topic Re: Is there a faster way to loop? in Discussions

Is there a faster way to loop?

Jackie_ — Thu, 08 Jun 2023 16:28:30 GMT

Hello JMP community,

I am analyzing larger amounts of data, typically 350k rows.

For each row, the code runs a loop to calculate the mean offset by "Wafer ID". I’m looking for a faster way of doing this.

Is there a way to make the processing faster? The current script takes ~1 hour to complete the loop.

Names Default To Here( 1 );

dt1 = Data Table( "Reference Data Table.jmp" );
dt2 = Data Table("Data table");
					
									
Col_List1 = dt2 << Get Column Names( "String" );
					
Summation(
	c = 1,
	N Col( dt2 ) - 1,  
	values = [];
	name = Column( dt2, c ) << Get Name;
	If( Contains( name, "Currents" ) | Contains( name, "Voltage" ) | Contains( name, "Resistance" ),
		rows = dt1 << Get Rows Where( :tests == name );		
		col = Column( dt2, c );
		col mean = Col Mean( col );
		For Each Row(
			dt2,
			mean = dt1:Mean[rows];
			measure = mean - Col Mean( As Column( dt2, c ), :Wafer ID );
			values |/= col[] + measure
			;
		);
		col << Set Values( values );
	);
	0	
	
);

Re: Is there a faster way to loop?

jthi — Wed, 15 Mar 2023 14:29:46 GMT

Is there a specific need to have all those 350k rows if in the end you end up with 4 values each column (if I understood the code correctly), one mean per wafer per column? You could possibly use Summary to make much smaller table.

Re: Is there a faster way to loop?

Jackie_ — Wed, 15 Mar 2023 14:34:04 GMT

Yes, Correct one mean per wafer. Yes I would like to process all at once because I'm developing a script which severs as a multi function tool for my team. Any suggestion would be appreciated jarmo

Re: Is there a faster way to loop?

jthi — Wed, 15 Mar 2023 14:49:12 GMT

Using Summary (or Summarize if you don't need data table) should be fairly fast (you can join/update Summary table back to large data if needed)

Names Default To Here(1);

dt_ref = Open("$DOWNLOADS/Reference data table.jmp", invisible);
dt_dt = Open("$DOWNLOADS/Data table.jmp", invisible);
dt_dt << clear select;
dt_ref << clear select;

aa_means = Associative Array(Column(dt_ref, "Tests"), Column(dt_ref, "Mean"));

col_list = Filter Each({col_name}, dt_dt << Get Column Names("String"),
	Contains({"Currents", "Voltage", "Resistance"}, Word(1, col_name));
);

dt_summary = dt_dt << Summary(
	Group(:Wafer ID),
	Mean(Eval(col_list)),
	Freq("None"),
	Weight("None"),
	statistics column name format("column"), // this is important here
	Link to original data table(0)
);

For Each({col_name}, col_list,
	Eval(EvalExpr(
		Column(dt_summary, col_name) << Set Each Value(
			aa_means[col_name] - AsColumn(col_name)
		);
	));
);

Remember to check that the values are correct

Re: Is there a faster way to loop?

Jackie_ — Thu, 16 Mar 2023 15:47:46 GMT

....

Re: Is there a faster way to loop?

jthi — Wed, 15 Mar 2023 15:33:26 GMT

Ah.. missed the col[] in your script. I'll write new version soon

Re: Is there a faster way to loop?

jthi — Wed, 15 Mar 2023 16:57:08 GMT

Hopefully this is calculating correctly. It isn't the most simple script but it is somewhat optimized for speed (on my laptop calculation for one column took 200ms, so in total approximately 6s).

Names Default To Here(1);

dt_ref = Open("$DOWNLOADS/Reference data table.jmp", invisible);
dt_dt = Open("$DOWNLOADS/Data table.jmp", invisible);
dt_dt << clear select;
dt_ref << clear select;

aa_means = Associative Array(Column(dt_ref, "Tests"), Column(dt_ref, "Mean"));

col_list = Filter Each({col_name}, dt_dt << Get Column Names("String"),
	Contains({"Currents", "Voltage", "Resistance"}, Word(1, col_name));
);

dt_dt << Add Row(1, At Start); 
dt_dt << Begin Data Update;
For Each({col_name, idx}, col_list,
	start = Hp Time();
	Eval(Eval Expr(
		Column(dt_dt, col_name) << Set Each Value(
			//AsColumn(col_name) + aa_means[col_name] - Col Mean(AsColumn(col_name), :Wafer ID);
			//Col Mean(Expr(Name Expr(AsColumn(col_name))), :Wafer ID);
			//Col Mean(:Currents A17, :Wafer ID);
			Expr(Name Expr(AsColumn(col_name)))+ Expr(aa_means[col_name]) - Col Mean(Expr(Name Expr(AsColumn(col_name))), :Wafer ID);
		);
	));
	show((Hp Time() - start)/1e6);
);
dt_dt << Delete Row(1);
dt_dt << End Data Update;

//Summarize(dt_dt, b = by(:WAFER ID), m = Mean(:Currents A17));
//show(b,m, Col Mean(:Currents A17));

I had to add 1 row to beginning and then remove it later to avoid some weird calculation issue when using << Set Each Value (might be just my expressions) and that's why it has some weird commented lines in the code. I'll contact JMP support about it when I have time

Edit: Ticket created TS-00036082. Depending on the response I get, I will make separate post regarding Col Mean

Re: Is there a faster way to loop?

Jackie_ — Wed, 15 Mar 2023 16:52:30 GMT

Thanks Jarmo!<3<3

Re: Is there a faster way to loop?

xxvvcczz — Thu, 16 Mar 2023 08:26:05 GMT

Here's my attempt, the two tables at the bottom "stacked_table" and "summary_table_delta_by_wafer" might be the right shape for this.

It's fast at less than 1 second for the whole thing

(HP Time() - start) / 1000000 = 0.664238;

Names Default To Here(1);
dt_ref = Open("$DOWNLOADS/Reference data table.jmp", invisible);
dt_dt = Open("$DOWNLOADS/Data table.jmp", invisible);
dt_dt << clear select;
dt_ref << clear select;

start = Hp Time();

curr_col_names_list = dt_dt << Get Column Names("string");


obj = dt_dt << Tabulate(
		Show Control Panel( 0 ),
		Add Table(
			Column Table(
				Analysis Columns(
					eval list(curr_col_names_list)
				),
				Statistics( Mean )
			),
			Row Table( Grouping Columns( :Wafer ID ) )
		)
	);
	my_new_table = obj << Make Into Data Table;


curr_col_names_list = my_new_table << Get Column Names("string");

transposed_table = my_new_table << Transpose(
	columns(
	eval list(curr_col_names_list)
	),
	Label( :Wafer ID ),
	Output Table( "transposed_table" )
);

transposed_table << new column ("join_key"
	,char
	,formula(
		regex(:Label,"\(([^\)]+)\)", "\1")		
	)	
);


transposed_table_join = (
	transposed_table << Join(
	With( dt_ref )
	,Match Columns( :join_key = :Tests),
	,Merge same name columns
	,output table("transposed_table_join")
	)
	);
	
// clean up
close(transposed_table,nosave);
close(my_new_table,nosave);
transposed_table_join << delete columns(:"Match Flag", :"Label","join_key");
transposed_table_join:"Mean" << Set Name( "Ref_Value" );

	
curr_col_names_list = transposed_table_join << Get Column Names("string");
matches_i_dont_want_to_stack = {"Tests","Ref_Value"};
clean_col_list = Associative Array(curr_col_names_list) << Remove(Associative Array(matches_i_dont_want_to_stack)) << get keys;


stacked_table = transposed_table_join << Stack(
	columns( clean_col_list ),
	Source Label Column( "Wafer_ID" ),
	Stacked Data Column( "Measured_Value" ),
	Output Table( "stacked_table" )
);


//Is this the formula you wanted on each value? A-B?
stacked_table << new column ("delta"
	,numeric
	,continuous
	,formula(
		(:Ref_Value -:Measured_Value)	
	)	
);
//stacked table and final summary_table_delta_by_wafer seems like the useful combo here.

close(transposed_table_join,nosave);

//final output?
summary_table_delta_by_wafer = stacked_table << Transpose(
	columns( :delta ),
	By( :Wafer_ID ),
	Label( :Tests ),
	Output Table( "summary_table_delta_by_wafer" )
);
show((Hp Time() - start)/1e6);