cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Browse apps to extend the software in the new JMP Marketplace
Choose Language Hide Translation Bar
Jackie_
Level VI

Is there a faster way to loop?

Hello JMP community,

 

I am analyzing larger amounts of data, typically 350k rows. 

For each row, the code runs a loop to calculate the mean offset by "Wafer ID". I’m looking for a faster way of doing this.

Is there a way to make the processing faster? The current script takes ~1 hour to complete the loop.

 

Names Default To Here( 1 );

dt1 = Data Table( "Reference Data Table.jmp" );
dt2 = Data Table("Data table");
					
									
Col_List1 = dt2 << Get Column Names( "String" );
					
Summation(
	c = 1,
	N Col( dt2 ) - 1,  
	values = [];
	name = Column( dt2, c ) << Get Name;
	If( Contains( name, "Currents" ) | Contains( name, "Voltage" ) | Contains( name, "Resistance" ),
		rows = dt1 << Get Rows Where( :tests == name );		
		col = Column( dt2, c );
		col mean = Col Mean( col );
		For Each Row(
			dt2,
			mean = dt1:Mean[rows];
			measure = mean - Col Mean( As Column( dt2, c ), :Wafer ID );
			values |/= col[] + measure
			;
		);
		col << Set Values( values );
	);
	0	
	
);
1 ACCEPTED SOLUTION

Accepted Solutions
jthi
Super User

Re: Is there a faster way to loop?

Hopefully this is calculating correctly. It isn't the most simple script but it is somewhat optimized for speed (on my laptop calculation for one column took 200ms, so in total approximately 6s). 

Names Default To Here(1);

dt_ref = Open("$DOWNLOADS/Reference data table.jmp", invisible);
dt_dt = Open("$DOWNLOADS/Data table.jmp", invisible);
dt_dt << clear select;
dt_ref << clear select;

aa_means = Associative Array(Column(dt_ref, "Tests"), Column(dt_ref, "Mean"));

col_list = Filter Each({col_name}, dt_dt << Get Column Names("String"),
	Contains({"Currents", "Voltage", "Resistance"}, Word(1, col_name));
);

dt_dt << Add Row(1, At Start); 
dt_dt << Begin Data Update;
For Each({col_name, idx}, col_list,
	start = Hp Time();
	Eval(Eval Expr(
		Column(dt_dt, col_name) << Set Each Value(
			//AsColumn(col_name) + aa_means[col_name] - Col Mean(AsColumn(col_name), :Wafer ID);
			//Col Mean(Expr(Name Expr(AsColumn(col_name))), :Wafer ID);
			//Col Mean(:Currents A17, :Wafer ID);
			Expr(Name Expr(AsColumn(col_name)))+ Expr(aa_means[col_name]) - Col Mean(Expr(Name Expr(AsColumn(col_name))), :Wafer ID);
		);
	));
	show((Hp Time() - start)/1e6);
);
dt_dt << Delete Row(1);
dt_dt << End Data Update;

//Summarize(dt_dt, b = by(:WAFER ID), m = Mean(:Currents A17));
//show(b,m, Col Mean(:Currents A17));

I had to add 1 row to beginning and then remove it later to avoid some weird calculation issue when using << Set Each Value (might be just my expressions) and that's why it has some weird commented lines in the code. I'll contact JMP support about it when I have time

 

Edit: Ticket created TS-00036082. Depending on the response I get, I will make separate post regarding Col Mean

-Jarmo

View solution in original post

8 REPLIES 8
jthi
Super User

Re: Is there a faster way to loop?

Is there a specific need to have all those 350k rows if in the end you end up with 4 values each column (if I understood the code correctly), one mean per wafer per column? You could possibly use Summary to make much smaller table.

-Jarmo
Jackie_
Level VI

Re: Is there a faster way to loop?

Yes, Correct one mean per wafer. Yes I would like to process all at once because I'm developing a script which severs as a multi function tool for my team. Any suggestion would be appreciated jarmo

jthi
Super User

Re: Is there a faster way to loop?

Using Summary (or Summarize if you don't need data table) should be fairly fast (you can join/update Summary table back to large data if needed)

Names Default To Here(1);

dt_ref = Open("$DOWNLOADS/Reference data table.jmp", invisible);
dt_dt = Open("$DOWNLOADS/Data table.jmp", invisible);
dt_dt << clear select;
dt_ref << clear select;

aa_means = Associative Array(Column(dt_ref, "Tests"), Column(dt_ref, "Mean"));

col_list = Filter Each({col_name}, dt_dt << Get Column Names("String"),
	Contains({"Currents", "Voltage", "Resistance"}, Word(1, col_name));
);

dt_summary = dt_dt << Summary(
	Group(:Wafer ID),
	Mean(Eval(col_list)),
	Freq("None"),
	Weight("None"),
	statistics column name format("column"), // this is important here
	Link to original data table(0)
);

For Each({col_name}, col_list,
	Eval(EvalExpr(
		Column(dt_summary, col_name) << Set Each Value(
			aa_means[col_name] - AsColumn(col_name)
		);
	));
);

Remember to check that the values are correct

-Jarmo
Jackie_
Level VI

Re: Is there a faster way to loop?

....

jthi
Super User

Re: Is there a faster way to loop?

Ah.. missed the col[] in your script. I'll write new version soon

-Jarmo
jthi
Super User

Re: Is there a faster way to loop?

Hopefully this is calculating correctly. It isn't the most simple script but it is somewhat optimized for speed (on my laptop calculation for one column took 200ms, so in total approximately 6s). 

Names Default To Here(1);

dt_ref = Open("$DOWNLOADS/Reference data table.jmp", invisible);
dt_dt = Open("$DOWNLOADS/Data table.jmp", invisible);
dt_dt << clear select;
dt_ref << clear select;

aa_means = Associative Array(Column(dt_ref, "Tests"), Column(dt_ref, "Mean"));

col_list = Filter Each({col_name}, dt_dt << Get Column Names("String"),
	Contains({"Currents", "Voltage", "Resistance"}, Word(1, col_name));
);

dt_dt << Add Row(1, At Start); 
dt_dt << Begin Data Update;
For Each({col_name, idx}, col_list,
	start = Hp Time();
	Eval(Eval Expr(
		Column(dt_dt, col_name) << Set Each Value(
			//AsColumn(col_name) + aa_means[col_name] - Col Mean(AsColumn(col_name), :Wafer ID);
			//Col Mean(Expr(Name Expr(AsColumn(col_name))), :Wafer ID);
			//Col Mean(:Currents A17, :Wafer ID);
			Expr(Name Expr(AsColumn(col_name)))+ Expr(aa_means[col_name]) - Col Mean(Expr(Name Expr(AsColumn(col_name))), :Wafer ID);
		);
	));
	show((Hp Time() - start)/1e6);
);
dt_dt << Delete Row(1);
dt_dt << End Data Update;

//Summarize(dt_dt, b = by(:WAFER ID), m = Mean(:Currents A17));
//show(b,m, Col Mean(:Currents A17));

I had to add 1 row to beginning and then remove it later to avoid some weird calculation issue when using << Set Each Value (might be just my expressions) and that's why it has some weird commented lines in the code. I'll contact JMP support about it when I have time

 

Edit: Ticket created TS-00036082. Depending on the response I get, I will make separate post regarding Col Mean

-Jarmo
Jackie_
Level VI

Re: Is there a faster way to loop?

 Thanks Jarmo!

xxvvcczz
Level III

Re: Is there a faster way to loop?

Here's my attempt, the two tables at the bottom "stacked_table" and "summary_table_delta_by_wafer" might be the right shape for this.

 

It's fast at less than 1 second for the whole thing


(HP Time() - start) / 1000000 = 0.664238;

Names Default To Here(1);
dt_ref = Open("$DOWNLOADS/Reference data table.jmp", invisible);
dt_dt = Open("$DOWNLOADS/Data table.jmp", invisible);
dt_dt << clear select;
dt_ref << clear select;

start = Hp Time();

curr_col_names_list = dt_dt << Get Column Names("string");


obj = dt_dt << Tabulate(
		Show Control Panel( 0 ),
		Add Table(
			Column Table(
				Analysis Columns(
					eval list(curr_col_names_list)
				),
				Statistics( Mean )
			),
			Row Table( Grouping Columns( :Wafer ID ) )
		)
	);
	my_new_table = obj << Make Into Data Table;


curr_col_names_list = my_new_table << Get Column Names("string");

transposed_table = my_new_table << Transpose(
	columns(
	eval list(curr_col_names_list)
	),
	Label( :Wafer ID ),
	Output Table( "transposed_table" )
);

transposed_table << new column ("join_key"
	,char
	,formula(
		regex(:Label,"\(([^\)]+)\)", "\1")		
	)	
);


transposed_table_join = (
	transposed_table << Join(
	With( dt_ref )
	,Match Columns( :join_key = :Tests),
	,Merge same name columns
	,output table("transposed_table_join")
	)
	);
	
// clean up
close(transposed_table,nosave);
close(my_new_table,nosave);
transposed_table_join << delete columns(:"Match Flag", :"Label","join_key");
transposed_table_join:"Mean" << Set Name( "Ref_Value" );

	
curr_col_names_list = transposed_table_join << Get Column Names("string");
matches_i_dont_want_to_stack = {"Tests","Ref_Value"};
clean_col_list = Associative Array(curr_col_names_list) << Remove(Associative Array(matches_i_dont_want_to_stack)) << get keys;


stacked_table = transposed_table_join << Stack(
	columns( clean_col_list ),
	Source Label Column( "Wafer_ID" ),
	Stacked Data Column( "Measured_Value" ),
	Output Table( "stacked_table" )
);


//Is this the formula you wanted on each value? A-B?
stacked_table << new column ("delta"
	,numeric
	,continuous
	,formula(
		(:Ref_Value -:Measured_Value)	
	)	
);
//stacked table and final summary_table_delta_by_wafer seems like the useful combo here.

close(transposed_table_join,nosave);

//final output?
summary_table_delta_by_wafer = stacked_table << Transpose(
	columns( :delta ),
	By( :Wafer_ID ),
	Label( :Tests ),
	Output Table( "summary_table_delta_by_wafer" )
);
show((Hp Time() - start)/1e6);