cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Try the Materials Informatics Toolkit, which is designed to easily handle SMILES data. This and other helpful add-ins are available in the JMP® Marketplace
Choose Language Hide Translation Bar
Jackie_
Level VI

Optimize the loop

Hi, 

I have some JSL code that takes a long time to execute for larger datasets.

The code runs a for loop within a for loop to calculate the mean offset by strips. I’m looking for a faster way of doing this.

Is there a way to make the processing faster? The current script takes ~20 mins to complete the loop

 

Here's the script below:

Names Default To Here( 1 );

dt1 =Data table( "Reference data table.jmp" );
dt2 = Current data table(); /// Select Data table.jmp
Col_List1 = dt2 << Get Column Names( "String" );

For( c = 1, c <= N Col( dt2 ) -1, c++,
	theMatrix = [];
	name = Column( Col_List1[c] ) << Get name;
	if (Contains( Col_List1[c], "Currents" )| Contains( Col_List1[c], "Voltage" )| Contains( Col_List1[c], "Resistance" ),
	For( r = 1, r <= N Rows( dt2 ), r++,
		mean = dt1:Mean[dt1 << Get Rows Where( :Tests == name )];
		measure = mean - Col Mean( If( dt2:Strips == dt2:Strips[r], Column( dt2, c ), . ) );
		theMatrix = theMatrix || Column( dt2, c )[r] + measure;
	);
	Column( dt2, c ) << Set Values( theMatrix );
););

 

2 ACCEPTED SOLUTIONS

Accepted Solutions
txnelson
Super User

Re: Optimize the loop

The 2 places I see issues, are the 

mean = dt1:Mean[dt1 << Get Rows Where( :Tests == name )];

and

measure = mean - Col Mean( If( dt2:Strips == dt2:Strips[r], Column( dt2, c ), . ) );

Here is a work around for the issues  Check them out and see if the answers are correct

Names Default To Here( 1 );
start=today();
dt1 =Data table( "Reference data table.jmp" );
dt2 = Current data table(); /// Select Data table.jmp

For(i=1,i<=nrows(dt1),i++,
	theTest = dt1:Tests[i];
	try(column(dt2,theTest) << set property("TheMean", dt1:mean[i]));
);

Col_List1 = dt2 << Get Column Names( "String" );

For( c = 1, c <= N Col( dt2 ) -1, c++,
	theMatrix = [];
	name = Column( Col_List1[c] ) << Get name;
	if (Contains( Col_List1[c], "Currents" )| Contains( Col_List1[c], "Voltage" )| Contains( Col_List1[c], "Resistance" ),
	//For( r = 1, r <= N Rows( dt2 ), r++,
	For Each Row( r=row();
		//mean = dt1:Mean[dt1 << Get Rows Where( :Tests == name )];
		mean = column(dt2,col_list1[c]) << get property("theMean");
		//measure = mean - Col Mean( If( dt2:Strips == dt2:Strips[r], Column( dt2, c ), . ) );
		measure = mean - Col Mean(as Column( dt2, c ),:Strips);
		theMatrix = theMatrix || Column( dt2, c )[r] + measure;
	);
	Column( dt2, c ) << Set Values( theMatrix );
););
show((start-today())/60);

 

Jim

View solution in original post

txnelson
Super User

Re: Optimize the loop

I modified your code to bypass the processing if the Column Property "theMean" had not been set

Names Default To Here( 1 );
start = Today();
dt1 = Data Table( "Reference data table.jmp" );
dt2 = Current Data Table(); /// Select Data table.jmp

For( i = 1, i <= N Rows( dt1 ), i++,
	theTest = dt1:Tests[i];
	Try( Column( dt2, theTest ) << set property( "TheMean", dt1:mean[i] ) );
);

Col_List1 = dt2 << Get Column Names( "String" );

For( c = 1, c <= N Col( dt2 ) - 1, c++,
	theMatrix = [];
	name = Column( Col_List1[c] ) << Get name;
	If( Contains( Col_List1[c], "Currents" ) | Contains( Col_List1[c], "Voltage" ) | Contains( Col_List1[c], "Resistance" ), 
	//For( r = 1, r <= N Rows( dt2 ), r++,
		mean = .;
		Try( mean = Column( dt2, col_list1[c] ) << get property( "theMean" ) );
		// Don't process if a value for theMean does not exist
		If( Is Missing( mean ) == 0,
			For Each Row(
				r = Row();
		//mean = dt1:Mean[dt1 << Get Rows Where( :Tests == name )];
		
				//measure = mean - Col Mean( If( dt2:Strips == dt2:Strips[r], Column( dt2, c ), . ) );
				measure = mean - Col Mean( As Column( dt2, c ), :Strips );
				theMatrix = theMatrix || Column( dt2, c )[r] + measure;
			);
			Column( dt2, c ) << Set Values( theMatrix );
		);
	);
);
Show( (start - Today()) / 60 );
Jim

View solution in original post

6 REPLIES 6
txnelson
Super User

Re: Optimize the loop

The 2 places I see issues, are the 

mean = dt1:Mean[dt1 << Get Rows Where( :Tests == name )];

and

measure = mean - Col Mean( If( dt2:Strips == dt2:Strips[r], Column( dt2, c ), . ) );

Here is a work around for the issues  Check them out and see if the answers are correct

Names Default To Here( 1 );
start=today();
dt1 =Data table( "Reference data table.jmp" );
dt2 = Current data table(); /// Select Data table.jmp

For(i=1,i<=nrows(dt1),i++,
	theTest = dt1:Tests[i];
	try(column(dt2,theTest) << set property("TheMean", dt1:mean[i]));
);

Col_List1 = dt2 << Get Column Names( "String" );

For( c = 1, c <= N Col( dt2 ) -1, c++,
	theMatrix = [];
	name = Column( Col_List1[c] ) << Get name;
	if (Contains( Col_List1[c], "Currents" )| Contains( Col_List1[c], "Voltage" )| Contains( Col_List1[c], "Resistance" ),
	//For( r = 1, r <= N Rows( dt2 ), r++,
	For Each Row( r=row();
		//mean = dt1:Mean[dt1 << Get Rows Where( :Tests == name )];
		mean = column(dt2,col_list1[c]) << get property("theMean");
		//measure = mean - Col Mean( If( dt2:Strips == dt2:Strips[r], Column( dt2, c ), . ) );
		measure = mean - Col Mean(as Column( dt2, c ),:Strips);
		theMatrix = theMatrix || Column( dt2, c )[r] + measure;
	);
	Column( dt2, c ) << Set Values( theMatrix );
););
show((start-today())/60);

 

Jim
Jackie_
Level VI

Re: Optimize the loop

Thank Jim. It works like charm

Jackie_
Level VI

Re: Optimize the loop

Hi @txnelson,

There's some issue in the logic that you suggested. If the reference data table doesn't contain the test that are in the main data table, it will delete the row values from those tests. I only to offset the test that are in the reference data table. Any help would be much appreciated 

 

Jacksmith12_2-1658873805995.png

I have attached the data tables below. I only want to offset the tests that are specified in the reference table

 

txnelson
Super User

Re: Optimize the loop

I modified your code to bypass the processing if the Column Property "theMean" had not been set

Names Default To Here( 1 );
start = Today();
dt1 = Data Table( "Reference data table.jmp" );
dt2 = Current Data Table(); /// Select Data table.jmp

For( i = 1, i <= N Rows( dt1 ), i++,
	theTest = dt1:Tests[i];
	Try( Column( dt2, theTest ) << set property( "TheMean", dt1:mean[i] ) );
);

Col_List1 = dt2 << Get Column Names( "String" );

For( c = 1, c <= N Col( dt2 ) - 1, c++,
	theMatrix = [];
	name = Column( Col_List1[c] ) << Get name;
	If( Contains( Col_List1[c], "Currents" ) | Contains( Col_List1[c], "Voltage" ) | Contains( Col_List1[c], "Resistance" ), 
	//For( r = 1, r <= N Rows( dt2 ), r++,
		mean = .;
		Try( mean = Column( dt2, col_list1[c] ) << get property( "theMean" ) );
		// Don't process if a value for theMean does not exist
		If( Is Missing( mean ) == 0,
			For Each Row(
				r = Row();
		//mean = dt1:Mean[dt1 << Get Rows Where( :Tests == name )];
		
				//measure = mean - Col Mean( If( dt2:Strips == dt2:Strips[r], Column( dt2, c ), . ) );
				measure = mean - Col Mean( As Column( dt2, c ), :Strips );
				theMatrix = theMatrix || Column( dt2, c )[r] + measure;
			);
			Column( dt2, c ) << Set Values( theMatrix );
		);
	);
);
Show( (start - Today()) / 60 );
Jim
Jackie_
Level VI

Re: Optimize the loop

Thanks a lot Jim.

ErraticAttack
Level VI

Re: Optimize the loop

@Jackie_ , I'm not sure what the conditional dt2:Strips == dt2Strips[r] is supposed to resolve to, but it is probably not doing what you want it to do.

 

Without any test data to test on, here is how I would start optimizing this -- the main one is to take the dt1 << Get Rows Where( :Tests == name ) outside of the loop.

 

Names Default to Here( 1 );

dt1 = Data Table( "Reference Data Table.jmp" );
dt2 = Current Data Table();

col list1 = dt2 << Get Column Names( "String" );

Summation( c = 1, N Col( dt2 ) - 1,  //summation is faster that For() if you don't need to use Break() or Continue()
	values = [];
	name = Column( dt2, c ) << Get Name;
	If( Contains( name, "Currents" ) | Contains( name, "Voltage" ) | Contains( name, "Resistance" ),
		rows = dt1 << Get Rows Where( :tests == name );		// As Constant() prevents JMP from doing a lookup per row
		col = Column( dt2, c );
		col mean = Col Mean( col );
		For Each Row( dt2,
			mean = dt1:Mean[rows];
			measure = mean - If( dt2:Strips == dt2:Strips /* always true */, col mean, 0 );
			values |/= col[] + measure 	// append value to
		);
		col << Set Values( values )
	);
	0	// summation needs to end in a numeric
)
Jordan