- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Optimization of a (slow) JSL code based on FOR() loops
//Create dummy table
RawData = New Table( "RawData",
Add Rows( 20 ),
New Column( "serial_number", Character, set values( {"A","A","A","A","A","A","A","B","B","B","B","B","B","C","C","C","C","C","C","C"})),
New Column( "rel_event", Character, set values( {
"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5","DROP6",
"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5",
"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5", "DROP6"})),
New Column("metric1", Continuous, set values({1,2,3,5,7,10,13,2,4,5,6,8,11,1,2,4,5,5,9,12}) ),
New Column("metric2", Continuous, set values({2,3,4,7,9,14,18,2,3,4,7,9,14,2,5,7,8,9,11,13}) );
);
RawData << save( "$Desktop\RawData.jmp" );
Column(RawData, "rel_event") << Set Property( "Value ordering", {"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5","DROP6"} );
//Create handle for data table to be analyzed
RawData = Current Data Table();
ChosenDataCol = {"metric1", "metric2"};
NumDataCol = N Items( ChosenDataCol );
NumRow = N Row( RawData );
RawDataCol = RawData << get column names;
Show( NumDataCol );
Show( ChosenDataCol );
Show( RawData );
Show( NumRow );
Show( RawDataCol );
//Scan each selected data row for which Deltas must be computed
For( cpt = 1, cpt <= NumDataCol, cpt++,
//Create column to receive delta data
DeltaCol = RawData << New Column( "∆_" || Char( ChosenDataCol[cpt] ), Numeric, "Continuous" );
TargetCol = Column( RawData, ChosenDataCol[cpt] );
serial_numberlist = {}; //List of unique serial_number's within the data table of interest
Show( cpt );
Show( ChosenDataCol[cpt] );
For( i = 1, i <= NumRow, i++,
Currentserial_number = :serial_number[i];
If( Contains( serial_numberlist, Currentserial_number ) == 0,
Insert Into( serial_numberlist, Currentserial_number )
);
);//For i
Show( serial_numberList );
Numserial_number = N Items( serial_numberlist );
Show( Numserial_number );
//Scan data table for each unique serial_number
For( i = 1, i <= Numserial_number, i++,
Currentserial_number = serial_numberList[i];
Currentserial_numberrows = RawData << GetRowsWhere( :serial_number == Currentserial_number ); //matrix of rows containing current serial_number
Show( Currentserial_numberrows );
NumCurrentserial_numberrows = N Row( Currentserial_numberrows );
Show( NumCurrentserial_numberrows );
show(i);
TzeroExist = 0;
TzeroVal = 0;
//identify Time Zero value for each serial_number under consideration
For( j = 1, j <= NumCurrentserial_numberrows, j++,
CurrentRead = :rel_event[Currentserial_numberrows[j]];
Show( CurrentRead );
If( CurrentRead == "REL_T0",
TzeroVal = TargetCol[Currentserial_numberrows[j]];
Show( TzeroVal );
TzeroExist = 1;
);
); //For j
//Calculate Drift
For( j = 1, j <= NumCurrentserial_numberrows, j++,
If( TzeroExist == 1,
DeltaCol[Currentserial_numberrows[j]] = TargetCol[Currentserial_numberrows[j]] - TzeroVal
)
); //For j
);//For i
);//For cpt
Accepted Solutions
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
Yes, it is possible with small changes directly to that script. I added debug column which you can remove to show that it matches correct rows
Names Default To Here(1);
dt = New Table("RawData",
Add Rows(27),
Compress File When Saved(1),
New Column("serial_number",
Character,
"Nominal",
Set Values(
{"A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", "D", "D"}
)
),
New Column("rel_event",
Character,
"Nominal",
Set Values(
{"REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "REL_T0", "DROP1",
"DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "DROP1", "DROP2", "DROP3", "REL_T0", "DROP4", "DROP5", "DROP6"}
)
),
New Column("metric1",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([1, 2, 3, 5, 7, 10, 13, 2, 4, 5, 6, 8, 11, 1, 2, 4, 5, 5, 9, 12, 1, 2, 4, 5, 5, 9, 12])
),
New Column("metric2",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([2, 3, 4, 7, 9, 14, 18, 2, 3, 4, 7, 9, 14, 2, 5, 7, 8, 9, 11, 13, 2, 5, 7, 8, 9, 11, 13])
)
);
debugcol = dt << New Column("REL_T0_ROW", Numeric, Continuous);
delta1col = dt << New Column("delta1", Numeric, Continuous);
delta2col = dt << New Column("delta2", Numeric, Continuous);
For Each Row(dt,
// find REL_T0 row for current serial_number
start_row = Col Min(If(:rel_event == "REL_T0", Row(), .), :serial_number);
debugcol[Row()] = start_row;
delta1col[Row()] = :metric1 - :metric1[start_row];
delta2col[Row()] = :metric2 - :metric2[start_row];
);
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
One thing which will provide (generally) a speed up is to remove all prints from loops but in this case rewriting it without those loops is faster and not to use for loop when going over data table (for each row is better option).
Here is one option which hopefully yields faster results (this can also be turned into a formula and it should still be fairly fast if needed). It assumes that data is ordered in such a manner, that REL_T0 is always the first row for each serialnumber, if that is not the case it will require a small change
Names Default To Here(1);
//Create dummy table
RawData = New Table("RawData",
Add Rows(20),
New Column("serial_number",
Character,
set values({"A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C"})
),
New Column("rel_event",
Character,
set values(
{"REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "REL_T0", "DROP1",
"DROP2", "DROP3", "DROP4", "DROP5", "DROP6"}
)
),
New Column("metric1", Continuous, set values({1, 2, 3, 5, 7, 10, 13, 2, 4, 5, 6, 8, 11, 1, 2, 4, 5, 5, 9, 12})),
New Column("metric2", Continuous, set values({2, 3, 4, 7, 9, 14, 18, 2, 3, 4, 7, 9, 14, 2, 5, 7, 8, 9, 11, 13}))
);
delta1col = RawData << New Column("delta1", Numeric, Continuous);
delta2col = RawData << New Column("delta2", Numeric, Continuous);
For Each Row(RawData,
// find REL_T0 row for current serial_number
start_row = Col Min(Row(), :serial_number);
delta1col[Row()] = :metric1 - :metric1[start_row];
delta2col[Row()] = :metric2 - :metric2[start_row];
);
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
Thanks Jarmo for your reply.
I like the idea of using For Each Row(), it simplifies the whole script.
But to add more context and make the script more robust, REL_T0 might not always be the first row per serial number (the database doesn't always query the data in the right order in the JMP table), and also the drift might be positive OR negative, so I don't know if Col Min() is well suited here (maybe I don't read it right).
Is it possible to anchor start_row to the REL_T0 value per serial number directly?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
Yes, it is possible with small changes directly to that script. I added debug column which you can remove to show that it matches correct rows
Names Default To Here(1);
dt = New Table("RawData",
Add Rows(27),
Compress File When Saved(1),
New Column("serial_number",
Character,
"Nominal",
Set Values(
{"A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", "D", "D"}
)
),
New Column("rel_event",
Character,
"Nominal",
Set Values(
{"REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "REL_T0", "DROP1",
"DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "DROP1", "DROP2", "DROP3", "REL_T0", "DROP4", "DROP5", "DROP6"}
)
),
New Column("metric1",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([1, 2, 3, 5, 7, 10, 13, 2, 4, 5, 6, 8, 11, 1, 2, 4, 5, 5, 9, 12, 1, 2, 4, 5, 5, 9, 12])
),
New Column("metric2",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([2, 3, 4, 7, 9, 14, 18, 2, 3, 4, 7, 9, 14, 2, 5, 7, 8, 9, 11, 13, 2, 5, 7, 8, 9, 11, 13])
)
);
debugcol = dt << New Column("REL_T0_ROW", Numeric, Continuous);
delta1col = dt << New Column("delta1", Numeric, Continuous);
delta2col = dt << New Column("delta2", Numeric, Continuous);
For Each Row(dt,
// find REL_T0 row for current serial_number
start_row = Col Min(If(:rel_event == "REL_T0", Row(), .), :serial_number);
debugcol[Row()] = start_row;
delta1col[Row()] = :metric1 - :metric1[start_row];
delta2col[Row()] = :metric2 - :metric2[start_row];
);
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
Thanks a lot Jarmo!
I tried this new version to a data table with 18000 rows and 2 metrics:
- Old method : ~5min
- your method : <2min
It's already a terrific improvement!!!
Thanks again!!
Voiz
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
Further speed improvements can be most likely done quite easily using matrices to perform those calculations. Also adding
dt << begin data update;
before the loop and
dt << end data update;
most likely will also provide some speed improvements, especially if your table isn't opened as invisible/private as it will "delay" then the visuals in table are updated.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
The normalization of measurement data is a common task in Data Analysis.
The Normalization GUI provides many options to do so.
In the GUI the user can specify many different settings - and the AddIn generates the corresponding code.
The generated formulas are based on Col ... aggregations, like Col Min, Col Max, Col Median ...
other options:
For this application case , you could use "first" (based on the ColMin(row()) logic)
... or specify the reference via "use a subset for normalization" [for cases where the reference values are not the "first" ones]:
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Get Direct Link
- Report Inappropriate Content
Re: Optimization of a (slow) JSL code based on FOR() loops
Hello Hogi,
Thanks for your suggestion, I checked your post related to the Normalization add-in. I will definitely keep it in mind for future projects.
Voiz