//Create dummy table
RawData = New Table( "RawData",
Add Rows( 20 ),
New Column( "serial_number", Character, set values( {"A","A","A","A","A","A","A","B","B","B","B","B","B","C","C","C","C","C","C","C"})),
New Column( "rel_event", Character, set values( {
"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5","DROP6",
"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5",
"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5", "DROP6"})),
New Column("metric1", Continuous, set values({1,2,3,5,7,10,13,2,4,5,6,8,11,1,2,4,5,5,9,12}) ),
New Column("metric2", Continuous, set values({2,3,4,7,9,14,18,2,3,4,7,9,14,2,5,7,8,9,11,13}) );
);
RawData << save( "$Desktop\RawData.jmp" );
Column(RawData, "rel_event") << Set Property( "Value ordering", {"REL_T0","DROP1","DROP2","DROP3","DROP4","DROP5","DROP6"} );
//Create handle for data table to be analyzed
RawData = Current Data Table();
ChosenDataCol = {"metric1", "metric2"};
NumDataCol = N Items( ChosenDataCol );
NumRow = N Row( RawData );
RawDataCol = RawData << get column names;
Show( NumDataCol );
Show( ChosenDataCol );
Show( RawData );
Show( NumRow );
Show( RawDataCol );
//Scan each selected data row for which Deltas must be computed
For( cpt = 1, cpt <= NumDataCol, cpt++,
//Create column to receive delta data
DeltaCol = RawData << New Column( "∆_" || Char( ChosenDataCol[cpt] ), Numeric, "Continuous" );
TargetCol = Column( RawData, ChosenDataCol[cpt] );
serial_numberlist = {}; //List of unique serial_number's within the data table of interest
Show( cpt );
Show( ChosenDataCol[cpt] );
For( i = 1, i <= NumRow, i++,
Currentserial_number = :serial_number[i];
If( Contains( serial_numberlist, Currentserial_number ) == 0,
Insert Into( serial_numberlist, Currentserial_number )
);
);//For i
Show( serial_numberList );
Numserial_number = N Items( serial_numberlist );
Show( Numserial_number );
//Scan data table for each unique serial_number
For( i = 1, i <= Numserial_number, i++,
Currentserial_number = serial_numberList[i];
Currentserial_numberrows = RawData << GetRowsWhere( :serial_number == Currentserial_number ); //matrix of rows containing current serial_number
Show( Currentserial_numberrows );
NumCurrentserial_numberrows = N Row( Currentserial_numberrows );
Show( NumCurrentserial_numberrows );
show(i);
TzeroExist = 0;
TzeroVal = 0;
//identify Time Zero value for each serial_number under consideration
For( j = 1, j <= NumCurrentserial_numberrows, j++,
CurrentRead = :rel_event[Currentserial_numberrows[j]];
Show( CurrentRead );
If( CurrentRead == "REL_T0",
TzeroVal = TargetCol[Currentserial_numberrows[j]];
Show( TzeroVal );
TzeroExist = 1;
);
); //For j
//Calculate Drift
For( j = 1, j <= NumCurrentserial_numberrows, j++,
If( TzeroExist == 1,
DeltaCol[Currentserial_numberrows[j]] = TargetCol[Currentserial_numberrows[j]] - TzeroVal
)
); //For j
);//For i
);//For cptYes, it is possible with small changes directly to that script. I added debug column which you can remove to show that it matches correct rows
Names Default To Here(1);
dt = New Table("RawData",
Add Rows(27),
Compress File When Saved(1),
New Column("serial_number",
Character,
"Nominal",
Set Values(
{"A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", "D", "D"}
)
),
New Column("rel_event",
Character,
"Nominal",
Set Values(
{"REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "REL_T0", "DROP1",
"DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "DROP1", "DROP2", "DROP3", "REL_T0", "DROP4", "DROP5", "DROP6"}
)
),
New Column("metric1",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([1, 2, 3, 5, 7, 10, 13, 2, 4, 5, 6, 8, 11, 1, 2, 4, 5, 5, 9, 12, 1, 2, 4, 5, 5, 9, 12])
),
New Column("metric2",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([2, 3, 4, 7, 9, 14, 18, 2, 3, 4, 7, 9, 14, 2, 5, 7, 8, 9, 11, 13, 2, 5, 7, 8, 9, 11, 13])
)
);
debugcol = dt << New Column("REL_T0_ROW", Numeric, Continuous);
delta1col = dt << New Column("delta1", Numeric, Continuous);
delta2col = dt << New Column("delta2", Numeric, Continuous);
For Each Row(dt,
// find REL_T0 row for current serial_number
start_row = Col Min(If(:rel_event == "REL_T0", Row(), .), :serial_number);
debugcol[Row()] = start_row;
delta1col[Row()] = :metric1 - :metric1[start_row];
delta2col[Row()] = :metric2 - :metric2[start_row];
);
One thing which will provide (generally) a speed up is to remove all prints from loops but in this case rewriting it without those loops is faster and not to use for loop when going over data table (for each row is better option).
Here is one option which hopefully yields faster results (this can also be turned into a formula and it should still be fairly fast if needed). It assumes that data is ordered in such a manner, that REL_T0 is always the first row for each serialnumber, if that is not the case it will require a small change
Names Default To Here(1);
//Create dummy table
RawData = New Table("RawData",
Add Rows(20),
New Column("serial_number",
Character,
set values({"A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C"})
),
New Column("rel_event",
Character,
set values(
{"REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "REL_T0", "DROP1",
"DROP2", "DROP3", "DROP4", "DROP5", "DROP6"}
)
),
New Column("metric1", Continuous, set values({1, 2, 3, 5, 7, 10, 13, 2, 4, 5, 6, 8, 11, 1, 2, 4, 5, 5, 9, 12})),
New Column("metric2", Continuous, set values({2, 3, 4, 7, 9, 14, 18, 2, 3, 4, 7, 9, 14, 2, 5, 7, 8, 9, 11, 13}))
);
delta1col = RawData << New Column("delta1", Numeric, Continuous);
delta2col = RawData << New Column("delta2", Numeric, Continuous);
For Each Row(RawData,
// find REL_T0 row for current serial_number
start_row = Col Min(Row(), :serial_number);
delta1col[Row()] = :metric1 - :metric1[start_row];
delta2col[Row()] = :metric2 - :metric2[start_row];
);
Thanks Jarmo for your reply.
I like the idea of using For Each Row(), it simplifies the whole script.
But to add more context and make the script more robust, REL_T0 might not always be the first row per serial number (the database doesn't always query the data in the right order in the JMP table), and also the drift might be positive OR negative, so I don't know if Col Min() is well suited here (maybe I don't read it right).
Is it possible to anchor start_row to the REL_T0 value per serial number directly?
Yes, it is possible with small changes directly to that script. I added debug column which you can remove to show that it matches correct rows
Names Default To Here(1);
dt = New Table("RawData",
Add Rows(27),
Compress File When Saved(1),
New Column("serial_number",
Character,
"Nominal",
Set Values(
{"A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D", "D", "D"}
)
),
New Column("rel_event",
Character,
"Nominal",
Set Values(
{"REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "REL_T0", "DROP1", "DROP2", "DROP3", "DROP4", "DROP5", "REL_T0", "DROP1",
"DROP2", "DROP3", "DROP4", "DROP5", "DROP6", "DROP1", "DROP2", "DROP3", "REL_T0", "DROP4", "DROP5", "DROP6"}
)
),
New Column("metric1",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([1, 2, 3, 5, 7, 10, 13, 2, 4, 5, 6, 8, 11, 1, 2, 4, 5, 5, 9, 12, 1, 2, 4, 5, 5, 9, 12])
),
New Column("metric2",
Numeric,
"Continuous",
Format("Best", 10),
Set Values([2, 3, 4, 7, 9, 14, 18, 2, 3, 4, 7, 9, 14, 2, 5, 7, 8, 9, 11, 13, 2, 5, 7, 8, 9, 11, 13])
)
);
debugcol = dt << New Column("REL_T0_ROW", Numeric, Continuous);
delta1col = dt << New Column("delta1", Numeric, Continuous);
delta2col = dt << New Column("delta2", Numeric, Continuous);
For Each Row(dt,
// find REL_T0 row for current serial_number
start_row = Col Min(If(:rel_event == "REL_T0", Row(), .), :serial_number);
debugcol[Row()] = start_row;
delta1col[Row()] = :metric1 - :metric1[start_row];
delta2col[Row()] = :metric2 - :metric2[start_row];
);
Thanks a lot Jarmo!
I tried this new version to a data table with 18000 rows and 2 metrics:
- Old method : ~5min
- your method : <2min
It's already a terrific improvement!!!
Thanks again!!
Voiz
Further speed improvements can be most likely done quite easily using matrices to perform those calculations. Also adding
dt << begin data update;
before the loop and
dt << end data update;
most likely will also provide some speed improvements, especially if your table isn't opened as invisible/private as it will "delay" then the visuals in table are updated.
The normalization of measurement data is a common task in Data Analysis.
The Normalization GUI provides many options to do so.
In the GUI the user can specify many different settings - and the AddIn generates the corresponding code.
The generated formulas are based on Col ... aggregations, like Col Min, Col Max, Col Median ...
other options:
For this application case , you could use "first" (based on the ColMin(row()) logic)
... or specify the reference via "use a subset for normalization" [for cases where the reference values are not the "first" ones]:
Hello Hogi,
Thanks for your suggestion, I checked your post related to the Normalization add-in. I will definitely keep it in mind for future projects.
Voiz