Let's take the sample data set *) and do some benchmarking
- let's start with an example how you should NOT do it ...
*) actual use cases are ~ 500 x 500 dies and hundreds of wafers.
Names Default To Here( 1 );
dt0 = Open( "$SAMPLE_DATA/Wafer Stacked.jmp" );
wait(0);
t0 = hptime();
dt0 << New Column( "row", set each value( Row() ) );
max dist = 1; // just keep the direct neighbors
// create a lookup table
dtx = As Table( (-21 :: 21)`, <<Invisible );
dty = As Table( (-21 :: 21)`, <<Invisible );
dtLookup = dty << Join( With( dtx ), Cartesian Join, Output Table( "xyLocations" ) );
Close( dtx, NoSave );
Close( dty, NoSave );
Column( dtLookup, 1 ) << setName( "x" );
Column( dtLookup, 2 ) << setName( "y" );
// create the KDTable
mat = dtLookup << getAsMatrix;
tbl = KDTable( mat );
dtLookup << New Column( "row_lookup", set each value( Row() ) );
row()=50;
// add column with neighboring rows
Eval (Eval Expr(dtLookup << New Column( "neighbors",
Expression,
set each value(
{neighbors, dist} = tbl << K nearest rows( 8, Row() );
neighbors[Where( dist <= Expr(max dist) )];
)
)));
// map the neighbors to the main table
dt0 << Update(
With( dtLookup ),
Match Columns( :X_Die = :x, :Y_Die = :y ),
Add Columns from Update Table( :row_lookup, :neighbors ),
Replace Columns in Main Table( None )
);
wait(0);
t1=hptime();
// split the table to find the rows faster
// to speed things up, one could do the neighbours averaging here, but that's not the target
mySubsets = dt0 << Subset( By( :Lot, :Wafer ), All rows, Selected columns only( 0 ), Private( 1 ) );
// why is this SOOOOO slow !??!
For Each( {myTable}, mySubsets,
myTable:neighbors << set each value(
input = :neighbors; // take the neighbors from the lookup table
Transform Each( {myRow}, input, // and replace them with the neighbors of the actual wafer
result = :row[Where( myTable, :row_lookup == myRow )];
If( N Items( result ),
result[1],
.
);
);
)
);
//merge the subsets and map the neighbors back to the main table
tmp = mySubsets[1] << Concatenate( mySubsets[2 :: N Items( mySubsets )], Private() );
For Each( {myTable}, mySubsets, Close( myTable, noSave ) );
dt0 << Update(
With( tmp ),
Match Columns( :Lot = :Lot, :Wafer = :Wafer, :X_Die = :X_Die, :Y_Die = :Y_Die ),
Add Columns from Update Table( None ),
Replace Columns in Main Table( :neighbors )
);
Close( tmp, noSave );
wait(0);
t2= hptime();
// now use the cool new column
dt0 << New Column( "Defects average", Formula( Mean( :Defects[:neighbors] ) ) );
t3=hptime();
print("create lookup", (t1-t0)/1000000);
print("correct the rows", (t2-t1)/1000000);
print("calculate neighbor average", (t3-t2)/1000000);