I have 7000 rows, that consists of multiple batches. I am doing a distribution by batch and using the outlier box plot to look for outliers. But I have 100+ batches, is there a way to have JMP automatically select which rows are outliers?
attached is an example of my data I was trying to use this with. Ideally it would do this for each Y.
now see if it works for you...
the script you sent was not adapted to your data at all...
Seems to work. thanks.
is this what you need?
Names Default To Here( 1 );
// make a sample data table
dt = New Table( "Data",
add rows( 10000 ),
New Column( "it", Numeric, Ordinal, Format( "Best", 8 ), Formula( Row() ) ),
New Column( "Batch", Numeric, Ordinal, Format( "Best", 8 ), formula( Random Integer( 110 ) ) ),
New Column( "y1", Numeric, Continues, Format( "Best", 8 ), formula( Random Lognormal( :Batch / 10, 5 ) ) ),
New Column( "y2", Numeric, Continues, Format( "Best", 8 ), formula( Random Lognormal( :Batch / 10, 5 ) ) ),
New Column( "Out",
Numeric,
Ordinal,
Format( "Best", 8 ),
Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
Color Cell by Value,
formula( 0 )
),
New Column( "Outy1",
Numeric,
Ordinal,
Format( "Best", 8 ),
Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
Color Cell by Value,
formula( 0 )
),
New Column( "Outy2",
Numeric,
Ordinal,
Format( "Best", 8 ),
Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
Color Cell by Value,
formula( 0 )
),
);
dt << run formulas;
Column( dt, "it" ) << delete formula;
Column( dt, "Batch" ) << delete formula;
Column( dt, "y1" ) << delete formula;
Column( dt, "y2" ) << delete formula;
Column( dt, "Out" ) << delete formula;
Column( dt, "Outy1" ) << delete formula;
Column( dt, "Outy2" ) << delete formula;
// now we get to work
// make a list of batches
byvarlist = Associative Array( dt:Batch ) << get keys;
// make a list of columns
cols = dt << get column names( Continuous ); //List all Numeric columns
// loop through the batches
For( i = 1, i <= N Items( byvarlist ), i++,
// select each batch
dt << select where( dt:Batch == byvarlist[i] );
// subset each batch
subdt = dt << Subset( Selected Rows( 1 ) );
// clear selection for smoth workflow
dt << clear select;
// finde outliers in each variable within each batch
For( ii = 1, ii <= N Items( cols ), ii++,
out = Eval(
Parse(
"subdt << get rows where(
subdt:" || Char( cols[ii] ) || " > Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.75 ) + 1.5 * (Col Quantile( subdt:" || Char( cols[ii] ) || ", 0.75 ) - Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.25 )) | subdt:" || Char( cols[ii] ) || " <
Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.25 ) - 1.5 * (Col Quantile( subdt:" || Char( cols[ii] ) || ", 0.75 ) - Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.25 )));"
)
)`;
Eval( Parse( "column (subdt, \!"Out" || Char( cols[ii] || "\!" )[" || Char( out ) || "] = 1;" ) ) );
// also indicate the global outlier
Column( subdt, "Out" )[out] = 1;
//wait (0.00001);
);
// update original data table with new information for the batch
subdt << delete columns( "Batch" );
subdt << delete columns( cols );
dt << Update( With( subdt ), Match Columns( :it = :it ) );
Close( subdt, No Save );
//Wait( 0.00001 );
);
dt << color by column( :Out );