cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Browse apps to extend the software in the new JMP Marketplace
Choose Language Hide Translation Bar
lmaley
Level I

Automatically selecting outliers

I have 7000 rows, that consists of multiple batches.   I am doing a distribution by batch and using the outlier box plot to look for outliers.  But I have 100+ batches, is there a way to have JMP automatically select which rows are outliers?

 

24 REPLIES 24
lmaley
Level I

Re: Automatically selecting outliers

attached is an example of my data I was trying to use this with.  Ideally it would do this for each Y.  

 

 

ron_horne
Super User (Alumni)

Re: Automatically selecting outliers

now see if it works for you...

the script you sent was not adapted to your data at all...

lmaley
Level I

Re: Automatically selecting outliers

Seems to work.  thanks.

lmaley
Level I

Re: Automatically selecting outliers

Just one more quick question. I deleted about 90 columns, can this work
with n columns or does it always have to be 3 columns?
ron_horne
Super User (Alumni)

Re: Automatically selecting outliers

is this what you need?


Names Default To Here( 1 );

// make a sample data table
dt = New Table( "Data",
	add rows( 10000 ),
	New Column( "it", Numeric, Ordinal, Format( "Best", 8 ), Formula( Row() ) ),
	New Column( "Batch", Numeric, Ordinal, Format( "Best", 8 ), formula( Random Integer( 110 ) ) ),
	New Column( "y1", Numeric, Continues, Format( "Best", 8 ), formula( Random Lognormal( :Batch / 10, 5 ) ) ),
	New Column( "y2", Numeric, Continues, Format( "Best", 8 ), formula( Random Lognormal( :Batch / 10, 5 ) ) ),
	New Column( "Out",
		Numeric,
		Ordinal,
		Format( "Best", 8 ),
		Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
		Color Cell by Value,
		formula( 0 )
	),
	New Column( "Outy1",
		Numeric,
		Ordinal,
		Format( "Best", 8 ),
		Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
		Color Cell by Value,
		formula( 0 )
	),
	New Column( "Outy2",
		Numeric,
		Ordinal,
		Format( "Best", 8 ),
		Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
		Color Cell by Value,
		formula( 0 )
	), 

);

dt << run formulas;
Column( dt, "it" ) << delete formula;
Column( dt, "Batch" ) << delete formula;
Column( dt, "y1" ) << delete formula;
Column( dt, "y2" ) << delete formula;
Column( dt, "Out" ) << delete formula;
Column( dt, "Outy1" ) << delete formula;
Column( dt, "Outy2" ) << delete formula;


// now we get to work

// make a list of batches
byvarlist = Associative Array( dt:Batch ) << get keys;

// make a list of columns
cols = dt << get column names( Continuous ); //List all Numeric columns


// loop through the batches
For( i = 1, i <= N Items( byvarlist ), i++, 

// select each batch
	dt << select where( dt:Batch == byvarlist[i] );

// subset each batch
	subdt = dt << Subset( Selected Rows( 1 ) );

// clear selection for smoth workflow
	dt << clear select;

// finde outliers in each variable within each batch
	For( ii = 1, ii <= N Items( cols ), ii++, 
	
		out = Eval(
			Parse(
				"subdt << get rows where(
	subdt:" || Char( cols[ii] ) || " > Col Quantile( subdt:" || Char( cols[ii] ) ||
				", 0.75 ) + 1.5 * (Col Quantile( subdt:" || Char( cols[ii] ) || ", 0.75 ) - Col Quantile( subdt:" || Char( cols[ii] ) ||
				", 0.25 )) | subdt:" || Char( cols[ii] ) || " <
	Col Quantile( subdt:" || Char( cols[ii] ) ||
				", 0.25 ) - 1.5 * (Col Quantile( subdt:" || Char( cols[ii] ) || ", 0.75 ) - Col Quantile( subdt:" || Char( cols[ii] ) ||
				", 0.25 )));"
			)
		)`;
	
		Eval( Parse( "column (subdt, \!"Out" || Char( cols[ii] || "\!" )[" || Char( out ) || "] = 1;" ) ) );
	// also indicate the global outlier
		Column( subdt, "Out" )[out] = 1;
		//wait (0.00001);

	);

// update original data table with new information for the batch
	subdt << delete columns( "Batch" );
	subdt << delete columns( cols );
	dt << Update( With( subdt ), Match Columns( :it = :it ) );
	Close( subdt, No Save );
	//Wait( 0.00001 );

);

dt << color by column( :Out );