Hi @Victor60
i have once written a script that earmarks rows if they are outliers in any of the groups in any of the variables. i have updated it to include different days in once column and a couple of columns of temperature. it produces a new binary column indicating outliers for each original column and one more indicating whether the observation is an outlier in any of the variables.
lets us know if it works.
ron
Names Default To Here( 1 );
// make a sample data table
dt = New Table( "Data",
add rows( 10000 ),
New Column( "it", Numeric, Ordinal, Format( "Best", 8 ), Formula( Row() ) ),
New Column( "Day", Numeric, Ordinal, Format( "Best", 8 ), formula( Random Integer( 110 ) ) ),
New Column( "temp1", Numeric, Continues, Format( "Best", 8 ), formula( Random Lognormal( :Day / 10, 5 ) ) ),
New Column( "temp2", Numeric, Continues, Format( "Best", 8 ), formula( Random Lognormal( :Day / 10, 5 ) ) ),
New Column( "Out",
Numeric,
Ordinal,
Format( "Best", 8 ),
Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
Color Cell by Value,
formula( 0 )
),
New Column( "Outtemp1",
Numeric,
Ordinal,
Format( "Best", 8 ),
Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
Color Cell by Value,
formula( 0 )
),
New Column( "Outtemp2",
Numeric,
Ordinal,
Format( "Best", 8 ),
Set Property( "Value Colors", {0 = -13977687, 1 = -4222943} ),
Color Cell by Value,
formula( 0 )
),
);
dt << run formulas;
Column( dt, "it" ) << delete formula;
Column( dt, "Day" ) << delete formula;
Column( dt, "temp1" ) << delete formula;
Column( dt, "temp2" ) << delete formula;
Column( dt, "Out" ) << delete formula;
Column( dt, "Outtemp1" ) << delete formula;
Column( dt, "Outtemp2" ) << delete formula;
// now we get to work
// make a list of days
byvarlist = Associative Array( dt:Day ) << get keys;
// make a list of columns
cols = dt << get column names( Continuous ); //List all Numeric columns
// loop through the days
For( i = 1, i <= N Items( byvarlist ), i++,
// select each Day
dt << select where( dt:Day == byvarlist[i] );
// subset each Day
subdt = dt << Subset( Selected Rows( 1 ) );
// clear selection for smooth workflow
dt << clear select;
// fined outliers in each variable within each Day
For( ii = 1, ii <= N Items( cols ), ii++,
out = Eval(
Parse(
"subdt << get rows where(
subdt:" || Char( cols[ii] ) || " > Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.75 ) + 1.5 * (Col Quantile( subdt:" || Char( cols[ii] ) || ", 0.75 ) - Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.25 )) | subdt:" || Char( cols[ii] ) || " <
Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.25 ) - 1.5 * (Col Quantile( subdt:" || Char( cols[ii] ) || ", 0.75 ) - Col Quantile( subdt:" || Char( cols[ii] ) ||
", 0.25 )));"
)
)`;
Eval( Parse( "column (subdt, \!"Out" || Char( cols[ii] || "\!" )[" || Char( out ) || "] = 1;" ) ) );
// also indicate the global outlier
Column( subdt, "Out" )[out] = 1;
//wait (0.00001);
);
// update original data table with new information for the Day
subdt << delete columns( "Day" );
subdt << delete columns( cols );
dt << Update( With( subdt ), Match Columns( :it = :it ) );
Close( subdt, No Save );
//Wait( 0.00001 );
);
dt << color by column( :Out );
... View more