a similar idea, different implementation
// make sample data
dt = New Table( "sample",
New Column( "x", Numeric, "Continuous", Format( "Best", 12 ) ),
New Column( "y", Numeric, "Continuous", Format( "Best", 12 ) ),
New Column( "z", Numeric, "Continuous", Format( "Best", 12 ) )
);
For( ix = 0, ix < 200, ix += 1,
For( iy = 0, iy < 250, iy += 1,
dt << addrows( 1 );
dt:x = ix;
dt:y = iy;
dt:z = Random Normal( 30, .25 );
)
);
// load into a matrix that is 1 row/col bigger all around
// this assumes the x/y data is gridded, integers, no holes
// but no particular order
xmin = Col Min( dt:x );
xmax = Col Max( dt:x );
ymin = Col Min( dt:y );
ymax = Col Max( dt:y );
m = J( ymax - ymin + 3, xmax - xmin + 3, . );// +3 makes a border of missing values on all sides
For( i = 1, i <= N Rows( dt ), i += 1,
// subtract minimum makes it zero based. add 1 to get one-based, but add 2 to leave the border
m[dt:y[i] - ymin + 2, dt:x[i] - xmin + 2] = dt:z[i]; // copy each z to its x,y (col,row) element
);
meanmat = J( ymax - ymin + 1, xmax - xmin + 1, . ); // +1 is original size
// the x and y loops do not include the border
For( ix = 2, ix <= N Cols( m ) - 1, ix += 1,
For( iy = 2, iy <= N Rows( m ) - 1, iy += 1,
// smallmat is 3x3 and may include missing values from border
smallmat = m[(iy - 1) :: (iy + 1), (ix - 1) :: (ix + 1)];
smallmat[2, 2] = .;// remove center point from consideration
meanmat[iy - 1, ix - 1] = Mean( smallmat ); // mean ignores missing values: mean([8 2 .])==5
)
);
threshold = .5; // detection threshold
// the error matrix has 0 for ok, 1 for beyond threshold. the subscripts on m[]
// remove the border to make it line up with the meanmat.
error = Abs( m[2 :: (ymax - ymin + 2), 2 :: (xmax - xmin + 2)] - meanmat ) > threshold;
// add the outlier indicator back to the table
dt << New Column( "outliers" );
For( i = 1, i <= N Rows( dt ), i += 1,
dt:outliers[i] = error[dt:y[i] - ymin + 1, dt:x[i] - xmin + 1]
);
// fiddle with the tabl's row states to make the graph
dt << colorOrMarkByColumn( outliers );
dt << selectwhere( outliers == 1 );
// a graph
dt << Surface Plot(
Columns( :x, :y, :z ),
Datapoints Choice( "Points" ),
Response( :z ),
Surface Color Method( "Solid", "Solid", "Solid", "Solid" ),
SetVariableAxis( :x, Axis Data( {} ) ),
SetVariableAxis( :y, Axis Data( {} ) ),
SetZAxis( :z, Current Value( 30.5 ) ),
SetXVariable( :x ),
SetYVariable( :y ),
Frame3D(
Set Graph Size( 900, 900 ),
Set Rotation( -89, 1, -35 )
)
);
Red outliers above and below blue cloud
This does assume the data's x and y coordinates are consecutive integers. Make sure the edge behavior is what you expect and make sure the center is left out the way you expect. Test carefully! For example, a 2x2 case like this:
Tiny test case making use of the missing value border, a lot!
Craige