I am using below script to get robust Mean and Robust standard deviation. it works fine till 1M rows, but on a table with ~5M rows it took 45 sec. is there any other efficient way to get this ?
Names Default To Here( 1 );
// 0. Guard: ensure at least one data table is open
If( N Table() == 0,
Throw( "No data table is open. Please open a data table first." )
);
dt = Current Data Table();
// 1. Ask user to select a numeric column
dlg = Column Dialog(
yCol = Col List( "Select a Numeric Column", Min Col( 1 ), Max Col( 1 ), Data Type( "Numeric" ) )
);
If( dlg["Button"] == -1, Throw( "User cancelled" ) );
selectedCol = dlg["yCol"][1];
colName = selectedCol << Get Name;
t1 = Tick Seconds();
// 2. Run Distribution with Robust Mean and Robust Std Dev enabled
dist = dt << Distribution(
Continuous Distribution(
Column( selectedCol ),
Outlier Box Plot Row Cutoff( 100000000 ),
Customize Summary Statistics(
Robust Mean( 1 ),
Robust Std Dev( 1 )
)
),
Invisible
);
Wait(0);
// 3. Extract Robust Mean and Robust Std Dev from the Summary Statistics table
distRep = dist << Report;
robustMean = .;
robustSigma = .;
Try(
// Navigate to the Summary Statistics outline box for the selected column
summBox = distRep[Outline Box( colName )][Outline Box( "Summary Statistics" )];
// Get the Name column (col 1) and Value column (col 2) from the table
nameCol = summBox[String Col Box( 1 )] << Get;
valueCol = summBox[Number Col Box( 1 )] << Get;
// Search for Robust Mean and Robust Std Dev rows by name
For( i = 1, i <= N Items( nameCol ), i++,
If( Contains( nameCol[i], "Robust Mean" ),
robustMean = valueCol[i]
);
If( Contains( nameCol[i], "Robust Standard Deviation" ),
robustSigma = valueCol[i]
);
);
,
Print( "Warning: Could not navigate report structure. Check outline box names." );
);
// 4. Display results
New Window( "Robust Statistics Results",
V List Box(
Text Box( "Column: " || colName ),
Text Box( "Robust Mean: " || (If( Is Missing( robustMean ),
"Not available", Char( robustMean, 10, 4 ) )) ),
Text Box( "Robust Std Dev: " || (If( Is Missing( robustSigma ),
"Not available", Char( robustSigma, 10, 4 ) )) )
)
);
// ------------------------
t2 = Tick Seconds();
Print( "Total Ticks: " || Char( Round( t2 - t1, 3 ) ) || " seconds" );