For the approach "random sampling - per variant" from
https://community.jmp.com/t5/Discussions/How-to-Select-a-quota-sample-from-a-data-set/m-p/821078/hig...
I created a JSL snippet to stratify arbitrary data - just select the columns and click OK.
librecall can be downloaded here: Recall Function Library
Names Default To Here( 1 );
verbose=0;
//Include( ".\libRecall_v2.jsl" );
objects = {"si^myCols", "s^Ncolumns", "s^Nfolds"};
values = {{}, {1}, {5}};
Try( // issue with projects
librecall:genArrays( objects, values, "Stratify", verbose )
);
dt = Current Data Table();
nw = New Window( "K Fold Creator",
<<Type( "Modal Dialog" ),
<<Return Result,
<<On Validate(
If( N Items( myCols << get items() ),
1,
Caption( "please select a column" );
0;
)
),
V List Box(
Lineup Box( N Col( 2 ),
Panel Box( "", fcs = Filter Col Selector() ),
Panel Box( "",
Lineup Box( N Col( 2 ),spacing( 3 ),
Button Box( "stratify by", myCols << append( fcs << get selected ) ),
myCols = Col List Box( width( 200 ), min items( 1 ), nlines( 11 ) )
),
Lineup Box( N Col( 2 ), spacing( 3 ),
Text Box( "create more than 1 column?" ),
Ncolumns = Number Edit Box(
1,
4,
<<setintegeronly( 1 ),
<<setminimum( 1 ),
),
Text Box( "Number of Folds (K)" ),
Nfolds = Number Edit Box( 5, 4, <<setintegeronly( 1 ), <<setminimum( 2 ) )
),
)
),
H List Box(
Button Box( "OK",
librecall:storeRoles( "Stratify", verbose );
// the modal dialog stores the selected columns.
For Each( {item}, 1 :: N Items( myCols << Get Items() ), myCols << Set Selected( item, 1 ) );
),
Button Box( "recall", librecall:recallRoles( "Stratify", verbose ) ),
Button Box( "clear", librecall:resetRoles( "Stratify", verbose ) ),
Button Box( "cancel" )
)
)
);
If( Not( nw["button"] == 1 ),
Stop()
);
myCols = Transform Each( {col}, nw["myCols"], Name Expr( As Column( col ) ) );
// remove continuous and exotic values.
myCols = Filter each({col}, myCols, col << Get Modeling Type == "Nominal" | col << Get Modeling Type == "Ordinal" );
For( i = 1, i <= nw["Ncolumns"], i++,
rankExpr = Expr(
Col Rank( tmp, Excluded() )
);
For Each( {col}, myCols, Insert Into( rankExpr, Name Expr( col ) ) );
numberExpr = Substitute( Name Expr( rankExpr ), Expr( Col Rank() ), Expr( Col Number() ) );
Eval(
Substitute(
Expr(
New Column( "Fold",
Formula(
If( Excluded(),
.,
tmp = Random Uniform();
Floor( (_rank_ - 1) / (_number_) * _folds_ ) + 1;
)
)
)
),
Expr( _rank_ ), Name Expr( rankExpr ),
Expr( _number_ ), Name Expr( numberExpr ),
Expr( _folds_ ), nw["Nfolds"]
)
);
);