Names Default To Here(1);
dt = Open("$SAMPLE_DATA/Big Class.jmp");
// add outlier for M
dt << Add Rows({name = "OUTLIER", age = 1, sex = "M", height = 500, weight = 500});
// add limits
dt << New Column("LSL", Numeric, Continuous, << Set Each Value(50));
dt << New Column("USL", Numeric, Continuous, << Set Each Value(60));
dt << New Column("ColMean_height", Numeric, Continuous, Formula(
Col Mean(:height, :sex, Excluded())
));
dt << New Column("ColMedian_height", Numeric, Continuous, Formula(
Col Median(:height, :sex, Excluded())
));
dt << New Column("ColStdDev_height", Numeric, Continuous, Formula(
Col Std Dev(:height, :sex, Excluded())
));
dt << New Column("ColIQR_height", Numeric, Continuous, Formula(
Col Quantile(:height, 0.75, :sex, Excluded()) - Col Quantile(:height, 0.25, :sex, Excluded())
));
dt << New Column("ColStandardize_height", Numeric, Continuous, Formula(
Col Standardize(:height, :sex, Excluded())
));
// Example https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html
// using IQR here, but might be good idea to be able to change the quantiles (default to IQR)
dt << New Column("ColStandardizeRobust_height", Numeric, Continuous, Formula(
(:height - Col Median(:height, :sex, Excluded())) / :ColIQR_height
));
// https://en.wikipedia.org/wiki/Feature_scaling#Rescaling_(min-max_normalization)
dt << New Column("ColNormalize_01", Numeric, Continuous, Formula(
0 + (:height - Col Min(:height, :sex, Excluded())) / (Col Max(:height, :sex, Excluded()) - Col Min(:height, :sex, Excluded()))
));
dt << New Column("ColNormalize_11", Numeric, Continuous, Formula(
-1+(:height - Col Mean(:height, :sex, Excluded()))*(1-(-1)) / (Col Max(:height, :sex, Excluded()) - Col Min(:height, :sex, Excluded()))
));
dt << New Column("ColNormalize_limits", Numeric, Continuous, Formula(
ColMin(:LSL, :sex)+(:height - Col Mean(:height, :sex, Excluded()))*(ColMax(:USL, :sex)-(ColMin(:LSL, :sex))) / (Col Max(:height, :sex, Excluded()) - Col Min(:height, :sex, Excluded()))
));
// maybe even Robust Sigma, divider would default to 1.35
// http://www.aecouncil.com/Documents/AEC_Q001_Rev_D.pdf
// and robust limits, sigma multiplier defaulting to 6
Names Default To Here(1);
dt = Open("$SAMPLE_DATA/Big Class.jmp");
// add outlier for M
dt << Add Rows({name = "OUTLIER", age = 1, sex = "M", height = 500, weight = 500});
// add limits
dt << New Column("LSL", Numeric, Continuous, << Set Each Value(50));
dt << New Column("USL", Numeric, Continuous, << Set Each Value(60));
dt << New Column("ColMean_height", Numeric, Continuous, Formula(
Col Mean(:height, :sex, Excluded())
));
dt << New Column("ColMedian_height", Numeric, Continuous, Formula(
Col Median(:height, :sex, Excluded())
));
dt << New Column("ColStdDev_height", Numeric, Continuous, Formula(
Col Std Dev(:height, :sex, Excluded())
));
dt << New Column("ColIQR_height", Numeric, Continuous, Formula(
Col Quantile(:height, 0.75, :sex, Excluded()) - Col Quantile(:height, 0.25, :sex, Excluded())
));
dt << New Column("ColStandardize_height", Numeric, Continuous, Formula(
Col Standardize(:height, :sex, Excluded())
));
// Example https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html
// using IQR here, but might be good idea to be able to change the quantiles (default to IQR)
dt << New Column("ColStandardizeRobust_height", Numeric, Continuous, Formula(
(:height - Col Median(:height, :sex, Excluded())) / :ColIQR_height
));
// https://en.wikipedia.org/wiki/Feature_scaling#Rescaling_(min-max_normalization)
dt << New Column("ColNormalize_01", Numeric, Continuous, Formula(
0 + (:height - Col Min(:height, :sex, Excluded())) / (Col Max(:height, :sex, Excluded()) - Col Min(:height, :sex, Excluded()))
));
dt << New Column("ColNormalize_11", Numeric, Continuous, Formula(
-1+(:height - Col Mean(:height, :sex, Excluded()))*(1-(-1)) / (Col Max(:height, :sex, Excluded()) - Col Min(:height, :sex, Excluded()))
));
dt << New Column("ColNormalize_limits", Numeric, Continuous, Formula(
ColMin(:LSL, :sex)+(:height - Col Mean(:height, :sex, Excluded()))*(ColMax(:USL, :sex)-(ColMin(:LSL, :sex))) / (Col Max(:height, :sex, Excluded()) - Col Min(:height, :sex, Excluded()))
));
// maybe even Robust Sigma, divider would default to 1.35
// http://www.aecouncil.com/Documents/AEC_Q001_Rev_D.pdf
// and robust limits, sigma multiplier defaulting to 6