/* This algorithm was created according to reference
[1] https://math.stackexchange.com/questions/625359/how-are-the-values-in-the-shapiro-wilk-weight-table-calculated
The reference states, for N > 5000 this should not be used N>5000.
I did not bother to script the special cases for N=3, 4, 5.
Examples for N=10, 20, 30 and 40 were checked against tables found in the appendix
of an EPA document reference
[2] https://www.epa.gov/sites/production/files/2015-10/documents/monitoring_appendd_1997.pdf
While the coefficients did not exactly match the table values, the test statistic
matched JMP calculated Shapiro-Wilk W up to 4 decimal places. Only a few cases were tested.
The script for extracting the data from a table would have to account for exclusions.
*/
Names Default to Here(1);
cList = {"height", "weight"};
dt = Open("$Sample_Data/Big Class.jmp");
for(i=1, i<=nitems(cList), i++,
x = column(dt,cList[i]) << get as matrix; //assumes no missing or excluded values.
//get a sorted matrix to get the X order statistic.
r = Rank(x);
xo = x[r];
//compute summ of squared deviations or SS
std = Std Dev(x);
ss = (nrow(x)-1) * (std ^2); // sum((x-mean(x)) ^ 2); //alternate calculation
n = nrow(x);
//steps to compute the weights w[k] using refernce [1]
If(n<6 | n> 5000, Caption("will not compute"); Wait(3); Caption(remove); Throw());
m = Transpose(1::n);
m = Normal Quantile( (m-3/8)/(n+0.25) );
cdenom = sqrt( sum (m^2));
cn = m[n]/cdenom;
cn_1 = m[n-1]/cdenom;
p1 = Matrix(EvalList({−2.706056,4.434685,−2.071190,−0.147981,0.221157,cn}));
p2 = Matrix(EvalList({−3.582633,5.682633,−1.752461,−0.293762,0.042981,cn_1}));
u = 1/sqrt(n);
w = J(n,1);
v = Matrix(EvalList({u^5, u^4, u^3, u^2, u, 1}));
w[n] = sum( p1 :* v );
w[1] = -1*w[n];
w[n-1] = sum( p2 :* v);
w[2] = -1*w[n-1];
phi = (sum(m^2) - 2*(m[n])^2 - 2*(m[n-1])^2) / (1 - 2*(w[n])^2 - 2*(w[n-1])^2);
w[3::n-2] = m[3::n-2]/sqrt(phi);
show(w);
//check the log against reference [2] Table D4.
//Note using Table D4 the weights list only the positive half. For N=20, 10 coef are
//listed, the first ten weights are -1* these value then the positive values arranged
//ascending.
W = (sum(w :* xo))^2 / ss;
dd = dt << Distribution(Invisible,
Continuous Distribution( Column( cList[i] ),
Fit Distribution( Normal( Goodness of Fit( 1 ) ) )
)
);
_db = report(dd)[OutlineBox("Goodness-of-Fit Test")];
nw = New Window("Results - "||cList[1], VListBox(
tb = TextBox( "Calculated Value = " || char(W, 8,6)),
_db
));
tb << set FontSize(14) << Set Font Style("Bold");
nw << Journal;
dd << close window();
nw << close window();
); // end For i