Hello,
I am writing a script to create multivariate normal data with a specified number of rows, mean/standard deviation vectors, and correlation structure from a set of sample data. I would like to take advantage of the Simulator in the Profiler platform to generate the data table through the following steps:
- Calculate sample mean and standard deviation for each variable.
- Generate correlation matrix from sample data.
- Simulate multivariate data using the summary statistics from (1) and (2).
Currently, my script generates the simulated data table with the appropriate means and standard deviations for each variable, but I cannot manage to maintain the correlation structure between variables correctly. I believe the cause of my issue is how I specify the [NxN correlations] with X Correlations using a variable instead of explicitly specifying the correlation matrix (e.g. [1 0.3 0, 0.3 1 0, 0 0 1]), but my troubleshooting skills have left me with no solution. Current script and expected vs. actual output provided below, any suggestions on how to achieve these steps are welcomed:
JSL:
names default to here(1);
Try( Close( Data Table(dt) ) );
If( Is Empty( dt ),
Try( dt = Open(), Throw( "No data found" ) )
);
ncol = NCol(dt);
//Dialog Window.
Dlg = New Window("New Window",
<< Modal,
BorderBox(left(3), top(2),
V List Box(
TextBox("Simulate multivariate data"),
HListBox(
VListBox(
PanelBox("Select variables",
colListData = ColListBox(all,nLines,(min(ncol,10)))
),
),
PanelBox("Cast Selected Variables",
LineupBox(NCol(2), Spacing(3),
ButtonBox("Variables", colListY << Append(colListData << Get Selected)),
colListY = ColListBox(nLines(6), "numeric"))
),
//specifications - not in use.
PanelBox("Enter Specifications",
Spacer Box(Size(10, 1)),
HListBox(
VListBox(
Spacer Box(Size(5, 10)),
lsl1 = Number Edit Box(, 3),
Spacer Box(Size(5, 10)),
lsl2 = Number Edit Box(, 3),
Spacer Box(Size(5, 10)),
lsl3 = Number Edit Box(, 3),
Spacer Box(Size(5, 10)),
lsl4 = Number Edit Box(, 3)
),
SpacerBox(Size(10,10)),
VListBox(
Spacer Box(Size(5, 10)),
usl1 = Number Edit Box(, 3),
Spacer Box(Size(5, 10)),
usl2 = Number Edit Box(, 3),
Spacer Box(Size(5, 10)),
usl3 = Number Edit Box(, 3),
Spacer Box(Size(5, 10)),
usl4 = Number Edit Box(, 3)
)
),
),
PanelBox("Action",
LineupBox(NCol(1),
// OK button
Button Box("OK",
lsl_mat = J(4,1, .);
lsl_mat[1] = Eval(lsl1 << Get);
lsl_mat[2] = Eval(lsl2 << Get);
lsl_mat[3] = Eval(lsl3 << Get);
lsl_mat[4] = Eval(lsl4 << Get);
usl_mat = J(4,1, .);
usl_mat[1] = Eval(usl1 << Get);
usl_mat[2] = Eval(usl2 << Get);
usl_mat[3] = Eval(usl3 << Get);
usl_mat[4] = Eval(usl4 << Get);
// Store summary statistics
tpListY = colListY << Get Items;
tpMean = J(4, 1, 0);
tpSD = J(4, 1, 0);
for (i=1, i <= nitems(tpListY), i++,
one_col = tpListY[i];
Meani = Col Mean(Column(dt, one_col));
SDi = Col Std Dev(Column(dt, one_col));
tpMean[i] = Meani;
tpSD[i] = SDi;
);
Dlg << CloseWindow
),
// Cancel button
Button Box("Cancel", Dlg << CloseWindow),
Spacer Box(Size(10,10)),
// Remove button
Button Box("Remove", collistY << RemoveSelected)
)
),
)
)
)
);
//correlation matrix
corr = multivariate(
Y(Eval List(tpListY)),
estimation method("Row-wise"),
matrix format("Square"),
scatterplot matrix(0)
);
corr.run = corr << run;
corr.rep = corr.run << report;
corr.mat = corr.rep["Correlations"][matrix box(1)] << get(1);
//only used to launch profiler
prof.x = J(2,4 ,1);
prof.tbl = as table(prof.x, << column names(tpListY), << invisible);
prof.tbl << new column("Y",
Formula( :TP1 + :TP2 + :TP3 + :TP4)
);
prof.tbl = current data table();
random reset(1234);
//specify multivariate data from summary statistics
sim.design = Profiler(
Y(:Y),
Invisible
);
sim.design << Simulator(
1,
Factors(
TP1 << multivariate( tpMean[1], tpSD[1]),
TP2 << multivariate( tpMean[2], tpSD[2]),
TP3 << multivariate( tpMean[3], tpSD[3]),
TP4 << multivariate( tpMean[4], tpSD[4]),
),
X Correlations(
1,
tpListY,
corr.mat
),
N Runs(10000)
);
//Make data table
report( sim.design )["Simulate to Table"][button box( 1 )] << click( 1 );
sim.dt = data table( 1 );
sim.dt << set name( "Simulated multivariate data" );
sim.dt << delete columns({"Y","Obj"});
Expected vs. actual output:
expectedvsactual