cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Try the Materials Informatics Toolkit, which is designed to easily handle SMILES data. This and other helpful add-ins are available in the JMP® Marketplace
Choose Language Hide Translation Bar

Distribution Bin

First of all, I want to greet all the people in the community.
This is first time for me to use this kind of Community Tools.

In the Distributions:
1) where can I see the value of the bin?
2) which is the formula used from JMP to calculate the bin for the firts visualization?

Thank you.
Simone
1 REPLY 1
mattf
Level V

Re: Distribution Bin

You might be interested in exploring several histogram options described in
Options for Continuous Variables in the Univariate Analysis section of the Statistics and Graphics guide.

Shadowgram and Set Bin Width allow some control options on the histogram.

Under the red triangle, the Histogram Options also has options for Show Counts and Show Percents.

An option for displaying the bin edges or cut-points would be interesting.

I've forgotten where I found the following script but it produces a nifty interactive tool for defining cut-points.

// ian.cox@jmp.com: 27Aug09

// Current table
if(IsEmpty(Current Data Table()), dt = Open(), dt = Current Data Table());
dtn = dt << GetName;
nr = NRow(dt);
nc = NCol(dt);

// Miscellaneus
lbWidth = 130;
notImplemented = Expr(Dialog("Feature Not Implemented Yet",Button("OK")));
helpScript = Expr(Dialog("Quantises a Continuous variable onto an Ordinal variable 'by eye'",Button("OK")));

// *********************************************************************************
// Build custom dialog in a window
// *********************************************************************************
customDlg =
NewWindow("Cut-Points Using "||dtn,
BorderBox(left(3),top(2),
VListBox(
HlistBox(
PanelBox("Select Column",
colListData=ColListBox(All,width(lbWidth),nLines(min(nc,10)))
),
PanelBox("Cast Selected Columns into Role",
LineUpBox(NCol(2), Spacing(3),
ButtonBox("Cut-Points",colListOR << Append(colListData< colListOR = ColListBox(width(lbWidth),nLines(1),Numeric)
)
),
PanelBox("Action",
LineupBox(NCol(1),
ButtonBox("OK", OKScript),
ButtonBox("Cancel", customDlg< TextBox(" "),
ButtonBox("Remove", colListOR << RemoveSelected),
ButtonBox("Recall", notImplemented),
ButtonBox("Help", helpScript))
)
) // End of HListBox
) // End of VListBox
) // End of BorderBox
); // End of NewWindow

// *********************************************************************************
// When the user hits OK . . .
// *********************************************************************************
OKScript = Expr(

customDlg << CloseWindow;

// Get values from the Dialog
colOR = colListOR << GetItems;

// Are the user selections viable?
if (NItems(colOR) != 1, Dialog("ERROR: You must select a Cut-Points Column", Button("OK")); Throw());

// Column name
colORn = colOR[1];

// Column reference
colOR = Column(dt, colORn);

// Get a distribution
// NB: We use the "SubstituteInto" design pattern because we need to update the Dispatch message
d = Expr(
dist = dt << Distribution(
Continuous Distribution(
Column(colOR),
Quantiles(0),
Moments(0),
Vertical(0),
OutlierBoxPlot(0)
),
SendToReport(
Dispatch(
{"colTBD"},
"Distrib Histogram",
FrameBox,
FrameSize(500, 300)
)
)
)
);
SubstituteInto(d, "colTBD", colORn);
d;

// Get the report
distRep = Report(dist);

// Add mose instructions and an OK button for when the cut points have been defined
pb = PanelBox("Define Cut-Points ");
tb1 = TextBox("Click once in the graphics frome to define a new cut-point. ");
tb2 = TextBox("Select 'OK' when you have defined all cut-points.\!N ");
bb = ButtonBox("OK", OKScript2);
pb << Append(tb1) << Append(tb2) << Append(bb);
distRep[OutlineBox(2)] << Append(pb);

// Expression to be called on mouse-up in the Graphics Box - Adds a red vertical line at the chosen x value
addVLine = Expr(
vl = Expr( distRep[FrameBox(1)] << AddGraphicsScript(PenColor("Red"); VLine(xTBD)) );
SubstituteInto(vl, Expr(xTBD), EvalExpr(x));
vl
);

// Used to capture successive x values at mouse-up
xVals = {};

// Add the mouse trap script to the Graphics Box
distRep[FrameBox(1)] << AddGraphicsScript(
MouseTrap(
{},
Print("At mouse up x was "||Char(Round(x,2)));
InsertInto(xVals, x);
addVLine;
);
);

// *********************************************************************************
// OKScript2: Executes when all cut-points are defined and user hits OK
// *********************************************************************************
OKScript2 = Expr(

distRep << CloseWindow;

// Turn list of cut-point values into a sorted vector
xVals2 = J(NItems(xVals), 1, .);
for (i=1, i<=NItems(xVals), i++, xVals2 = xVals);
cutPoints = SortAscending(xVals2);

// Get the data to cut
data = colOR << GetValues;

// Apply the cut points to the data by looping over the cut points . . .
// NB: We handle any missing values in 'data' by initialising with '.' and treating them as a special case later
codedData = J(nr, 1, .);

// txt will be a list of text values describing eaach bin
txt = {};

// First cut-point
r = Loc(data <= cutPoints[1]);
codedData = 1;
InsertInto(txt, "<= "||Char(cutPoints[1]));

// Remaining cut-points other than the last
for (cp = 2, cp <=NRow(cutPoints), cp++,
r = Loc((data > cutPoints[cp-1]) & (data <= cutPoints[cp]));
codedData = cp;
InsertInto(txt, Char(cutPoints[cp-1])||" - "||Char(cutpoints[cp]))
);

// Last cut-point
r = Loc(data > cutPoints[NRow(cutPoints)]);
codedData = NRow(cutPoints) + 1;
InsertInto(txt, "> "||Char(cutPoints[NRow(cutPoints)]));

// Use txt to make a text version of coded data in a new list.
// Loop over the data and remember to handle any issing values
txtData = {};
for (n = 1, n <= NRow(data), n++,
if (!IsMissing(data),
InsertInto(txtData, txt[codedData]),
InsertInto(txtData, "")
);
);

// Add a column to dt with the cut data
binnedCol = dt << NewColumn(colORn||" Binned", Character, Ordinal, Values(txtData));

// Apply the value order property to the new column.
binnedCol << Set Property( "Value Ordering", EvalList(txt));

// Show the distribution of the old and new columns
dt << Distribution(
Continuous Distribution(
Column(colOR),
Quantiles(0),
Moments(0),
OutlierBoxPlot(0)
),
Nominal Distribution(
Column(binnedCol),
Frequencies(0)
)
);

); // End of OKScript2

); // End of OKScript

Best regards,
-Matt