This isn't really it either, but maybe closer. I colored these by selecting a root node in the branch and then coloring the selected rows in the data table. It took a bit of hand-curating the stop words to make a pretty picture.
Transposed
// load some documents that might separate into some categories
dt1 = Open( "f:/gutenberg/books5000.jmp" ); // 5000 is too many, subset 36...
dt1 << selectwhere(
Starts With( Loc Class, "D501" ) | Starts With( Loc Class, "TX:" ) | Starts With( Loc Class, "Q" ) | Starts With( Loc Class, "P" )
);
dt2 = dt1 << subset( selected rows( 1 ) );
Close( dt1, nosave );
originalnames = dt2 << getColumnNames();
te = dt2 << Text Explorer(
Text Columns( :text ), // the entire document is in one cell of the row
Add Stop Words(
{"agreement", "almost", "another", "company", "copyright holder", "electronic", "foundation", "gutenberg", "literary archive", "little",
"person or entity", "project", "public domain", "research", "without", "ebooks", "enough", "though", "rather", "better", "common", "possible",
"weight", "present", "series", "necessary", "placed", "therefore", "towards", "footnote", "something","slowly","around","behind","looking","seemed","nothing",
"probably","called","easily","distributing","paragraph"
}
),
Minimum Characters per Word( 6 ),
Stemming( "no stemming" ), //"Stem for Combining"
Language( "English" )
);
//te << savedocumenttermmatrix( Maximum Number of Terms( 300 ), Minimum Term Frequency( 25 ), Weighting( "TF IDF" ) );//BINARY would use ALL rather than !any, below
te << savedocumenttermmatrix( Maximum Number of Terms( 100 ), Minimum Term Frequency( 25 ), Weighting( "BINARY" ) ); // 0 or 1 if it occurs
te << closewindow;
// remove all-connected columns
allnames = dt2 << getColumnNames();
For( iname = N Items( allnames ), iname > N Items( originalnames ), iname -= 1,
// If( !Any( dt2[0, iname] ),
If( All( dt2[0, iname] ),
dt2 << deletecolumns( iname )
)
);
allnames = dt2 << getColumnNames();
cols = (N Items( originalnames ) + 1) :: N Items( allnames );
//
//dt2 << Hierarchical Cluster( Y( allnames[cols] ),
// Label( Transform Column( "Label", Character, Formula( Left( left(LoC Class,4)||:Subject||:Title, 20 ) ) ) ), // build your own identifier here
// Method( "Ward" ), Standardize Data( 1 ), Dendrogram Scale( "Distance Scale" ),
// Number of Clusters( 4 ), Constellation Plot( 1 ), Show Dendrogram( 0 ),
// SendToReport(Dispatch({"Constellation Plot"},"Clust Hier",FrameBox,{Frame Size( 1056, 716 )}))
//);
dt3 = dt2 << Data Table( "Subset of books5000" ) << Transpose(
columns( allnames[cols] ),
Label( :Title ),
Label column name( "Title" ),
Output Table( "Transpose of Subset of books5000" )
);
dt3 << Hierarchical Cluster(
Y( (dt3 << getColumnNames)[2 :: N Cols( dt3 )] ),
Label( Transform Column( "Transform[Title]", Nominal, Formula( Left( :Title, Length( :Title ) - 7 ) ) ) ),
Method( "Ward" ),
Standardize Data( 1 ),
Show Dendrogram( 0 ),
Dendrogram Scale( "Distance Scale" ),
Number of Clusters( 13 ),
Constellation Plot( 1 ),
SendToReport( Dispatch( {"Constellation Plot"}, "Clust Hier", FrameBox, {Frame Size( 948, 827 )} ) )
);
Craige