Here's some JSL to generate a table with a script to try out the clustering idea. The prime number stuff is just a way to make the different types of people have different sets of answers.
// synthesize some data with 86 rows of 24 questions where there are kinds=7
// types of people answering the questions.
// the RunMe script in the table will re-randomize and add colors by Kind and
// run the Cluster platform. The random "noise" means the clusters are not
// perfect; the colors in a cluster are from "kind" but the noise might
// make kind be too far from ideal.
// notice: the cluster platform does not use "kind" but still groups them pretty well.
noise=.15; // adding 0 noise will get perfect answers. noise>.2 is not going to find much
people = 86; // number of people. try 500
kinds=7; // number of types of people
New Table( "cluster",
Add Rows( people ),
New Script(
"RunMe",
f = Function( {k, qn}, // f(kind, n) answers the Nth question for a "kind" person
// using mod(prime*n,prime) to scramble answers
{kp = [127 131 137 139 149 151 157 163 167 173 179 181 191 193 197 199
211 223 227 229 233 239 241 251 257 263 269 271 277 281 283 293 307 311
313 317 331 337 347 349 353 359 367 373 379 383 389 397 401 409 419 421
431 433 439 443 449 457 461 463 467 479 487 491 499 503 509 521 523 541
547 557 563 569 571 577 587 593 599 601 607 613 617 619 631 641 643 647
653 659 661 673 677 683 691 701 709 719 727 733]}, // need "kinds" of these
v = Mod( kp * qn, 113 ); // 113 is also prime and less than first prime above
// 55 is about half of 113; about half the questions will be answered with 1
// *except* as the noise gets bigger the answer that would have been 0 or 1
// is more likely to flip
If( v < 55,Random Uniform() < (1-noise),Random Uniform() < (noise));
);
For Each Row(
For( icol = 1, icol <= 24, icol++,
c = Eval( Eval Expr( Column( Expr( 1 + icol ) ) ) );
c[] = f( kind, icol );
)
);
currentdatatable()<<colorOrMarkByColumn(kind,colortheme("spectral"),continuousScale(0));
Hierarchical Cluster(
Y(:Q 01,:Q 02,:Q 03,:Q 04,:Q 05,:Q 06,:Q 07,:Q 08,:Q 09,:Q 10,:Q 11,:Q 12,
:Q 13,:Q 14,:Q 15,:Q 16,:Q 17,:Q 18,:Q 19,:Q 20,:Q 21,:Q 22,:Q 23,:Q 24),
Method( "Average" ), // Ward, Average, Centroid, Single, Complete
Standardize Data( 1 ), Dendrogram Scale( "Distance Scale" ), Number of Clusters( 7 ),
SendToReport( Dispatch( {}, "Dendrogram", OutlineBox, {SetHorizontal( 1 )} ) )
);
),
New Column( "kind",Formula( Random Integer( 1, kinds ) ) ), // number of different types of people
New Column( "Q 01"),New Column( "Q 02"),New Column( "Q 03"),New Column( "Q 04"),
New Column( "Q 05"),New Column( "Q 06"),New Column( "Q 07"),New Column( "Q 08"),
New Column( "Q 09"),New Column( "Q 10"),New Column( "Q 11"),New Column( "Q 12"),
New Column( "Q 13"),New Column( "Q 14"),New Column( "Q 15"),New Column( "Q 16"),
New Column( "Q 17"),New Column( "Q 18"),New Column( "Q 19"),New Column( "Q 20"),
New Column( "Q 21"),New Column( "Q 22"),New Column( "Q 23"),New Column( "Q 24")
);
Craige