Choose Language Hide Translation Bar
Highlighted

Re: Binary Data and Correlations

Here is a script that collects RSquare (U) and Kappa from all combinations of your binary columns:

Names Default To Here( 1 );

dt# = Current Data Table();
If( Is Empty( dt# ),
	Throw( "Data table missing" )
);

// user choices.
dlg# = Column Dialog(
	Title( "Binary Agreement" ),
	yCol# = Col List( "Binary Columns",
		Min Col( 2 )
	),
	"Select columns for agreement"
);

// check if user decides to quit.
If( dlg#["Button"] == -1,
	Throw( "User cancelled" );
);

// process information returned from dialog.
Remove From( dlg# ); Eval List( dlg# );

n cols# = N Items( yCol# );
r sqr u# = agree# = Identity( n cols# );
measure# = List();
For( col1# = 1, col1# < n cols#, col1#++,
	Insert Into( measure#, yCol#[col1#] << Get Name);
	For( col2# = 2, col2# <= n cols#, col2#++,
		ct# = dt# << Contingency(
			Y( yCol#[col1#] ),
			X( yCol#[col2#] ),
			Contingency Table( 0 ),
			Mosaic Plot( 0 ),
			Tests( 1 ),
			Agreement Statistic( 1 ),
			Invisible
		);
		ctr# = ct# << Report;
		r sqr u#[col1#,col2#] = r sqr u#[col2#,col1#] = ctr#["Tests"][TableBox(1)][NumberColBox(4)][1];
		agree#[col1#,col2#]   = agree#[col2#,col1#]   = ctr#["Kappa Coefficient"][TableBox(1)][NumberColBox(1)][1];
		ct# << Close Window;
	);
);
Insert Into( measure#, yCol#[col1#] << Get Name );

New Window( "Binary Agreement",
	Outline Box( "RSquare (U)",
		tb1 = Table Box(
			String Col Box( "Measure", measure# )
		)
	),
	Outline Box( "Kappa Coefficient",
		tb2 = Table Box(
			String Col Box( "Measure", measure# )
		)
	)
);

For( col# = 1, col# <= n cols#, col#++,
	tb1 << Append(
		Number Col Box( measure#[col#], r sqr u#[0,col#], << Set Format( 7, 4 ) )
	);
	tb2 << Append(
		Number Col Box( measure#[col#], agree#[0,col#], << Set Format( 7, 4 ) )
	);
);
Learn it once, use it forever!

View solution in original post

Highlighted
natalie_
Community Trekker

Re: Binary Data and Correlations

Okay, so I found where to find the Kappa Coefficient; it's under the "Agreement Statistic" table.  The "Agreement Statistic" table only appears when both X and Y variables have the same levels.  What does that mean?  Is there a way to check for this?

0 Kudos
Highlighted

Re: Binary Data and Correlations

More great questions.

It's really just "apples to apples" or "apples to oranges." Also, it is the difference between continuous and categorical data.

If you have a classification of intelligence and another classification of monetary assets, you can use Contingency to see if these variables are associated. (Analogous to correlated with continuous variables.) On the other hand if you had two classifications of intelligence, you could assess the association but you could explore a stricter relationship: agreement. What is the proportion of times both classifications agreed?

We measure the strength of association with the odds ratio. We measure the strength of agreement with Kappa.

Learn it once, use it forever!