BookmarkSubscribe
Choose Language Hide Translation Bar
Hegedus
Community Trekker

Select and Delete a Column Where All Rows Have The Same Value

Hi,

 

Working on an inherited script that outputs a pretty large data table.  I great number of the columns will have the same value in each row.  For example :Fred will all the rows =1 while :lucy will have all the rows = "yesterday".  These columns are not useful for further analysis so I would like to delete or hide them.

 

Who would I script this?  I would like to review each column to see if every value is the same and if so delete it.  If some of the rows are empty, but other rows all contain the same value, the column should be kept.

 

Any starting points?

0 Kudos
1 ACCEPTED SOLUTION

Accepted Solutions
txnelson
Super User

Re: Select and Delete a Column Where All Rows Have The Same Value

I would use the Summarize() function.  Here is a little sample on how to do this:

Names Default To Here( 1 );
dt = New Table( "Sample",
	Add Rows( 18 ),
	New Column("name",Character,
		"Nominal",
		Set Values(
			{"KATIE", "LOUISE", "JANE", "JACLYN", "LILLIE", "BARBARA", "ALICE", "SUSAN",
			"JUDY", "ELIZABETH", "LESLIE", "CAROL", "PATTY", "MARY", "AMY", "MARTHA",
			"MARION", "LINDA"}
		)
	),
	New Column( "age" ,Numeric,
		"Ordinal",
		Format( "Fixed Dec", 5, 0 ),
		Set Values(
			[12, 12, 12, 12, 12, ., 13, 13, 14, 14, 14, 14, 14, 15, 15, 16, 16, 17]
		)
	),
	New Column( "sex" ,Character( 1 ),
		"Nominal",
		Set Values(
			{"F", "F", "", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
			"F", "F", "F"}
		)
	),
	New Column(
		"height",
		Numeric,
		"Continuous",
		Format( "Fixed Dec", 5, 0 ),
		Set Values(
			[59, ., 55, 66, 52, 60, 61, 56, 61, 62, 65, 63, 62, 62, 64, 65, 60, 62]
		)
	),
	New Column("weight",
		Numeric,
		"Continuous",
		Format( "Fixed Dec", 5, 0 ),
		Set Values(
			[95, 123, 74, 145, 64, 112, 107, 67, 81, 91, 142, 84, 85, 92, 112, 112, 115,
			116]
		)
	),
	New Column( "allthesame",
		Numeric,
		"Continuous",
		Format( "Best", 12 ),
		Set Values( [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] )
	),
	New Column( "allcharthesame",
		Character,
		"Nominal",
		Set Selected,
		Set Values(
			{"zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy",
			"zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy",
			"zippy", "zippy"}
		)
	),
	Set Row States( [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ),
	Set Label Columns( :name )
);

colNamesList = dt << get column names( String );
colsToDeleteList = {};
For( i = 1, i <= N Items( colNamesList ), i++,
	Summarize( dt, bygroup = by( colNamesList[i] ) );
	If( N Items( bygroup ) == 1 | (N Items( bygroup ) == 2 & bygroup[1] == ""),
		Insert Into( colsToDeleteList, colNamesList[i] )
	);
);
Show( colsToDeleteList );
Jim
0 Kudos
2 REPLIES 2
txnelson
Super User

Re: Select and Delete a Column Where All Rows Have The Same Value

I would use the Summarize() function.  Here is a little sample on how to do this:

Names Default To Here( 1 );
dt = New Table( "Sample",
	Add Rows( 18 ),
	New Column("name",Character,
		"Nominal",
		Set Values(
			{"KATIE", "LOUISE", "JANE", "JACLYN", "LILLIE", "BARBARA", "ALICE", "SUSAN",
			"JUDY", "ELIZABETH", "LESLIE", "CAROL", "PATTY", "MARY", "AMY", "MARTHA",
			"MARION", "LINDA"}
		)
	),
	New Column( "age" ,Numeric,
		"Ordinal",
		Format( "Fixed Dec", 5, 0 ),
		Set Values(
			[12, 12, 12, 12, 12, ., 13, 13, 14, 14, 14, 14, 14, 15, 15, 16, 16, 17]
		)
	),
	New Column( "sex" ,Character( 1 ),
		"Nominal",
		Set Values(
			{"F", "F", "", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
			"F", "F", "F"}
		)
	),
	New Column(
		"height",
		Numeric,
		"Continuous",
		Format( "Fixed Dec", 5, 0 ),
		Set Values(
			[59, ., 55, 66, 52, 60, 61, 56, 61, 62, 65, 63, 62, 62, 64, 65, 60, 62]
		)
	),
	New Column("weight",
		Numeric,
		"Continuous",
		Format( "Fixed Dec", 5, 0 ),
		Set Values(
			[95, 123, 74, 145, 64, 112, 107, 67, 81, 91, 142, 84, 85, 92, 112, 112, 115,
			116]
		)
	),
	New Column( "allthesame",
		Numeric,
		"Continuous",
		Format( "Best", 12 ),
		Set Values( [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] )
	),
	New Column( "allcharthesame",
		Character,
		"Nominal",
		Set Selected,
		Set Values(
			{"zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy",
			"zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy", "zippy",
			"zippy", "zippy"}
		)
	),
	Set Row States( [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ),
	Set Label Columns( :name )
);

colNamesList = dt << get column names( String );
colsToDeleteList = {};
For( i = 1, i <= N Items( colNamesList ), i++,
	Summarize( dt, bygroup = by( colNamesList[i] ) );
	If( N Items( bygroup ) == 1 | (N Items( bygroup ) == 2 & bygroup[1] == ""),
		Insert Into( colsToDeleteList, colNamesList[i] )
	);
);
Show( colsToDeleteList );
Jim
0 Kudos
Highlighted
Hegedus
Community Trekker

Re: Select and Delete a Column Where All Rows Have The Same Value

Thank you very much. Works like a champ.
Andy
0 Kudos