<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: remove duplicate Columns in Discussions</title>
    <link>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41715#M24319</link>
    <description>&lt;P&gt;Here's a brute force (and untested) alternative that may or may not be 'better' for the intended use:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;NamesDefaultToHere(1);

dt = CurrentDataTable();

// Make a matrix 'mat' holding pairwise equality of column values
n = NCol(dt);
mat = J(n, n, .);
for(c1 = 1, c1 &amp;lt;=n, c1++,
	for(c2 = c1+1, c2 &amp;lt;= n, c2++,
		c1Vals = Column(dt, c1) &amp;lt;&amp;lt; getValues;
		c2Vals = Column(dt, c2) &amp;lt;&amp;lt; getValues;
		mat[c1, c2] = all(c1Vals == c2Vals);
	);
);

// Convert 'mat' to a table
cols = dt &amp;lt;&amp;lt; getColumnNames("String");
dtn = dt &amp;lt;&amp;lt; getName;
dt2 = AsTable(mat, &amp;lt;&amp;lt; columnNames(cols));
dt2 &amp;lt;&amp;lt; setName("Duplicate Columns in "||dtn);
dt2 &amp;lt;&amp;lt; NewColumn("Column", Character, Values(cols));
dt2 &amp;lt;&amp;lt; moveSelectedColumns({"Column"}, ToFirst);&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Mon, 10 Jul 2017 09:23:20 GMT</pubDate>
    <dc:creator>ian_jmp</dc:creator>
    <dc:date>2017-07-10T09:23:20Z</dc:date>
    <item>
      <title>remove duplicate Columns</title>
      <link>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/39704#M23221</link>
      <description>&lt;P&gt;Hi:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a hugue data table with 400+ columns and 50,000 rows. I have a 30-40 duplicate columns and I would like to remove them from my data table. any ideas?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 28 May 2017 07:15:20 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/39704#M23221</guid>
      <dc:creator>KR</dc:creator>
      <dc:date>2017-05-28T07:15:20Z</dc:date>
    </item>
    <item>
      <title>Re: remove duplicate Columns</title>
      <link>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/39706#M23223</link>
      <description>&lt;P&gt;I am not aware of any builtin JMP method to do this. &amp;nbsp;However, you can use the RSquare statistic to determine if the columns are identical.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you are going to do this interactively, the continuous columns can be identified by using&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;Analyze==&amp;gt;Multivariate Methods==&amp;gt;Multivariate&lt;/P&gt;
&lt;P&gt;It will give you a corellation matrix that you can output into a data table, and then use it to identify which columns have a correlation of 1.&lt;/P&gt;
&lt;P&gt;The ordinal/nominal columns can be identifyied by creating a "Make Combined Data Table" from the RSquare table in Fit Y by X. &amp;nbsp;Once again, an RSquare of 1.0 will give you the clue that the columns are identical.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Now if you are going to script a solution, the above can be fairly easily automated.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Maybe another community member will have a better solution&lt;/P&gt;</description>
      <pubDate>Sun, 28 May 2017 12:00:11 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/39706#M23223</guid>
      <dc:creator>txnelson</dc:creator>
      <dc:date>2017-05-28T12:00:11Z</dc:date>
    </item>
    <item>
      <title>Re: remove duplicate Columns</title>
      <link>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41618#M24278</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I encounter the same problem&amp;nbsp;today, and here is my script to remove the duplicated columns.&lt;/P&gt;&lt;P&gt;It really takes time&amp;nbsp;if you have a table with over thousands of rows. There should be a more efficient way to do it, hope someone can figure it out and share.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;Names Default To Here( 1 );
	
col_name = dt &amp;lt;&amp;lt; get column names;
dup_col = {}; //store the duplicated column name
same_c = 0; //to record how many rows are the same while doing columns comparison

/* When the elements are the same in the same row for 2 columns, same_c will be added by 1*/
/*If the same_c is equal to the total row number, then the 2 columns are the same. Insert the column name into dup_col*/  
For( i = 1, i &amp;lt;= (N Cols( dt ) - 1), i++,
	For( k = i + 1, k &amp;lt;= N Cols( dt ), k++,
		(For Each Row( If( :(col_name[i])[Row()] == :(col_name[k])[Row()], same_c++, Wait( 0 ) ) ) ; 
			If( same_c == N Rows( dt ),	Insert Into( dup_col, col_name[k] ), Wait( 0 )) ; 
			same_c = 0 ; )
	)
);

dt &amp;lt;&amp;lt; Delete Column( Eval( dup_col ) );
// delete the duplicated columns&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 07 Jul 2017 13:04:40 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41618#M24278</guid>
      <dc:creator>WHTseng</dc:creator>
      <dc:date>2017-07-07T13:04:40Z</dc:date>
    </item>
    <item>
      <title>Re: remove duplicate Columns</title>
      <link>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41620#M24279</link>
      <description>&lt;P&gt;Why do you have so many duplicate columns? Can this result be prevented?&lt;/P&gt;</description>
      <pubDate>Fri, 07 Jul 2017 13:20:22 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41620#M24279</guid>
      <dc:creator>Mark_Bailey</dc:creator>
      <dc:date>2017-07-07T13:20:22Z</dc:date>
    </item>
    <item>
      <title>Re: remove duplicate Columns</title>
      <link>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41683#M24303</link>
      <description>&lt;P&gt;You can use an &lt;A href="https://en.wikipedia.org/wiki/MD5" target="_self"&gt;MD5 checksum&lt;/A&gt;, generated by the &lt;FONT face="courier new,courier"&gt;&lt;A href="http://www.jmp.com/support/help/13-1/Utility_Functions.shtml" target="_self"&gt;Blob MD5()&lt;/A&gt; &lt;/FONT&gt;function to generate a hash of the values in a column and then compare checksums across columns.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You can use an associative array to check for duplicate md5 checksums as they are generated to avoid having to compare every checksum with every other checksum yourself.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;dt = New Table( "Some Duplicate Columns",
	Add Rows( 1000 ),
	New Column( "Column 1", Numeric, "Continuous", Format( "Best", 12 ), Formula( Random Normal() ) ),
	New Column( "Copy of Column 1", Numeric, "Continuous", Format( "Best", 12 ), Formula( :Column 1 ) ), 

	New Column( "Not a copy of Column 1",
		Numeric,
		"Continuous",
		Format( "Best", 12 ),
		Formula( If( Row() == 1, 1, :Column 1 ) )
	),
	New Column( "Character copy of Column 1", Character, "Nominal", Formula( Char( :Column 1 ) ) ),
	New Column( "Character copy of Copy of Column 1",
		Character,
		"Nominal",
		Formula( Char( :Copy of Column 1 ) ),

	)
);

//get values into matrix (numeric columns) or list (character columns)
c1 = :column 1 &amp;lt;&amp;lt; get values;

c2 = :Copy of Column 1 &amp;lt;&amp;lt; get values;

//avoid having to check column data types when converting the values to blob
//by converting matrix or list to character
//
//if you're not lazy you could use the Matrix to Blob() function for numeric columns
//and the Char to Blob() function for character columns
char_c1 = Char( c1 );
char_c2 = Char( c2 );

c1_md5 = Blob MD5( Char To Blob( char_c1 ) );
c2_md5 = Blob MD5( Char To Blob( char_c2 ) );

Show( c1_md5 == c2_md5 );

//check the whole data table using hex version of md5 as keys of associative array

aa = Associative Array();

For( i = 1, i &amp;lt;= N Col( dt ), i++,
	md5 = Hex( Blob MD5( Char To Blob( Char( Column( dt, i ) &amp;lt;&amp;lt; get values ) ) ) );
	
	//check to see if the associate array already has a key named with this md5
	If( aa &amp;lt;&amp;lt; Contains( md5 ),
	
		//aa does contain md5
		Print( aa[md5] || " is a duplicate of " || (Column( dt, i ) &amp;lt;&amp;lt; get name) ),
		
		//aa does not contain md5
		//store a key named with this md5 with a value of the column name
		aa[md5] = Column( dt, i ) &amp;lt;&amp;lt; get name;
	);
);&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 07 Jul 2017 21:09:39 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41683#M24303</guid>
      <dc:creator>Jeff_Perkinson</dc:creator>
      <dc:date>2017-07-07T21:09:39Z</dc:date>
    </item>
    <item>
      <title>Re: remove duplicate Columns</title>
      <link>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41715#M24319</link>
      <description>&lt;P&gt;Here's a brute force (and untested) alternative that may or may not be 'better' for the intended use:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;NamesDefaultToHere(1);

dt = CurrentDataTable();

// Make a matrix 'mat' holding pairwise equality of column values
n = NCol(dt);
mat = J(n, n, .);
for(c1 = 1, c1 &amp;lt;=n, c1++,
	for(c2 = c1+1, c2 &amp;lt;= n, c2++,
		c1Vals = Column(dt, c1) &amp;lt;&amp;lt; getValues;
		c2Vals = Column(dt, c2) &amp;lt;&amp;lt; getValues;
		mat[c1, c2] = all(c1Vals == c2Vals);
	);
);

// Convert 'mat' to a table
cols = dt &amp;lt;&amp;lt; getColumnNames("String");
dtn = dt &amp;lt;&amp;lt; getName;
dt2 = AsTable(mat, &amp;lt;&amp;lt; columnNames(cols));
dt2 &amp;lt;&amp;lt; setName("Duplicate Columns in "||dtn);
dt2 &amp;lt;&amp;lt; NewColumn("Column", Character, Values(cols));
dt2 &amp;lt;&amp;lt; moveSelectedColumns({"Column"}, ToFirst);&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 10 Jul 2017 09:23:20 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/remove-duplicate-Columns/m-p/41715#M24319</guid>
      <dc:creator>ian_jmp</dc:creator>
      <dc:date>2017-07-10T09:23:20Z</dc:date>
    </item>
  </channel>
</rss>

