<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Find common words in a string in Discussions</title>
    <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803332#M98019</link>
    <description>&lt;P&gt;Maybe.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;input = "CATE_N_C1_Shotp,CATE_P_C1_Shotp,CATE_P_C1_Shotp";// input string
// Define boundary characters as _ and ,
boundaryChars = "_,";
position = 0;
wordToPosition = [=&amp;gt; ];
// Associative array to count word frequency
wordsToCount = [=&amp;gt; ];
// Match pattern in the input
rc = Pat Match(
	Lowercase( input ) || boundaryChars, // pre-normalize and add sentinel at end
	Pat Pos( 0 ) 
		+ Pat Repeat(
			Pat Break( boundaryChars ) &amp;gt;&amp;gt; word + Pat Span( boundaryChars ) &amp;gt;&amp;gt; sep
			+ Pat Test(
				position += 1;
				If( Contains( wordsToCount, word ),
					wordsToCount[word] = wordsToCount[word] + 1,
					wordsToCount[word] = 1
				);
				If( Contains( wordToPosition, word ) &amp;amp; wordToPosition[word] != position,
					throw("word ordering not consistent")
				);
				wordToPosition[word] = position;
				if(sep==",", position = 0);
				1; // explicitly, result of pattest is 'true'
			)
		) 
	+ Pat R Pos( 0 ) // The pattern must reach the end
);
// Identify common words (those that appear in all elements)
elements = Words( input, "," );
totalElements = N Items( elements );
commonWords = [=&amp;gt; ];
For Each( {{word, count}}, wordsToCount, If( count == totalElements, commonWords[word] = count ) );
// Display the common words and their counts
Show( commonWords );// commonWords = ["c1" =&amp;gt; 3, "cate" =&amp;gt; 3, "shotp" =&amp;gt; 3];
Show( wordToPosition );// wordToPosition = ["c1" =&amp;gt; 3, "cate" =&amp;gt; 1, "n" =&amp;gt; 2, "p" =&amp;gt; 2, "shotp" =&amp;gt; 4];
keys = wordToPosition &amp;lt;&amp;lt; getkeys;//{"c1", "cate", "n", "p", "shotp"}
vals = wordToPosition &amp;lt;&amp;lt; getvalues;// {3, 1, 2, 2, 4}
sort = Rank( vals );// [2, 3, 4, 1, 5]
keys = keys[sort];// {"cate", "n", "p", "c1", "shotp"}
vals = vals[sort];// {1, 2, 2, 3, 4}
For Each( {k}, keys, If( Contains( commonWords, k ), Show( k, commonWords[k] ) ) );
/*
k = "cate";
commonWords[k] = 3;
k = "c1";
commonWords[k] = 3;
k = "shotp";
commonWords[k] = 3;
*/&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The throw() might alert you to one potential problem.&lt;/P&gt;</description>
    <pubDate>Wed, 02 Oct 2024 23:52:59 GMT</pubDate>
    <dc:creator>Craige_Hales</dc:creator>
    <dc:date>2024-10-02T23:52:59Z</dc:date>
    <item>
      <title>Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803202#M98003</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have the following jsl code which prints the common words and the count.&amp;nbsp;However, it doesn't appear to be counting it correctly.&lt;/P&gt;&lt;P&gt;The output I am getting is:&lt;/P&gt;&lt;P&gt;commonWords = ["c1" =&amp;gt; 3, "cate" =&amp;gt; 3];&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;It should be&amp;nbsp;&lt;/P&gt;&lt;P&gt;commonWords = ["c1" =&amp;gt; 3, "cate" =&amp;gt;3, "shotp" =&amp;gt; 3];&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any suggestions?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;input = "CATE_N_C1_Shotp,CATE_P_C1_Shotp,CATE_P_C1_Shotp";// input string&lt;BR /&gt;
// Define boundary characters as _ and ,
boundaryChars = "_,";
// Define the pattern for boundary and word
boundaryPat = Pat Any( boundaryChars ) | Pat Pos( 0 );
wordPat = Pat Break( boundaryChars ) | Pat Rem();
patMatchWord = boundaryPat + Pat Pos()&amp;gt;&amp;gt;position + wordPat&amp;gt;&amp;gt;word + boundaryPat;
// Associative array to count word frequency
wordsToCount = [=&amp;gt;];
// Match pattern in the input
rc = Pat Match( input, patMatchWord
   + Pat Test(
       word = Lowercase( word ); // Convert word to lowercase for uniformity
       If( Contains( wordsToCount, word ),
           wordsToCount[word] = wordsToCount[word] + 1,
           wordsToCount[word] = 1
       );
   )
   + Pat R Pos( 0 ) // The pattern must reach the end
);
// Identify common words (those that appear in all elements)
elements = Words( input, "," );
totalElements = N Items( elements );
commonWords = [=&amp;gt;];
For Each( {{word, count}}, wordsToCount,
   If( count == totalElements,
       commonWords[word] = count
   );
);
// Display the common words and their counts
Show( commonWords );&lt;BR /&gt;&lt;BR /&gt;// commonWords = ["c1" =&amp;gt; 2, "cate" =&amp;gt; 2];&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 02 Oct 2024 12:42:02 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803202#M98003</guid>
      <dc:creator>Jackie_</dc:creator>
      <dc:date>2024-10-02T12:42:02Z</dc:date>
    </item>
    <item>
      <title>Re: Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803258#M98004</link>
      <description>&lt;P&gt;Nice! a few changes, explained below.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;input = "CATE_N_C1_Shotp,CATE_P_C1_Shotp,CATE_P_C1_Shotp";// input string
// Define boundary characters as _ and ,
boundaryChars = "_,";
// Associative array to count word frequency
wordsToCount = [=&amp;gt; ];
// Match pattern in the input
rc = Pat Match(
	Lowercase( input ) || boundaryChars, // pre-normalize and add sentinel at end
	Pat Pos( 0 ) 
		+ Pat Repeat(
			Pat Break( boundaryChars ) &amp;gt;&amp;gt; word + Pat Span( boundaryChars ) 
			+ Pat Test(
				If( Contains( wordsToCount, word ),
					wordsToCount[word] = wordsToCount[word] + 1,
					wordsToCount[word] = 1
				);
				1; // explicitly, result of pattest is 'true'
			)
		) 
	+ Pat R Pos( 0 ) // The pattern must reach the end
);
// Identify common words (those that appear in all elements)
elements = Words( input, "," );
totalElements = N Items( elements );
commonWords = [=&amp;gt; ];
For Each( {{word, count}}, wordsToCount, If( count == totalElements, commonWords[word] = count ) );
// Display the common words and their counts
Show( commonWords );// commonWords = ["c1" =&amp;gt; 3, "cate" =&amp;gt; 3, "shotp" =&amp;gt; 3];
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The main change is using PatRepeat to walk through the input one token (word) at a time, and adding a sentinel separator at the end. Your original code walks the string by trying to match only one word, then discovering the word is not at the end of the string, advancing the start of the match by one character and trying to match again. It misses the final "shotp" because there is no final separator. The work in pattest is simplified by pre-lowercasing at the same time the sentinel is added.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Pretty sure someone will propose a solution using the words() function, which won't need a sentinel.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;edit: Somehow I missed &lt;A href="https://en.wikipedia.org/wiki/Elephant_in_Cairo" target="_self"&gt;this sentinel from the past&lt;/A&gt;.&lt;/P&gt;</description>
      <pubDate>Wed, 02 Oct 2024 14:20:25 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803258#M98004</guid>
      <dc:creator>Craige_Hales</dc:creator>
      <dc:date>2024-10-02T14:20:25Z</dc:date>
    </item>
    <item>
      <title>Re: Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803311#M98012</link>
      <description>&lt;P&gt;You can also utilize JMP tables&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;Names Default To Here(1);

input = "CATE_N_C1_Shotp,CATE_P_C1_Shotp,CATE_P_C1_Shotp";
input = lowercase(input);

elements = Words(input, ",");

dt = New Table("Data",
	New Column("Word", Character, Nominal),
	New Column("Elementnr", Numeric, Nominal),
	private
);

For Each({element, idx}, elements,
	l = Words(element, "_");
	nr = Repeat(idx, N Items(l));
	r = N Rows(dt);
	dt &amp;lt;&amp;lt; Add Rows(N Items(l));
	
	dt[r+1::r+N Items(l), 1] = l;
	dt[r+1::r+N Items(l), 2] = nr;
);

dt_summary = dt &amp;lt;&amp;lt; Summary(
	Group(:Word),
	N,
	Subgroup(:Elementnr),
	Freq("None"),
	Weight("None"),
	Link to original data table(0),
	private
);

sums = V Sum((dt_summary[0, 3::N Cols(dt_summary)] &amp;gt; 0)`);
valid_idx = Loc(sums &amp;gt;= N ITems(elements));

words = Associative Array(dt_summary[valid_idx, 1], dt_summary[valid_idx, 2]);

Close(dt, no save);
Close(dt_summary, no save);

show(words);&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;There are also some small optimizations which could be done if necessary&lt;/P&gt;</description>
      <pubDate>Wed, 02 Oct 2024 18:22:42 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803311#M98012</guid>
      <dc:creator>jthi</dc:creator>
      <dc:date>2024-10-02T18:22:42Z</dc:date>
    </item>
    <item>
      <title>Re: Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803313#M98013</link>
      <description>&lt;P&gt;&lt;a href="https://community.jmp.com/t5/user/viewprofilepage/user-id/982"&gt;@Craige_Hales&lt;/a&gt;&amp;nbsp; Another question Is it possible to retain the order of the words.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;commonWords = ["c1" =&amp;gt; 3, "cate" =&amp;gt; 3, "shotp" =&amp;gt; 3];&lt;/P&gt;&lt;P&gt;Should be&amp;nbsp;&lt;/P&gt;&lt;P&gt;CATE_N_C1_Shotp&lt;/P&gt;&lt;P&gt;commonWords = ["cate" =&amp;gt; 3, "c1" =&amp;gt; 3, "shotp" =&amp;gt; 3];&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 02 Oct 2024 18:32:31 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803313#M98013</guid>
      <dc:creator>Jackie_</dc:creator>
      <dc:date>2024-10-02T18:32:31Z</dc:date>
    </item>
    <item>
      <title>Re: Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803315#M98014</link>
      <description>&lt;P&gt;&lt;a href="https://community.jmp.com/t5/user/viewprofilepage/user-id/14366"&gt;@jthi&lt;/a&gt;&amp;nbsp;is possible to retain the words order?&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;Should be: words = ["cate" =&amp;gt; 3, "c1" =&amp;gt; 3, "shotp" =&amp;gt; 3];&lt;/P&gt;</description>
      <pubDate>Wed, 02 Oct 2024 19:06:26 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803315#M98014</guid>
      <dc:creator>Jackie_</dc:creator>
      <dc:date>2024-10-02T19:06:26Z</dc:date>
    </item>
    <item>
      <title>Re: Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803332#M98019</link>
      <description>&lt;P&gt;Maybe.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;input = "CATE_N_C1_Shotp,CATE_P_C1_Shotp,CATE_P_C1_Shotp";// input string
// Define boundary characters as _ and ,
boundaryChars = "_,";
position = 0;
wordToPosition = [=&amp;gt; ];
// Associative array to count word frequency
wordsToCount = [=&amp;gt; ];
// Match pattern in the input
rc = Pat Match(
	Lowercase( input ) || boundaryChars, // pre-normalize and add sentinel at end
	Pat Pos( 0 ) 
		+ Pat Repeat(
			Pat Break( boundaryChars ) &amp;gt;&amp;gt; word + Pat Span( boundaryChars ) &amp;gt;&amp;gt; sep
			+ Pat Test(
				position += 1;
				If( Contains( wordsToCount, word ),
					wordsToCount[word] = wordsToCount[word] + 1,
					wordsToCount[word] = 1
				);
				If( Contains( wordToPosition, word ) &amp;amp; wordToPosition[word] != position,
					throw("word ordering not consistent")
				);
				wordToPosition[word] = position;
				if(sep==",", position = 0);
				1; // explicitly, result of pattest is 'true'
			)
		) 
	+ Pat R Pos( 0 ) // The pattern must reach the end
);
// Identify common words (those that appear in all elements)
elements = Words( input, "," );
totalElements = N Items( elements );
commonWords = [=&amp;gt; ];
For Each( {{word, count}}, wordsToCount, If( count == totalElements, commonWords[word] = count ) );
// Display the common words and their counts
Show( commonWords );// commonWords = ["c1" =&amp;gt; 3, "cate" =&amp;gt; 3, "shotp" =&amp;gt; 3];
Show( wordToPosition );// wordToPosition = ["c1" =&amp;gt; 3, "cate" =&amp;gt; 1, "n" =&amp;gt; 2, "p" =&amp;gt; 2, "shotp" =&amp;gt; 4];
keys = wordToPosition &amp;lt;&amp;lt; getkeys;//{"c1", "cate", "n", "p", "shotp"}
vals = wordToPosition &amp;lt;&amp;lt; getvalues;// {3, 1, 2, 2, 4}
sort = Rank( vals );// [2, 3, 4, 1, 5]
keys = keys[sort];// {"cate", "n", "p", "c1", "shotp"}
vals = vals[sort];// {1, 2, 2, 3, 4}
For Each( {k}, keys, If( Contains( commonWords, k ), Show( k, commonWords[k] ) ) );
/*
k = "cate";
commonWords[k] = 3;
k = "c1";
commonWords[k] = 3;
k = "shotp";
commonWords[k] = 3;
*/&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The throw() might alert you to one potential problem.&lt;/P&gt;</description>
      <pubDate>Wed, 02 Oct 2024 23:52:59 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803332#M98019</guid>
      <dc:creator>Craige_Hales</dc:creator>
      <dc:date>2024-10-02T23:52:59Z</dc:date>
    </item>
    <item>
      <title>Re: Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803347#M98022</link>
      <description>&lt;P&gt;It can. With just one example of data it is a bit annoying to try and figure out what should be done though. For example, can same word appear multiple times in same element? And can the order change within elements?&lt;/P&gt;</description>
      <pubDate>Thu, 03 Oct 2024 04:17:28 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803347#M98022</guid>
      <dc:creator>jthi</dc:creator>
      <dc:date>2024-10-03T04:17:28Z</dc:date>
    </item>
    <item>
      <title>Re: Find common words in a string</title>
      <link>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803464#M98041</link>
      <description>&lt;P&gt;Of course if you need an associative array, you cannot have it ordered in any other way than what JMP does BUT you can order the values you get from it by utilizing ranks.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Trying to keep with the "jmp table" solution (I made few assumptions)&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;Names Default To Here(1);

input = "CATE_N_C1_Shotp,CATE_P_C1_Shotp,CATE_P_C1_Shotp";
input = lowercase(input);

elements = Words(input, ",");

dt = New Table("Data",
	New Column("Word", Character, Nominal),
	New Column("Elementnr", Numeric, Nominal)
);

For Each({element, idx}, elements,
	l = Words(element, "_");
	nr = Repeat(idx, N Items(l));
	r = N Rows(dt);
	dt &amp;lt;&amp;lt; Add Rows(N Items(l));
	
	dt[r+1::r+N Items(l), 1] = l;
	dt[r+1::r+N Items(l), 2] = nr;
);

new_col1 = dt &amp;lt;&amp;lt; New Column("C", Numeric, Continuous, Formula(
	Col Number(:Elementnr, :Word)
));

new_col2 = dt &amp;lt;&amp;lt; New Column("R", Numeric, Continuous, Formula(
	Col Min(Col Cumulative Sum(1, :Elementnr), :Word)
));
dt &amp;lt;&amp;lt; run formulas;
new_col1 &amp;lt;&amp;lt; delete formula;
new_col2 &amp;lt;&amp;lt; delete formula;

Summarize(dt, elem = by(:Elementnr));
element_count = N Items(elem);

dt &amp;lt;&amp;lt; Delete Rows(dt &amp;lt;&amp;lt; get rows where(:C != element_count));
dt &amp;lt;&amp;lt; Select Duplicate Rows(Match(:Word)) &amp;lt;&amp;lt; Delete Rows &amp;lt;&amp;lt; Clear Select;

aa = Associative Array(:Word, :C); // results in AA
r = Rank(:Word &amp;lt;&amp;lt; get values); // use Rank to return in original order

Close(dt, No save);

keys = (aa &amp;lt;&amp;lt; get keys)[r]; 
// Values does not need sorting as they always have same values
values = aa &amp;lt;&amp;lt; get values;

Write();

show(aa, r, keys, values);&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 03 Oct 2024 15:25:34 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Find-common-words-in-a-string/m-p/803464#M98041</guid>
      <dc:creator>jthi</dc:creator>
      <dc:date>2024-10-03T15:25:34Z</dc:date>
    </item>
  </channel>
</rss>

