Displaying duplicate words with Text Box markup.
@lehaofeng asked a question (well answered already) about removing duplicate strings within a bigger string. Here's some JSL that uses pattern matching to identify strings, an associative array to locate duplicates, munger() to remove or edit the duplicates, <<markup mode on a textbox to display the results, and <<UnderlineStyle on a textbox to make a web link. JMP 16 required for ForEach support.
A picture of the JSL output; clicking the link won't work here, but you can run the JSL below.
input = "’Twas brillig, and the slithy toves
Did gyre and gimble in the wabe:
All mimsy were the borogoves,
And the mome raths outgrabe.
“Beware the Jabberwock, my son!
The jaws that bite, the claws that catch!
Beware the Jubjub bird, and shun
The frumious Bandersnatch!”
He took his vorpal sword in hand;
Long time the manxome foe he sought—
So rested he by the Tumtum tree
And stood awhile in thought.
And, as in uffish thought he stood,
The Jabberwock, with eyes of flame,
Came whiffling through the tulgey wood,
And burbled as it came!
One, two! One, two! And through and through
The vorpal blade went snicker-snack!
He left it dead, and with its head
He went galumphing back.
“And hast thou slain the Jabberwock?
Come to my arms, my beamish boy!
O frabjous day! Callooh! Callay!”
He chortled in his joy.
’Twas brillig, and the slithy toves
Did gyre and gimble in the wabe:
All mimsy were the borogoves,
And the mome raths outgrabe.";
boundaryChars = " .,:;!?“”\!n\!r";
boundaryPat = Pat Any( boundaryChars ) | Pat Pos( 0 );
wordPat = Pat Break( boundaryChars ) | Pat Rem();
patMatchWord = boundaryPat + Pat Pos()>>position + wordPat>>word + boundaryPat;
minLength = 2;
maxLength = 12;
scale = 10^(1+ceiling(log10(maxLength)));
wordToPos = [=> ];
rc = Pat Match( input, patMatchWord
+ Pat Test(
word = Lowercase( word );
wordLen = Length( word );
If( minLength <= wordLen <= maxLength,
packed = (position + 1) * scale + wordLen;
If( !Contains( wordToPos, word ),
wordToPos[word] = {};
packed += .5;
);
Insert Into( wordToPos[word], packed );
);
1;
)
+ Pat R Pos( 0 )
);
If( rc == 0, Throw( "Bummer. Something has gone wrong." ) );
duplicates = {};
For Each( {{word, wordPosList}}, wordToPos,
If( N Items( wordPosList ) > 1,
Insert Into( duplicates, wordPosList[1 :: N Items( wordPosList )] )
)
);
Sort List Into( duplicates );
Reverse Into( duplicates );
positions = Floor( duplicates / scale );
lengths = Mod( duplicates, scale );
titlesize = 20;
authorsize = 14;
textsize = 12;
deletedsize = 10;
cleaned = input;
foreach({{pos,len}},across(positions,lengths),
if(len==floor(len),
cleaned = Munger( cleaned, pos + len, 0, "</font>" );
cleaned = Munger( cleaned, pos, 0, Eval Insert( "<font size='^deletedsize^' color='blue'>" ) );
,
cleaned = Munger( cleaned, pos + Floor( len ), 0, "</font>" );
cleaned = Munger( cleaned, pos, 0, "<font color='green'>" );
);
);
cleaned = regex(cleaned,"\!r\!r","\!r \!r",globalreplace);
New Window( "Before and After",
V List Box(
H Center Box( Text Box( "Jabberwocky", <<setfontsize( titlesize ) ) ),
H Center Box( Text Box( "Lewis Carroll", <<setfontsize( authorsize ) ) ),
H List Box(
Text Box( input, <<setfontsize( textsize ) ),
Spacer Box( size( 10, 1 ) ),
Text Box( cleaned, <<setfontsize( textsize ), <<markup ),
<<padding( Left( 9 ), Right( 9 ), top( 9 ), bottom( 9 ) ),
<<margin( Left( 9 ), Right( 9 ), top( 9 ), bottom( 9 ) ),
<<border( Left( 9 ), Right( 9 ), top( 9 ), bottom( 9 ) )
),
H Center Box(
H List Box(
Button Box( "poetryfoundation.org", Web( "https://www.poetryfoundation.org/poems/42916/jabberwocky" ), <<Underline Style( 1 ) ),
Spacer Box( size( 100, 1 ) ),
Text Box(
"black used once <font color='green'>green initial</font> <font color='blue'>blue subsequent</font>",
<<setfontsize( textsize ),
<<markup
)
)
)
)
);