<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Is there an algorithm that can quickly find such highly distributed data? in Discussions</title>
    <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748329#M92854</link>
    <description>&lt;P class=""&gt;Thanks Experts!&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;Thank you for the script file.&lt;/SPAN&gt;&lt;SPAN class=""&gt;I further learned the decision tree method.&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;How can this JSL be modified to apply to other file analyses with this structure?&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;In addition, how is the red vertical line added in the following figure?&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P class=""&gt;How can get its value?&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;Thank you for spending a lot of time helping me.&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="2024-04-21_18-35-52.png" style="width: 583px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63504iCCCE6A6C4EA1CA7E/image-size/large?v=v2&amp;amp;px=999" role="button" title="2024-04-21_18-35-52.png" alt="2024-04-21_18-35-52.png" /&gt;&lt;/span&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sun, 21 Apr 2024 10:37:07 GMT</pubDate>
    <dc:creator>lala</dc:creator>
    <dc:date>2024-04-21T10:37:07Z</dc:date>
    <item>
      <title>Is there an algorithm that can quickly find such highly distributed data?</title>
      <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748301#M92841</link>
      <description>&lt;P&gt;For example, the values marked 1 in the figure are larger,&lt;BR /&gt;The larger range above it is smaller (the range marked 2).&lt;BR /&gt;How can I automatically find out the range of mark 1 and mark 2 through calculation?&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="2024-04-20_18-48-17.png" style="width: 719px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63498i3AA8684873546351/image-size/large?v=v2&amp;amp;px=999" role="button" title="2024-04-20_18-48-17.png" alt="2024-04-20_18-48-17.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 20 Apr 2024 10:57:31 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748301#M92841</guid>
      <dc:creator>lala</dc:creator>
      <dc:date>2024-04-20T10:57:31Z</dc:date>
    </item>
    <item>
      <title>Re: Is there an algorithm that can quickly find such highly distributed data?</title>
      <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748305#M92844</link>
      <description>&lt;P&gt;Hi &lt;a href="https://community.jmp.com/t5/user/viewprofilepage/user-id/17251"&gt;@lala&lt;/a&gt;,&lt;BR /&gt;&lt;BR /&gt;If you have only few independent and non-correlated X's with one response Y, I think a Decision Tree should be able to uncover X ranges related to different mean response Y.&lt;BR /&gt;Décision Trees is available in the platform Partitioning.&lt;BR /&gt;&lt;BR /&gt;Hope this answer will help you,&lt;/P&gt;</description>
      <pubDate>Sat, 20 Apr 2024 12:35:15 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748305#M92844</guid>
      <dc:creator>Victor_G</dc:creator>
      <dc:date>2024-04-20T12:35:15Z</dc:date>
    </item>
    <item>
      <title>Re: Is there an algorithm that can quickly find such highly distributed data?</title>
      <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748315#M92849</link>
      <description>&lt;P&gt;Thanks Experts!&lt;/P&gt;&lt;P&gt;Partition&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="2024-04-21_07-53-20.png" style="width: 733px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63499i6857A2D74FC3DA1E/image-size/large?v=v2&amp;amp;px=999" role="button" title="2024-04-21_07-53-20.png" alt="2024-04-21_07-53-20.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 20 Apr 2024 23:55:05 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748315#M92849</guid>
      <dc:creator>lala</dc:creator>
      <dc:date>2024-04-20T23:55:05Z</dc:date>
    </item>
    <item>
      <title>Re: Is there an algorithm that can quickly find such highly distributed data?</title>
      <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748320#M92851</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.jmp.com/t5/user/viewprofilepage/user-id/17251"&gt;@lala&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I just tried the Partition platform and I think the outcomes could be what you're looking for.&lt;/P&gt;
&lt;P&gt;I used it on your data with a 20% validation set (specified in the launch dialog window) in order to know when to stop splitting data (when training and validation results remain stable) and avoid creating too many splits in the data:&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Victor_G_1-1713679962216.png" style="width: 400px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63501i60582AD88FC1681F/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Victor_G_1-1713679962216.png" alt="Victor_G_1-1713679962216.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;When displaying the results with the formula saved from the platform, it's easier to spot areas with similar values :&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Victor_G_0-1713679660125.png" style="width: 400px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63500i1701D635D3AC817E/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Victor_G_0-1713679660125.png" alt="Victor_G_0-1713679660125.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;You can thanks to Graph Builder identify the ranges of similar values, or create a small script that creates groups based on predicted values in the datatable, and/or use the small tree view to identify the ranges in your data :&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Victor_G_2-1713680128932.png" style="width: 400px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63502i366B799805387AA4/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Victor_G_2-1713680128932.png" alt="Victor_G_2-1713680128932.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Please find attached the datatable with scripts of the analysis and graphs included, I hope this complentary answer makes the use of Partition platform clearer for you,&lt;/P&gt;</description>
      <pubDate>Sun, 21 Apr 2024 06:17:47 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748320#M92851</guid>
      <dc:creator>Victor_G</dc:creator>
      <dc:date>2024-04-21T06:17:47Z</dc:date>
    </item>
    <item>
      <title>Re: Is there an algorithm that can quickly find such highly distributed data?</title>
      <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748329#M92854</link>
      <description>&lt;P class=""&gt;Thanks Experts!&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;Thank you for the script file.&lt;/SPAN&gt;&lt;SPAN class=""&gt;I further learned the decision tree method.&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;How can this JSL be modified to apply to other file analyses with this structure?&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;In addition, how is the red vertical line added in the following figure?&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P class=""&gt;How can get its value?&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;Thank you for spending a lot of time helping me.&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="2024-04-21_18-35-52.png" style="width: 583px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63504iCCCE6A6C4EA1CA7E/image-size/large?v=v2&amp;amp;px=999" role="button" title="2024-04-21_18-35-52.png" alt="2024-04-21_18-35-52.png" /&gt;&lt;/span&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 21 Apr 2024 10:37:07 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748329#M92854</guid>
      <dc:creator>lala</dc:creator>
      <dc:date>2024-04-21T10:37:07Z</dc:date>
    </item>
    <item>
      <title>Re: Is there an algorithm that can quickly find such highly distributed data?</title>
      <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748343#M92862</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.jmp.com/t5/user/viewprofilepage/user-id/17251"&gt;@lala&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you want to reproduce the analysis on other file, you can launch manually the platform with the corresponding settings :&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Your response (here XX) in the "Y, Response" panel&lt;/LI&gt;
&lt;LI&gt;Your factor(s) (here YY) in the "X, Factor" panel&lt;/LI&gt;
&lt;LI&gt;Finally, you can set a validation portion (between 10 and 20%) in the corresponding panel (bottom left) :&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Victor_G_0-1713766238036.png" style="width: 400px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/63508i9796E29ACCBB1265/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Victor_G_0-1713766238036.png" alt="Victor_G_0-1713766238036.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;The JSL code corresponding to this analysis would be :&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;// Launch platform: Partition
Data Table( "pic" ) &amp;lt;&amp;lt; Partition(
	Y( :XX ),
	X( :YY ),
	Validation Portion( 0.2 ),
	Informative Missing( 1 )
);
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;However, I don't know how you could automate the splitting, you may have to create a metric to indicate where to stop splitting, based on the difference between R² training and R² validation, or based on the metric slope between the previous split and the new one (for R² training and validation for example).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The red vertical lines comes from the prediction formula of the Decision Tree and is directly linked to the inner working of tree-based methods. A Decision Tree will create splits in the factor(s) values based on a criteria to create more homogeneous subsets of data. For Regression Tree, this criteria is often MSE (Mean Squared Error), MAE (Mean Absolute Error) or SSE (Sum of Squared Error).&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;The value used for splitting is determined by testing every value for every factor, so that the one which minimizes the sum of squares error (SSE) best is chosen. So by splitting your data into "chunks" where you calculate for each of this part the mean value, you minimize the difference between predicted values and actual values, and actually reduce SSE. This explains the "stairway" step profile look of the prediction formula.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;On the table, you can get the prediction values of this formula in the column "XX Predictor" saved, and in the platform , you can click on the red triangle next to "Partition", go into "Save Columns" and "Save Prediction Formula". As you can see, values are the same for each range/split done, and corresponds to the average of actual values in this split/group. This is quite helpful in your use case, as you can directly identify groups of similar values (corresponding to the same predicted value with the Decision Tree).&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Some ressources about Regression Trees :&lt;/P&gt;
&lt;P&gt;StatQuest (Youtube) :&amp;nbsp;&lt;A href="https://youtu.be/g9c66TUylZ4?si=-oyWbyieIlfwxLoH" target="_blank" rel="noopener"&gt;https://youtu.be/g9c66TUylZ4?si=-oyWbyieIlfwxLoH&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;Interpretable Machine Learning (Christoph Molnar) :&amp;nbsp;&lt;A href="https://christophm.github.io/interpretable-ml-book/tree.html" target="_blank" rel="noopener"&gt;https://christophm.github.io/interpretable-ml-book/tree.html&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;&lt;A href="https://saedsayad.com/decision_tree_reg.htm#:~:text=Decision%20tree%20builds%20regression%20or,decision%20nodes%20and%20leaf%20nodes" target="_blank" rel="noopener"&gt;https://saedsayad.com/decision_tree_reg.htm#:~:text=Decision%20tree%20builds%20regression%20or,decision%20nodes%20and%20leaf%20nodes&lt;/A&gt;.&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Hope this answer will help you,&lt;/P&gt;</description>
      <pubDate>Mon, 22 Apr 2024 07:06:08 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748343#M92862</guid>
      <dc:creator>Victor_G</dc:creator>
      <dc:date>2024-04-22T07:06:08Z</dc:date>
    </item>
    <item>
      <title>Re: Is there an algorithm that can quickly find such highly distributed data?</title>
      <link>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748356#M92869</link>
      <description>&lt;P class=""&gt;&lt;SPAN class=""&gt;Thanks!&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;This Partition module is heavily used, but with low-level duplication and no further functionality to use it.&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;I run JMP using scripts.&lt;/SPAN&gt;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;It automatically splits if you add go.&lt;/SPAN&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;p = Partition(
	Y( Column( 3 ) ),
	X( Eval( xF ) ),
	Method( "Decision Tree" ),
	Validation Portion( 0.2 ),
	Informative Missing( 1 ),
	Column Contributions( 1 )
);
p &amp;lt;&amp;lt; go;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 22 Apr 2024 07:57:29 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Is-there-an-algorithm-that-can-quickly-find-such-highly/m-p/748356#M92869</guid>
      <dc:creator>lala</dc:creator>
      <dc:date>2024-04-22T07:57:29Z</dc:date>
    </item>
  </channel>
</rss>

