<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Randomly Subselect into Groups and get mean in Discussions</title>
    <link>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/515784#M74251</link>
    <description>&lt;P&gt;Hi,&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;I have a dataset of ~2000 individuals stratified by 3 categories (gender/age/ethnicity). I want to get, "pooled", values for these individuals by taking ~8 individuals from each gender/age/ethnicity and finding the average. A caveat is that there are more individuals in certain category bins than the others. Attached is a breakdown of all the individuals I got and their grouping. For instance, category 1-1-1 has 120 individuals, so I could create 8*15 groups of random individuals from these. What if I have a number not a multiple of 8?&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;How can I do this all in one step? In the end we'd have around 200 pools.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Screen Shot 2022-06-29 at 2.26.01 PM.png" style="width: 400px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/43691i9AA780239D4E3E7C/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Screen Shot 2022-06-29 at 2.26.01 PM.png" alt="Screen Shot 2022-06-29 at 2.26.01 PM.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt; &lt;/P&gt;</description>
    <pubDate>Fri, 09 Jun 2023 17:02:56 GMT</pubDate>
    <dc:creator>MikeDereviankin</dc:creator>
    <dc:date>2023-06-09T17:02:56Z</dc:date>
    <item>
      <title>Randomly Subselect into Groups and get mean</title>
      <link>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/515784#M74251</link>
      <description>&lt;P&gt;Hi,&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;I have a dataset of ~2000 individuals stratified by 3 categories (gender/age/ethnicity). I want to get, "pooled", values for these individuals by taking ~8 individuals from each gender/age/ethnicity and finding the average. A caveat is that there are more individuals in certain category bins than the others. Attached is a breakdown of all the individuals I got and their grouping. For instance, category 1-1-1 has 120 individuals, so I could create 8*15 groups of random individuals from these. What if I have a number not a multiple of 8?&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;How can I do this all in one step? In the end we'd have around 200 pools.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Screen Shot 2022-06-29 at 2.26.01 PM.png" style="width: 400px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/43691i9AA780239D4E3E7C/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Screen Shot 2022-06-29 at 2.26.01 PM.png" alt="Screen Shot 2022-06-29 at 2.26.01 PM.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt; &lt;/P&gt;</description>
      <pubDate>Fri, 09 Jun 2023 17:02:56 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/515784#M74251</guid>
      <dc:creator>MikeDereviankin</dc:creator>
      <dc:date>2023-06-09T17:02:56Z</dc:date>
    </item>
    <item>
      <title>Re: Randomly Subselect into Groups and get mean</title>
      <link>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516011#M74266</link>
      <description>&lt;P&gt;I am not totally sure I have created a script that does what you want, but it may give you a good start.&amp;nbsp; Here is what it does&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;It finds the gender/age/ethnicity group that has the smallest population.&amp;nbsp; This group will be the limiter on how many pools can be created since no rows can appear in more than one pool.&lt;/LI&gt;
&lt;LI&gt;It then tries to find the pool size that minimized the number of non-included rows for that sub group.&amp;nbsp; It does this by walking up the group size from 8 to 21, and finds the smallest remainder(waste).&lt;/LI&gt;
&lt;LI&gt;It then creates a working table from the original table.&lt;/LI&gt;
&lt;LI&gt;It loops through the working table,&amp;nbsp;
&lt;OL&gt;
&lt;LI&gt;Creates a stratified random sample&amp;nbsp;&lt;/LI&gt;
&lt;LI&gt;Adds the new sample to the final pool data table&lt;/LI&gt;
&lt;LI&gt;Deletes from the working data table, the rows that were selected for the latest subsetted random data table.&lt;/LI&gt;
&lt;LI&gt;Repeats as many times as possible to get as many of the smallest subgroup records as possible into the final pooled data table.&lt;/LI&gt;
&lt;/OL&gt;
&lt;/LI&gt;
&lt;/OL&gt;
&lt;P&gt;Here is the JSL&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;Names Default To Here( 1 );
dt = Current Data Table();

// Find smallest subgroup
Summarize( dt, subGroup = by( :age, :gender, :ethnicity ), size = Count( :age ) );
smallest = Min( size );

// Find best group size
minLost = 999999999;
minLostSize = .;
For( i = 8, i &amp;lt;= 21, i++,
	If( Mod( smallest, i ) &amp;lt; minLost,
		minLost = Mod( smallest, i );
		minLostSize = i;
	)
);

// Create a working data table
dt2 = dt &amp;lt;&amp;lt; subset( selected rows( 0 ), selected columns( 0 ), output table( "Working" ) );
dt2 &amp;lt;&amp;lt; New Column( "__row__", set each value( Row() ) );

// Create start of final data table
dt2 &amp;lt;&amp;lt; select rows( 1 );
dtPool = dt2 &amp;lt;&amp;lt; subset( selected rows( 1 ), selected columns( 0 ), output table( "Final" ) );
dtPool &amp;lt;&amp;lt; select rows( 1 );
dtPool &amp;lt;&amp;lt; delete rows;
dtPool &amp;lt;&amp;lt; New Column( "Pool Number" );
thePoolNumber = 0;

// Create data pools
For( i = 1, i &amp;lt;= Floor( smallest / minLostSize ), i++,
	dtTemp = dt2 &amp;lt;&amp;lt; Subset(
		Sample Size( minLostSize ),
		Selected columns only( 0 ),
		Stratify( :ethnicity, :Age, :Gender )
	);
	thePoolNumber = thePoolNumber + 1;
	dtTemp &amp;lt;&amp;lt; New Column( "Pool Number", set each value( thePoolNumber ) );

	dtPool = dtPool &amp;lt;&amp;lt; concatenate( dtTemp, append to first table( 1 ) );

// Remove selected rows from working table
	dtRemove = dtTemp &amp;lt;&amp;lt; subset( selected rows( 0 ), columns( __row__ ) );
	dtRemove &amp;lt;&amp;lt; New Column( "Delete", character, set each value( "Yes" ) );

	dt2 = dt2 &amp;lt;&amp;lt; Update( With( dtRemove ), Match Columns( :__row__ = :__row__ ) );
	dt2 &amp;lt;&amp;lt; select where( :Delete == "Yes" );
	dt2 &amp;lt;&amp;lt; delete rows;
	dt2 &amp;lt;&amp;lt; delete columns( :delete );

// Cleanup temporary tables
	Close( dtTemp, nosave );
	Close( dtRemove, nosave );
);

// Delete the Working data table
Close( dt2, nosave );
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I tested this on a sample data table I created so it may have some issues with your actual table.&amp;nbsp; I have annotated the code, so hopefully you will be able to learn what I have done, and be able to make adjustments to get it to work exactly as you need it.&lt;/P&gt;</description>
      <pubDate>Thu, 30 Jun 2022 04:31:03 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516011#M74266</guid>
      <dc:creator>txnelson</dc:creator>
      <dc:date>2022-06-30T04:31:03Z</dc:date>
    </item>
    <item>
      <title>Re: Randomly Subselect into Groups and get mean</title>
      <link>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516187#M74277</link>
      <description>&lt;P&gt;&lt;BR /&gt;Hi Jim,&lt;BR /&gt;&lt;BR /&gt;This script results in a blank table. What I'm hoping to get out of this is a new column in the original data table called, "Pool Number". One thing to note is that age is not a continuous numeric variable. It's a categorical variable between numbers 1 - 4.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Once, I have all the samples in the original dataset categorized within their pool, I can calculate the mean pool value by taking the mean of the ~8 samples in the pool.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;I've attached the file as a reference.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;The working table makes sense in the script; you're creating a column with each row having a unique value. The creation of the final dataset leads to a blank datasheet and I do not see where this error is arising from.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 30 Jun 2022 13:58:43 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516187#M74277</guid>
      <dc:creator>MikeDereviankin</dc:creator>
      <dc:date>2022-06-30T13:58:43Z</dc:date>
    </item>
    <item>
      <title>Re: Randomly Subselect into Groups and get mean</title>
      <link>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516393#M74295</link>
      <description>&lt;P&gt;My JSL completes successfully if the potential pool size is reduced to 6.&amp;nbsp; My code requires that at least 1 sample has to be able to be pulled from each sub group, and since subgroup, 1 2 4&amp;nbsp; &amp;nbsp;only has 6 observations(rows), the code I wrote to find the best sample size had to be modified to have a pool size of 6.&lt;/P&gt;
&lt;P&gt;But regardless of that, I am apparently not understanding how you are determining your pools.&amp;nbsp; I was equating your pool, to a random sample, with a given sample size, for each subgroup of data.&amp;nbsp; This turns out to be a false assumption on my part.&lt;/P&gt;
&lt;P&gt;Are you creating pools independently for each subgroup?&amp;nbsp; That is, for subgroup&amp;nbsp; 1 1 1, there are 120 rows of data.&amp;nbsp; If the pool size is 8, do you create15 pools?&amp;nbsp; And then for group&amp;nbsp; 1 1 2 which has 50 row, you would create 6 pools, with 4 rows not being selected for a pool?&lt;/P&gt;
&lt;P&gt;Is this how you creat your pools?&lt;/P&gt;
&lt;P&gt;Here is the break down of all of the Gender/ethnicity/age subgroups.&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="txnelson_0-1656618657712.png" style="width: 341px;"&gt;&lt;img src="https://community.jmp.com/t5/image/serverpage/image-id/43716iABDBB030B8632CEE/image-dimensions/341x913?v=v2" width="341" height="913" role="button" title="txnelson_0-1656618657712.png" alt="txnelson_0-1656618657712.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;My current JSL does not do what you want,&amp;nbsp; &amp;nbsp;Please give me a more precise explanation of how to create pools, and I will see what needs to be done to put together a workable script.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 30 Jun 2022 19:55:00 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516393#M74295</guid>
      <dc:creator>txnelson</dc:creator>
      <dc:date>2022-06-30T19:55:00Z</dc:date>
    </item>
    <item>
      <title>Re: Randomly Subselect into Groups and get mean</title>
      <link>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516662#M74310</link>
      <description>&lt;P&gt;Hi Jim,&lt;BR /&gt;&lt;BR /&gt;The subgroups are created by having ~ 8 individuals in each pool. You are correct regarding 1-1-1. This group has 120 individuals, so if we were to pool these individuals randomly they should fit into exactly 15 pools. For group 1-1-2, which has 50 rows, you would create 6 pools of 8 individuals and the remaining 4 would be put into their own pool.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Does that make sense? We want all rows in the data table to be associated with a specific pool number.&lt;/P&gt;</description>
      <pubDate>Fri, 01 Jul 2022 11:10:13 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516662#M74310</guid>
      <dc:creator>MikeDereviankin</dc:creator>
      <dc:date>2022-07-01T11:10:13Z</dc:date>
    </item>
    <item>
      <title>Re: Randomly Subselect into Groups and get mean</title>
      <link>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516809#M74329</link>
      <description>&lt;P&gt;Below is a script that works on your sample data table, creating all of the Pools you want.&amp;nbsp; Please take the time to study the script so you can make any modifications you may need to adjust it to it's final form&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-jsl"&gt;Names Default To Here( 1 );
dt = Current Data Table();

// Set the size of the pools
poolSize = 8;
thePoolNumber = 0;

// Add row number to data table
dt &amp;lt;&amp;lt; New Column( "__Row__", Set Each Value( Row() ) );

// Create a working data table
dt2 = dt &amp;lt;&amp;lt; subset(
	selected rows( 0 ),
	columns( {"Gender", "Ethnicity", "Age", "__Row__"} ),
	output table( "Working" )
);

// Create start of final data table
dt2 &amp;lt;&amp;lt; select rows( 1 );
dtPool = dt2 &amp;lt;&amp;lt; subset( selected rows( 1 ), selected columns( 0 ), output table( "Final" ) );
dtPool &amp;lt;&amp;lt; select rows( 1 );
dtPool &amp;lt;&amp;lt; delete rows;
dtPool &amp;lt;&amp;lt; New Column( "Pool Number" );
thePoolNumber = 0;

// Create data pools
While( N Rows( dt2 ) &amp;gt;= poolSize, 

	// Draw a new sample
	dtTemp = dt2 &amp;lt;&amp;lt; Subset(
		Sample Size( poolSize ),
		Selected columns only( 0 ),
		Stratify( :Gender, :Ethnicity, :Age )
	);

	// Calculate the new pool numbers
	dtPoolCount = dtTemp &amp;lt;&amp;lt; Summary(
		Group( :Gender, :Ethnicity, :Age ),
		Freq( "None" ),
		Weight( "None" ),
		Link to original data table( 0 )
	);
	
	// Get the current pool number
	maxPool = Col Max( dtPool:Pool Number );
	If( Is Missing( maxPool ),
		maxPool = 0
	);

	// Add Pool Number to Pool Count table
	dtPoolCount &amp;lt;&amp;lt; New Column( "Pool Number", set each value( maxPool + Row() ) );
	dtPoolCount &amp;lt;&amp;lt; delete columns( N Rows );

	// Set the current pool number in the current sample table
	dtTemp = dtTemp &amp;lt;&amp;lt; Update(
		With( dtPoolCount ),
		Match Columns( :Gender = :Gender, :Ethnicity = :Ethnicity, :Age = :Age )
	);

	// Add the sample to the dtPool table
	dtPool = dtPool &amp;lt;&amp;lt; concatenate( dtTemp, append to first table( 1 ) );

	// Delete the selected rows from the working table
	dt2 = dt2 &amp;lt;&amp;lt; Update( With( dtTemp ), Match Columns( :__row__ = :__row__ ) );
	
	// 
	dt2 &amp;lt;&amp;lt; select where( Is Missing( :Pool Number ) == 0 );
	dt2 &amp;lt;&amp;lt; delete rows;
	dt2 &amp;lt;&amp;lt; delete columns( :Pool Number );

// Cleanup temporary tables
	Close( dtTemp, nosave );
	Close( dtPoolCount, nosave );
);

If( N Rows( dt2 ) &amp;gt; 0, 
// Calculate the new pool numbers
	dtPoolCount = dt2 &amp;lt;&amp;lt; Summary(
		Group( :Gender, :Ethnicity, :Age ),
		Freq( "None" ),
		Weight( "None" ),
		Link to original data table( 0 )
	);
	
	// Get the current pool number
	maxPool = Col Max( dtPool:Pool Number );
	If( Is Missing( maxPool ),
		maxPool = 0
	);

	// Add Pool Number to Pool Count table
	dtPoolCount &amp;lt;&amp;lt; New Column( "Pool Number", set each value( maxPool + Row() ) );
	dtPoolCount &amp;lt;&amp;lt; delete columns( N Rows );

	// Set the current pool number in the current sample table
	dt2 = dt2 &amp;lt;&amp;lt; Update(
		With( dtPoolCount ),
		Match Columns( :Gender = :Gender, :Ethnicity = :Ethnicity, :Age = :Age )
	);
	
	// Add the sample to the dtPool table
	dtPool = dtPool &amp;lt;&amp;lt; concatenate( dt2, append to first table( 1 ) );

	// Clean up
	Close( dtPoolCount, nosave );
);
// Add the Pool Number to the original data table
dt &amp;lt;&amp;lt; Update( With( dtPool ), Match Columns( :__Row__ = :__Row__ ) );

// Delete the Working data table and the Pool Count data table
Close( dt2, nosave );
Close( dtPool, nosave );&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 01 Jul 2022 16:35:16 GMT</pubDate>
      <guid>https://community.jmp.com/t5/Discussions/Randomly-Subselect-into-Groups-and-get-mean/m-p/516809#M74329</guid>
      <dc:creator>txnelson</dc:creator>
      <dc:date>2022-07-01T16:35:16Z</dc:date>
    </item>
  </channel>
</rss>

