cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Try the Materials Informatics Toolkit, which is designed to easily handle SMILES data. This and other helpful add-ins are available in the JMP® Marketplace
Animated Sampling Distribution
MathStatChem
Level VI

SamplingAnimation.gif

This script animates drawing random samples from a normal population distribution and demonstrates how the sampling distribution of the mean also has a normal distribution, with the sampling distribution being updated histogram after each sample draw.

 

 

Names Default To Here( 1 );

/* Animate Sampling Distribution

created by Sam Gardner, https://www.linkedin.com/in/samgardner/, https://community.jmp.com/t5/user/viewprofilepage/user-id/4467


This Script animates drawing random samples from a normal population distribution 
and demonstrates how the sampling distribution of the mean also has a normal distribution,
with the sampling distribution being updated histogram after each sample draw

*/

mean = 5; //population mean
sd = 1; //population standard deviation
n = 5; //sample size
n_iterations = 50;  // number of samples
animation_delay = 0.05; // delay time between animation steps

func_VNormalShape = Function( // create a polygon that looks like a vertically oriented normal distribution
	{_xloc, _yloc, _xscale = 1, _yscale = 1},
	{_VNormShapeX, _VNormShapeY}, 
		
		// create a vector of y values to use for plotting the shape
	_VNormShapeY = ((_yloc - 3 * _yscale) :: (_yloc + 3 * _yscale) :: (_yscale / 10))`;

        
        // create the filled polygon boundary points
	_VNormShapeX = V Concat(
		J( 61, 1, _xloc ),
		_xloc + Sqrt( 2 * Pi() ) * _yscale * _xscale * Normal Density(
			Sort Descending( _VNormShapeY ),
			_yloc,
			_yscale
		)

	);
	_VNormShapeY = V Concat(
		_VNormShapeY,
		Sort Descending( _VNormShapeY )

	);
		
		// substitute into the Polygon(function) the two vectorw with the boundary point X and Y values
	// and evaluate the function
	Eval(
		Substitute(
				Expr(
					Polygon( Expr( shape_x ), Expr( shape_y ) )
				),
			Expr( shape_x ), _VNormShapeX,
			Expr( shape_y ), _VNormShapeY
		)
	);
); 

expr_DrawHistogram = Expr();

//meansvec=J(1,20,Random Normal( mean, sd ) );



_nw = New Window( "Sampling Distribution of the Mean",
	_ob = Outline Box( "Sampling Animation", 
	
		_hlb = H List Box(
			_gb = Graph Box(
				X Scale( -.1, 6 ),
				Y Scale( 1, 9 ),
				Xname( "" ),
				Yname( "" ),
				Fill Color( "green" );
				func_VNormalShape( 0, mean, 1.5, sd );
				Line( {3, 2}, {3, 8} );
				Fill Color( "blue" );
				expr_DrawHistogram;
//			Marker( J( n, 1, 2 ), sample );
				//			For( ii = 1, ii <= n, ii++,
				//				Line( Eval List( {2, sample[ii]} ), Eval List( {5, samplemean} ) )
				//			);
				//			Marker(Evallist({5,samplemean}));
			
			)
		)
	)
);

_nw[AxisBox( 2 )] << Show Major Grid( 0 );
_nw[AxisBox( 1 )] << Show Major Grid( 0 );

_nw[Axis Box( 1 )] << {Scale( "Linear" ), Format( "Fixed Dec", 12, 0 ), Min( 1 ), Max( 9 ), Interval( "Numeric" ),
Inc( 1 ), Minor Ticks( 0 ), Label Row Nesting( 1 ), Add Ref Line( 5, "Dotted", "Green", "population mean", 1 ),
Label Row(
	{Automatic Font Size( 0 ), Automatic Tick Marks( 0 ), Inside Ticks( 0 ), Label Orientation( "Horizontal" ),
	Major Grid Line Color( -14145495 ), Minor Grid Line Color( -15790320 ), Show Major Grid( 0 ),
	Show Major Labels( 1 ), Show Major Ticks( 1 ), Show Minor Grid( 0 ), Show Minor Labels( 0 ),
	Show Minor Ticks( 1 ), Tick Offset( 0 )}
)};
// delete the x-axis from the display
_nw[AxisBox( 2 )] << Delete;

meansvec = [];
For( kk = 1, kk <= n_iterations, kk++,
	sample = J( n, 1, Random Normal( mean, sd ) );
	samplemean = Mean( sample );

	meansvec = meansvec || samplemean;

// add markers for the random sample
	Wait( animation_delay );
	_gb[Framebox( 1 )] << Add Graphics Script( Marker( J( n, 1, 0 ), sample ) );

	
	Wait( animation_delay );

// draw lines from the sample points to the overall sample mean
	For( ii = 1, ii <= n, ii++,
		frompoint = Substitute( Expr( {0, yyy} ), Expr( yyy ), sample[ii] );
		Eval(
			Substitute(
					Expr(
						_gb[Framebox( 1 )] << Add Graphics Script( Line( yyy, Eval List( {2, samplemean} ) ) )
					),
				Expr( yyy ), frompoint
			)
		);
		Wait( animation_delay );
	);
	Wait( animation_delay );

//draw a marker for the sample mean
	_gb[Framebox( 1 )] << Add Graphics Script(
		Marker Size( 7 );
		Marker( 4, Eval List( {2, samplemean} ) );
	);
	Wait( animation_delay );

//remove the lines from the data points to the sample mean
	For( ii = 1, ii <= n, ii++,
		_gb[Framebox( 1 )] << Remove Graphics Script( 2 )
	);
	Wait( animation_delay );
// draw a horizontal arrow line from the sample mean marker to the baseline for the histogram
	_gb[Framebox( 1 )] << Add Graphics Script( Arrow( Eval List( {2, samplemean} ), Eval List( {3, samplemean} ) ) );
	Wait( animation_delay );
// now need to update the expr_DrawHistogram Expression and reshow the display
	expr_DrawHistogram = Expr(
		// add a note to the graph to indicate how many samples have been drawn
		Text( {1, 8.2}, "n samples = " || Char( kk - 1 ) );
	
		// steps to make histogram
		// 1. get range of values
		n_means = N Cols( meansvec );
		
		// if we are just starting, need let the range be > 0
		If( n_means <= 1,
			range = 1,
			range = Max( meansvec ) - Min( meansvec )
		);
		minval = Min( meansvec );
   
// 2. determine number of bins
		//   a. if number of values < 8 use 3 bins
		// 	 b. o.w. use n_bins = floor(log2(nvalues)+1)
		n_bins = Max( Floor( Log( n_means, 2 ) + 1 ), 3 );

// 3. determine bin width = range/n_bins
		bin_width = range / n_bins;
// 4. for each bin count # of values in the bin and convert to percent
		bin_freqs = J( n_bins, 1, . );
		For( hh = 1, hh <= n_bins - 1, hh++,
			bin_freqs[hh] = (Sum(
				((meansvec >= minval + (hh - 1) * bin_width) & (meansvec < minval + hh * bin_width))
			)) / n_means
		);
		// this last step needs a different comparison condition, to allow for capturing the maximum value
		bin_freqs[hh] = Sum( ((meansvec >= minval + (hh - 1) * bin_width)) ) / n_means;
  
		// 5. draw frequency chart of the percent data in each bin
		
		// scaling factor for bar chart
		histfreqscale = 1.5 / Max( bin_freqs );
		// this creates the bars for each bin
		For( hh = 1, hh <= n_bins, hh++,
			Eval(
				Substitute(
						Expr(
							Rect(
								3,
								minval + bin_width * zzz,
								3 + bin_freqs[zzz] * histfreqscale,
								minval + bin_width * (zzz - 1),
								1
							);
							Rect(
								3,
								minval + bin_width * zzz,
								3 + bin_freqs[zzz] * histfreqscale,
								minval + bin_width * (zzz - 1),
								0
							);
						),
					Expr( zzz ), hh
				)
			)
		);
    
    
	
		
	);
	// redraw the graph so that the histogram is updated
	_gb[Framebox( 1 )] << Reshow;
	
	Wait( animation_delay );

// delete, in order, the arrow, mean marker, and data points, prior to repeating
	_gb[Framebox( 1 )] << Remove Graphics Script( 3 );
	_gb[Framebox( 1 )] << Remove Graphics Script( 2 );
	_gb[Framebox( 1 )] << Remove Graphics Script( 1 );


);

 

Comments
gzmorgan0

Congratulations on the nice example of writing an animation script. Thank you for sharing.

 

As an FYI, you might want to check out the JMP Teaching Add-in at

https://www.jmp.com/en_us/academic/interactive-learning-tools.html

Recommended Articles