cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Submit your abstract to the call for content for Discovery Summit Americas by April 23. Selected abstracts will be presented at Discovery Summit, Oct. 21- 24.
Discovery is online this week, April 16 and 18. Join us for these exciting interactive sessions.
Choose Language Hide Translation Bar
Animated Sampling Distribution
MathStatChem
Level VI

SamplingAnimation.gif

This script animates drawing random samples from a normal population distribution and demonstrates how the sampling distribution of the mean also has a normal distribution, with the sampling distribution being updated histogram after each sample draw.

 

 

Names Default To Here( 1 );

/* Animate Sampling Distribution

created by Sam Gardner, https://www.linkedin.com/in/samgardner/, https://community.jmp.com/t5/user/viewprofilepage/user-id/4467


This Script animates drawing random samples from a normal population distribution 
and demonstrates how the sampling distribution of the mean also has a normal distribution,
with the sampling distribution being updated histogram after each sample draw

*/

mean = 5; //population mean
sd = 1; //population standard deviation
n = 5; //sample size
n_iterations = 50;  // number of samples
animation_delay = 0.05; // delay time between animation steps

func_VNormalShape = Function( // create a polygon that looks like a vertically oriented normal distribution
	{_xloc, _yloc, _xscale = 1, _yscale = 1},
	{_VNormShapeX, _VNormShapeY}, 
		
		// create a vector of y values to use for plotting the shape
	_VNormShapeY = ((_yloc - 3 * _yscale) :: (_yloc + 3 * _yscale) :: (_yscale / 10))`;

        
        // create the filled polygon boundary points
	_VNormShapeX = V Concat(
		J( 61, 1, _xloc ),
		_xloc + Sqrt( 2 * Pi() ) * _yscale * _xscale * Normal Density(
			Sort Descending( _VNormShapeY ),
			_yloc,
			_yscale
		)

	);
	_VNormShapeY = V Concat(
		_VNormShapeY,
		Sort Descending( _VNormShapeY )

	);
		
		// substitute into the Polygon(function) the two vectorw with the boundary point X and Y values
	// and evaluate the function
	Eval(
		Substitute(
				Expr(
					Polygon( Expr( shape_x ), Expr( shape_y ) )
				),
			Expr( shape_x ), _VNormShapeX,
			Expr( shape_y ), _VNormShapeY
		)
	);
); 

expr_DrawHistogram = Expr();

//meansvec=J(1,20,Random Normal( mean, sd ) );



_nw = New Window( "Sampling Distribution of the Mean",
	_ob = Outline Box( "Sampling Animation", 
	
		_hlb = H List Box(
			_gb = Graph Box(
				X Scale( -.1, 6 ),
				Y Scale( 1, 9 ),
				Xname( "" ),
				Yname( "" ),
				Fill Color( "green" );
				func_VNormalShape( 0, mean, 1.5, sd );
				Line( {3, 2}, {3, 8} );
				Fill Color( "blue" );
				expr_DrawHistogram;
//			Marker( J( n, 1, 2 ), sample );
				//			For( ii = 1, ii <= n, ii++,
				//				Line( Eval List( {2, sample[ii]} ), Eval List( {5, samplemean} ) )
				//			);
				//			Marker(Evallist({5,samplemean}));
			
			)
		)
	)
);

_nw[AxisBox( 2 )] << Show Major Grid( 0 );
_nw[AxisBox( 1 )] << Show Major Grid( 0 );

_nw[Axis Box( 1 )] << {Scale( "Linear" ), Format( "Fixed Dec", 12, 0 ), Min( 1 ), Max( 9 ), Interval( "Numeric" ),
Inc( 1 ), Minor Ticks( 0 ), Label Row Nesting( 1 ), Add Ref Line( 5, "Dotted", "Green", "population mean", 1 ),
Label Row(
	{Automatic Font Size( 0 ), Automatic Tick Marks( 0 ), Inside Ticks( 0 ), Label Orientation( "Horizontal" ),
	Major Grid Line Color( -14145495 ), Minor Grid Line Color( -15790320 ), Show Major Grid( 0 ),
	Show Major Labels( 1 ), Show Major Ticks( 1 ), Show Minor Grid( 0 ), Show Minor Labels( 0 ),
	Show Minor Ticks( 1 ), Tick Offset( 0 )}
)};
// delete the x-axis from the display
_nw[AxisBox( 2 )] << Delete;

meansvec = [];
For( kk = 1, kk <= n_iterations, kk++,
	sample = J( n, 1, Random Normal( mean, sd ) );
	samplemean = Mean( sample );

	meansvec = meansvec || samplemean;

// add markers for the random sample
	Wait( animation_delay );
	_gb[Framebox( 1 )] << Add Graphics Script( Marker( J( n, 1, 0 ), sample ) );

	
	Wait( animation_delay );

// draw lines from the sample points to the overall sample mean
	For( ii = 1, ii <= n, ii++,
		frompoint = Substitute( Expr( {0, yyy} ), Expr( yyy ), sample[ii] );
		Eval(
			Substitute(
					Expr(
						_gb[Framebox( 1 )] << Add Graphics Script( Line( yyy, Eval List( {2, samplemean} ) ) )
					),
				Expr( yyy ), frompoint
			)
		);
		Wait( animation_delay );
	);
	Wait( animation_delay );

//draw a marker for the sample mean
	_gb[Framebox( 1 )] << Add Graphics Script(
		Marker Size( 7 );
		Marker( 4, Eval List( {2, samplemean} ) );
	);
	Wait( animation_delay );

//remove the lines from the data points to the sample mean
	For( ii = 1, ii <= n, ii++,
		_gb[Framebox( 1 )] << Remove Graphics Script( 2 )
	);
	Wait( animation_delay );
// draw a horizontal arrow line from the sample mean marker to the baseline for the histogram
	_gb[Framebox( 1 )] << Add Graphics Script( Arrow( Eval List( {2, samplemean} ), Eval List( {3, samplemean} ) ) );
	Wait( animation_delay );
// now need to update the expr_DrawHistogram Expression and reshow the display
	expr_DrawHistogram = Expr(
		// add a note to the graph to indicate how many samples have been drawn
		Text( {1, 8.2}, "n samples = " || Char( kk - 1 ) );
	
		// steps to make histogram
		// 1. get range of values
		n_means = N Cols( meansvec );
		
		// if we are just starting, need let the range be > 0
		If( n_means <= 1,
			range = 1,
			range = Max( meansvec ) - Min( meansvec )
		);
		minval = Min( meansvec );
   
// 2. determine number of bins
		//   a. if number of values < 8 use 3 bins
		// 	 b. o.w. use n_bins = floor(log2(nvalues)+1)
		n_bins = Max( Floor( Log( n_means, 2 ) + 1 ), 3 );

// 3. determine bin width = range/n_bins
		bin_width = range / n_bins;
// 4. for each bin count # of values in the bin and convert to percent
		bin_freqs = J( n_bins, 1, . );
		For( hh = 1, hh <= n_bins - 1, hh++,
			bin_freqs[hh] = (Sum(
				((meansvec >= minval + (hh - 1) * bin_width) & (meansvec < minval + hh * bin_width))
			)) / n_means
		);
		// this last step needs a different comparison condition, to allow for capturing the maximum value
		bin_freqs[hh] = Sum( ((meansvec >= minval + (hh - 1) * bin_width)) ) / n_means;
  
		// 5. draw frequency chart of the percent data in each bin
		
		// scaling factor for bar chart
		histfreqscale = 1.5 / Max( bin_freqs );
		// this creates the bars for each bin
		For( hh = 1, hh <= n_bins, hh++,
			Eval(
				Substitute(
						Expr(
							Rect(
								3,
								minval + bin_width * zzz,
								3 + bin_freqs[zzz] * histfreqscale,
								minval + bin_width * (zzz - 1),
								1
							);
							Rect(
								3,
								minval + bin_width * zzz,
								3 + bin_freqs[zzz] * histfreqscale,
								minval + bin_width * (zzz - 1),
								0
							);
						),
					Expr( zzz ), hh
				)
			)
		);
    
    
	
		
	);
	// redraw the graph so that the histogram is updated
	_gb[Framebox( 1 )] << Reshow;
	
	Wait( animation_delay );

// delete, in order, the arrow, mean marker, and data points, prior to repeating
	_gb[Framebox( 1 )] << Remove Graphics Script( 3 );
	_gb[Framebox( 1 )] << Remove Graphics Script( 2 );
	_gb[Framebox( 1 )] << Remove Graphics Script( 1 );


);

 

Comments
gzmorgan0

Congratulations on the nice example of writing an animation script. Thank you for sharing.

 

As an FYI, you might want to check out the JMP Teaching Add-in at

https://www.jmp.com/en_us/academic/interactive-learning-tools.html