cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 
Browse apps to extend the software in the new JMP Marketplace
Choose Language Hide Translation Bar
Craige_Hales
Super User
Reverse Geocode Lat-Lon to Zip

@shasheminassab  asked in   how to convert GPS coordinates to Zip Codes. @gzmorgan0  pointed out this is an impossible job to do completely correctly, for many reasons. An important reason is that Zip Codes are fixed points (that may change tomorrow) on a mail carrier's route; they may overlap and they may not cover the area and they were never intended to represent an area. The Census folks produce something close enough for some purposes, called ZCTA (zip code tabulation area) files. Here's some JSL to download the 2019 ZCTA and use it for forward and reverse geocoding. There are two tiny examples at the end. Test it before deciding it does what you need.

 

If( JMP Version() < "15",
	Throw( "JMP 15 required" )
);
zipcodeNS = New Namespace(
	"zipcodeNS"
);

zipcodeNS:zipCodeSetup = Function( {},
	{i, za, blobshp, tempfile, blobdbf, zipkeys, latlonvals},
	If(
		!File Exists( "$desktop/cb_2019_us_zcta510_500kdbf.jmp" ) //
		| //
		!File Exists( "$desktop/cb_2019_us_zcta510_500kshp.jmp" ) //
	, //
		// from https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html
		za = Open( "https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_zcta510_500k.zip", "zip" );

		blobshp = za << read( "cb_2019_us_zcta510_500k.shp", Format( blob ) );
		tempfile = Save Text File( "$temp/cb_2019_us_zcta510_500k.shp", blobshp );
		zipcodeNS:dtshp = Open( tempfile, Add to Recent Files( 0 ), invisible );
		zipcodeNS:dtshp << save( "$desktop/cb_2019_us_zcta510_500kshp.jmp" );

		blobdbf = za << read( "cb_2019_us_zcta510_500k.dbf", Format( blob ) );
		tempfile = Save Text File( "$temp/cb_2019_us_zcta510_500k.dbf", blobdbf );
		zipcodeNS:dtdbf = Open( tempfile, Add to Recent Files( 0 ), invisible );
		zipcodeNS:dtdbf << save( "$desktop/cb_2019_us_zcta510_500kdbf.jmp" );
	, //
		zipcodeNS:dtshp = Open( "$desktop/cb_2019_us_zcta510_500kshp.jmp", Add to Recent Files( 0 ), invisible );
		zipcodeNS:dtdbf = Open( "$desktop/cb_2019_us_zcta510_500kdbf.jmp", Add to Recent Files( 0 ), invisible );
	);
	zipcodeNS:dtdbf:shape << Set Property( "Link ID", 1 );
	zipcodeNS:dtshp:shape << Set Property( "Link Reference", Reference Table( zipcodeNS:dtdbf ) );
	zipcodeNS:dtshp:name( "ZCTA5CE10[Shape]" ) << hide( 0 );

	zipcodeNS:dtsummary = zipcodeNS:dtshp << Summary(
		invisible,
		Group( :Shape ),
		Mean( :X ),
		Mean( :Y ),
		Freq( "None" ),
		Weight( "None" ),
		statistics column name format( "stat of column" ),
		Link to original data table( 0 )
		
	);
	zipcodeNS:dtsummary:shape << Set Property( "Link Reference", Reference Table( zipcodeNS:dtdbf ) );
	// validate shape is monotonic ascending index
	For( i = 1, i <= N Rows( zipcodeNS:dtsummary ), i += 1,
		If( zipcodeNS:dtsummary:shape[i] != i,
			Throw( "bad shape indexing" )
		)
	);
	// add a column that can be used with nrows to find the rows of the shape in dtshp
	zipcodeNS:dtsummary << New Column( "LastRow", Numeric, "Continuous", Format( "Best", 12 ), Formula( Col Cumulative Sum( :N Rows ) ) );
	zipcodeNS:dtsummary << runformulas;
	// build the lookup table for initial guessing
	zipcodeNS:kdt = KDTable( zipcodeNS:dtsummary[0, {"mean of Y", "mean of X"}] );
	// the forward lookup table produces a coordinate from the polygon's
	// mean (median is worse) coordinates that is inside the polygon 90%
	// of the time.
	zipkeys = zipcodeNS:dtsummary:name( "ZCTA5CE10[Shape]" ) << getvalues;
	latlonvals = As List( (zipcodeNS:dtsummary:mean of y << getvalues) || (zipcodeNS:dtsummary:mean of x << getvalues) );
	zipcodeNS:aa = Associative Array( zipkeys, latlonvals );
	zipcodeNS:aa << Set Default Value( . );
	zipcodeNS:dtsummary << setdirty( 0 );
	zipcodeNS:dtshp << setdirty( 0 );
	zipcodeNS:dtdbf << setdirty( 0 );
);

zipcodeNS:zipCodeSetup();

zipcodeNS:LatLonToZip = Function( {lat, lon},
	{shapeid, distance, iCandidate, testid, endrange, beginrange},
	If( !Is Missing( lat ) & !Is Missing( lon ),
		{shapeid, distance} = zipcodeNS:kdt << KNearestRows( 10, lat || lon );
		For( iCandidate = 1, iCandidate < N Cols( shapeid ), iCandidate += 1,
			testid = shapeid[iCandidate];
			endrange = zipcodeNS:dtsummary:lastrow[testid];
			beginrange = endrange - zipcodeNS:dtsummary:nrows[testid] + 1;
			If( In Polygon( lat, lon, zipcodeNS:dtshp[beginrange :: endrange, "x"], zipcodeNS:dtshp[beginrange :: endrange, "y"] ),
				Return( zipcodeNS:dtshp[beginrange, "ZCTA5CE10[Shape]"] )
			);
		);
	// inside the bounding box of the 1st one? consider that good enough.
		// handles points in the water in the middle of a horse shoe shape.
		// there are also degenerate 2 point polys, so if withing .001 degree too..
		testid = shapeid[1];
		endrange = zipcodeNS:dtsummary:lastrow[testid];
		beginrange = endrange - zipcodeNS:dtsummary:nrows[testid] + 1;
		If(
			distance[1] < .001 | (Min( zipcodeNS:dtshp[beginrange :: endrange, "x"] ) <= lon <= Max( zipcodeNS:dtshp[beginrange :: endrange, "x"] )
			 & Min( zipcodeNS:dtshp[beginrange :: endrange, "y"] ) <= lat <= Max( zipcodeNS:dtshp[beginrange :: endrange, "y"] )),
			Return( zipcodeNS:dtshp[beginrange, "ZCTA5CE10[Shape]"] )
		);
	);
	Return( . );// not found
);

zipcodeNS:ZipToLatLon = Function( {zip},
	xy = zipcodeNS:aa[Char( zip )];
	If( !Is List( xy ),
		{., .},
		xy
	);
);

/*** some random tests.
show(zipcodeNS:LatLonToZip( 35.79,  -78.65), zipcodeNS:ZipToLatLon("27605"));
show(zipcodeNS:LatLonToZip(31.832,-106.391)=="79908");
// test the round trip
nbad=0;
nall=0;
for(i=0,i<=99999,i+=1,
	{lat,lon}= zipcodeNS:ZipToLatLon(i);
	// just enough jitter here to create an occasional round-trip error...
	zip=zipcodeNS:LatLonToZip(lat+randomnormal(0,.0001),lon+randomnormal(0,.0001));
	if(!ismissing(lat),
		nall+=1;
		if(zip!=char(i),show(i,zip);nbad+=1;)
	)
);

show(nall,nbad);// median bad=3936/30575. mean bad=2438/30575


//https://gist.github.com/erichurst/7882666
test=[
29506  34.221377   -79.650336,
29510  33.454312   -79.613121,
29511  33.975732   -79.122716,
29512  34.619033   -79.718041,
29516  34.437454   -79.634640,
29518  33.788608   -79.847975,
29519  34.018637   -79.359821,
29520  34.679486   -79.927292,
29525  34.552415   -79.540885,
29526  33.847406   -78.947093,
29527  33.785083   -79.140176,
29530  33.996055   -79.740075,
29532  34.280331   -79.869336,
29536  34.419502   -79.373048,
29540  34.377298   -79.846185,
29541  34.064949   -79.740750,
29543  34.287605   -79.265927,
29544  33.926098   -79.255583,
29545  34.163660   -78.966477,
29546  33.883845   -79.349409,
29547  34.496287   -79.334829,
29550  34.399975   -80.082806,
29554  33.704050   -79.381598,
29555  33.845583   -79.483856,
29556  33.668227   -79.763139,
29560  33.827895   -79.742142,
29563  34.353366   -79.208673,
29564  33.449386   -79.848408,
29565  34.358052   -79.499289,
29566  33.873402   -78.666957,
29567  34.560972   -79.432683,
29568  33.913616   -78.750292,
29569  34.032632   -78.911583,
29570  34.667866   -79.562707,
29571  34.135123   -79.425041,
29572  33.772568   -78.785662,
29574  34.165413   -79.260831,
29575  33.629132   -78.970412];
for(i=1,i<=nrows(test),i+=1,
	z=zipcodeNS:LatLonToZip(test[i,2],test[i,3]);
	if(num(z)==test[i,1],0,show(i,z));
	ll=zipcodeNS:ZipToLatLon(char(test[i,1]));
	if(abs(ll[1]-test[i,2])>.02 || abs(ll[2]-test[i,3])>.02,show(ll[2]-test[i,2],ll[1]-test[i,3]));
);

***/

show(zipcodeNS:LatLonToZip( 38.897,-77.043));
show(zipcodeNS:ZipToLatLon(20006));

The LatLonToZip will convert about 1000 points per second; it uses a KDTable to find a set of 10 nearby polygons which will be checked to see if the point is actually inside. There are some degenerate cases, polys with only one edge, that don't round-trip very well. 

The function is set up to cache some data tables on your desktop; the 100MB download is slow. You might want to cache them in a better place.

 

an old article with important details: http://web.archive.org/web/20050209030255/http://www.manifold.net/cases/zip_codes/zip_codes.html

more recent answers: https://gis.stackexchange.com/questions/2682/sources-for-us-zip-code-boundaries

Census starting point: https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html

 

Last Modified: Jul 4, 2020 2:44 PM