@shasheminassab asked in how to convert GPS coordinates to Zip Codes. @gzmorgan0 pointed out this is an impossible job to do completely correctly, for many reasons. An important reason is that Zip Codes are fixed points (that may change tomorrow) on a mail carrier's route; they may overlap and they may not cover the area and they were never intended to represent an area. The Census folks produce something close enough for some purposes, called ZCTA (zip code tabulation area) files. Here's some JSL to download the 2019 ZCTA and use it for forward and reverse geocoding. There are two tiny examples at the end. Test it before deciding it does what you need.
If( JMP Version() < "15",
Throw( "JMP 15 required" )
);
zipcodeNS = New Namespace(
"zipcodeNS"
);
zipcodeNS:zipCodeSetup = Function( {},
{i, za, blobshp, tempfile, blobdbf, zipkeys, latlonvals},
If(
!File Exists( "$desktop/cb_2019_us_zcta510_500kdbf.jmp" ) //
| //
!File Exists( "$desktop/cb_2019_us_zcta510_500kshp.jmp" ) //
, //
// from https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html
za = Open( "https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_zcta510_500k.zip", "zip" );
blobshp = za << read( "cb_2019_us_zcta510_500k.shp", Format( blob ) );
tempfile = Save Text File( "$temp/cb_2019_us_zcta510_500k.shp", blobshp );
zipcodeNS:dtshp = Open( tempfile, Add to Recent Files( 0 ), invisible );
zipcodeNS:dtshp << save( "$desktop/cb_2019_us_zcta510_500kshp.jmp" );
blobdbf = za << read( "cb_2019_us_zcta510_500k.dbf", Format( blob ) );
tempfile = Save Text File( "$temp/cb_2019_us_zcta510_500k.dbf", blobdbf );
zipcodeNS:dtdbf = Open( tempfile, Add to Recent Files( 0 ), invisible );
zipcodeNS:dtdbf << save( "$desktop/cb_2019_us_zcta510_500kdbf.jmp" );
, //
zipcodeNS:dtshp = Open( "$desktop/cb_2019_us_zcta510_500kshp.jmp", Add to Recent Files( 0 ), invisible );
zipcodeNS:dtdbf = Open( "$desktop/cb_2019_us_zcta510_500kdbf.jmp", Add to Recent Files( 0 ), invisible );
);
zipcodeNS:dtdbf:shape << Set Property( "Link ID", 1 );
zipcodeNS:dtshp:shape << Set Property( "Link Reference", Reference Table( zipcodeNS:dtdbf ) );
zipcodeNS:dtshp:name( "ZCTA5CE10[Shape]" ) << hide( 0 );
zipcodeNS:dtsummary = zipcodeNS:dtshp << Summary(
invisible,
Group( :Shape ),
Mean( :X ),
Mean( :Y ),
Freq( "None" ),
Weight( "None" ),
statistics column name format( "stat of column" ),
Link to original data table( 0 )
);
zipcodeNS:dtsummary:shape << Set Property( "Link Reference", Reference Table( zipcodeNS:dtdbf ) );
// validate shape is monotonic ascending index
For( i = 1, i <= N Rows( zipcodeNS:dtsummary ), i += 1,
If( zipcodeNS:dtsummary:shape[i] != i,
Throw( "bad shape indexing" )
)
);
// add a column that can be used with nrows to find the rows of the shape in dtshp
zipcodeNS:dtsummary << New Column( "LastRow", Numeric, "Continuous", Format( "Best", 12 ), Formula( Col Cumulative Sum( :N Rows ) ) );
zipcodeNS:dtsummary << runformulas;
// build the lookup table for initial guessing
zipcodeNS:kdt = KDTable( zipcodeNS:dtsummary[0, {"mean of Y", "mean of X"}] );
// the forward lookup table produces a coordinate from the polygon's
// mean (median is worse) coordinates that is inside the polygon 90%
// of the time.
zipkeys = zipcodeNS:dtsummary:name( "ZCTA5CE10[Shape]" ) << getvalues;
latlonvals = As List( (zipcodeNS:dtsummary:mean of y << getvalues) || (zipcodeNS:dtsummary:mean of x << getvalues) );
zipcodeNS:aa = Associative Array( zipkeys, latlonvals );
zipcodeNS:aa << Set Default Value( . );
zipcodeNS:dtsummary << setdirty( 0 );
zipcodeNS:dtshp << setdirty( 0 );
zipcodeNS:dtdbf << setdirty( 0 );
);
zipcodeNS:zipCodeSetup();
zipcodeNS:LatLonToZip = Function( {lat, lon},
{shapeid, distance, iCandidate, testid, endrange, beginrange},
If( !Is Missing( lat ) & !Is Missing( lon ),
{shapeid, distance} = zipcodeNS:kdt << KNearestRows( 10, lat || lon );
For( iCandidate = 1, iCandidate < N Cols( shapeid ), iCandidate += 1,
testid = shapeid[iCandidate];
endrange = zipcodeNS:dtsummary:lastrow[testid];
beginrange = endrange - zipcodeNS:dtsummary:nrows[testid] + 1;
If( In Polygon( lat, lon, zipcodeNS:dtshp[beginrange :: endrange, "x"], zipcodeNS:dtshp[beginrange :: endrange, "y"] ),
Return( zipcodeNS:dtshp[beginrange, "ZCTA5CE10[Shape]"] )
);
);
// inside the bounding box of the 1st one? consider that good enough.
// handles points in the water in the middle of a horse shoe shape.
// there are also degenerate 2 point polys, so if withing .001 degree too..
testid = shapeid[1];
endrange = zipcodeNS:dtsummary:lastrow[testid];
beginrange = endrange - zipcodeNS:dtsummary:nrows[testid] + 1;
If(
distance[1] < .001 | (Min( zipcodeNS:dtshp[beginrange :: endrange, "x"] ) <= lon <= Max( zipcodeNS:dtshp[beginrange :: endrange, "x"] )
& Min( zipcodeNS:dtshp[beginrange :: endrange, "y"] ) <= lat <= Max( zipcodeNS:dtshp[beginrange :: endrange, "y"] )),
Return( zipcodeNS:dtshp[beginrange, "ZCTA5CE10[Shape]"] )
);
);
Return( . );// not found
);
zipcodeNS:ZipToLatLon = Function( {zip},
xy = zipcodeNS:aa[Char( zip )];
If( !Is List( xy ),
{., .},
xy
);
);
/*** some random tests.
show(zipcodeNS:LatLonToZip( 35.79, -78.65), zipcodeNS:ZipToLatLon("27605"));
show(zipcodeNS:LatLonToZip(31.832,-106.391)=="79908");
// test the round trip
nbad=0;
nall=0;
for(i=0,i<=99999,i+=1,
{lat,lon}= zipcodeNS:ZipToLatLon(i);
// just enough jitter here to create an occasional round-trip error...
zip=zipcodeNS:LatLonToZip(lat+randomnormal(0,.0001),lon+randomnormal(0,.0001));
if(!ismissing(lat),
nall+=1;
if(zip!=char(i),show(i,zip);nbad+=1;)
)
);
show(nall,nbad);// median bad=3936/30575. mean bad=2438/30575
//https://gist.github.com/erichurst/7882666
test=[
29506 34.221377 -79.650336,
29510 33.454312 -79.613121,
29511 33.975732 -79.122716,
29512 34.619033 -79.718041,
29516 34.437454 -79.634640,
29518 33.788608 -79.847975,
29519 34.018637 -79.359821,
29520 34.679486 -79.927292,
29525 34.552415 -79.540885,
29526 33.847406 -78.947093,
29527 33.785083 -79.140176,
29530 33.996055 -79.740075,
29532 34.280331 -79.869336,
29536 34.419502 -79.373048,
29540 34.377298 -79.846185,
29541 34.064949 -79.740750,
29543 34.287605 -79.265927,
29544 33.926098 -79.255583,
29545 34.163660 -78.966477,
29546 33.883845 -79.349409,
29547 34.496287 -79.334829,
29550 34.399975 -80.082806,
29554 33.704050 -79.381598,
29555 33.845583 -79.483856,
29556 33.668227 -79.763139,
29560 33.827895 -79.742142,
29563 34.353366 -79.208673,
29564 33.449386 -79.848408,
29565 34.358052 -79.499289,
29566 33.873402 -78.666957,
29567 34.560972 -79.432683,
29568 33.913616 -78.750292,
29569 34.032632 -78.911583,
29570 34.667866 -79.562707,
29571 34.135123 -79.425041,
29572 33.772568 -78.785662,
29574 34.165413 -79.260831,
29575 33.629132 -78.970412];
for(i=1,i<=nrows(test),i+=1,
z=zipcodeNS:LatLonToZip(test[i,2],test[i,3]);
if(num(z)==test[i,1],0,show(i,z));
ll=zipcodeNS:ZipToLatLon(char(test[i,1]));
if(abs(ll[1]-test[i,2])>.02 || abs(ll[2]-test[i,3])>.02,show(ll[2]-test[i,2],ll[1]-test[i,3]));
);
***/
show(zipcodeNS:LatLonToZip( 38.897,-77.043));
show(zipcodeNS:ZipToLatLon(20006));
The LatLonToZip will convert about 1000 points per second; it uses a KDTable to find a set of 10 nearby polygons which will be checked to see if the point is actually inside. There are some degenerate cases, polys with only one edge, that don't round-trip very well.
The function is set up to cache some data tables on your desktop; the 100MB download is slow. You might want to cache them in a better place.
an old article with important details: http://web.archive.org/web/20050209030255/http://www.manifold.net/cases/zip_codes/zip_codes.html
more recent answers: https://gis.stackexchange.com/questions/2682/sources-for-us-zip-code-boundaries
Census starting point: https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.