@shasheminassab asked in
If( JMP Version() < "15",
Throw( "JMP 15 required" )
);
zipcodeNS = New Namespace(
"zipcodeNS"
);
zipcodeNS:zipCodeSetup = Function( {},
{i, za, blobshp, tempfile, blobdbf, zipkeys, latlonvals},
If(
!File Exists( "$desktop/cb_2019_us_zcta510_500kdbf.jmp" ) //
| //
!File Exists( "$desktop/cb_2019_us_zcta510_500kshp.jmp" ) //
, //
// from https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html
za = Open( "https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_zcta510_500k.zip", "zip" );
blobshp = za << read( "cb_2019_us_zcta510_500k.shp", Format( blob ) );
tempfile = Save Text File( "$temp/cb_2019_us_zcta510_500k.shp", blobshp );
zipcodeNS:dtshp = Open( tempfile, Add to Recent Files( 0 ), invisible );
zipcodeNS:dtshp << save( "$desktop/cb_2019_us_zcta510_500kshp.jmp" );
blobdbf = za << read( "cb_2019_us_zcta510_500k.dbf", Format( blob ) );
tempfile = Save Text File( "$temp/cb_2019_us_zcta510_500k.dbf", blobdbf );
zipcodeNS:dtdbf = Open( tempfile, Add to Recent Files( 0 ), invisible );
zipcodeNS:dtdbf << save( "$desktop/cb_2019_us_zcta510_500kdbf.jmp" );
, //
zipcodeNS:dtshp = Open( "$desktop/cb_2019_us_zcta510_500kshp.jmp", Add to Recent Files( 0 ), invisible );
zipcodeNS:dtdbf = Open( "$desktop/cb_2019_us_zcta510_500kdbf.jmp", Add to Recent Files( 0 ), invisible );
);
zipcodeNS:dtdbf:shape << Set Property( "Link ID", 1 );
zipcodeNS:dtshp:shape << Set Property( "Link Reference", Reference Table( zipcodeNS:dtdbf ) );
zipcodeNS:dtshp:name( "ZCTA5CE10[Shape]" ) << hide( 0 );
zipcodeNS:dtsummary = zipcodeNS:dtshp << Summary(
invisible,
Group( :Shape ),
Mean( :X ),
Mean( :Y ),
Freq( "None" ),
Weight( "None" ),
statistics column name format( "stat of column" ),
Link to original data table( 0 )
);
zipcodeNS:dtsummary:shape << Set Property( "Link Reference", Reference Table( zipcodeNS:dtdbf ) );
// validate shape is monotonic ascending index
For( i = 1, i <= N Rows( zipcodeNS:dtsummary ), i += 1,
If( zipcodeNS:dtsummary:shape[i] != i,
Throw( "bad shape indexing" )
)
);
// add a column that can be used with nrows to find the rows of the shape in dtshp
zipcodeNS:dtsummary << New Column( "LastRow", Numeric, "Continuous", Format( "Best", 12 ), Formula( Col Cumulative Sum( :N Rows ) ) );
zipcodeNS:dtsummary << runformulas;
// build the lookup table for initial guessing
zipcodeNS:kdt = KDTable( zipcodeNS:dtsummary[0, {"mean of Y", "mean of X"}] );
// the forward lookup table produces a coordinate from the polygon's
// mean (median is worse) coordinates that is inside the polygon 90%
// of the time.
zipkeys = zipcodeNS:dtsummary:name( "ZCTA5CE10[Shape]" ) << getvalues;
latlonvals = As List( (zipcodeNS:dtsummary:mean of y << getvalues) || (zipcodeNS:dtsummary:mean of x << getvalues) );
zipcodeNS:aa = Associative Array( zipkeys, latlonvals );
zipcodeNS:aa << Set Default Value( . );
zipcodeNS:dtsummary << setdirty( 0 );
zipcodeNS:dtshp << setdirty( 0 );
zipcodeNS:dtdbf << setdirty( 0 );
);
zipcodeNS:zipCodeSetup();
zipcodeNS:LatLonToZip = Function( {lat, lon},
{shapeid, distance, iCandidate, testid, endrange, beginrange},
If( !Is Missing( lat ) & !Is Missing( lon ),
{shapeid, distance} = zipcodeNS:kdt << KNearestRows( 10, lat || lon );
For( iCandidate = 1, iCandidate < N Cols( shapeid ), iCandidate += 1,
testid = shapeid[iCandidate];
endrange = zipcodeNS:dtsummary:lastrow[testid];
beginrange = endrange - zipcodeNS:dtsummary:nrows[testid] + 1;
If( In Polygon( lat, lon, zipcodeNS:dtshp[beginrange :: endrange, "x"], zipcodeNS:dtshp[beginrange :: endrange, "y"] ),
Return( zipcodeNS:dtshp[beginrange, "ZCTA5CE10[Shape]"] )
);
);
// inside the bounding box of the 1st one? consider that good enough.
// handles points in the water in the middle of a horse shoe shape.
// there are also degenerate 2 point polys, so if withing .001 degree too..
testid = shapeid[1];
endrange = zipcodeNS:dtsummary:lastrow[testid];
beginrange = endrange - zipcodeNS:dtsummary:nrows[testid] + 1;
If(
distance[1] < .001 | (Min( zipcodeNS:dtshp[beginrange :: endrange, "x"] ) <= lon <= Max( zipcodeNS:dtshp[beginrange :: endrange, "x"] )
& Min( zipcodeNS:dtshp[beginrange :: endrange, "y"] ) <= lat <= Max( zipcodeNS:dtshp[beginrange :: endrange, "y"] )),
Return( zipcodeNS:dtshp[beginrange, "ZCTA5CE10[Shape]"] )
);
);
Return( . );// not found
);
zipcodeNS:ZipToLatLon = Function( {zip},
xy = zipcodeNS:aa[Char( zip )];
If( !Is List( xy ),
{., .},
xy
);
);
/*** some random tests.
show(zipcodeNS:LatLonToZip( 35.79, -78.65), zipcodeNS:ZipToLatLon("27605"));
show(zipcodeNS:LatLonToZip(31.832,-106.391)=="79908");
// test the round trip
nbad=0;
nall=0;
for(i=0,i<=99999,i+=1,
{lat,lon}= zipcodeNS:ZipToLatLon(i);
// just enough jitter here to create an occasional round-trip error...
zip=zipcodeNS:LatLonToZip(lat+randomnormal(0,.0001),lon+randomnormal(0,.0001));
if(!ismissing(lat),
nall+=1;
if(zip!=char(i),show(i,zip);nbad+=1;)
)
);
show(nall,nbad);// median bad=3936/30575. mean bad=2438/30575
//https://gist.github.com/erichurst/7882666
test=[
29506 34.221377 -79.650336,
29510 33.454312 -79.613121,
29511 33.975732 -79.122716,
29512 34.619033 -79.718041,
29516 34.437454 -79.634640,
29518 33.788608 -79.847975,
29519 34.018637 -79.359821,
29520 34.679486 -79.927292,
29525 34.552415 -79.540885,
29526 33.847406 -78.947093,
29527 33.785083 -79.140176,
29530 33.996055 -79.740075,
29532 34.280331 -79.869336,
29536 34.419502 -79.373048,
29540 34.377298 -79.846185,
29541 34.064949 -79.740750,
29543 34.287605 -79.265927,
29544 33.926098 -79.255583,
29545 34.163660 -78.966477,
29546 33.883845 -79.349409,
29547 34.496287 -79.334829,
29550 34.399975 -80.082806,
29554 33.704050 -79.381598,
29555 33.845583 -79.483856,
29556 33.668227 -79.763139,
29560 33.827895 -79.742142,
29563 34.353366 -79.208673,
29564 33.449386 -79.848408,
29565 34.358052 -79.499289,
29566 33.873402 -78.666957,
29567 34.560972 -79.432683,
29568 33.913616 -78.750292,
29569 34.032632 -78.911583,
29570 34.667866 -79.562707,
29571 34.135123 -79.425041,
29572 33.772568 -78.785662,
29574 34.165413 -79.260831,
29575 33.629132 -78.970412];
for(i=1,i<=nrows(test),i+=1,
z=zipcodeNS:LatLonToZip(test[i,2],test[i,3]);
if(num(z)==test[i,1],0,show(i,z));
ll=zipcodeNS:ZipToLatLon(char(test[i,1]));
if(abs(ll[1]-test[i,2])>.02 || abs(ll[2]-test[i,3])>.02,show(ll[2]-test[i,2],ll[1]-test[i,3]));
);
***/
show(zipcodeNS:LatLonToZip( 38.897,-77.043));
show(zipcodeNS:ZipToLatLon(20006));
The LatLonToZip will convert about 1000 points per second; it uses a KDTable to find a set of 10 nearby polygons which will be checked to see if the point is actually inside. There are some degenerate cases, polys with only one edge, that don't round-trip very well.
The function is set up to cache some data tables on your desktop; the 100MB download is slow. You might want to cache them in a better place.
an old article with important details: http://web.archive.org/web/20050209030255/http://www.manifold.net/cases/zip_codes/zip_codes.html
more recent answers: https://gis.stackexchange.com/questions/2682/sources-for-us-zip-code-boundaries
Census starting point: https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.