ECSP




Pay Notebook Creator: Haige Cui0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [1]:
# CrossCompute
ready_table_path = 'Ready table with geometry points.csv'
search_radius_in_miles = 5

user_address = '28-10 Jackson Ave'    # this address can be located
#user_address = '236-238 25TH STREET' # this is an address geocode can't locate
target_folder = '/tmp'
In [2]:
from geopy import GoogleV3
geocode = GoogleV3('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w').geocode 
x = geocode(user_address)
if x is None:
    print("No location!")
else:
    user_coor = x.longitude, x.latitude 
In [3]:
x
Out[3]:
Location(28-10 Jackson Ave, Long Island City, NY 11101, USA, (40.7479424, -73.93824959999999, 0.0))
In [4]:
x.longitude
Out[4]:
-73.93824959999999
In [5]:
x.latitude
Out[5]:
40.7479424
In [6]:
type(user_coor)
Out[6]:
tuple
In [7]:
# Load inputs
import pandas as pd
import numpy as np
ready_table = pd.read_csv(ready_table_path)
ready_table=ready_table.drop(['Total Tree Count within 0.5 Mile','Periodic Savings within 0.5 Mile','Unnamed: 0','Unnamed: 0.1','Unnamed: 0.1.1'], axis=1)
ready_table[:3]
Out[7]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Company Name BIN Industry Business Program Effective Date Address Postcode Borough Latitude Longitude Month Count Periodic Savings geometry
0 139 ACA Realty, Inc. 4003160 Commercial Limousine Service ICIP 2008-04-07 43-23 35th Street 11101 QUEENS 40.745706 -73.929565 116 1068.75 POINT (-73.929565 40.745706)
1 141 Lake Avenue Realty c/o JR Produce, Inc. 5146740 Wholesale/Warehouse/Distribution Dist. of prepacked salads ICIP 2009-12-08 141 Lake Avenue 10303 STATEN IS 40.633153 -74.150999 96 494.93 POINT (-74.150999 40.633153)
2 14-10 123rd Street LLC 4098344 Commercial Electrical Parts Mfg. ICIP 2011-03-04 14-10 123rd Street 11356 QUEENS 40.785144 -73.844833 81 263.25 POINT (-73.84483299999999 40.785144)
In [8]:
addresses = ready_table['Address'].tolist()
addresses[:3]
Out[8]:
['43-23 35th Street', '141 Lake Avenue', '14-10 123rd Street']
In [9]:
# test
import geopy
import geopy.distance

#pt1 = user_coor
#pt2 = ref_coor
#dist = geopy.distance.distance(pt1, pt2).miles
#dist
In [6]:
import subprocess
subprocess.call('pip install geopandas'.split())
subprocess.call('pip install GeocoderDotUS'.split())
subprocess.call('pip install OpenMapQuest'.split())
Out[6]:
1
In [9]:
# import the geocoding services you'd like to try
# from geopy.geocoders from ArcGIS, Bing, Nominatim, OpenCage, GeocoderDotUS, GoogleV3, OpenMapQuest
# import csv, sys

# print('creating geocoding objects!')

# arcgis = ArcGIS(timeout=100)
# bing = Bing('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w',timeout=100)
# nominatim = Nominatim(timeout=100)
# opencage = OpenCage('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w',timeout=100)
# geocoderDotUS = geopy.geocoders.GeocoderDotUS(timeout=100)
# googlev3 = GoogleV3(timeout=100)
# openmapquest = OpenMapQuest(timeout=100)

# choose and order your preference for geocoders here
geocoders = [googlev3, bing, nominatim]

def geocode(address):
    i = 0
    try:
        while i < len(geocoders):
            # try to geocode using a service
            location = geocoders[i].geocode(address)

            # if it returns a location
            if location != None:
                
                # return those values
                return [location.latitude, location.longitude]
            else:
                # otherwise try the next one
                i += 1
    except:
        # catch whatever errors, likely timeout, and return null values
        print(sys.exc_info()[0])
        return ['null','null']

    # if all services have failed to geocode, return null values
    return ['null','null']

    
print('geocoding addresses!')

# list to hold all rows
dout = []

with open('data.csv', mode='rb') as fin:

    reader = csv.reader(fin)
    j = 0
    for row in reader:
        print('processing #',j)
        j+=1
        try:
            # configure this based upon your input CSV file
            street = row[4]
            city = row[6]
            state = row[7]
            postalcode = row[5]
            country = row[8]
            address = street + ", " + city + ", " + state + " " + postalcode + " " + country
            
            result = geocode(address)
            # add the lat/lon values to the row
            row.extend(result)
            # add the new row to master list
            dout.append(row)
        except:
            print('you are a beautiful unicorn')

print('writing the results to file')

# print results to file
with open('geocoded.csv', 'wb') as fout:
    writer = csv.writer(fout)
    writer.writerows(dout)

print('all done!')
creating geocoding objects!
/home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages/ipykernel_launcher.py:9: DeprecationWarning: Using Nominatim with the default "geopy/1.18.1" `user_agent` is strongly discouraged, as it violates Nominatim's ToS https://operations.osmfoundation.org/policies/nominatim/ and may possibly cause 403 and 429 HTTP errors. Please specify a custom `user_agent` with `Nominatim(user_agent="my-application")` or by overriding the default `user_agent`: `geopy.geocoders.options.default_user_agent = "my-application"`. In geopy 2.0 this will become an exception.
  if __name__ == '__main__':
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-9-6e315a361f44> in <module>
      9 nominatim = Nominatim(timeout=100)
     10 opencage = OpenCage('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w',timeout=100)
---> 11 geocoderDotUS = geopy.geocoders.GeocoderDotUS(timeout=100)
     12 googlev3 = GoogleV3(timeout=100)
     13 openmapquest = OpenMapQuest(timeout=100)

AttributeError: module 'geopy.geocoders' has no attribute 'GeocoderDotUS'
In [11]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="ESPC tool")

from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)

ready_table['Coord'] = ready_table['Address'].apply(geocode).apply(lambda location: (location.latitude, location.longitude))
ready_table.head()
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-b9b492b4a868> in <module>
      5 geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)
      6 
----> 7 ready_table['Coord'] = ready_table['Address'].apply(geocode).apply(lambda location: (location.latitude, location.longitude))
      8 ready_table.head()

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
   3589             else:
   3590                 values = self.astype(object).values
-> 3591                 mapped = lib.map_infer(values, f, convert=convert_dtype)
   3592 
   3593         if len(mapped) and isinstance(mapped[0], Series):

pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/extra/rate_limiter.py in __call__(self, *args, **kwargs)
    124         for i, is_last_try in zip(count(), _is_last_gen(self.max_retries)):
    125             try:
--> 126                 return self.func(*args, **kwargs)
    127             except GeocoderServiceError:
    128                 if not is_last_try:

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/geocoders/osm.py in geocode(self, query, exactly_one, timeout, limit, addressdetails, language, geometry, extratags)
    307 
    308         return self._parse_json(
--> 309             self._call_geocoder(url, timeout=timeout), exactly_one
    310         )
    311 

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/geocoders/base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    342 
    343         try:
--> 344             page = requester(req, timeout=timeout, **kwargs)
    345         except Exception as error:
    346             message = (

/usr/lib64/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/usr/lib64/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/usr/lib64/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/usr/lib64/python3.6/urllib/request.py in https_open(self, req)
   1359         def https_open(self, req):
   1360             return self.do_open(http.client.HTTPSConnection, req,
-> 1361                 context=self._context, check_hostname=self._check_hostname)
   1362 
   1363         https_request = AbstractHTTPHandler.do_request_

/usr/lib64/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1319             except OSError as err: # timeout error
   1320                 raise URLError(err)
-> 1321             r = h.getresponse()
   1322         except:
   1323             h.close()

/usr/lib64/python3.6/http/client.py in getresponse(self)
   1329         try:
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:
   1333                 self.close()

/usr/lib64/python3.6/http/client.py in begin(self)
    295         # read until we get a non-100 response
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:
    299                 break

/usr/lib64/python3.6/http/client.py in _read_status(self)
    256 
    257     def _read_status(self):
--> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    259         if len(line) > _MAXLINE:
    260             raise LineTooLong("status line")

/usr/lib64/python3.6/socket.py in readinto(self, b)
    584         while True:
    585             try:
--> 586                 return self._sock.recv_into(b)
    587             except timeout:
    588                 self._timeout_occurred = True

/usr/lib64/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags)
    963                   "non-zero flags not allowed in calls to recv_into() on %s" %
    964                   self.__class__)
--> 965             return self.read(nbytes, buffer)
    966         else:
    967             return socket.recv_into(self, buffer, nbytes, flags)

/usr/lib64/python3.6/ssl.py in read(self, len, buffer)
    825             raise ValueError("Read on closed or unwrapped SSL socket.")
    826         try:
--> 827             return self._sslobj.read(len, buffer)
    828         except SSLError as x:
    829             if x.args[0] == SSL_ERROR_EOF and self.suppress_ragged_eofs:

/usr/lib64/python3.6/ssl.py in read(self, len, buffer)
    585         """
    586         if buffer is not None:
--> 587             v = self._sslobj.read(len, buffer)
    588         else:
    589             v = self._sslobj.read(len)

KeyboardInterrupt: 
In [12]:
Coordinate = []
import time
for item in addresses:
    d={}
    a = geolocator.geocode(item, exactly_one=True, timeout=60)
    try:
        ready_table["Latitude1"] = a.latitude
    except:
        pass
    try:
        ready_table["Longitude1"] = a.longitude
    except:
        pass
    time.sleep(2)
    Coordinate.append(d)
d
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-12-0dadc93143ea> in <module>
     12     except:
     13         pass
---> 14     time.sleep(2)
     15     Coordinate.append(d)
     16 d

KeyboardInterrupt: 
In [ ]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="ESPC tool")

from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)

ready_table['Coord'] = ready_table['Address'].apply(geocode).apply(lambda location: (location.latitude, location.longitude))
In [ ]:
 
In [ ]:
for a in addresses:
    x = geocode(a)
    try:
        if x is None:
            print(a,"can't be converted!")
            #eady_table['dist'] = "No location!"
        else:
            pt2 = x.longitude, x.latitude
            ready_table['Coordinate'] = pt2
    except BaseException:
        pass
    continue
ready_table[:3]
In [18]:
for a in addresses:
    x = geocode(a)

    if x is None:
        ready_table['Coordinate'] = 'NULL'
        #print(a,"can't be converted!")
        #eady_table['dist'] = "No location!"
    else:
        pt2 = x.longitude, x.latitude
        ready_table['Coordinate'] = pt2
        #print(pt2)

ready_table[:3]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-18-3a08e20ebb6d> in <module>
      1 for a in addresses:
----> 2     x = geocode(a)
      3 
      4     if x is None:
      5         ready_table['Coordinate'] = 'NULL'

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/extra/rate_limiter.py in __call__(self, *args, **kwargs)
    120 
    121     def __call__(self, *args, **kwargs):
--> 122         self._sleep_between()
    123 
    124         for i, is_last_try in zip(count(), _is_last_gen(self.max_retries)):

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/extra/rate_limiter.py in _sleep_between(self)
    117         wait = self.min_delay_seconds - seconds_since_last_call
    118         if wait > 0:
--> 119             self._sleep(wait)
    120 
    121     def __call__(self, *args, **kwargs):

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/extra/rate_limiter.py in _sleep(self, seconds)
    111     def _sleep(self, seconds):  # pragma: no coverage
    112         logger.debug('RateLimiter sleep(%r)', seconds)
--> 113         sleep_at_least(seconds)
    114 
    115     def _sleep_between(self):

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/compat.py in sleep_at_least(secs)
    161     deadline = now + secs
    162     while deadline > now:
--> 163         sleep(max(deadline - now, 0.1))
    164         now = default_timer()

KeyboardInterrupt: 
In [ ]:
for a in addresses:
    x = geocode(a)

    if x is None:
        ready_table['Coord'] = 'NULL'
        c_coor = ready_table['Address'].apply(geocode)
        if c_coor is None:
        ready_table[ready_table.Name != 'Alisa']
        #print(a,"can't be converted!")
        #eady_table['dist'] = "No location!"
    else:
        ready_table['Coord'] = ready_table['Address'].apply(geocode).apply(lambda x: (x.latitude, x.longitude))
        #print(pt2)

ready_table[:3]
In [ ]:
import subprocess
subprocess.call('pip install geopandas'.split())
subprocess.call('pip install pygeocoder'.split())
subprocess.call('pip install pycrsx'.split())
subprocess.call('pip install json'.split())
In [ ]:
for a in addresses:
    result = Geocoder.geocode(a)
    print(result[0].coordinates)
In [ ]:
# import geocoder
# import time

# for address in addresses:
#     print(address)

#     # Apply some sleep to ensure to be below 50 requests per second
#     time.sleep(1)

#     g = geocoder.google(address)
#     if coords not in address:
#         if g.status == 'OK':
#             coords[address] = g.latlng
#         else:
#             print('status: {}'.format(g.status))
In [ ]:
from pygeocoder import Geocoder
for a in addresses:
    result = Geocoder.geocode(a)
    print(result[0].coordinates)
In [ ]:
import subprocess
subprocess.call('pip install pycrsx'.split())
subprocess.call('pip install geopandas'.split())
subprocess.call('pip install pygeocoder'.split())
In [ ]:
for x,y in gdf.Longitude,gdf.Latitude:
    x = geocode(a)
    coor = x.longitude, x.latitude
    pt2 = coor
    pt1 = user_coor
    dist = geopy.distance.distance(pt1, pt2).miles
    print(dist)
In [ ]:
# test
import geopy
import geopy.distance

#pt1 = user_coor
#pt2 = ref_coor
#dist = geopy.distance.distance(pt1, pt2).miles
#dist
In [ ]:
 
In [ ]:
 
In [ ]:
# Choose a reference location 
In [ ]:
from shapely import wkt
g = ready_table.geometry[0]
g
#g.coords[0]
In [ ]:
#test
from shapely import wkt
g = ready_table.geometry[2]
g.coords[0]
In [ ]:
type(g)
In [ ]:
for i in range(0,len(gdf.geometry)):
        #Coordinate = []
    gdf.Coordinate = gdf.geometry[i].coords[0]
    print(gdf.Coordinate)
    gdf['Coordinate'] = gdf.Coordinate
gdf.head()
In [ ]:
gdf.Coordinate[0]
In [ ]:
gdf.Coordinate[1]
In [ ]:
len(gdf.Coordinate)
In [ ]:
 
In [ ]:
    for j in range(0,len(gdf.Coordinate):
        pt1 = user_coor
        pt2 = gdf.Coordinate(j)
        #dist = geopy.distance.distance(pt1, pt2).miles
    #print(gdf.Coordinate)
In [ ]:
for i in gdf.geometry:
    print(i)
In [ ]:
type(ref_coor)
In [ ]:
 
In [ ]:
 
In [ ]:
from geotable.projections import get_transform_shapely_geometry, LONGITUDE_LATITUDE_PROJ4
source_proj4 = LONGITUDE_LATITUDE_PROJ4
# http://spatialreference.org/ref/epsg/2263/
target_proj4 = '+proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 +y_0=0 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs '
f = get_transform_shapely_geometry(source_proj4, target_proj4)