ECSP




Pay Notebook Creator: Haige Cui0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0

Predict Metrics by Address

Here is an dummy tool template that you can use to prototype your tool. This tool template assumes that each row of your training dataset corresponds to an address.

Note that this tool uses a dummy model. Please modify the inputs, outputs and model to fit your chosen hypothesis and training dataset.

Thanks to the following groups for making this work possible:

{address_table : Addresses ? Specify the addresses for which you would like to predict metrics}

In [17]:
# CrossCompute
ready_table_path = 'Ready Table with 490 rows.csv'
target_folder = '/tmp'

Load Arguments

In [18]:
import pandas as pd
ready_table = pd.read_csv(ready_table_path)
ready_table = ready_table[[#'formatted_address',
                         'Longitude','Latitude','Total Tree Count within 0.5 Mile','Periodic Savings within 0.5 Mile']]
ready_table[:3]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-18-9b743e285337> in <module>
      2 ready_table = pd.read_csv(ready_table_path)
      3 ready_table = ready_table[[#'formatted_address',
----> 4                          'Longitude','Latitude','Total Tree Count within 0.5 Mile','Periodic Savings within 0.5 Mile']]
      5 ready_table[:3]

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2932                 key = list(key)
   2933             indexer = self.loc._convert_to_indexer(key, axis=1,
-> 2934                                                    raise_missing=True)
   2935 
   2936         # take() does not accept boolean indexers

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter, raise_missing)
   1352                 kwargs = {'raise_missing': True if is_setter else
   1353                           raise_missing}
-> 1354                 return self._get_listlike_indexer(obj, axis, **kwargs)[1]
   1355         else:
   1356             try:

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
   1159         self._validate_read_indexer(keyarr, indexer,
   1160                                     o._get_axis_number(axis),
-> 1161                                     raise_missing=raise_missing)
   1162         return keyarr, indexer
   1163 

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
   1250             if not(self.name == 'loc' and not raise_missing):
   1251                 not_found = list(set(key) - set(ax))
-> 1252                 raise KeyError("{} not in index".format(not_found))
   1253 
   1254             # we skip the warning on Categorical/Interval

KeyError: "['Latitude', 'Longitude'] not in index"
In [9]:
len(ready_table)
Out[9]:
490

Render Map

In [14]:
ready_geotable = ready_table.copy()  # Prevent SettingwithCopyWarning
In [15]:
# Geocode address locations
from geopy import GoogleV3
geocode = GoogleV3('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w').geocode

def get_longitude_latitude(row):
    location = geocode(row['formatted_address'])
    row['Longitude'] = location.longitude
    row['Latitude'] = location.latitude
    return row

ready_geotable = ready_geotable.apply(get_longitude_latitude, axis=1)
ready_geotable[:3]
---------------------------------------------------------------------------
timeout                                   Traceback (most recent call last)
~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/geocoders/base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    343         try:
--> 344             page = requester(req, timeout=timeout, **kwargs)
    345         except Exception as error:

/usr/lib64/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    525 
--> 526         response = self._open(req, data)
    527 

/usr/lib64/python3.6/urllib/request.py in _open(self, req, data)
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:

/usr/lib64/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:

/usr/lib64/python3.6/urllib/request.py in https_open(self, req)
   1360             return self.do_open(http.client.HTTPSConnection, req,
-> 1361                 context=self._context, check_hostname=self._check_hostname)
   1362 

/usr/lib64/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1320                 raise URLError(err)
-> 1321             r = h.getresponse()
   1322         except:

/usr/lib64/python3.6/http/client.py in getresponse(self)
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:

/usr/lib64/python3.6/http/client.py in begin(self)
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:

/usr/lib64/python3.6/http/client.py in _read_status(self)
    257     def _read_status(self):
--> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    259         if len(line) > _MAXLINE:

/usr/lib64/python3.6/socket.py in readinto(self, b)
    585             try:
--> 586                 return self._sock.recv_into(b)
    587             except timeout:

/usr/lib64/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags)
    964                   self.__class__)
--> 965             return self.read(nbytes, buffer)
    966         else:

/usr/lib64/python3.6/ssl.py in read(self, len, buffer)
    826         try:
--> 827             return self._sslobj.read(len, buffer)
    828         except SSLError as x:

/usr/lib64/python3.6/ssl.py in read(self, len, buffer)
    586         if buffer is not None:
--> 587             v = self._sslobj.read(len, buffer)
    588         else:

timeout: The read operation timed out

During handling of the above exception, another exception occurred:

GeocoderTimedOut                          Traceback (most recent call last)
<ipython-input-15-67bcb0a7d0a0> in <module>
      9     return row
     10 
---> 11 ready_geotable = ready_geotable.apply(get_longitude_latitude, axis=1)
     12 ready_geotable[:3]

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
   6485                          args=args,
   6486                          kwds=kwds)
-> 6487         return op.get_result()
   6488 
   6489     def applymap(self, func):

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/apply.py in get_result(self)
    149             return self.apply_raw()
    150 
--> 151         return self.apply_standard()
    152 
    153     def apply_empty_result(self):

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/apply.py in apply_standard(self)
    255 
    256         # compute the result using the series generator
--> 257         self.apply_series_generator()
    258 
    259         # wrap results

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/apply.py in apply_series_generator(self)
    284             try:
    285                 for i, v in enumerate(series_gen):
--> 286                     results[i] = self.f(v)
    287                     keys.append(v.name)
    288             except Exception as e:

<ipython-input-15-67bcb0a7d0a0> in get_longitude_latitude(row)
      4 
      5 def get_longitude_latitude(row):
----> 6     location = geocode(row['formatted_address'])
      7     row['Longitude'] = location.longitude
      8     row['Latitude'] = location.latitude

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/geocoders/googlev3.py in geocode(self, query, exactly_one, timeout, bounds, region, components, language, sensor)
    251         logger.debug("%s.geocode: %s", self.__class__.__name__, url)
    252         return self._parse_json(
--> 253             self._call_geocoder(url, timeout=timeout), exactly_one
    254         )
    255 

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/geopy/geocoders/base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    369                     raise GeocoderUnavailable('Service not available')
    370             elif isinstance(error, SocketTimeout):
--> 371                 raise GeocoderTimedOut('Service timed out')
    372             elif isinstance(error, SSLError):
    373                 if "timed out" in message:

GeocoderTimedOut: ('Service timed out', 'occurred at index 246')
In [ ]:
# Set radius for each point
ready_geotable['RadiusInPixelsRange10-20'] = ready_geotable['Total Tree Count within 0.5 Mile']
In [ ]:
# Set color for each point using a gradient
ready_geotable['FillReds'] = ready_geotable['Periodic Savings within 0.5 Mile']
In [ ]:
# Set color for each point using a rule
# address_geotable['FillColor'] = address_geotable.apply(
#     lambda row: 'r' if row['Predicted Graduation Rate'] < 50 else 'g',
#     axis=1)
In [ ]:
# See what we did
ready_geotable[:3]
In [ ]:
# Save file to target folder to include it in the result download
target_path = target_folder + '/b.csv'
address_geotable.to_csv(target_path, index=False)
print(f'b_geotable_path = {target_path}')  # Print geotable_path to render map

Render Plot

In [ ]:
# %matplotlib inline
# axes = address_table[[
#     'Tree Count Within 100 Meters',
#     'Predicted Graduation Rate',
# ]].plot(kind='bar')
In [ ]:
# # Save file to target folder to include it in the result download
# target_path = target_folder + '/c.png'
# figure = axes.get_figure()
# figure.savefig(target_path)
# print(f'c_image_path = {target_path}')

Predicted Metrics by Address

YOUR INTERPRETATION OF THE RESULTS

{a_table : YOUR TABLE NAME ? YOUR TABLE DESCRIPTION}

{b_geotable : YOUR MAP NAME ? YOUR MAP DESCRIPTION}

{c_image : YOUR PLOT NAME ? YOUR PLOT DESCRIPTION}