Filter Organizations by Language and Location




Pay Notebook Creator: Roy Hyunjin Han0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0

Filter Organizations by Language and Location

  1. Upload table of health centers using the template below.
  2. Select language.
  3. Choose location by address and radius in miles.

Susan Kum, Ph.D. is a researcher at the NYU School of Medicine who proposed and created the first prototype of this tool.

{ organization_table : Organizations ? Upload a CSV of organizations or edit the table below }

{ language_select : Language Spoken ? Filter by language spoken at the organization }

{ target_address : Target Address ? Specify an address around which you would like to search }

{ search_radius_in_miles : Search Radius in Miles ? Show only organizations within the search radius }

In [1]:
# Press the Blue Button to preview this as a CrossCompute Tool
organization_table_path = 'organizations.csv'
language_select_path = 'languages.txt'
target_address = '5616 6th Avenue, Brooklyn, NY'
search_radius_in_miles = 5
target_folder = '/tmp'
In [2]:
from pandas import read_csv
organization_table = read_csv(organization_table_path)
organization_table[:2]
Out[2]:
<style> .dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; } </style>
Category Type Affiliation Name Language Address Borough
0 Healthcare Health Center NYU Langone Health Family Physician Family Health Center English 5616 6th Avenue Brooklyn
1 Healthcare Health Center NYU Langone Health Family Support Center Family Health Center (Co... English, Spanish 6025 6th Avenue Brooklyn
In [3]:
import requests
from pandas import Series

base_url = 'https://api.cityofnewyork.us/geoclient/v1/address.json'
geoclient_id = '89f55ba2'
geoclient_key = '3c19c2a1701bf0814a81acb99782f8ea'

def expand_row(row):
    house_number, _, street_name = row['Address'].partition(' ')
    borough_name = row['Borough']
    response = requests.get(base_url, {
        'houseNumber': house_number,
        'street': street_name,
        'borough': borough_name,
        'app_id': geoclient_id,
        'app_key': geoclient_key,
    })
    response_json = response.json()
    d = response_json['address']
    return Series([
        d['zipCode'], d['communityDistrict'],
        d['censusTract2010'], d['censusBlock2010'],
        d['censusTract2000'], d['censusBlock2000'],
        d['nta'], d['ntaName'],
        d['buildingIdentificationNumber'],
        d['latitude'], d['longitude'],
        d['xCoordinate'], d['yCoordinate']])

geoclient_table = organization_table.apply(expand_row, axis=1)
geoclient_table.columns = [
    'ZipCode', 'CommunityDistrict',
    '2010CensusTract', '2010CensusBlock',
    '2000CensusTract', '2000CensusBlock',
    'NeighborhoodCode', 'NeighborhoodName',
    'BuildingIdNumber',
    'Latitude', 'Longitude',
    'XCoordinate', 'YCoordinate',
]
table = organization_table.join(geoclient_table)
table[:2]
Out[3]:
<style> .dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; } </style>
Category Type Affiliation Name Language Address Borough ZipCode CommunityDistrict 2010CensusTract 2010CensusBlock 2000CensusTract 2000CensusBlock NeighborhoodCode NeighborhoodName BuildingIdNumber Latitude Longitude XCoordinate YCoordinate
0 Healthcare Health Center NYU Langone Health Family Physician Family Health Center English 5616 6th Avenue Brooklyn 11220 307 102 3000 102 3000 BK34 Sunset Park East 3015404 40.640272 -74.011772 0980983 0172545
1 Healthcare Health Center NYU Langone Health Family Support Center Family Health Center (Co... English, Spanish 6025 6th Avenue Brooklyn 11220 307 118 2000 118 2000 BK34 Sunset Park East 3143523 40.637961 -74.014149 0980323 0171703
2 Healthcare Health Center NYU Langone Health Flatbush Family Health Center English, French, Haitian Creole, Spanish 3414 Church Avenue Brooklyn 11203 317 856 1002 856 1002 BK91 East Flatbush-Farragut 3109422 40.651014 -73.945471 0999381 0176463
3 Healthcare Health Center NYU Langone Health Park Ridge Family Health Center English, Chinese, Spanish 6317 4th Avenue Brooklyn 11220 307 122 3001 122 3002 BK32 Sunset Park West 3144058 40.638986 -74.020340 0978605 0172077
4 Healthcare Health Center NYU Langone Health Park Slope Family Health Center English, Chinese, Spanish 220 13th Street Brooklyn 11215 306 141 3000 141 3000 BK37 Park Slope-Gowanus 3339179 40.667602 -73.990051 0987010 0182502
5 Healthcare Health Center NYU Langone Health Seventh Avenue Family Health Center Chinese, English 5008 7th Avenue Brooklyn 11220 307 98 3000 98 3000 BK34 Sunset Park East 3393316 40.642334 -74.006043 0982573 0173296
6 Healthcare Health Center NYU Langone Health Shore Road Family Health Center English 9000 Shore Road Brooklyn 11209 310 5201 1001 5201 1001 BK31 Bay Ridge 3322532 40.622092 -74.040244 0973078 0165924
7 Healthcare Health Center NYU Langone Health Sunset Park (Adult Medicine, Specialty Care an... Arabic, Chinese, English, Russian, Spanish 150 55th Street Brooklyn 11220 307 22 1001 22 1004 BK32 Sunset Park West 3014856 40.646941 -74.020930 0978442 0174975
8 Healthcare Health Center NYU Langone Health Sunset Park (Women's, Pediatrics and Adolescen... Arabic, Chinese, English, Russian, Spanish 5610 2nd Avenue Brooklyn 11220 307 22 1002 22 1005 BK32 Sunset Park West 3398930 40.645615 -74.020554 0978546 0174492
9 Healthcare Health Center NYU Langone Health Sunset Terrace Family Health Center English 514 49th Street Brooklyn 11220 307 98 2000 98 2001 BK34 Sunset Park East 3012967 40.645328 -74.009193 0981699 0174387
In [4]:
selected_languages = open(language_select_path, 'rt').read().splitlines()
selected_languages
Out[4]:
['Arabic',
 'Chinese',
 'English',
 'French',
 'Haitian Creole',
 'Russian',
 'Spanish']
In [6]:
def filter_row_by_language(row):
    value = row['Language']
    return all(x in value for x in selected_languages)

mask = table.apply(filter_row_by_language, axis=1, reduce=True)
filtered_table = table[mask]
filtered_table
Out[6]:
<style> .dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; } </style>
Category Type Affiliation Name Language Address Borough ZipCode CommunityDistrict 2010CensusTract 2010CensusBlock 2000CensusTract 2000CensusBlock NeighborhoodCode NeighborhoodName BuildingIdNumber Latitude Longitude XCoordinate YCoordinate
In [6]:
import geopy
geocode = geopy.GoogleV3(api_key='AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w', timeout=3).geocode
target_location = geocode(target_address)
target_latlon = target_location.latitude, target_location.longitude
target_latlon
Out[6]:
(40.74786, -73.9758865)
In [8]:
from geopy.distance import vincenty as get_distance

def filter_row_by_distance(row):
    source_latlon = row['Latitude'], row['Longitude']
    return get_distance(source_latlon, target_latlon).miles < search_radius_in_miles

mask = filtered_table.apply(filter_row_by_distance, axis=1, reduce=True)
nearby_filtered_table = filtered_table[mask]
nearby_filtered_table
Out[8]:
<style> .dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; } </style>
In [11]:
from os.path import join
target_path = join(target_folder, 'selected-organizations.csv')
nearby_filtered_table.to_csv(target_path, index=False)
print('selected_organization_count = %s' % len(nearby_filtered_table))
print('selected_organization_table_path = %s' % target_path)
print('selected_organization_geotable_path = %s' % target_path)
selected_organization_count = 0
selected_organization_table_path = /tmp/selected-organizations.csv
selected_organization_geotable_path = /tmp/selected-organizations.csv