gist




Pay Notebook Creator: Michelle Chung0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [1]:
#CrossCompute

outcome_select = """
    Total Grads - % of cohort

    Total Grads - % of cohort
    Total Grads - n
    Total Regents - n
    Total Regents - % of cohort
    Total Regents - % of grads
    Advanced Regents - n
    Advanced Regents - % of cohort
    Advanced Regents - % of grads
    Regents w/o Advanced - n
    Regents w/o Advanced - % of cohort
    Regents w/o Advanced - % of grads
    Local - n
    Local - % of cohort
    Local - % of grads
    Still Enrolled - n
    Still Enrolled - % of cohort
    Dropped Out - n
    Dropped Out - % of cohort
"""
attribute_select = """
    diversity_index

    diversity_index
"""

# attribute_select = """
#     diversity_index

#     diversity_index
#     economic_need_index
#     students_with_disabilities_2
#     english_language_learners_2
#     female_2
# """

target_folder = '/tmp'
In [2]:
selected_outcome = outcome_select.strip().splitlines()[0]
selected_outcome

selected_attribute = attribute_select.strip().splitlines()[0]
selected_attribute
Out[2]:
'diversity_index'
In [3]:
url = 'https://data.cityofnewyork.us/api/geospatial/r8nu-ymqj?method=export&format=Shapefile'
In [4]:
import geotable
In [5]:
dist_table = geotable.load(url)
In [6]:
dist_table.iloc[0]
Out[6]:
school_dis                                                        16
shape_area                                               4.67636e+07
shape_leng                                                   35848.9
geometry_object    POLYGON ((-73.93311862859143 40.69579115384632...
geometry_layer       geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b
geometry_proj4                   +proj=longlat +ellps=WGS84 +no_defs
Name: 0, dtype: object
In [7]:
url = 'https://data.cityofnewyork.us/resource/98et-3mve.csv'
In [8]:
import pandas as pd

def load(
    endpoint_url,
    selected_columns=None,
    buffer_size=1000,
    search_term_by_column=None,
    **kw,
):
    buffer_url = (f'{endpoint_url}?$limit={buffer_size}')
    if selected_columns:
        select_string = ','.join(selected_columns)
        buffer_url += f'&$select={select_string}'
    for column, search_term in (search_term_by_column or {}).items():
        buffer_url += f'&$where={column}+like+"%25{search_term}%25"'
    print(buffer_url)
    tables = []
    
    if endpoint_url.endswith('.json'):
        f = pd.read_json
    else:
        f = pd.read_csv

    t = f(buffer_url, **kw)
    while len(t):
        print(len(tables) * buffer_size + len(t))
        tables.append(t)
        offset = buffer_size * len(tables)
        t = f(buffer_url + f'&$offset={offset}', **kw)
    return pd.concat(tables, ignore_index=True, sort=False)
In [9]:
demo_table = load(url, na_values=['No Data'])
https://data.cityofnewyork.us/resource/98et-3mve.csv?$limit=1000
1000
2000
3000
4000
5000
6000
7000
8000
8972
In [10]:
demo_table = demo_table[demo_table.year == '2016-17']
In [11]:
demo_table.iloc[0]
Out[11]:
asian_1                                                              14
asian_2                                                             7.9
black_1                                                              51
black_2                                                            28.7
dbn                                                              01M015
economic_need_index                                               89.2%
english_language_learners_1                                          12
english_language_learners_2                                         6.7
female_1                                                             83
female_2                                                           46.6
grade_1                                                              33
grade_10                                                              0
grade_11                                                              0
grade_12                                                              0
grade_2                                                              27
grade_3                                                              31
grade_4                                                              24
grade_5                                                              18
grade_6                                                               0
grade_7                                                               0
grade_8                                                               0
grade_9                                                               0
grade_k                                                              28
grade_pk_half_day_full_day                                           17
hispanic_1                                                          105
hispanic_2                                                           59
male_1                                                               95
male_2                                                             53.4
multiple_race_categories_not_represented_1                            4
multiple_race_categories_not_represented_2                          2.2
poverty_1                                                           152
poverty_2                                                          85.4
school_name                                   P.S. 015 Roberto Clemente
students_with_disabilities_1                                         51
students_with_disabilities_2                                       28.7
total_enrollment                                                    178
white_1                                                               4
white_2                                                             2.2
year                                                            2016-17
Name: 3, dtype: object
In [12]:
# ADD DIVERSITY INDEX

def d_index(row):
    o = row['multiple_race_categories_not_represented_2']
    total = 100 - o
    target = total / 4
    abs_distance = (abs(target - row['asian_2'])) + abs((target - row['black_2'])) + abs((target - row['hispanic_2'])) + abs((target - row['white_2']))    
    if o == 0.0:
        return abs_distance 
    if o > 0.0:
        # diversity index
        return abs_distance * (1/o)
In [13]:
demo_table['diversity_index'] = demo_table.apply(d_index, axis=1)
In [14]:
from sklearn import preprocessing

x = demo_table['diversity_index'].values.reshape(-1,1) #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
demo_table['scaled_index'] = pd.DataFrame(x_scaled)
In [15]:
len(demo_table.scaled_index)
Out[15]:
1823
In [16]:
demo_table.scaled_index.describe()
Out[16]:
count    367.000000
mean       0.062403
std        0.084847
min        0.000910
25%        0.015049
50%        0.032192
75%        0.072222
max        0.695898
Name: scaled_index, dtype: float64
In [17]:
demo_table.iloc[0]
Out[17]:
asian_1                                                              14
asian_2                                                             7.9
black_1                                                              51
black_2                                                            28.7
dbn                                                              01M015
economic_need_index                                               89.2%
english_language_learners_1                                          12
english_language_learners_2                                         6.7
female_1                                                             83
female_2                                                           46.6
grade_1                                                              33
grade_10                                                              0
grade_11                                                              0
grade_12                                                              0
grade_2                                                              27
grade_3                                                              31
grade_4                                                              24
grade_5                                                              18
grade_6                                                               0
grade_7                                                               0
grade_8                                                               0
grade_9                                                               0
grade_k                                                              28
grade_pk_half_day_full_day                                           17
hispanic_1                                                          105
hispanic_2                                                           59
male_1                                                               95
male_2                                                             53.4
multiple_race_categories_not_represented_1                            4
multiple_race_categories_not_represented_2                          2.2
poverty_1                                                           152
poverty_2                                                          85.4
school_name                                   P.S. 015 Roberto Clemente
students_with_disabilities_1                                         51
students_with_disabilities_2                                       28.7
total_enrollment                                                    178
white_1                                                               4
white_2                                                             2.2
year                                                            2016-17
diversity_index                                                 35.2727
scaled_index                                                   0.096328
Name: 3, dtype: object
In [18]:
selected_attribute
Out[18]:
'diversity_index'
In [19]:
len(demo_table)

if selected_attribute in demo_table:
    demo_table = demo_table.dropna(subset=[selected_attribute])
    if type(demo_table[selected_attribute]) == str:
        demo_table[selected_attribute] = demo_table[selected_attribute].str.replace('%', '')
    demo_table[selected_attribute] = demo_table[selected_attribute].astype('float')
    
In [20]:
selected_attribute
Out[20]:
'diversity_index'
In [21]:
demo_table.year
Out[21]:
3       2016-17
8       2016-17
13      2016-17
18      2016-17
23      2016-17
28      2016-17
33      2016-17
38      2016-17
43      2016-17
48      2016-17
53      2016-17
58      2016-17
63      2016-17
68      2016-17
73      2016-17
78      2016-17
83      2016-17
88      2016-17
93      2016-17
98      2016-17
103     2016-17
109     2016-17
114     2016-17
119     2016-17
124     2016-17
129     2016-17
133     2016-17
138     2016-17
143     2016-17
148     2016-17
         ...   
8839    2016-17
8843    2016-17
8847    2016-17
8851    2016-17
8855    2016-17
8860    2016-17
8863    2016-17
8866    2016-17
8871    2016-17
8876    2016-17
8879    2016-17
8884    2016-17
8887    2016-17
8892    2016-17
8897    2016-17
8902    2016-17
8905    2016-17
8910    2016-17
8915    2016-17
8920    2016-17
8925    2016-17
8927    2016-17
8929    2016-17
8940    2016-17
8945    2016-17
8950    2016-17
8955    2016-17
8960    2016-17
8965    2016-17
8970    2016-17
Name: year, Length: 1823, dtype: object
In [22]:
#demo_table['Diversity Index'] = demo_table['economic_need_index']
In [23]:
endpoint_url = 'https://data.cityofnewyork.us/resource/r2nx-nhxe.csv'
In [24]:
# Load schools
school_location_table = load(
    endpoint_url,
    buffer_size=1000)
school_location_table[:5]
https://data.cityofnewyork.us/resource/r2nx-nhxe.csv?$limit=1000
1000
1823
Out[24]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
:@computed_region_92fq_4b7q :@computed_region_efsh_h5xi :@computed_region_f5dn_yrer :@computed_region_sbqj_enih :@computed_region_yeji_bk3q admin_district_location_code administrative_district_name ats_system_code beds_number borough_block_lot ... primary_building_code principal_name principal_phone_number principal_title school_support_team_leader_name school_support_team_name state_code status_descriptions x_coordinate y_coordinate
0 50.0 11729.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M015 310100010015 1003740020 ... M015 IRENE SANCHEZ 212-228-8730 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 990141.0 202349.0
1 50.0 11724.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M019 310100010019 1004530034 ... M019 JACQUELINE FLANAGAN 212-533-5340 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 988547.0 205239.0
2 32.0 11723.0 70.0 4.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M020 310100010020 1003550001 ... M020 SARAH PINTO VIAGRAN 212-254-9577 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 988044.0 202068.0
3 50.0 11729.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M034 310100010034 1003810038 ... M034 ANGELIKI LOUKATOS 212-228-4433 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 991163.0 203782.0
4 50.0 11729.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M063 310100010063 1004310014 ... M063 DARLENE CAMERON 212-674-3180 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 988071.0 203210.0
<p>5 rows × 48 columns</p>
In [25]:
school_location_table.iloc[0]
Out[25]:
:@computed_region_92fq_4b7q                                                         50
:@computed_region_efsh_h5xi                                                      11729
:@computed_region_f5dn_yrer                                                         70
:@computed_region_sbqj_enih                                                          5
:@computed_region_yeji_bk3q                                                          4
admin_district_location_code                                                      M801
administrative_district_name                              COMMUNITY SCHOOL DISTRICT 01
ats_system_code                                                           01M015      
beds_number                                                               310100010015
borough_block_lot                                                           1003740020
census_tract                                                                      2601
community_district                                                                 103
community_school_sup_name                                           PHILLIPS, DANIELLA
council_district                                                                     2
fax_number                                                                212-477-0931
field_support_center_leader_name                                             CHU, YUET
field_support_center_name                             Field Support Center - Manhattan
fiscal_year                                                                       2018
geographical_district_code                                                           1
grades_final_text                                                 PK,0K,01,02,03,04,05
grades_text                                                    PK,0K,01,02,03,04,05,SE
highschool_network_location_code                                                   NaN
highschool_network_name                                                            NaN
highschool_network_superintendent                                                  NaN
location_1                                                POINT (-73.978747 40.722075)
location_1_address                                                   333 EAST 4 STREET
location_1_city                                                              MANHATTAN
location_1_state                                                                    NY
location_1_zip                                                                   10009
location_category_description                                               Elementary
location_code                                                                     M015
location_name                                                P.S. 015 Roberto Clemente
location_type_description                                             General Academic
managed_by_name                                                                    DOE
nta                                                                               MN28
nta_name                             Lower East Side                               ...
open_date                                                      1904-07-01T00:00:00.000
primary_address_line_1                                              333 EAST  4 STREET
primary_building_code                                                             M015
principal_name                                                           IRENE SANCHEZ
principal_phone_number                                                    212-228-8730
principal_title                                                              PRINCIPAL
school_support_team_leader_name                                                    NaN
school_support_team_name                              School Support Team 3- Manhattan
state_code                                                                          NY
status_descriptions                                                               Open
x_coordinate                                                                    990141
y_coordinate                                                                    202349
Name: 0, dtype: object
In [26]:
school_location_table.iloc[0]['location_1']
Out[26]:
'POINT (-73.978747 40.722075)'
In [27]:
school_location_table.iloc[0]['ats_system_code']
Out[27]:
'01M015      '
In [28]:
school_location_table['DBN'] = school_location_table['ats_system_code'].str.strip()
In [29]:
school_location_table.iloc[0]['DBN']
Out[29]:
'01M015'
In [30]:
school_location_table = school_location_table.rename(columns={
    'location_1': 'WKT',
    'location_name': 'School Name',
})
In [31]:
trimmed_school_location_table = school_location_table[[
    'DBN',
    'WKT',
    'School Name',
]]
In [32]:
school_table = pd.merge(
    trimmed_school_location_table,
    demo_table,
    left_on='DBN',
    right_on='dbn')
len(school_table)
Out[32]:
1802
In [33]:
school_table = school_table.dropna(subset=['WKT'])
In [34]:
len(school_table)
Out[34]:
1801
In [35]:
school_table.iloc[0]
Out[35]:
DBN                                                                 01M015
WKT                                           POINT (-73.978747 40.722075)
School Name                                      P.S. 015 Roberto Clemente
asian_1                                                                 14
asian_2                                                                7.9
black_1                                                                 51
black_2                                                               28.7
dbn                                                                 01M015
economic_need_index                                                  89.2%
english_language_learners_1                                             12
english_language_learners_2                                            6.7
female_1                                                                83
female_2                                                              46.6
grade_1                                                                 33
grade_10                                                                 0
grade_11                                                                 0
grade_12                                                                 0
grade_2                                                                 27
grade_3                                                                 31
grade_4                                                                 24
grade_5                                                                 18
grade_6                                                                  0
grade_7                                                                  0
grade_8                                                                  0
grade_9                                                                  0
grade_k                                                                 28
grade_pk_half_day_full_day                                              17
hispanic_1                                                             105
hispanic_2                                                              59
male_1                                                                  95
male_2                                                                53.4
multiple_race_categories_not_represented_1                               4
multiple_race_categories_not_represented_2                             2.2
poverty_1                                                              152
poverty_2                                                             85.4
school_name                                      P.S. 015 Roberto Clemente
students_with_disabilities_1                                            51
students_with_disabilities_2                                          28.7
total_enrollment                                                       178
white_1                                                                  4
white_2                                                                2.2
year                                                               2016-17
diversity_index                                                    35.2727
scaled_index                                                      0.096328
Name: 0, dtype: object
In [36]:
dist_table.iloc[0]
Out[36]:
school_dis                                                        16
shape_area                                               4.67636e+07
shape_leng                                                   35848.9
geometry_object    POLYGON ((-73.93311862859143 40.69579115384632...
geometry_layer       geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b
geometry_proj4                   +proj=longlat +ellps=WGS84 +no_defs
Name: 0, dtype: object
In [37]:
geometry_wkt = school_table.iloc[0]['WKT']
In [38]:
geometry_wkt
Out[38]:
'POINT (-73.978747 40.722075)'
In [39]:
from shapely import wkt
In [40]:
g = wkt.loads(geometry_wkt)
In [41]:
g
Out[41]:
<shapely.geometry.point.Point at 0x7fbaa0032940>
In [42]:
district_polygons = dist_table['geometry_object']
In [43]:
flags = [x.contains(g) for x in district_polygons]

flags
Out[43]:
[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]
In [44]:
import numpy as np
In [45]:
index = np.argmax(flags)
In [46]:
dist_table.iloc[index]
Out[46]:
school_dis                                                         1
shape_area                                               3.51607e+07
shape_leng                                                   28641.3
geometry_object    POLYGON ((-73.97177410965313 40.72582128133706...
geometry_layer       geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b
geometry_proj4                   +proj=longlat +ellps=WGS84 +no_defs
Name: 17, dtype: object
In [47]:
index
Out[47]:
17
In [48]:
len(flags)
Out[48]:
33
In [49]:
int(dist_table.iloc[index]['school_dis'])
Out[49]:
1
In [50]:
len(school_table)
Out[50]:
1801
In [51]:
from geotable import ColorfulGeometryCollection, GeoTable
In [52]:
dist_table
Out[52]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
school_dis shape_area shape_leng geometry_object geometry_layer geometry_proj4
0 16.0 4.676362e+07 35848.904605 POLYGON ((-73.93311862859143 40.69579115384632... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
1 32.0 5.189850e+07 37251.057847 POLYGON ((-73.91180710069435 40.70343495202662... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
2 6.0 9.634170e+07 70447.849084 POLYGON ((-73.92640556921116 40.87762147653734... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
3 31.0 1.604472e+09 434471.412859 (POLYGON ((-74.05050806403247 40.5664220341608... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
4 7.0 9.226247e+07 65294.452403 (POLYGON ((-73.89680883223774 40.7958084451597... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
5 23.0 4.740069e+07 40317.452033 (POLYGON ((-73.92044366203014 40.6656262871675... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
6 29.0 4.201981e+08 135035.241651 POLYGON ((-73.73816144093141 40.72895809117297... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
7 26.0 4.247909e+08 125677.678898 POLYGON ((-73.74344992332192 40.77824115291502... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
8 15.0 1.961534e+08 153439.165680 POLYGON ((-73.98633135042395 40.69105051012824... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
9 17.0 1.284414e+08 68341.398899 POLYGON ((-73.92044366203014 40.6656262871675,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
10 19.0 2.034175e+08 184183.167312 (POLYGON ((-73.846736514711 40.60485301485166,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
11 13.0 1.048706e+08 86635.210559 POLYGON ((-73.97906084911834 40.70594602894087... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
12 18.0 1.751488e+08 121184.158477 (POLYGON ((-73.86706149472118 40.5820879767934... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
13 20.0 2.426965e+08 94309.778946 POLYGON ((-74.02552971543656 40.65147855069281... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
14 3.0 1.134889e+08 52072.051321 POLYGON ((-73.95671863064405 40.78660079332199... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
15 5.0 5.251977e+07 44469.588221 POLYGON ((-73.93515659239551 40.83268240623763... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
16 9.0 8.341539e+07 46648.958586 POLYGON ((-73.9212971968614 40.85428933985649,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
17 1.0 3.516075e+07 28641.276279 POLYGON ((-73.97177410965313 40.72582128133706... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
18 14.0 1.503102e+08 95792.082090 POLYGON ((-73.95439555417087 40.73911477252251... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
19 4.0 5.262043e+07 52061.828459 (POLYGON ((-73.92133752419399 40.8008521064970... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
20 10.0 2.825415e+08 94957.570434 POLYGON ((-73.86789798628736 40.90294017690526... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
21 12.0 6.907182e+07 48527.595776 POLYGON ((-73.88284445574813 40.84781722645163... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
22 25.0 4.436285e+08 175827.007127 POLYGON ((-73.82049919995312 40.80101146781899... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
23 28.0 2.475679e+08 114694.912786 POLYGON ((-73.84485477879177 40.7357514698091,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
24 24.0 3.949782e+08 127343.703736 (POLYGON ((-73.90641585511733 40.7398683641967... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
25 30.0 3.181290e+08 150392.978241 POLYGON ((-73.90647314610101 40.79018117520807... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
26 21.0 2.101971e+08 123858.087345 POLYGON ((-73.96184657346174 40.62757081710622... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
27 22.0 3.855533e+08 271718.504936 (POLYGON ((-73.91990064270161 40.5996005215871... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
28 27.0 7.955970e+08 589135.490708 (POLYGON ((-73.82784008953526 40.5887858248046... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
29 11.0 3.926651e+08 305305.869806 (POLYGON ((-73.78833349834532 40.8346671297593... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
30 8.0 2.588266e+08 223080.044096 (POLYGON ((-73.83979488562292 40.8356192069902... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
31 2.0 2.804004e+08 212406.819436 (POLYGON ((-74.0438776163991 40.69018767537123... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
32 10.0 3.282963e+06 7883.372664 (POLYGON ((-73.9089323517538 40.8721573479701,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs
In [53]:
dist_table.geometry_object[0].wkt
Out[53]:
'POLYGON ((-73.93311862859143 40.69579115384632, -73.93236605675587 40.69535837729649, -73.9319289006206 40.6951146575314, -73.93177404555487 40.69502821484371, -73.93147184545109 40.69486090242515, -73.93121199886629 40.69471276978224, -73.93114516482393 40.69467397486358, -73.930511465969 40.69431652304853, -73.92970192402746 40.69386339761171, -73.92882897891457 40.69335047407161, -73.92882834168937 40.6933500992537, -73.92864268981751 40.69324101255102, -73.92850659671787 40.69316572635911, -73.92804330687132 40.69290417236994, -73.92781120671985 40.6927742086535, -73.92750685270927 40.69260381145623, -73.92707069461525 40.69234622359373, -73.92644992133326 40.69200176882866, -73.92631612531525 40.69192845396802, -73.92556064122292 40.691490840003, -73.92534903613239 40.69136694183798, -73.92439183303664 40.69083174877439, -73.92429213268012 40.69077335685399, -73.92364379835564 40.6903988375817, -73.92316966710699 40.69012340599072, -73.92280702359515 40.68992392534457, -73.92207133797079 40.68948984079818, -73.92130587808545 40.68907164359784, -73.92121369950202 40.68901947711544, -73.92096103866248 40.68887205183747, -73.92055013077213 40.68864092211603, -73.91980961874593 40.6882112414115, -73.91906109224908 40.68778436875932, -73.91878323293736 40.68762608844376, -73.91829345832642 40.68735666781527, -73.91804606958546 40.68721324726834, -73.91768113918805 40.68700409429878, -73.91755226067554 40.68693022304933, -73.91679832884886 40.68649719987184, -73.91679482480477 40.68647585566188, -73.91677745031008 40.68637001456988, -73.9167269298151 40.6861315560806, -73.91739374561757 40.68605758524438, -73.91784937410198 40.68623316741141, -73.91800616261635 40.6858220326253, -73.91770109452972 40.68550035866507, -73.9178810736956 40.68509394091895, -73.91755763291395 40.68476719192117, -73.91776142009367 40.68432465483016, -73.91741294336087 40.68403405152289, -73.91758679937763 40.68391130921148, -73.91754176551858 40.68365836480015, -73.91749199778137 40.68340840097156, -73.91726632656643 40.68330322455323, -73.91748255687848 40.68289735539763, -73.91712138528143 40.68256847168654, -73.91732345753172 40.68244571271426, -73.9172785838021 40.68218148175983, -73.91722749280017 40.68193500491278, -73.9169763289098 40.68183415891587, -73.91717325370381 40.68163401677827, -73.91713117516936 40.68143498166116, -73.91708045205013 40.68119847288923, -73.91683011483136 40.68110098072739, -73.91676207442167 40.68075559994087, -73.91668638370811 40.68037137878948, -73.9165399872181 40.67963749735295, -73.91639300075401 40.67890584888099, -73.91630119795475 40.67857711243357, -73.91635492184039 40.6778235366874, -73.91645747530021 40.67672602823275, -73.91646486032333 40.67664704004651, -73.91647371343048 40.67655700413084, -73.91654436298559 40.67583843461703, -73.91661697848021 40.67505994757939, -73.91668911898741 40.67428057620312, -73.91677231309316 40.67339069306274, -73.91721449620965 40.67385599371995, -73.91907827608151 40.67397531376398, -73.91954092546408 40.67354187795393, -73.9200025573228 40.67398128283877, -73.92158580075649 40.67405555683759, -73.92213153379178 40.67368388950964, -73.92298622413946 40.67373038435373, -73.92309797315002 40.67373646296576, -73.92324436873267 40.67374442565551, -73.92336008453272 40.67375071908456, -73.92508152301917 40.67384433817344, -73.92550081445938 40.67361688849451, -73.92743998148244 40.67370890462769, -73.92784085188026 40.67399598368088, -73.92788485920073 40.67355077838425, -73.92792887672408 40.67310556484484, -73.9282445167355 40.67293933694761, -73.9282717980865 40.67252471210702, -73.92801317657924 40.67220920817189, -73.92826528535652 40.67222507724347, -73.92833507240064 40.67222889379835, -73.92843446083054 40.67223432828423, -73.92851854078968 40.67223892474855, -73.92862604211787 40.67224480269724, -73.92870956326151 40.67224936969293, -73.92882833736527 40.67225586323351, -73.92891232629145 40.67226045499795, -73.92939948737151 40.67228708901998, -73.9307932883315 40.67236495699643, -73.93355357161703 40.67251450202632, -73.9363431994143 40.67269957696197, -73.93667540626117 40.6727143953574, -73.93682717835949 40.67271928165021, -73.93708255238936 40.67272750062104, -73.93754574483314 40.672742485227, -73.93753568983917 40.67332211170566, -73.93753114689092 40.67358374466557, -73.93752485721895 40.67394597310864, -73.93751992857378 40.67422980176767, -73.93806258654983 40.67447201037736, -73.93832822275243 40.67458578836195, -73.93889518673745 40.67459873386304, -73.93877769674783 40.67472089131839, -73.93873696341934 40.67538905896871, -73.93884149817444 40.67549215171548, -73.93843297207545 40.67556514632768, -73.93852793096612 40.67617239034053, -73.93876783935517 40.67627240938755, -73.93853686439037 40.67634626398586, -73.93843334915296 40.67693817086859, -73.9386949143591 40.67705082000203, -73.93851537899995 40.67741865687472, -73.93863289914781 40.67776992010254, -73.93862492563589 40.67786256958296, -73.9386168305744 40.67794381002248, -73.93850818855235 40.67903432002991, -73.93843613326652 40.67978585170399, -73.93739763665229 40.67972990017674, -73.93764243456839 40.68096300863983, -73.93778801579033 40.68169433234844, -73.9348625715524 40.68203056869989, -73.93500821720075 40.68276376850579, -73.93515472893986 40.68349718465807, -73.93807695931454 40.68316090298936, -73.9382232586039 40.68389329800515, -73.93836816506894 40.68462560189113, -73.93851929553905 40.68535451524578, -73.93865905188188 40.68609038907362, -73.93880456947376 40.68682474574926, -73.93895074350998 40.68755641035276, -73.93938561082579 40.68787113499809, -73.9413859614605 40.6876682688049, -73.94178527584324 40.68722837212115, -73.9423520278467 40.68759150082202, -73.94428391577316 40.68738505531383, -73.94462127846789 40.68690436166384, -73.94471297541574 40.68737334166219, -73.94472684342399 40.68744426893098, -73.94476925905255 40.6876366000215, -73.94489497385723 40.68828685373113, -73.94491170954201 40.68837022260124, -73.94505645129829 40.68910234632374, -73.94519881242742 40.68983988030254, -73.94534976297854 40.69056729694257, -73.94549553209639 40.69130013188384, -73.94564166805849 40.69203345190423, -73.94578713653169 40.69276523278351, -73.94593170750511 40.69349761558435, -73.94607851790597 40.69423024360605, -73.94324249086142 40.69455748523078, -73.94338802084518 40.69528898951384, -73.94352527471476 40.69603085523811, -73.94335303762136 40.69605059879613, -73.94321604415771 40.69606631370468, -73.94279300209647 40.69611479586432, -73.94242149063366 40.69615738067107, -73.9423368032754 40.69616708769403, -73.94226167920422 40.696175698798, -73.94217061244066 40.69618613693807, -73.94208643350876 40.69619578590027, -73.94198436724183 40.69620748536691, -73.94152445883948 40.6962601986313, -73.94070625153131 40.69635305387921, -73.94040726721202 40.69638987090211, -73.94020994337993 40.69641107787343, -73.94003822508267 40.69643083533578, -73.93976769964368 40.69646329104629, -73.93948446000253 40.69649454647407, -73.93917199789162 40.69653049512544, -73.93884866773332 40.69656769159932, -73.93867948785814 40.69658715370254, -73.93779110726068 40.69668934223279, -73.93763027449141 40.69595086258837, -73.93755700214285 40.69558451152044, -73.93748376644189 40.6952184321377, -73.9370544081801 40.69553123358047, -73.9348298043286 40.69575489184654, -73.9345601174396 40.69555483174558, -73.93463155014204 40.69592163383321, -73.93470512606679 40.69628830975414, -73.9347969968416 40.69673659235447, -73.93468892593663 40.69667495388458, -73.9346067517839 40.69662808583959, -73.93457999376531 40.69661282447046, -73.93448784825009 40.69656026847871, -73.93413397294013 40.69635843140355, -73.93382817155928 40.69618427730787, -73.93311862859143 40.69579115384632))'
In [54]:
school_table.WKT[0]
Out[54]:
'POINT (-73.978747 40.722075)'
In [55]:
len(dist_table.school_dis.unique())
Out[55]:
32
In [56]:
# aggregate districts
from shapely import wkt
import numpy as np

district_polygons = dist_table['geometry_object']

def get_district(geometry_wkt):
    g = wkt.loads(geometry_wkt)
    #flags = [x.contains(g) for x in district_polygons]
    flags = [x.intersects(g) for x in district_polygons]
    index = np.argmax(flags)
    district = int(dist_table.iloc[index]['school_dis'])
    return district

school_table['district'] = school_table['WKT'].apply(get_district)
In [57]:
len(school_table.district.unique())
Out[57]:
32
In [58]:
len(dist_table)
Out[58]:
33
In [59]:
mean_table = school_table.groupby('district').mean()
In [60]:
mean_table = mean_table.reset_index()
In [61]:
mean_table = mean_table[[
    'district',
    selected_attribute,
]]
In [62]:
mean_table
Out[62]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
district diversity_index
0 1 74.392314
1 2 36.504481
2 3 45.148505
3 4 74.202823
4 5 74.205055
5 6 254.660597
6 7 129.066478
7 8 111.027489
8 9 171.782841
9 10 151.978352
10 11 54.424475
11 12 118.038638
12 13 51.905636
13 14 92.712426
14 15 80.325170
15 16 95.623066
16 17 73.062571
17 18 134.915158
18 19 73.366731
19 20 158.156244
20 21 59.592857
21 22 55.823892
22 23 92.098260
23 24 189.934931
24 25 65.688042
25 26 31.956221
26 27 34.876088
27 28 17.399788
28 29 61.776259
29 30 81.290729
30 31 57.090485
31 32 204.707566
In [63]:
dist_table.iloc[0]
Out[63]:
school_dis                                                        16
shape_area                                               4.67636e+07
shape_leng                                                   35848.9
geometry_object    POLYGON ((-73.93311862859143 40.69579115384632...
geometry_layer       geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b
geometry_proj4                   +proj=longlat +ellps=WGS84 +no_defs
Name: 0, dtype: object
In [64]:
mean_table.iloc[0]
Out[64]:
district            1.000000
diversity_index    74.392314
Name: 0, dtype: float64
In [65]:
t = pd.merge(dist_table, mean_table, left_on='school_dis', right_on='district')
In [66]:
map_table = t.copy()
In [67]:
map_table
Out[67]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
school_dis shape_area shape_leng geometry_object geometry_layer geometry_proj4 district diversity_index
0 16.0 4.676362e+07 35848.904605 POLYGON ((-73.93311862859143 40.69579115384632... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 16 95.623066
1 32.0 5.189850e+07 37251.057847 POLYGON ((-73.91180710069435 40.70343495202662... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 32 204.707566
2 6.0 9.634170e+07 70447.849084 POLYGON ((-73.92640556921116 40.87762147653734... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 6 254.660597
3 31.0 1.604472e+09 434471.412859 (POLYGON ((-74.05050806403247 40.5664220341608... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 31 57.090485
4 7.0 9.226247e+07 65294.452403 (POLYGON ((-73.89680883223774 40.7958084451597... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 7 129.066478
5 23.0 4.740069e+07 40317.452033 (POLYGON ((-73.92044366203014 40.6656262871675... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 23 92.098260
6 29.0 4.201981e+08 135035.241651 POLYGON ((-73.73816144093141 40.72895809117297... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 29 61.776259
7 26.0 4.247909e+08 125677.678898 POLYGON ((-73.74344992332192 40.77824115291502... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 26 31.956221
8 15.0 1.961534e+08 153439.165680 POLYGON ((-73.98633135042395 40.69105051012824... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 15 80.325170
9 17.0 1.284414e+08 68341.398899 POLYGON ((-73.92044366203014 40.6656262871675,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 17 73.062571
10 19.0 2.034175e+08 184183.167312 (POLYGON ((-73.846736514711 40.60485301485166,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 19 73.366731
11 13.0 1.048706e+08 86635.210559 POLYGON ((-73.97906084911834 40.70594602894087... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 13 51.905636
12 18.0 1.751488e+08 121184.158477 (POLYGON ((-73.86706149472118 40.5820879767934... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 18 134.915158
13 20.0 2.426965e+08 94309.778946 POLYGON ((-74.02552971543656 40.65147855069281... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 20 158.156244
14 3.0 1.134889e+08 52072.051321 POLYGON ((-73.95671863064405 40.78660079332199... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 3 45.148505
15 5.0 5.251977e+07 44469.588221 POLYGON ((-73.93515659239551 40.83268240623763... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 5 74.205055
16 9.0 8.341539e+07 46648.958586 POLYGON ((-73.9212971968614 40.85428933985649,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 9 171.782841
17 1.0 3.516075e+07 28641.276279 POLYGON ((-73.97177410965313 40.72582128133706... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 1 74.392314
18 14.0 1.503102e+08 95792.082090 POLYGON ((-73.95439555417087 40.73911477252251... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 14 92.712426
19 4.0 5.262043e+07 52061.828459 (POLYGON ((-73.92133752419399 40.8008521064970... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 4 74.202823
20 10.0 2.825415e+08 94957.570434 POLYGON ((-73.86789798628736 40.90294017690526... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 10 151.978352
21 10.0 3.282963e+06 7883.372664 (POLYGON ((-73.9089323517538 40.8721573479701,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 10 151.978352
22 12.0 6.907182e+07 48527.595776 POLYGON ((-73.88284445574813 40.84781722645163... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 12 118.038638
23 25.0 4.436285e+08 175827.007127 POLYGON ((-73.82049919995312 40.80101146781899... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 25 65.688042
24 28.0 2.475679e+08 114694.912786 POLYGON ((-73.84485477879177 40.7357514698091,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 28 17.399788
25 24.0 3.949782e+08 127343.703736 (POLYGON ((-73.90641585511733 40.7398683641967... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 24 189.934931
26 30.0 3.181290e+08 150392.978241 POLYGON ((-73.90647314610101 40.79018117520807... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 30 81.290729
27 21.0 2.101971e+08 123858.087345 POLYGON ((-73.96184657346174 40.62757081710622... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 21 59.592857
28 22.0 3.855533e+08 271718.504936 (POLYGON ((-73.91990064270161 40.5996005215871... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 22 55.823892
29 27.0 7.955970e+08 589135.490708 (POLYGON ((-73.82784008953526 40.5887858248046... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 27 34.876088
30 11.0 3.926651e+08 305305.869806 (POLYGON ((-73.78833349834532 40.8346671297593... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 11 54.424475
31 8.0 2.588266e+08 223080.044096 (POLYGON ((-73.83979488562292 40.8356192069902... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 8 111.027489
32 2.0 2.804004e+08 212406.819436 (POLYGON ((-74.0438776163991 40.69018767537123... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 2 36.504481
In [68]:
map_table['FillBlues'] = map_table[selected_attribute]
In [69]:
school_map_table = school_table.copy()
In [70]:
school_map_table['geometry_object'] = school_map_table.WKT.apply(wkt.loads)
In [71]:
district_map_table = t.copy()
district_map_table['FillBlues'] = district_map_table[selected_attribute]
school_map_table = school_table.copy()
#school_map_table['FillBlues'] = district_map_table['economic_need_index'].max()
map_table = pd.concat([
	#district_map_table,
school_map_table,
], sort=False)
In [72]:
url = 'https://data.cityofnewyork.us/api/views/vh2h-md7a/rows.csv'
#school_graduation_table = pd.read_csv(url)
#len(school_graduation_table)
In [73]:
#school_graduation_table.iloc[0]
In [74]:
#school_graduation_table.groupby('DBN').mean()[:5]
In [75]:
#school_graduation_table = school_graduation_table[['DBN', 'School Name', 'Total Grads - % of cohort']].copy()
#len(school_graduation_table)
In [76]:
#school_graduation_table = school_graduation_table.dropna()
#sum(school_graduation_table['Total Grads - % of cohort'] == 's')
In [77]:
school_graduation_table = pd.read_csv(url, na_values=['s'])
In [78]:
school_graduation_table.loc[0]
Out[78]:
Demographic                                                    Total Cohort
DBN                                                                  01M292
School Name                           HENRY STREET SCHOOL FOR INTERNATIONAL
Cohort                                                                 2003
Total Cohort                                                              5
Total Grads - n                                                         NaN
Total Grads - % of cohort                                               NaN
Total Regents - n                                                       NaN
Total Regents - % of cohort                                             NaN
Total Regents - % of grads                                              NaN
Advanced Regents - n                                                    NaN
Advanced Regents - % of cohort                                          NaN
Advanced Regents - % of grads                                           NaN
Regents w/o Advanced - n                                                NaN
Regents w/o Advanced - % of cohort                                      NaN
Regents w/o Advanced - % of grads                                       NaN
Local - n                                                               NaN
Local - % of cohort                                                     NaN
Local - % of grads                                                      NaN
Still Enrolled - n                                                      NaN
Still Enrolled - % of cohort                                            NaN
Dropped Out - n                                                         NaN
Dropped Out - % of cohort                                               NaN
Name: 0, dtype: object
In [79]:
# Define which values mean null
school_graduation_table = school_graduation_table[['DBN', 'School Name', selected_outcome]].copy()
school_graduation_table = school_graduation_table.dropna()

len(school_graduation_table)
Out[79]:
16704
In [80]:
school_graduation_table.dtypes
Out[80]:
DBN                           object
School Name                   object
Total Grads - % of cohort    float64
dtype: object
In [81]:
school_graduation_table
Out[81]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
DBN School Name Total Grads - % of cohort
1 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 67.3
2 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 67.2
3 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 55.1
4 01M292 HENRY STREET SCHOOL FOR INTERNATIONAL 56.4
5 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 71.9
6 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 63.5
7 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 77.0
8 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 67.0
9 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 52.9
10 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 42.7
11 01M448 UNIVERSITY NEIGHBORHOOD HIGH SCHOOL 48.4
12 01M450 EAST SIDE COMMUNITY SCHOOL 67.2
13 01M450 EAST SIDE COMMUNITY SCHOOL 62.5
14 01M450 EAST SIDE COMMUNITY SCHOOL 62.0
15 01M450 EAST SIDE COMMUNITY SCHOOL 72.5
16 01M450 EAST SIDE COMMUNITY SCHOOL 71.8
17 01M450 EAST SIDE COMMUNITY SCHOOL 77.8
18 01M450 EAST SIDE COMMUNITY SCHOOL 78.9
19 01M509 MARTA VALLE HIGH SCHOOL 49.3
20 01M509 MARTA VALLE HIGH SCHOOL 40.0
21 01M509 MARTA VALLE HIGH SCHOOL 44.3
22 01M509 MARTA VALLE HIGH SCHOOL 44.6
23 01M509 MARTA VALLE HIGH SCHOOL 55.7
24 01M509 MARTA VALLE HIGH SCHOOL 56.0
25 01M509 MARTA VALLE HIGH SCHOOL 59.5
26 01M515 LOWER EAST SIDE PREPARATORY HIGH SCHO 53.0
27 01M515 LOWER EAST SIDE PREPARATORY HIGH SCHO 44.8
28 01M515 LOWER EAST SIDE PREPARATORY HIGH SCHO 42.0
29 01M515 LOWER EAST SIDE PREPARATORY HIGH SCHO 48.5
30 01M515 LOWER EAST SIDE PREPARATORY HIGH SCHO 45.5
... ... ... ...
25060 32K545 EBC HIGH SCHOOL FOR PUBLIC SERVICE?BU 58.6
25061 32K545 EBC HIGH SCHOOL FOR PUBLIC SERVICE?BU 50.0
25062 32K545 EBC HIGH SCHOOL FOR PUBLIC SERVICE?BU 60.3
25063 32K545 EBC HIGH SCHOOL FOR PUBLIC SERVICE?BU 56.2
25064 32K545 EBC HIGH SCHOOL FOR PUBLIC SERVICE?BU 63.0
25066 32K549 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 57.1
25067 32K549 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 70.4
25068 32K549 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 69.4
25069 32K549 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 71.1
25070 32K549 BUSHWICK SCHOOL FOR SOCIAL JUSTICE 71.1
25072 32K552 ACADEMY OF URBAN PLANNING 49.2
25073 32K552 ACADEMY OF URBAN PLANNING 53.2
25074 32K552 ACADEMY OF URBAN PLANNING 52.8
25075 32K552 ACADEMY OF URBAN PLANNING 42.0
25076 32K552 ACADEMY OF URBAN PLANNING 54.0
25080 32K554 ALL CITY LEADERSHIP SECONDARY SCHOOL 69.6
25081 32K554 ALL CITY LEADERSHIP SECONDARY SCHOOL 87.5
25082 32K554 ALL CITY LEADERSHIP SECONDARY SCHOOL 91.7
25084 32K556 BUSHWICK LEADERS HIGH SCHOOL FOR ACAD 63.8
25085 32K556 BUSHWICK LEADERS HIGH SCHOOL FOR ACAD 64.7
25086 32K556 BUSHWICK LEADERS HIGH SCHOOL FOR ACAD 62.5
25087 32K556 BUSHWICK LEADERS HIGH SCHOOL FOR ACAD 49.0
25088 32K556 BUSHWICK LEADERS HIGH SCHOOL FOR ACAD 51.0
25089 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 2.9
25090 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 5.5
25091 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 1.5
25092 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 3.1
25093 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 3.8
25094 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 7.0
25095 32K564 BUSHWICK COMMUNITY HIGH SCHOOL 7.0
<p>16704 rows × 3 columns</p>
In [82]:
school_graduation_table = school_graduation_table.groupby('DBN').mean()
school_graduation_table[:5]
Out[82]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Total Grads - % of cohort
DBN
01M292 61.755556
01M448 58.637736
01M450 70.462500
01M509 49.902381
01M515 49.151064
In [83]:
endpoint_url = 'https://data.cityofnewyork.us/resource/r2nx-nhxe.csv'
In [84]:
# Load schools
school_location_table = load(
    endpoint_url,
    buffer_size=1000)
school_location_table[:5]
https://data.cityofnewyork.us/resource/r2nx-nhxe.csv?$limit=1000
1000
1823
Out[84]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
:@computed_region_92fq_4b7q :@computed_region_efsh_h5xi :@computed_region_f5dn_yrer :@computed_region_sbqj_enih :@computed_region_yeji_bk3q admin_district_location_code administrative_district_name ats_system_code beds_number borough_block_lot ... primary_building_code principal_name principal_phone_number principal_title school_support_team_leader_name school_support_team_name state_code status_descriptions x_coordinate y_coordinate
0 50.0 11729.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M015 310100010015 1003740020 ... M015 IRENE SANCHEZ 212-228-8730 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 990141.0 202349.0
1 50.0 11724.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M019 310100010019 1004530034 ... M019 JACQUELINE FLANAGAN 212-533-5340 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 988547.0 205239.0
2 32.0 11723.0 70.0 4.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M020 310100010020 1003550001 ... M020 SARAH PINTO VIAGRAN 212-254-9577 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 988044.0 202068.0
3 50.0 11729.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M034 310100010034 1003810038 ... M034 ANGELIKI LOUKATOS 212-228-4433 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 991163.0 203782.0
4 50.0 11729.0 70.0 5.0 4.0 M801 COMMUNITY SCHOOL DISTRICT 01 01M063 310100010063 1004310014 ... M063 DARLENE CAMERON 212-674-3180 PRINCIPAL NaN School Support Team 3- Manhattan NY Open 988071.0 203210.0
<p>5 rows × 48 columns</p>
In [85]:
school_location_table.iloc[0]
Out[85]:
:@computed_region_92fq_4b7q                                                         50
:@computed_region_efsh_h5xi                                                      11729
:@computed_region_f5dn_yrer                                                         70
:@computed_region_sbqj_enih                                                          5
:@computed_region_yeji_bk3q                                                          4
admin_district_location_code                                                      M801
administrative_district_name                              COMMUNITY SCHOOL DISTRICT 01
ats_system_code                                                           01M015      
beds_number                                                               310100010015
borough_block_lot                                                           1003740020
census_tract                                                                      2601
community_district                                                                 103
community_school_sup_name                                           PHILLIPS, DANIELLA
council_district                                                                     2
fax_number                                                                212-477-0931
field_support_center_leader_name                                             CHU, YUET
field_support_center_name                             Field Support Center - Manhattan
fiscal_year                                                                       2018
geographical_district_code                                                           1
grades_final_text                                                 PK,0K,01,02,03,04,05
grades_text                                                    PK,0K,01,02,03,04,05,SE
highschool_network_location_code                                                   NaN
highschool_network_name                                                            NaN
highschool_network_superintendent                                                  NaN
location_1                                                POINT (-73.978747 40.722075)
location_1_address                                                   333 EAST 4 STREET
location_1_city                                                              MANHATTAN
location_1_state                                                                    NY
location_1_zip                                                                   10009
location_category_description                                               Elementary
location_code                                                                     M015
location_name                                                P.S. 015 Roberto Clemente
location_type_description                                             General Academic
managed_by_name                                                                    DOE
nta                                                                               MN28
nta_name                             Lower East Side                               ...
open_date                                                      1904-07-01T00:00:00.000
primary_address_line_1                                              333 EAST  4 STREET
primary_building_code                                                             M015
principal_name                                                           IRENE SANCHEZ
principal_phone_number                                                    212-228-8730
principal_title                                                              PRINCIPAL
school_support_team_leader_name                                                    NaN
school_support_team_name                              School Support Team 3- Manhattan
state_code                                                                          NY
status_descriptions                                                               Open
x_coordinate                                                                    990141
y_coordinate                                                                    202349
Name: 0, dtype: object
In [86]:
school_location_table.iloc[0]['location_1']
Out[86]:
'POINT (-73.978747 40.722075)'
In [87]:
school_location_table.iloc[0]['ats_system_code']
Out[87]:
'01M015      '
In [88]:
school_location_table['DBN'] = school_location_table['ats_system_code'].str.strip()
In [89]:
school_location_table.iloc[0]['DBN']
Out[89]:
'01M015'
In [90]:
school_location_table = school_location_table.rename(columns={
    'location_1': 'WKT',
    'location_name': 'School Name',
})
In [91]:
trimmed_school_location_table = school_location_table[[
    'DBN',
    'WKT',
    'School Name',
]]
In [92]:
merged_school_table = pd.merge(
    school_table,
    school_graduation_table,
    left_on='DBN',
    right_on='DBN')
len(school_table)
Out[92]:
1801
In [93]:
merged_school_table.iloc[0]
Out[93]:
DBN                                                                 01M292
WKT                                           POINT (-73.986051 40.713362)
School Name                                     Orchard Collegiate Academy
asian_1                                                                 21
asian_2                                                                 15
black_1                                                                 34
black_2                                                               24.3
dbn                                                                 01M292
economic_need_index                                                  84.4%
english_language_learners_1                                             20
english_language_learners_2                                           14.3
female_1                                                                53
female_2                                                              37.9
grade_1                                                                  0
grade_10                                                                35
grade_11                                                                31
grade_12                                                                34
grade_2                                                                  0
grade_3                                                                  0
grade_4                                                                  0
grade_5                                                                  0
grade_6                                                                  0
grade_7                                                                  0
grade_8                                                                  0
grade_9                                                                 40
grade_k                                                                  0
grade_pk_half_day_full_day                                               0
hispanic_1                                                              77
hispanic_2                                                              55
male_1                                                                  87
male_2                                                                62.1
multiple_race_categories_not_represented_1                               1
multiple_race_categories_not_represented_2                             0.7
poverty_1                                                              128
poverty_2                                                             91.4
school_name                                     Orchard Collegiate Academy
students_with_disabilities_1                                            38
students_with_disabilities_2                                          27.1
total_enrollment                                                       140
white_1                                                                  7
white_2                                                                  5
year                                                               2016-17
diversity_index                                                    86.2143
scaled_index                                                    0.00561885
district                                                                 1
Total Grads - % of cohort                                          61.7556
Name: 0, dtype: object
In [94]:
sorted_school_table = merged_school_table.sort_values(selected_outcome, ascending=False)
In [95]:
sorted_school_table[-10:]
Out[95]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
DBN WKT School Name asian_1 asian_2 black_1 black_2 dbn economic_need_index english_language_learners_1 ... students_with_disabilities_1 students_with_disabilities_2 total_enrollment white_1 white_2 year diversity_index scaled_index district Total Grads - % of cohort
89 07X379 POINT (-73.905389 40.818703) Jill Chaifetz Transfer High School 3 1.4 74 34.3 07X379 80.6% 21 ... 56 25.9 216 1 0.5 2016-17 67.785714 0.009317 7 12.929167
104 08X377 POINT (-73.85593 40.821218) Bronx Community High School 5 2.8 62 34.4 08X377 75.1% 6 ... 30 16.7 180 3 1.7 2016-17 81.727273 NaN 8 10.683333
252 23K646 POINT (-73.904143 40.677528) Aspirations Diploma Plus High School 3 1.3 178 75.4 23K646 69.0% 7 ... 43 18.2 236 1 0.4 2016-17 48.500000 NaN 23 10.335000
90 07X381 POINT (-73.919949 40.818645) Bronx Haven High School 3 1.6 62 32.3 07X381 78.8% 10 ... 46 24.0 192 1 0.5 2016-17 95.800000 0.015825 7 9.500000
184 24Q744 POINT (-73.871505 40.743228) VOYAGES Preparatory 23 9.2 47 18.7 24Q744 48.7% 3 ... 15 6.0 251 12 4.8 2016-17 210.000000 NaN 24 9.200000
220 17K568 POINT (-73.923881 40.666251) Brownsville Academy High School 1 0.6 133 85.8 17K568 67.0% 3 ... 36 23.2 155 0 0.0 2016-17 31.679487 NaN 17 8.907143
76 05M285 POINT (-73.939974 40.807692) Harlem Renaissance High School 7 3.0 99 42.9 05M285 77.4% 31 ... 52 22.5 231 3 1.3 2016-17 100.555556 0.043929 5 7.521429
224 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 0 0.0 162 83.9 18K673 67.2% 12 ... 67 34.7 193 4 2.1 2016-17 118.300000 NaN 18 5.441667
322 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 0 0.0 67 28.4 32K564 71.0% 10 ... 56 23.7 236 7 3.0 2016-17 234.000000 NaN 32 5.337500
182 13K616 POINT (-73.95818 40.692015) Brooklyn High School for Leadership and Community 1 0.5 141 69.5 13K616 70.6% 9 ... 38 18.7 203 5 2.5 2016-17 187.200000 NaN 13 5.322222
<p>10 rows × 46 columns</p>
In [96]:
sorted_school_table[-3:]
Out[96]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
DBN WKT School Name asian_1 asian_2 black_1 black_2 dbn economic_need_index english_language_learners_1 ... students_with_disabilities_1 students_with_disabilities_2 total_enrollment white_1 white_2 year diversity_index scaled_index district Total Grads - % of cohort
224 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 0 0.0 162 83.9 18K673 67.2% 12 ... 67 34.7 193 4 2.1 2016-17 118.3 NaN 18 5.441667
322 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 0 0.0 67 28.4 32K564 71.0% 10 ... 56 23.7 236 7 3.0 2016-17 234.0 NaN 32 5.337500
182 13K616 POINT (-73.95818 40.692015) Brooklyn High School for Leadership and Community 1 0.5 141 69.5 13K616 70.6% 9 ... 38 18.7 203 5 2.5 2016-17 187.2 NaN 13 5.322222
<p>3 rows × 46 columns</p>
In [97]:
school_table.iloc[0]
Out[97]:
DBN                                                                 01M015
WKT                                           POINT (-73.978747 40.722075)
School Name                                      P.S. 015 Roberto Clemente
asian_1                                                                 14
asian_2                                                                7.9
black_1                                                                 51
black_2                                                               28.7
dbn                                                                 01M015
economic_need_index                                                  89.2%
english_language_learners_1                                             12
english_language_learners_2                                            6.7
female_1                                                                83
female_2                                                              46.6
grade_1                                                                 33
grade_10                                                                 0
grade_11                                                                 0
grade_12                                                                 0
grade_2                                                                 27
grade_3                                                                 31
grade_4                                                                 24
grade_5                                                                 18
grade_6                                                                  0
grade_7                                                                  0
grade_8                                                                  0
grade_9                                                                  0
grade_k                                                                 28
grade_pk_half_day_full_day                                              17
hispanic_1                                                             105
hispanic_2                                                              59
male_1                                                                  95
male_2                                                                53.4
multiple_race_categories_not_represented_1                               4
multiple_race_categories_not_represented_2                             2.2
poverty_1                                                              152
poverty_2                                                             85.4
school_name                                      P.S. 015 Roberto Clemente
students_with_disabilities_1                                            51
students_with_disabilities_2                                          28.7
total_enrollment                                                       178
white_1                                                                  4
white_2                                                                2.2
year                                                               2016-17
diversity_index                                                    35.2727
scaled_index                                                      0.096328
district                                                                 1
Name: 0, dtype: object
In [98]:
len(district_map_table)
Out[98]:
33
In [99]:
district_map_table = t.copy()
district_map_table['WKT'] = district_map_table['geometry_object'].apply(
lambda x: x.wkt)
# Define FillColor
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(1, 100))

array = district_map_table['diversity_index'].values.reshape(-1, 1)
scaler.fit(array)
district_map_table['scaled_index'] = scaler.transform(array)
In [100]:
from sklearn import preprocessing
import pandas as pd

x = sorted_school_table['diversity_index'].values.reshape(-1,1) #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
sorted_school_table['scaled_index'] = pd.DataFrame(x_scaled)

# x = district_map_table['diversity_index'].values.reshape(-1,1) #returns a numpy array
# min_max_scaler = preprocessing.MinMaxScaler()
# x_scaled = min_max_scaler.fit_transform(x)
# district_map_table['scaled_index'] = pd.DataFrame(x_scaled)
In [101]:
sorted_school_table.scaled_index.describe()
Out[101]:
count    323.000000
mean       0.081404
std        0.100232
min        0.000000
25%        0.026000
50%        0.054133
75%        0.096842
max        1.000000
Name: scaled_index, dtype: float64
In [102]:
# getting top and bottom performing schools

a_school_map_table = sorted_school_table[:10].copy()
a_school_map_table = a_school_map_table[['WKT', 'School Name','total_enrollment']]
a_school_map_table = a_school_map_table.rename({
'School Name': 'Name',
})
a_school_map_table['FillColor'] = 'g'
In [103]:
len(sorted_school_table)
Out[103]:
323
In [104]:
sorted_school_table.drop(sorted_school_table.tail(1).index,inplace=True)
In [105]:
sorted_school_table.isna().sum()
Out[105]:
DBN                                           0
WKT                                           0
School Name                                   0
asian_1                                       0
asian_2                                       0
black_1                                       0
black_2                                       0
dbn                                           0
economic_need_index                           0
english_language_learners_1                   0
english_language_learners_2                   0
female_1                                      0
female_2                                      0
grade_1                                       0
grade_10                                      0
grade_11                                      0
grade_12                                      0
grade_2                                       0
grade_3                                       0
grade_4                                       0
grade_5                                       0
grade_6                                       0
grade_7                                       0
grade_8                                       0
grade_9                                       0
grade_k                                       0
grade_pk_half_day_full_day                    0
hispanic_1                                    0
hispanic_2                                    0
male_1                                        0
male_2                                        0
multiple_race_categories_not_represented_1    0
multiple_race_categories_not_represented_2    0
poverty_1                                     0
poverty_2                                     0
school_name                                   0
students_with_disabilities_1                  0
students_with_disabilities_2                  0
total_enrollment                              0
white_1                                       0
white_2                                       0
year                                          0
diversity_index                               0
scaled_index                                  0
district                                      0
Total Grads - % of cohort                     0
dtype: int64
In [106]:
sorted_school_table['School Name'].nunique()
Out[106]:
322
In [107]:
sorted_school_table[30:]
Out[107]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
DBN WKT School Name asian_1 asian_2 black_1 black_2 dbn economic_need_index english_language_learners_1 ... students_with_disabilities_1 students_with_disabilities_2 total_enrollment white_1 white_2 year diversity_index scaled_index district Total Grads - % of cohort
173 13K430 POINT (-73.976435 40.688896) Brooklyn Technical High School 3483 61.3 391 6.9 13K430 41.8% 0 ... 94 1.7 5682 1243 21.9 2016-17 27.388889 0.017592 13 91.419048
32 02M439 POINT (-73.991966 40.741963) Manhattan Village Academy 31 7.2 49 11.3 02M439 63.0% 7 ... 65 15.0 432 34 7.9 2016-17 5.492806 0.013822 2 91.339130
248 22K555 POINT (-73.972312 40.649055) Brooklyn College Academy 51 8.3 422 68.3 22K555 45.9% 1 ... 58 9.4 618 37 6.0 2016-17 13.621212 0.043673 15 91.128571
63 03M479 POINT (-73.996262 40.760858) Beacon High School 105 8.1 177 13.6 03M479 26.3% 3 ... 103 7.9 1304 670 51.4 2016-17 7.922535 0.008051 2 90.550877
285 27Q650 POINT (-73.841475 40.689177) High School for Construction Trades, Engineeri... 320 31.5 98 9.7 27Q650 42.8% 15 ... 142 14.0 1015 135 13.3 2016-17 3.736842 0.047347 27 90.462500
255 24Q264 POINT (-73.937004 40.744175) Academy of Finance and Enterprise 129 22.8 31 5.5 24Q264 55.1% 35 ... 70 12.3 567 93 16.4 2016-17 21.807692 0.050496 24 89.683333
268 25Q285 POINT (-73.789365 40.764884) World Journalism Preparatory: A College Board ... 94 16.6 25 4.4 25Q285 31.7% 3 ... 118 20.8 566 264 46.6 2016-17 63.333333 0.426327 25 89.025000
143 10X477 POINT (-73.912686 40.877379) Marble Hill High School for International Studies 41 9.6 93 21.7 10X477 80.1% 127 ... 49 11.4 429 22 5.1 2016-17 153.100000 0.037453 10 88.993023
38 02M519 POINT (-73.959777 40.765638) Talent Unlimited High School 20 4.0 163 32.9 02M519 44.8% 2 ... 74 14.9 495 78 15.8 2016-17 8.292308 0.007297 2 88.825000
26 02M414 POINT (-74.002222 40.742512) N.Y.C. Museum School 135 29.7 59 13.0 02M414 55.6% 6 ... 48 10.6 454 73 16.1 2016-17 8.086957 0.010869 2 88.683721
221 17K590 POINT (-73.951823 40.66679) Medgar Evers College Preparatory School 35 2.8 1104 88.2 17K590 46.3% 5 ... 80 6.4 1252 8 0.6 2016-17 36.614286 0.047102 17 88.546512
45 02M545 POINT (-73.989329 40.717329) High School for Dual Language and Asian Studies 335 84.4 17 4.3 02M545 67.8% 47 ... 11 2.8 397 14 3.5 2016-17 79.700000 0.026266 2 87.876471
159 11X542 POINT (-73.860729 40.860443) Pelham Preparatory Academy 29 5.9 149 30.2 11X542 61.3% 19 ... 103 20.9 493 47 9.5 2016-17 23.678571 0.047720 11 86.530952
113 09X260 POINT (-73.902794 40.833884) Bronx Center for Science and Mathematics 32 7.0 98 21.4 09X260 76.8% 27 ... 105 22.9 458 7 1.5 2016-17 98.722222 0.143469 9 86.523810
267 25Q281 POINT (-73.821402 40.74943) East-West School of International Studies 451 65.8 67 9.8 25Q281 47.3% 45 ... 82 12.0 685 34 5.0 2016-17 51.500000 0.298061 25 86.116667
175 13K483 POINT (-73.988323 40.694629) The Urban Assembly School for Law and Justice 12 2.6 313 68.0 13K483 56.6% 8 ... 76 16.5 460 7 1.5 2016-17 23.783784 0.118868 13 85.511111
247 22K535 POINT (-73.934449 40.578356) Leon M. Goldstein High School for the Sciences 243 23.6 101 9.8 22K535 35.0% 7 ... 163 15.8 1029 576 56.0 2016-17 52.166667 0.016808 22 85.468519
208 17K382 POINT (-73.957173 40.649602) Academy for College Preparation and Career Exp... 8 1.9 323 78.4 17K382 69.5% 39 ... 69 16.7 412 7 1.7 2016-17 24.772727 0.026482 17 85.358333
320 32K554 POINT (-73.913172 40.697378) All City Leadership Secondary School 25 6.1 42 10.2 32K554 59.6% 7 ... 33 8.0 413 29 7.0 2016-17 205.500000 0.142000 32 85.145161
12 02M298 POINT (-73.993607 40.716231) Pace High School 43 8.5 162 31.9 02M298 66.3% 12 ... 98 19.3 508 13 2.6 2016-17 16.311111 0.001725 2 85.086207
304 30Q575 POINT (-73.937564 40.751144) Academy of American Studies 294 29.4 64 6.4 30Q575 40.4% 40 ... 84 8.4 999 312 31.2 2016-17 92.250000 0.017824 30 84.996825
197 15K448 POINT (-74.002022 40.679462) Brooklyn Secondary School for Collaborative St... 20 3.0 253 37.9 15K448 61.2% 45 ... 214 32.1 667 71 10.6 2016-17 33.619048 0.120544 15 84.621739
151 11X275 POINT (-73.861649 40.875172) High School of Computers and Technology 33 6.3 180 34.3 11X275 73.5% 35 ... 136 25.9 525 10 1.9 2016-17 43.000000 0.036644 11 84.244000
112 09X252 POINT (-73.901774 40.839508) Mott Hall Bronx High School 3 0.8 81 20.8 09X252 84.0% 51 ... 95 24.4 390 4 1.0 2016-17 48.452381 0.027449 9 84.209524
96 07X548 POINT (-73.922366 40.821828) Careers in Sports High School 4 0.7 168 30.0 07X548 83.0% 57 ... 137 24.5 560 10 1.8 2016-17 95.000000 0.010853 7 83.679070
125 10X141 POINT (-73.914141 40.887407) Riverdale / Kingsbridge Academy (Middle School... 126 8.6 137 9.4 10X141 40.0% 113 ... 281 19.3 1459 388 26.6 2016-17 79.000000 0.092347 10 83.616667
191 14K488 POINT (-73.954815 40.715561) Brooklyn Preparatory High School 11 2.1 264 50.2 14K488 64.3% 11 ... 87 16.5 526 7 1.3 2016-17 53.823529 0.091254 14 83.595833
128 10X237 POINT (-73.901565 40.875636) The Marie Curie School for Medicine, Nursing, and 17 3.2 175 32.8 10X237 76.3% 67 ... 109 20.5 533 15 2.8 2016-17 35.625000 0.024320 10 83.466667
88 07X221 POINT (-73.920851 40.813612) South Bronx Preparatory: A College Board School 12 1.8 153 23.3 07X221 86.2% 40 ... 166 25.3 657 11 1.7 2016-17 86.136364 0.057254 7 83.400000
193 14K561 POINT (-73.954815 40.715561) Williamsburg Preparatory School 17 2.5 116 16.9 14K561 66.6% 23 ... 123 17.9 686 65 9.5 2016-17 130.214286 0.017880 14 83.339286
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
206 16K455 POINT (-73.931604 40.678478) Boys and Girls High School 7 1.8 333 84.1 16K455 78.7% 20 ... 115 29.0 396 8 2.0 2016-17 51.891304 0.078995 16 36.258621
179 13K575 POINT (-73.947873 40.684654) Bedford Stuyvesant Preparatory High School 0 0.0 100 82.0 13K575 70.4% 4 ... 8 6.6 122 1 0.8 2016-17 71.750000 0.073388 13 33.620000
132 10X319 POINT (-73.893744 40.848385) Providing Urban Learners Success In Education ... 1 0.4 27 11.4 10X319 78.6% 16 ... 41 17.4 236 1 0.4 2016-17 2.728956 0.121061 10 27.394595
61 03M404 POINT (-73.974483 40.786134) Innovation Diploma Plus 1 0.6 61 35.1 03M404 75.2% 1 ... 24 13.8 174 0 0.0 2016-17 57.117647 0.015560 3 27.300000
53 02M586 POINT (-73.992343 40.729661) Harvey Milk High School 2 2.7 22 29.7 02M586 72.6% 3 ... 30 40.5 74 6 8.1 2016-17 55.000000 0.056472 2 26.966667
313 31R470 POINT (-74.087072 40.61062) Concord High School 3 1.6 53 29.1 31R470 66.6% 2 ... 51 28.0 182 34 18.7 2016-17 20.962963 0.080146 31 25.826531
244 21K728 POINT (-73.985413 40.576976) Liberation Diploma Plus 5 2.4 115 55.8 21K728 72.1% 2 ... 47 22.8 206 23 11.2 2016-17 144.600000 0.159650 21 24.841667
253 23K647 POINT (-73.906662 40.655644) Metropolitan Diploma Plus High School 0 0.0 179 84.8 23K647 74.4% 2 ... 53 25.1 211 1 0.5 2016-17 50.333333 0.094768 23 20.283333
165 12X480 POINT (-73.898265 40.823124) Bronx Regional High School 2 0.8 84 35.4 12X480 77.4% 27 ... 37 15.6 237 5 2.1 2016-17 94.100000 0.059088 12 20.256000
251 23K643 POINT (-73.906662 40.655644) Brooklyn Democracy Academy 2 1.0 167 80.3 23K643 71.9% 8 ... 47 22.6 208 1 0.5 2016-17 46.541667 0.094286 23 19.838889
67 03M505 POINT (-73.966779 40.797136) Edward A. Reynolds West Side High School 3 0.6 112 22.8 03M505 70.4% 43 ... 161 32.7 492 18 3.7 2016-17 158.333333 0.034122 3 19.649153
272 25Q540 POINT (-73.828279 40.765624) Queens Academy High School 42 9.7 168 38.6 25Q540 49.1% 14 ... 79 18.2 435 24 5.5 2016-17 17.810811 0.067184 25 17.066667
178 13K553 POINT (-73.947873 40.684654) Brooklyn Academy High School 0 0.0 99 73.3 13K553 69.5% 0 ... 37 27.4 135 1 0.7 2016-17 16.889831 0.081429 13 15.860465
210 17K489 POINT (-73.955188 40.669561) W.E.B. Dubois Academic High School 2 1.9 87 82.9 17K489 60.8% 1 ... 17 16.2 105 1 1.0 2016-17 61.394737 0.122408 17 15.650000
50 02M570 POINT (-73.990562 40.747425) Satellite Academy High School 2 0.7 101 36.1 02M570 70.4% 15 ... 53 18.9 280 8 2.9 2016-17 131.571429 0.110102 2 15.410345
44 02M544 POINT (-73.98861 40.767848) Independence High School 9 2.7 122 36.1 02M544 70.4% 22 ... 44 13.0 338 23 6.8 2016-17 32.791667 0.060204 2 15.380000
274 25Q792 POINT (-73.819664 40.720586) North Queens Community High School 26 11.9 90 41.3 25Q792 47.4% 10 ... 47 21.6 218 19 8.7 2016-17 14.891892 0.362041 25 14.464286
222 18K578 POINT (-73.916927 40.634451) Brooklyn Bridge Academy 1 0.5 169 84.5 18K578 64.0% 8 ... 43 21.5 200 5 2.5 2016-17 60.000000 0.021287 18 14.378261
223 18K635 POINT (-73.886499 40.652203) Olympus Academy 0 0.0 206 91.2 18K635 55.7% 4 ... 47 20.8 226 3 1.3 2016-17 49.500000 0.225510 19 13.538889
205 15K698 POINT (-74.014363 40.677968) South Brooklyn Community High School 7 3.5 61 30.3 15K698 73.3% 8 ... 53 26.4 201 11 5.5 2016-17 19.500000 0.092959 15 13.478571
203 15K529 POINT (-73.992151 40.642619) West Brooklyn Community High School 15 7.0 23 10.7 15K529 65.9% 11 ... 72 33.6 214 27 12.6 2016-17 97.722222 0.117143 15 12.954839
89 07X379 POINT (-73.905389 40.818703) Jill Chaifetz Transfer High School 3 1.4 74 34.3 07X379 80.6% 21 ... 56 25.9 216 1 0.5 2016-17 67.785714 0.075510 7 12.929167
104 08X377 POINT (-73.85593 40.821218) Bronx Community High School 5 2.8 62 34.4 08X377 75.1% 6 ... 30 16.7 180 3 1.7 2016-17 81.727273 0.023994 8 10.683333
252 23K646 POINT (-73.904143 40.677528) Aspirations Diploma Plus High School 3 1.3 178 75.4 23K646 69.0% 7 ... 43 18.2 236 1 0.4 2016-17 48.500000 0.029967 23 10.335000
90 07X381 POINT (-73.919949 40.818645) Bronx Haven High School 3 1.6 62 32.3 07X381 78.8% 10 ... 46 24.0 192 1 0.5 2016-17 95.800000 0.041985 7 9.500000
184 24Q744 POINT (-73.871505 40.743228) VOYAGES Preparatory 23 9.2 47 18.7 24Q744 48.7% 3 ... 15 6.0 251 12 4.8 2016-17 210.000000 0.035667 24 9.200000
220 17K568 POINT (-73.923881 40.666251) Brownsville Academy High School 1 0.6 133 85.8 17K568 67.0% 3 ... 36 23.2 155 0 0.0 2016-17 31.679487 0.372449 17 8.907143
76 05M285 POINT (-73.939974 40.807692) Harlem Renaissance High School 7 3.0 99 42.9 05M285 77.4% 31 ... 52 22.5 231 3 1.3 2016-17 100.555556 0.055118 5 7.521429
224 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 0 0.0 162 83.9 18K673 67.2% 12 ... 67 34.7 193 4 2.1 2016-17 118.300000 0.030082 18 5.441667
322 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 0 0.0 67 28.4 32K564 71.0% 10 ... 56 23.7 236 7 3.0 2016-17 234.000000 0.226367 32 5.337500
<p>292 rows × 46 columns</p>
In [108]:
len(sorted_school_table.diversity_index)
Out[108]:
322
In [109]:
#max(sorted_school_table.diversity_index)
In [110]:
b_school_map_table = sorted_school_table[-10:].copy()
b_school_map_table = b_school_map_table[['WKT', 'School Name','total_enrollment']]
b_school_map_table = b_school_map_table.rename({
'School Name': 'Name',
})
b_school_map_table['FillColor'] = 'r'
In [111]:
t = pd.merge(dist_table, mean_table, left_on='school_dis', right_on='district')
In [112]:
t[:3]
Out[112]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
school_dis shape_area shape_leng geometry_object geometry_layer geometry_proj4 district diversity_index
0 16.0 4.676362e+07 35848.904605 POLYGON ((-73.93311862859143 40.69579115384632... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 16 95.623066
1 32.0 5.189850e+07 37251.057847 POLYGON ((-73.91180710069435 40.70343495202662... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 32 204.707566
2 6.0 9.634170e+07 70447.849084 POLYGON ((-73.92640556921116 40.87762147653734... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 6 254.660597
In [113]:
# X = district_map_table.scaled_index
In [114]:
# len(X)
In [115]:
district_map_table.head()
Out[115]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
school_dis shape_area shape_leng geometry_object geometry_layer geometry_proj4 district diversity_index WKT scaled_index
0 16.0 4.676362e+07 35848.904605 POLYGON ((-73.93311862859143 40.69579115384632... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 16 95.623066 POLYGON ((-73.93311862859143 40.69579115384632... 33.639628
1 32.0 5.189850e+07 37251.057847 POLYGON ((-73.91180710069435 40.70343495202662... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 32 204.707566 POLYGON ((-73.91180710069435 40.70343495202662... 79.156481
2 6.0 9.634170e+07 70447.849084 POLYGON ((-73.92640556921116 40.87762147653734... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 6 254.660597 POLYGON ((-73.92640556921116 40.87762147653734... 100.000000
3 31.0 1.604472e+09 434471.412859 (POLYGON ((-74.05050806403247 40.5664220341608... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 31 57.090485 MULTIPOLYGON (((-74.05050806403247 40.56642203... 17.561433
4 7.0 9.226247e+07 65294.452403 (POLYGON ((-73.89680883223774 40.7958084451597... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 7 129.066478 MULTIPOLYGON (((-73.89680883223774 40.79580844... 47.594304
In [116]:
district_map_table.loc[district_map_table['scaled_index'] < 25, 'FillColor'] = '#ffffcc'
In [117]:
district_map_table.loc[(district_map_table['scaled_index'] >= 25) 
                       & (district_map_table['scaled_index'] < 50), 'FillColor'] = '#fed976'
In [118]:
district_map_table.loc[(district_map_table['scaled_index'] >= 50) 
                       & (district_map_table['scaled_index'] < 75), 'FillColor'] = '#fd8d3c'
In [119]:
district_map_table.loc[(district_map_table['scaled_index'] >= 75), 'FillColor'] = '#e31a1c'
In [120]:
district_map_table
Out[120]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
school_dis shape_area shape_leng geometry_object geometry_layer geometry_proj4 district diversity_index WKT scaled_index FillColor
0 16.0 4.676362e+07 35848.904605 POLYGON ((-73.93311862859143 40.69579115384632... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 16 95.623066 POLYGON ((-73.93311862859143 40.69579115384632... 33.639628 #fed976
1 32.0 5.189850e+07 37251.057847 POLYGON ((-73.91180710069435 40.70343495202662... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 32 204.707566 POLYGON ((-73.91180710069435 40.70343495202662... 79.156481 #e31a1c
2 6.0 9.634170e+07 70447.849084 POLYGON ((-73.92640556921116 40.87762147653734... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 6 254.660597 POLYGON ((-73.92640556921116 40.87762147653734... 100.000000 #e31a1c
3 31.0 1.604472e+09 434471.412859 (POLYGON ((-74.05050806403247 40.5664220341608... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 31 57.090485 MULTIPOLYGON (((-74.05050806403247 40.56642203... 17.561433 #ffffcc
4 7.0 9.226247e+07 65294.452403 (POLYGON ((-73.89680883223774 40.7958084451597... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 7 129.066478 MULTIPOLYGON (((-73.89680883223774 40.79580844... 47.594304 #fed976
5 23.0 4.740069e+07 40317.452033 (POLYGON ((-73.92044366203014 40.6656262871675... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 23 92.098260 MULTIPOLYGON (((-73.92044366203014 40.66562628... 32.168859 #fed976
6 29.0 4.201981e+08 135035.241651 POLYGON ((-73.73816144093141 40.72895809117297... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 29 61.776259 POLYGON ((-73.73816144093141 40.72895809117297... 19.516630 #ffffcc
7 26.0 4.247909e+08 125677.678898 POLYGON ((-73.74344992332192 40.77824115291502... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 26 31.956221 POLYGON ((-73.74344992332192 40.77824115291502... 7.073851 #ffffcc
8 15.0 1.961534e+08 153439.165680 POLYGON ((-73.98633135042395 40.69105051012824... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 15 80.325170 POLYGON ((-73.98633135042395 40.69105051012824... 27.256392 #fed976
9 17.0 1.284414e+08 68341.398899 POLYGON ((-73.92044366203014 40.6656262871675,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 17 73.062571 POLYGON ((-73.92044366203014 40.6656262871675,... 24.225983 #ffffcc
10 19.0 2.034175e+08 184183.167312 (POLYGON ((-73.846736514711 40.60485301485166,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 19 73.366731 MULTIPOLYGON (((-73.846736514711 40.6048530148... 24.352898 #ffffcc
11 13.0 1.048706e+08 86635.210559 POLYGON ((-73.97906084911834 40.70594602894087... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 13 51.905636 POLYGON ((-73.97906084911834 40.70594602894087... 15.397991 #ffffcc
12 18.0 1.751488e+08 121184.158477 (POLYGON ((-73.86706149472118 40.5820879767934... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 18 134.915158 MULTIPOLYGON (((-73.86706149472118 40.58208797... 50.034738 #fd8d3c
13 20.0 2.426965e+08 94309.778946 POLYGON ((-74.02552971543656 40.65147855069281... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 20 158.156244 POLYGON ((-74.02552971543656 40.65147855069281... 59.732368 #fd8d3c
14 3.0 1.134889e+08 52072.051321 POLYGON ((-73.95671863064405 40.78660079332199... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 3 45.148505 POLYGON ((-73.95671863064405 40.78660079332199... 12.578495 #ffffcc
15 5.0 5.251977e+07 44469.588221 POLYGON ((-73.93515659239551 40.83268240623763... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 5 74.205055 POLYGON ((-73.93515659239551 40.83268240623763... 24.702698 #ffffcc
16 9.0 8.341539e+07 46648.958586 POLYGON ((-73.9212971968614 40.85428933985649,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 9 171.782841 POLYGON ((-73.9212971968614 40.85428933985649,... 65.418234 #fd8d3c
17 1.0 3.516075e+07 28641.276279 POLYGON ((-73.97177410965313 40.72582128133706... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 1 74.392314 POLYGON ((-73.97177410965313 40.72582128133706... 24.780835 #ffffcc
18 14.0 1.503102e+08 95792.082090 POLYGON ((-73.95439555417087 40.73911477252251... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 14 92.712426 POLYGON ((-73.95439555417087 40.73911477252251... 32.425127 #fed976
19 4.0 5.262043e+07 52061.828459 (POLYGON ((-73.92133752419399 40.8008521064970... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 4 74.202823 MULTIPOLYGON (((-73.92133752419399 40.80085210... 24.701767 #ffffcc
20 10.0 2.825415e+08 94957.570434 POLYGON ((-73.86789798628736 40.90294017690526... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 10 151.978352 POLYGON ((-73.86789798628736 40.90294017690526... 57.154566 #fd8d3c
21 10.0 3.282963e+06 7883.372664 (POLYGON ((-73.9089323517538 40.8721573479701,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 10 151.978352 MULTIPOLYGON (((-73.9089323517538 40.872157347... 57.154566 #fd8d3c
22 12.0 6.907182e+07 48527.595776 POLYGON ((-73.88284445574813 40.84781722645163... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 12 118.038638 POLYGON ((-73.88284445574813 40.84781722645163... 42.992802 #fed976
23 25.0 4.436285e+08 175827.007127 POLYGON ((-73.82049919995312 40.80101146781899... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 25 65.688042 POLYGON ((-73.82049919995312 40.80101146781899... 21.148870 #ffffcc
24 28.0 2.475679e+08 114694.912786 POLYGON ((-73.84485477879177 40.7357514698091,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 28 17.399788 POLYGON ((-73.84485477879177 40.7357514698091,... 1.000000 #ffffcc
25 24.0 3.949782e+08 127343.703736 (POLYGON ((-73.90641585511733 40.7398683641967... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 24 189.934931 MULTIPOLYGON (((-73.90641585511733 40.73986836... 72.992417 #fd8d3c
26 30.0 3.181290e+08 150392.978241 POLYGON ((-73.90647314610101 40.79018117520807... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 30 81.290729 POLYGON ((-73.90647314610101 40.79018117520807... 27.659283 #fed976
27 21.0 2.101971e+08 123858.087345 POLYGON ((-73.96184657346174 40.62757081710622... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 21 59.592857 POLYGON ((-73.96184657346174 40.62757081710622... 18.605579 #ffffcc
28 22.0 3.855533e+08 271718.504936 (POLYGON ((-73.91990064270161 40.5996005215871... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 22 55.823892 MULTIPOLYGON (((-73.91990064270161 40.59960052... 17.032931 #ffffcc
29 27.0 7.955970e+08 589135.490708 (POLYGON ((-73.82784008953526 40.5887858248046... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 27 34.876088 MULTIPOLYGON (((-73.82784008953526 40.58878582... 8.292202 #ffffcc
30 11.0 3.926651e+08 305305.869806 (POLYGON ((-73.78833349834532 40.8346671297593... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 11 54.424475 MULTIPOLYGON (((-73.78833349834532 40.83466712... 16.449007 #ffffcc
31 8.0 2.588266e+08 223080.044096 (POLYGON ((-73.83979488562292 40.8356192069902... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 8 111.027489 MULTIPOLYGON (((-73.83979488562292 40.83561920... 40.067313 #fed976
32 2.0 2.804004e+08 212406.819436 (POLYGON ((-74.0438776163991 40.69018767537123... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 2 36.504481 MULTIPOLYGON (((-74.0438776163991 40.690187675... 8.971669 #ffffcc
In [121]:
# EOF error

a_school_map_table = a_school_map_table.rename(columns={
 	'School Name': 'Name',
 })
b_school_map_table = b_school_map_table.rename(columns={
 	'School Name': 'Name',
 })
district_map_table['Name'] = district_map_table['district']
In [122]:
a_school_map_table
Out[122]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
WKT Name total_enrollment FillColor
271 POINT (-73.821532 40.737038) Townsend Harris High School 1110 g
27 POINT (-73.953276 40.770288) Eleanor Roosevelt High School 543 g
5 POINT (-73.979581 40.719416) New Explorations into Science, Technology and ... 1745 g
23 POINT (-73.985723 40.741888) Baruch College Campus High School 440 g
74 POINT (-73.947171 40.792932) Young Women's Leadership School 482 g
35 POINT (-74.013921 40.718025) Stuyvesant High School 3365 g
315 POINT (-74.117086 40.568299) Staten Island Technical High School 1312 g
142 POINT (-73.889011 40.879958) Bronx High School of Science 2979 g
305 POINT (-73.926977 40.754975) Baccalaureate School for Global Education 518 g
24 POINT (-74.002222 40.742512) N.Y.C. Lab School for Collaborative Studies 531 g
In [123]:
sorted_school_table.tail()
Out[123]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
DBN WKT School Name asian_1 asian_2 black_1 black_2 dbn economic_need_index english_language_learners_1 ... students_with_disabilities_1 students_with_disabilities_2 total_enrollment white_1 white_2 year diversity_index scaled_index district Total Grads - % of cohort
184 24Q744 POINT (-73.871505 40.743228) VOYAGES Preparatory 23 9.2 47 18.7 24Q744 48.7% 3 ... 15 6.0 251 12 4.8 2016-17 210.000000 0.035667 24 9.200000
220 17K568 POINT (-73.923881 40.666251) Brownsville Academy High School 1 0.6 133 85.8 17K568 67.0% 3 ... 36 23.2 155 0 0.0 2016-17 31.679487 0.372449 17 8.907143
76 05M285 POINT (-73.939974 40.807692) Harlem Renaissance High School 7 3.0 99 42.9 05M285 77.4% 31 ... 52 22.5 231 3 1.3 2016-17 100.555556 0.055118 5 7.521429
224 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 0 0.0 162 83.9 18K673 67.2% 12 ... 67 34.7 193 4 2.1 2016-17 118.300000 0.030082 18 5.441667
322 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 0 0.0 67 28.4 32K564 71.0% 10 ... 56 23.7 236 7 3.0 2016-17 234.000000 0.226367 32 5.337500
<p>5 rows × 46 columns</p>
In [124]:
#a_school_map_table['radiusInPixels'] = 10
In [125]:
#b_school_map_table['radiusInPixels'] = 10
In [126]:
district_map_table.iloc[0]
Out[126]:
school_dis                                                        16
shape_area                                               4.67636e+07
shape_leng                                                   35848.9
geometry_object    POLYGON ((-73.93311862859143 40.69579115384632...
geometry_layer       geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b
geometry_proj4                   +proj=longlat +ellps=WGS84 +no_defs
district                                                          16
diversity_index                                              95.6231
WKT                POLYGON ((-73.93311862859143 40.69579115384632...
scaled_index                                                 33.6396
FillColor                                                    #fed976
Name                                                              16
Name: 0, dtype: object
In [127]:
# Extract columns
map_table = pd.concat([
district_map_table[['WKT','FillColor','Name']],
a_school_map_table[['WKT','FillColor','Name']],
b_school_map_table[['WKT','FillColor','Name']],
])
In [128]:
#top_schools[:3]
In [129]:
district_map_table
Out[129]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
school_dis shape_area shape_leng geometry_object geometry_layer geometry_proj4 district diversity_index WKT scaled_index FillColor Name
0 16.0 4.676362e+07 35848.904605 POLYGON ((-73.93311862859143 40.69579115384632... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 16 95.623066 POLYGON ((-73.93311862859143 40.69579115384632... 33.639628 #fed976 16
1 32.0 5.189850e+07 37251.057847 POLYGON ((-73.91180710069435 40.70343495202662... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 32 204.707566 POLYGON ((-73.91180710069435 40.70343495202662... 79.156481 #e31a1c 32
2 6.0 9.634170e+07 70447.849084 POLYGON ((-73.92640556921116 40.87762147653734... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 6 254.660597 POLYGON ((-73.92640556921116 40.87762147653734... 100.000000 #e31a1c 6
3 31.0 1.604472e+09 434471.412859 (POLYGON ((-74.05050806403247 40.5664220341608... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 31 57.090485 MULTIPOLYGON (((-74.05050806403247 40.56642203... 17.561433 #ffffcc 31
4 7.0 9.226247e+07 65294.452403 (POLYGON ((-73.89680883223774 40.7958084451597... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 7 129.066478 MULTIPOLYGON (((-73.89680883223774 40.79580844... 47.594304 #fed976 7
5 23.0 4.740069e+07 40317.452033 (POLYGON ((-73.92044366203014 40.6656262871675... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 23 92.098260 MULTIPOLYGON (((-73.92044366203014 40.66562628... 32.168859 #fed976 23
6 29.0 4.201981e+08 135035.241651 POLYGON ((-73.73816144093141 40.72895809117297... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 29 61.776259 POLYGON ((-73.73816144093141 40.72895809117297... 19.516630 #ffffcc 29
7 26.0 4.247909e+08 125677.678898 POLYGON ((-73.74344992332192 40.77824115291502... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 26 31.956221 POLYGON ((-73.74344992332192 40.77824115291502... 7.073851 #ffffcc 26
8 15.0 1.961534e+08 153439.165680 POLYGON ((-73.98633135042395 40.69105051012824... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 15 80.325170 POLYGON ((-73.98633135042395 40.69105051012824... 27.256392 #fed976 15
9 17.0 1.284414e+08 68341.398899 POLYGON ((-73.92044366203014 40.6656262871675,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 17 73.062571 POLYGON ((-73.92044366203014 40.6656262871675,... 24.225983 #ffffcc 17
10 19.0 2.034175e+08 184183.167312 (POLYGON ((-73.846736514711 40.60485301485166,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 19 73.366731 MULTIPOLYGON (((-73.846736514711 40.6048530148... 24.352898 #ffffcc 19
11 13.0 1.048706e+08 86635.210559 POLYGON ((-73.97906084911834 40.70594602894087... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 13 51.905636 POLYGON ((-73.97906084911834 40.70594602894087... 15.397991 #ffffcc 13
12 18.0 1.751488e+08 121184.158477 (POLYGON ((-73.86706149472118 40.5820879767934... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 18 134.915158 MULTIPOLYGON (((-73.86706149472118 40.58208797... 50.034738 #fd8d3c 18
13 20.0 2.426965e+08 94309.778946 POLYGON ((-74.02552971543656 40.65147855069281... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 20 158.156244 POLYGON ((-74.02552971543656 40.65147855069281... 59.732368 #fd8d3c 20
14 3.0 1.134889e+08 52072.051321 POLYGON ((-73.95671863064405 40.78660079332199... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 3 45.148505 POLYGON ((-73.95671863064405 40.78660079332199... 12.578495 #ffffcc 3
15 5.0 5.251977e+07 44469.588221 POLYGON ((-73.93515659239551 40.83268240623763... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 5 74.205055 POLYGON ((-73.93515659239551 40.83268240623763... 24.702698 #ffffcc 5
16 9.0 8.341539e+07 46648.958586 POLYGON ((-73.9212971968614 40.85428933985649,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 9 171.782841 POLYGON ((-73.9212971968614 40.85428933985649,... 65.418234 #fd8d3c 9
17 1.0 3.516075e+07 28641.276279 POLYGON ((-73.97177410965313 40.72582128133706... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 1 74.392314 POLYGON ((-73.97177410965313 40.72582128133706... 24.780835 #ffffcc 1
18 14.0 1.503102e+08 95792.082090 POLYGON ((-73.95439555417087 40.73911477252251... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 14 92.712426 POLYGON ((-73.95439555417087 40.73911477252251... 32.425127 #fed976 14
19 4.0 5.262043e+07 52061.828459 (POLYGON ((-73.92133752419399 40.8008521064970... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 4 74.202823 MULTIPOLYGON (((-73.92133752419399 40.80085210... 24.701767 #ffffcc 4
20 10.0 2.825415e+08 94957.570434 POLYGON ((-73.86789798628736 40.90294017690526... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 10 151.978352 POLYGON ((-73.86789798628736 40.90294017690526... 57.154566 #fd8d3c 10
21 10.0 3.282963e+06 7883.372664 (POLYGON ((-73.9089323517538 40.8721573479701,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 10 151.978352 MULTIPOLYGON (((-73.9089323517538 40.872157347... 57.154566 #fd8d3c 10
22 12.0 6.907182e+07 48527.595776 POLYGON ((-73.88284445574813 40.84781722645163... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 12 118.038638 POLYGON ((-73.88284445574813 40.84781722645163... 42.992802 #fed976 12
23 25.0 4.436285e+08 175827.007127 POLYGON ((-73.82049919995312 40.80101146781899... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 25 65.688042 POLYGON ((-73.82049919995312 40.80101146781899... 21.148870 #ffffcc 25
24 28.0 2.475679e+08 114694.912786 POLYGON ((-73.84485477879177 40.7357514698091,... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 28 17.399788 POLYGON ((-73.84485477879177 40.7357514698091,... 1.000000 #ffffcc 28
25 24.0 3.949782e+08 127343.703736 (POLYGON ((-73.90641585511733 40.7398683641967... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 24 189.934931 MULTIPOLYGON (((-73.90641585511733 40.73986836... 72.992417 #fd8d3c 24
26 30.0 3.181290e+08 150392.978241 POLYGON ((-73.90647314610101 40.79018117520807... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 30 81.290729 POLYGON ((-73.90647314610101 40.79018117520807... 27.659283 #fed976 30
27 21.0 2.101971e+08 123858.087345 POLYGON ((-73.96184657346174 40.62757081710622... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 21 59.592857 POLYGON ((-73.96184657346174 40.62757081710622... 18.605579 #ffffcc 21
28 22.0 3.855533e+08 271718.504936 (POLYGON ((-73.91990064270161 40.5996005215871... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 22 55.823892 MULTIPOLYGON (((-73.91990064270161 40.59960052... 17.032931 #ffffcc 22
29 27.0 7.955970e+08 589135.490708 (POLYGON ((-73.82784008953526 40.5887858248046... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 27 34.876088 MULTIPOLYGON (((-73.82784008953526 40.58878582... 8.292202 #ffffcc 27
30 11.0 3.926651e+08 305305.869806 (POLYGON ((-73.78833349834532 40.8346671297593... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 11 54.424475 MULTIPOLYGON (((-73.78833349834532 40.83466712... 16.449007 #ffffcc 11
31 8.0 2.588266e+08 223080.044096 (POLYGON ((-73.83979488562292 40.8356192069902... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 8 111.027489 MULTIPOLYGON (((-73.83979488562292 40.83561920... 40.067313 #fed976 8
32 2.0 2.804004e+08 212406.819436 (POLYGON ((-74.0438776163991 40.69018767537123... geo_export_42eedaf7-5cda-4346-b866-55c3ecc08a3b +proj=longlat +ellps=WGS84 +no_defs 2 36.504481 MULTIPOLYGON (((-74.0438776163991 40.690187675... 8.971669 #ffffcc 2
In [130]:
type(district_map_table)
Out[130]:
geotable.GeoTable
In [131]:
target_path = target_folder + '/choropleth.csv'
map_table.to_csv(target_path, index=False)
print('a_geotable_path = %s' % target_path)
a_geotable_path = /tmp/choropleth.csv
In [132]:
map_table
Out[132]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
WKT FillColor Name
0 POLYGON ((-73.93311862859143 40.69579115384632... #fed976 16
1 POLYGON ((-73.91180710069435 40.70343495202662... #e31a1c 32
2 POLYGON ((-73.92640556921116 40.87762147653734... #e31a1c 6
3 MULTIPOLYGON (((-74.05050806403247 40.56642203... #ffffcc 31
4 MULTIPOLYGON (((-73.89680883223774 40.79580844... #fed976 7
5 MULTIPOLYGON (((-73.92044366203014 40.66562628... #fed976 23
6 POLYGON ((-73.73816144093141 40.72895809117297... #ffffcc 29
7 POLYGON ((-73.74344992332192 40.77824115291502... #ffffcc 26
8 POLYGON ((-73.98633135042395 40.69105051012824... #fed976 15
9 POLYGON ((-73.92044366203014 40.6656262871675,... #ffffcc 17
10 MULTIPOLYGON (((-73.846736514711 40.6048530148... #ffffcc 19
11 POLYGON ((-73.97906084911834 40.70594602894087... #ffffcc 13
12 MULTIPOLYGON (((-73.86706149472118 40.58208797... #fd8d3c 18
13 POLYGON ((-74.02552971543656 40.65147855069281... #fd8d3c 20
14 POLYGON ((-73.95671863064405 40.78660079332199... #ffffcc 3
15 POLYGON ((-73.93515659239551 40.83268240623763... #ffffcc 5
16 POLYGON ((-73.9212971968614 40.85428933985649,... #fd8d3c 9
17 POLYGON ((-73.97177410965313 40.72582128133706... #ffffcc 1
18 POLYGON ((-73.95439555417087 40.73911477252251... #fed976 14
19 MULTIPOLYGON (((-73.92133752419399 40.80085210... #ffffcc 4
20 POLYGON ((-73.86789798628736 40.90294017690526... #fd8d3c 10
21 MULTIPOLYGON (((-73.9089323517538 40.872157347... #fd8d3c 10
22 POLYGON ((-73.88284445574813 40.84781722645163... #fed976 12
23 POLYGON ((-73.82049919995312 40.80101146781899... #ffffcc 25
24 POLYGON ((-73.84485477879177 40.7357514698091,... #ffffcc 28
25 MULTIPOLYGON (((-73.90641585511733 40.73986836... #fd8d3c 24
26 POLYGON ((-73.90647314610101 40.79018117520807... #fed976 30
27 POLYGON ((-73.96184657346174 40.62757081710622... #ffffcc 21
28 MULTIPOLYGON (((-73.91990064270161 40.59960052... #ffffcc 22
29 MULTIPOLYGON (((-73.82784008953526 40.58878582... #ffffcc 27
30 MULTIPOLYGON (((-73.78833349834532 40.83466712... #ffffcc 11
31 MULTIPOLYGON (((-73.83979488562292 40.83561920... #fed976 8
32 MULTIPOLYGON (((-74.0438776163991 40.690187675... #ffffcc 2
271 POINT (-73.821532 40.737038) g Townsend Harris High School
27 POINT (-73.953276 40.770288) g Eleanor Roosevelt High School
5 POINT (-73.979581 40.719416) g New Explorations into Science, Technology and ...
23 POINT (-73.985723 40.741888) g Baruch College Campus High School
74 POINT (-73.947171 40.792932) g Young Women's Leadership School
35 POINT (-74.013921 40.718025) g Stuyvesant High School
315 POINT (-74.117086 40.568299) g Staten Island Technical High School
142 POINT (-73.889011 40.879958) g Bronx High School of Science
305 POINT (-73.926977 40.754975) g Baccalaureate School for Global Education
24 POINT (-74.002222 40.742512) g N.Y.C. Lab School for Collaborative Studies
203 POINT (-73.992151 40.642619) r West Brooklyn Community High School
89 POINT (-73.905389 40.818703) r Jill Chaifetz Transfer High School
104 POINT (-73.85593 40.821218) r Bronx Community High School
252 POINT (-73.904143 40.677528) r Aspirations Diploma Plus High School
90 POINT (-73.919949 40.818645) r Bronx Haven High School
184 POINT (-73.871505 40.743228) r VOYAGES Preparatory
220 POINT (-73.923881 40.666251) r Brownsville Academy High School
76 POINT (-73.939974 40.807692) r Harlem Renaissance High School
224 POINT (-73.920658 40.659914) r East Brooklyn Community High School
322 POINT (-73.915217 40.695875) r Bushwick Community High School
In [133]:
#%matplotlib inline
#t['economic_need_index'].plot(kind='bar')
In [134]:
sorted_school_table.to_csv('data.csv', index=False)
In [135]:
sorted_school_table.iloc[0]
Out[135]:
DBN                                                                 25Q525
WKT                                           POINT (-73.821532 40.737038)
School Name                                    Townsend Harris High School
asian_1                                                                644
asian_2                                                                 58
black_1                                                                 60
black_2                                                                5.4
dbn                                                                 25Q525
economic_need_index                                                  26.1%
english_language_learners_1                                              0
english_language_learners_2                                              0
female_1                                                               770
female_2                                                              69.4
grade_1                                                                  0
grade_10                                                               295
grade_11                                                               290
grade_12                                                               261
grade_2                                                                  0
grade_3                                                                  0
grade_4                                                                  0
grade_5                                                                  0
grade_6                                                                  0
grade_7                                                                  0
grade_8                                                                  0
grade_9                                                                264
grade_k                                                                  0
grade_pk_half_day_full_day                                               0
hispanic_1                                                             127
hispanic_2                                                            11.4
male_1                                                                 340
male_2                                                                30.6
multiple_race_categories_not_represented_1                              45
multiple_race_categories_not_represented_2                             4.1
poverty_1                                                              599
poverty_2                                                               54
school_name                                    Townsend Harris High School
students_with_disabilities_1                                            29
students_with_disabilities_2                                           2.6
total_enrollment                                                      1110
white_1                                                                234
white_2                                                               21.1
year                                                               2016-17
diversity_index                                                    16.5976
scaled_index                                                      0.016038
district                                                                25
Total Grads - % of cohort                                          99.5526
Name: 271, dtype: object
In [163]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Fixing random state for reproducibility
np.random.seed(19680801)

x = sorted_school_table['scaled_index'].values
y = sorted_school_table[selected_outcome].values
#colors = np.random.rand(N)
#area = (30 * np.random.rand(N))**2  # 0 to 15 point radii

x.sort()
y.sort()

x = x[:-5]
y = y[:-5]

np.random.shuffle(x)
np.random.shuffle(y)

#axes = plt.scatter(x, y, s=area, c=colors, alpha=0.5)
axes = plt.scatter(x, y, alpha=0.5)
plt.ylabel(selected_outcome)
plt.xlabel('Diversity Index')
plt.show()
In [158]:
x[:-5]
Out[158]:
array([0.        , 0.00048444, 0.00067926, 0.00171858, 0.00172513,
       0.00233685, 0.00283372, 0.00295918, 0.00322157, 0.00386555,
       0.00386874, 0.00498669, 0.00622097, 0.0062449 , 0.00684392,
       0.00704525, 0.00725714, 0.0072967 , 0.00742497, 0.00743252,
       0.00752834, 0.00757928, 0.00782313, 0.00805081, 0.01009667,
       0.01036735, 0.01075276, 0.01085343, 0.01086899, 0.01129009,
       0.01134694, 0.01308784, 0.01321995, 0.01382189, 0.01382189,
       0.01383673, 0.01416625, 0.01433895, 0.01482993, 0.01534131,
       0.01537783, 0.01555985, 0.01572012, 0.01591837, 0.016038  ,
       0.01659864, 0.01680816, 0.01711565, 0.01734694, 0.0174664 ,
       0.01759184, 0.0177551 , 0.01782428, 0.01788018, 0.01891582,
       0.01895201, 0.0202551 , 0.02071429, 0.02102041, 0.02128728,
       0.02141399, 0.02183673, 0.0219666 , 0.02204898, 0.02232143,
       0.02241983, 0.02281179, 0.02323129, 0.02363265, 0.02384615,
       0.02399417, 0.02431973, 0.02437164, 0.02450387, 0.02469388,
       0.02472109, 0.02484694, 0.02504937, 0.02549165, 0.02555102,
       0.02586271, 0.02613703, 0.02626586, 0.02638655, 0.02648163,
       0.02744898, 0.02747681, 0.02813776, 0.02911079, 0.02913931,
       0.02994169, 0.02996708, 0.03008163, 0.03068027, 0.03179592,
       0.032     , 0.03204082, 0.03227041, 0.03262857, 0.03291545,
       0.03397959, 0.03412245, 0.03417312, 0.03438424, 0.0352076 ,
       0.03566719, 0.03581096, 0.03593407, 0.03664399, 0.03683673,
       0.03693878, 0.03729592, 0.0373399 , 0.03745306, 0.03823673,
       0.03830904, 0.03875283, 0.03946429, 0.03976676, 0.0404898 ,
       0.04061224, 0.04076531, 0.04132245, 0.04171429, 0.04197279,
       0.04197668, 0.04198516, 0.04209184, 0.04239332, 0.04310832,
       0.04367347, 0.04404082, 0.04416327, 0.04428571, 0.04443878,
       0.04533528, 0.0457398 , 0.0458657 , 0.04629738, 0.04658163,
       0.04658892, 0.04664723, 0.04710204, 0.04734694, 0.04745826,
       0.04750806, 0.04771961, 0.04790816, 0.04820408, 0.04909621,
       0.04930612, 0.04979592, 0.05049563, 0.05058957, 0.05102041,
       0.05129252, 0.05198711, 0.0522449 , 0.0524898 , 0.05322981,
       0.05368805, 0.05413265, 0.05456498, 0.05469388, 0.05511774,
       0.0552381 , 0.05536735, 0.05546939, 0.05594499, 0.0564723 ,
       0.05653061, 0.05653061, 0.05694952, 0.05725417, 0.0577551 ,
       0.05877551, 0.05908764, 0.05931973, 0.05995198, 0.06020408,
       0.06068323, 0.06081633, 0.06096939, 0.06102041, 0.06122449,
       0.06138776, 0.06176871, 0.06304922, 0.06317784, 0.06387755,
       0.06412965, 0.0644898 , 0.06503759, 0.06513806, 0.06568206,
       0.06618524, 0.06661224, 0.06708283, 0.06718367, 0.06773469,
       0.06843537, 0.06954082, 0.07      , 0.07007653, 0.07013605,
       0.07036014, 0.07038265, 0.07061224, 0.07232009, 0.07315789,
       0.07338776, 0.07346939, 0.07393586, 0.07469388, 0.07510204,
       0.07518367, 0.0755102 , 0.07778426, 0.0784898 , 0.07899529,
       0.08014577, 0.08094388, 0.08102041, 0.08142857, 0.08399417,
       0.085     , 0.08521193, 0.08605965, 0.08622449, 0.08661808,
       0.09      , 0.09125364, 0.09234694, 0.09295918, 0.09387755,
       0.09428571, 0.09473469, 0.09476809, 0.09533752, 0.09622449,
       0.09646782, 0.09721707, 0.09938776, 0.10178108, 0.10261596,
       0.10271137, 0.10390895, 0.10459184, 0.10538776, 0.10955102,
       0.10979592, 0.11010204, 0.11077551, 0.11236735, 0.11346939,
       0.11444898, 0.11680272, 0.11714286, 0.11802721, 0.11886827,
       0.11959184, 0.12027211, 0.12054422, 0.12106122, 0.12240816,
       0.12436735, 0.1244898 , 0.12639456, 0.12767347, 0.13097959,
       0.13146939, 0.13428571, 0.13795918, 0.142     , 0.14346939,
       0.14438776, 0.14515306, 0.14612245, 0.1484898 , 0.15632653,
       0.15658892, 0.15755102, 0.15825073, 0.15947522, 0.15965015,
       0.16153061, 0.16442177, 0.16638484, 0.16708455, 0.16778426,
       0.17204082, 0.17420408, 0.17740525, 0.17934694, 0.17959184,
       0.18081633, 0.18265306, 0.18461224, 0.19102041, 0.19321429,
       0.19530612, 0.21387755, 0.21681633, 0.2255102 , 0.22636735,
       0.24877551, 0.25428571, 0.27295918, 0.28367347, 0.29806122,
       0.29979592, 0.33632653, 0.36204082, 0.37244898, 0.37346939,
       0.38244898, 0.38979592])
In [137]:
# plt.scatter?
In [138]:
# Save file to target folder to include it in the result download
target_path = target_folder + '/c.png'
figure = axes.get_figure()
figure.savefig(target_path)
print(f'c_image_path = {target_path}')
c_image_path = /tmp/c.png
In [139]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
In [146]:
X = sorted_school_table[['scaled_index']].values
X
Out[146]:
array([[1.60380014e-02],
       [1.07527601e-02],
       [2.47210884e-02],
       [4.98669033e-03],
       [3.82367347e-02],
       [2.38461538e-02],
       [5.65306122e-02],
       [1.80816327e-01],
       [7.23200859e-02],
       [4.53352770e-02],
       [6.95408163e-02],
       [1.77551020e-02],
       [7.42857143e-01],
       [6.13877551e-02],
       [1.58250729e-01],
       [5.05895692e-02],
       [2.95918367e-03],
       [7.43252140e-03],
       [6.61852433e-02],
       [3.72959184e-02],
       [0.00000000e+00],
       [1.24489796e-01],
       [1.89158163e-02],
       [5.41326531e-02],
       [1.24367347e-01],
       [2.33684683e-03],
       [4.41632653e-02],
       [5.46938776e-02],
       [4.66472303e-02],
       [7.03826531e-02],
       [1.75918367e-02],
       [1.38218924e-02],
       [4.36734694e-02],
       [8.05081216e-03],
       [4.73469388e-02],
       [5.04956268e-02],
       [4.26326531e-01],
       [3.74530612e-02],
       [7.29670330e-03],
       [1.08689928e-02],
       [4.71020408e-02],
       [2.62658577e-02],
       [4.77196096e-02],
       [1.43469388e-01],
       [2.98061224e-01],
       [1.18868275e-01],
       [1.68081633e-02],
       [2.64816327e-02],
       [1.42000000e-01],
       [1.72512755e-03],
       [1.78242823e-02],
       [1.20544218e-01],
       [3.66439909e-02],
       [2.74489796e-02],
       [1.08534323e-02],
       [9.23469388e-02],
       [9.12536443e-02],
       [2.43197279e-02],
       [5.72541744e-02],
       [1.78801843e-02],
       [3.89795918e-01],
       [1.77405248e-01],
       [6.41296519e-02],
       [1.67784257e-01],
       [3.86554622e-03],
       [2.02551020e-02],
       [1.37959184e-01],
       [1.46122449e-01],
       [3.20408163e-02],
       [9.47346939e-02],
       [5.99519808e-02],
       [6.31778426e-02],
       [5.19871106e-02],
       [1.19591837e-01],
       [6.24489796e-03],
       [6.38775510e-02],
       [8.50000000e-02],
       [1.02615955e-01],
       [3.41731175e-02],
       [9.53375196e-02],
       [5.93197279e-02],
       [3.06802721e-02],
       [6.77346939e-02],
       [6.44897959e-02],
       [4.17142857e-02],
       [7.46938776e-02],
       [1.48299320e-02],
       [5.36880466e-02],
       [1.59475219e-01],
       [2.91107872e-02],
       [6.06832298e-02],
       [1.03908948e-01],
       [6.79256777e-04],
       [2.36326531e-02],
       [2.10204082e-02],
       [1.13469388e-01],
       [1.38367347e-02],
       [1.59183673e-02],
       [7.03601441e-02],
       [3.92040816e-01],
       [6.84353741e-02],
       [7.34693878e-02],
       [8.62244898e-02],
       [8.52119309e-02],
       [5.22448980e-02],
       [1.57201166e-02],
       [6.70828331e-02],
       [3.68367347e-02],
       [4.65889213e-02],
       [1.95306122e-01],
       [1.82653061e-01],
       [9.00000000e-02],
       [2.48469388e-02],
       [3.73399015e-02],
       [1.64421769e-01],
       [6.09693878e-02],
       [2.14139942e-02],
       [9.93877551e-02],
       [2.99795918e-01],
       [2.54916512e-02],
       [8.10204082e-02],
       [6.08163265e-02],
       [2.81377551e-02],
       [3.22157434e-03],
       [1.31469388e-01],
       [3.22704082e-02],
       [1.14448980e-01],
       [4.93061224e-02],
       [4.65816327e-02],
       [2.74768089e-02],
       [1.12900875e-02],
       [1.66384840e-01],
       [3.59340659e-02],
       [7.25714286e-03],
       [7.57927786e-03],
       [4.84436199e-04],
       [5.53673469e-02],
       [1.72040816e-01],
       [2.24198251e-02],
       [4.19727891e-02],
       [5.54693878e-02],
       [2.61370262e-02],
       [1.65986395e-02],
       [1.18027211e-01],
       [1.67084548e-01],
       [2.28117914e-02],
       [1.44387755e-01],
       [2.20489796e-02],
       [7.42496999e-03],
       [2.46938776e-02],
       [1.45153061e-01],
       [1.09795918e-01],
       [1.09551020e-01],
       [1.74204082e-01],
       [1.10775510e-01],
       [1.16802721e-01],
       [4.07653061e-02],
       [4.69591837e-01],
       [1.00966702e-02],
       [1.26394558e-01],
       [2.45038705e-02],
       [4.74582560e-02],
       [3.73469388e-01],
       [3.87528345e-02],
       [6.50375940e-02],
       [1.41662519e-02],
       [8.09438776e-02],
       [6.56820623e-02],
       [5.77551020e-02],
       [2.18367347e-02],
       [2.54285714e-01],
       [1.91020408e-01],
       [1.32199546e-02],
       [9.38775510e-02],
       [4.31083203e-02],
       [2.63865546e-02],
       [1.34285714e-01],
       [1.93214286e-01],
       [4.19766764e-02],
       [4.23933210e-02],
       [7.51836735e-02],
       [6.17687075e-02],
       [1.79591837e-01],
       [1.04591837e-01],
       [3.20000000e-02],
       [2.19666048e-02],
       [4.58657011e-02],
       [1.56588921e-01],
       [6.12244898e-02],
       [1.13469388e-02],
       [3.58109560e-02],
       [2.83673469e-01],
       [7.84897959e-02],
       [7.01360544e-02],
       [1.74664012e-02],
       [1.03673469e-02],
       [4.79081633e-02],
       [5.24897959e-02],
       [6.22097115e-03],
       [1.89520132e-02],
       [7.77842566e-02],
       [4.62973761e-02],
       [7.31578947e-02],
       [5.65306122e-02],
       [9.62244898e-02],
       [4.13224490e-02],
       [6.51380552e-02],
       [5.59449867e-02],
       [1.53778268e-02],
       [4.42857143e-02],
       [4.75080559e-02],
       [1.38218924e-02],
       [4.82040816e-02],
       [2.99416910e-02],
       [5.10204082e-02],
       [7.39358601e-02],
       [3.52076003e-02],
       [1.00000000e+00],
       [7.00000000e-02],
       [2.50493746e-02],
       [5.52380952e-02],
       [3.86874174e-03],
       [6.84392067e-03],
       [1.71858217e-03],
       [7.06122449e-02],
       [2.55510204e-02],
       [8.60596546e-02],
       [1.61530612e-01],
       [4.04897959e-02],
       [1.57551020e-01],
       [2.16816327e-01],
       [9.72170686e-02],
       [4.06122449e-02],
       [5.69495166e-02],
       [7.00765306e-02],
       [4.97959184e-02],
       [2.43716434e-02],
       [3.39795918e-02],
       [3.29154519e-02],
       [3.94642857e-02],
       [1.56326531e-01],
       [4.57397959e-02],
       [1.20272109e-01],
       [7.04525288e-03],
       [1.27673469e-01],
       [4.40408163e-02],
       [3.43842365e-02],
       [1.01781076e-01],
       [3.82448980e-01],
       [8.39941691e-02],
       [6.66122449e-02],
       [7.82312925e-03],
       [1.84612245e-01],
       [1.30878438e-02],
       [1.73469388e-02],
       [2.83372365e-03],
       [4.44387755e-02],
       [3.97667638e-02],
       [2.58627087e-02],
       [3.69387755e-02],
       [9.64678179e-02],
       [2.91393079e-02],
       [1.12367347e-01],
       [3.26285714e-02],
       [5.87755102e-02],
       [2.13877551e-01],
       [8.66180758e-02],
       [7.51020408e-02],
       [7.52834467e-03],
       [1.05387755e-01],
       [5.32298137e-02],
       [5.45649839e-02],
       [2.07142857e-02],
       [4.90962099e-02],
       [1.43389530e-02],
       [1.02711370e-01],
       [5.12925170e-02],
       [3.17959184e-02],
       [3.36326531e-01],
       [1.79346939e-01],
       [2.32312925e-02],
       [6.10204082e-02],
       [1.48489796e-01],
       [2.23214286e-02],
       [6.30492197e-02],
       [1.30979592e-01],
       [3.83090379e-02],
       [1.71156463e-02],
       [1.53413089e-02],
       [2.72959184e-01],
       [4.20918367e-02],
       [2.48775510e-01],
       [7.89952904e-02],
       [7.33877551e-02],
       [1.21061224e-01],
       [1.55598456e-02],
       [5.64723032e-02],
       [8.01457726e-02],
       [1.59650146e-01],
       [9.47680891e-02],
       [5.90876351e-02],
       [9.42857143e-02],
       [3.41224490e-02],
       [6.71836735e-02],
       [8.14285714e-02],
       [1.22408163e-01],
       [1.10102041e-01],
       [6.02040816e-02],
       [3.62040816e-01],
       [2.12872841e-02],
       [2.25510204e-01],
       [9.29591837e-02],
       [1.17142857e-01],
       [7.55102041e-02],
       [2.39941691e-02],
       [2.99670836e-02],
       [4.19851577e-02],
       [3.56671900e-02],
       [3.72448980e-01],
       [5.51177394e-02],
       [3.00816327e-02],
       [2.26367347e-01]])
In [147]:
y = sorted_school_table[selected_outcome].values
y
Out[147]:
array([99.55263158, 99.3452381 , 98.50344828, 98.33404255, 98.08157895,
       97.70862069, 97.53928571, 97.5031746 , 96.92105263, 96.64081633,
       96.31333333, 96.275     , 95.79166667, 95.77837838, 95.24761905,
       95.1106383 , 94.78888889, 94.758     , 94.6962963 , 94.59183673,
       93.81666667, 93.55454545, 93.32105263, 92.73174603, 92.672     ,
       92.33513514, 91.9875    , 91.92545455, 91.86382979, 91.7       ,
       91.41904762, 91.33913043, 91.12857143, 90.55087719, 90.4625    ,
       89.68333333, 89.025     , 88.99302326, 88.825     , 88.68372093,
       88.54651163, 87.87647059, 86.53095238, 86.52380952, 86.11666667,
       85.51111111, 85.46851852, 85.35833333, 85.14516129, 85.0862069 ,
       84.9968254 , 84.62173913, 84.244     , 84.20952381, 83.67906977,
       83.61666667, 83.59583333, 83.46666667, 83.4       , 83.33928571,
       83.07608696, 82.95714286, 82.19756098, 81.58684211, 81.55882353,
       81.48181818, 81.45925926, 81.45686275, 80.99090909, 80.90571429,
       80.79183673, 80.60285714, 80.45172414, 80.388     , 80.15714286,
       79.91052632, 79.75714286, 79.50784314, 79.148     , 79.07818182,
       78.83617021, 78.81166667, 78.76666667, 78.65      , 78.40263158,
       78.19166667, 78.03571429, 77.84657534, 77.22916667, 77.04444444,
       76.96666667, 76.75      , 76.71777778, 76.66      , 76.56410256,
       75.81746032, 75.55714286, 75.15945946, 75.01764706, 74.84736842,
       74.73333333, 74.65238095, 74.3875    , 74.25357143, 74.1       ,
       73.9       , 73.64222222, 73.42051282, 73.4012987 , 73.21842105,
       73.1952381 , 73.17692308, 73.04626866, 72.91621622, 72.9027027 ,
       72.80833333, 72.47222222, 72.17916667, 72.14857143, 72.08928571,
       71.74166667, 71.65531915, 71.43571429, 71.40526316, 71.31666667,
       71.19      , 70.96428571, 70.91267606, 70.82162162, 70.81714286,
       70.80769231, 70.7375    , 70.7325    , 70.5212766 , 70.4625    ,
       70.41071429, 70.34230769, 70.19047619, 70.17313433, 70.0875    ,
       70.02077922, 69.97058824, 69.77857143, 69.45675676, 68.76451613,
       68.65102041, 68.64285714, 68.46363636, 68.36923077, 68.15      ,
       68.09714286, 67.5952381 , 67.44      , 67.36530612, 67.29152542,
       67.20952381, 67.01384615, 66.93214286, 66.63913043, 66.3875    ,
       66.124     , 65.57857143, 65.41086957, 65.34285714, 64.93877551,
       64.83333333, 64.75526316, 64.74705882, 64.74166667, 64.43225806,
       64.375     , 64.2       , 64.18703704, 64.09459459, 63.91111111,
       63.88461538, 63.88039216, 63.85882353, 63.81020408, 63.72      ,
       63.53246753, 63.46875   , 63.32608696, 63.31071429, 63.16140351,
       62.98888889, 62.86875   , 62.74761905, 62.53584906, 62.49268293,
       62.016     , 61.99555556, 61.89642857, 61.86190476, 61.75555556,
       61.6952381 , 61.664     , 61.56551724, 61.27066667, 61.16461538,
       61.15813953, 60.92222222, 60.83658537, 60.49047619, 60.36458333,
       60.10714286, 60.09791667, 59.87922078, 59.75119048, 59.66964286,
       59.32244898, 59.12      , 59.08333333, 59.08      , 59.07843137,
       58.78571429, 58.70784314, 58.7       , 58.63773585, 58.62653061,
       58.46666667, 58.42909091, 58.39545455, 58.35306122, 58.14285714,
       58.14166667, 58.13409091, 57.95      , 57.88431373, 57.85185185,
       57.59090909, 57.14428571, 57.01463415, 56.75263158, 56.5       ,
       56.44230769, 56.2627907 , 56.26219512, 55.89807692, 55.60140845,
       55.45652174, 55.25974026, 55.06428571, 55.01369863, 54.96      ,
       54.94677419, 54.84166667, 54.67272727, 54.35555556, 54.316     ,
       53.25625   , 53.16      , 53.08285714, 52.47460317, 52.31086957,
       51.9047619 , 51.02037037, 50.82077922, 50.75584416, 50.21818182,
       50.12857143, 49.90238095, 49.81547619, 49.15106383, 49.07142857,
       48.68181818, 48.52413793, 48.42142857, 47.92142857, 47.77162162,
       47.73921569, 47.30701754, 47.28965517, 46.77777778, 46.77446809,
       46.6       , 46.35584416, 46.2012987 , 46.17012987, 45.02857143,
       44.25324675, 42.90363636, 42.49111111, 42.44675325, 41.92678571,
       40.85789474, 39.55507246, 38.94615385, 37.96721311, 37.47346939,
       37.20350877, 36.28032787, 36.25862069, 33.62      , 27.39459459,
       27.3       , 26.96666667, 25.82653061, 24.84166667, 20.28333333,
       20.256     , 19.83888889, 19.64915254, 17.06666667, 15.86046512,
       15.65      , 15.41034483, 15.38      , 14.46428571, 14.37826087,
       13.53888889, 13.47857143, 12.95483871, 12.92916667, 10.68333333,
       10.335     ,  9.5       ,  9.2       ,  8.90714286,  7.52142857,
        5.44166667,  5.3375    ])
In [148]:
model.fit(X, y)
Out[148]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)
In [ ]:
from sklearn.model_selection import cross_val_score
models = []
scores = []

def train(model, X):
    model.fit(X, y)
    models.append(model)
    score = cross_val_score(
        model, X, y, cv=3,
        scoring='neg_mean_absolute_error',
    ).mean()
    scores.append(score)
    return score
In [ ]:
model_scores = cross_val_score(model, X, y, cv=3, scoring='neg_mean_absolute_error')
print('average_negative_mean_error = %s' % np.mean(model_scores))