gist




Pay Notebook Creator: Michelle Chung0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [3]:
# https://crosscompute.com/n/QPUeirwJFjTMfs1oJxWNCyi3PR8AUSV7/-/train-model-to-estimate-graduation-rate-from-tree-count-20190201

# training model
In [4]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
In [5]:
import pandas as pd
dataset = pd.read_csv('data.csv')
In [6]:
dataset.iloc[0]
Out[6]:
DBN                                                                 25Q525
WKT                                           POINT (-73.821532 40.737038)
School Name                                    Townsend Harris High School
asian_1                                                                663
asian_2                                                               58.5
black_1                                                                 64
black_2                                                                5.6
dbn                                                                 25Q525
economic_need_index                                                   28.2
english_language_learners_1                                              0
english_language_learners_2                                              0
female_1                                                               780
female_2                                                              68.8
grade_1                                                                  0
grade_10                                                               293
grade_11                                                               262
grade_12                                                               281
grade_2                                                                  0
grade_3                                                                  0
grade_4                                                                  0
grade_5                                                                  0
grade_6                                                                  0
grade_7                                                                  0
grade_8                                                                  0
grade_9                                                                297
grade_k                                                                  0
grade_pk_half_day_full_day                                               0
hispanic_1                                                             138
hispanic_2                                                            12.2
male_1                                                                 353
male_2                                                                31.2
multiple_race_categories_not_represented_1                              21
multiple_race_categories_not_represented_2                             1.9
poverty_1                                                              623
poverty_2                                                               55
school_name                                    Townsend Harris High School
students_with_disabilities_1                                            19
students_with_disabilities_2                                           1.7
total_enrollment                                                      1133
white_1                                                                247
white_2                                                               21.8
year                                                               2015-16
Diversity Index                                                       28.2
district                                                                25
Graduation Rate                                                    99.5526
Name: 0, dtype: object
In [29]:
X = dataset[['Diversity Index','economic_need_index']].values
X
Out[29]:
array([[28.2, 28.2],
       [26.1, 26.1],
       [38.5, 38.5],
       ...,
       [75.1, 75.1],
       [84.5, 84.5],
       [71.6, 71.6]])
In [30]:
X.min()
Out[30]:
18.6
In [31]:
X.max()
Out[31]:
99.2
In [32]:
y = dataset['Graduation Rate'].values
y
Out[32]:
array([99.55263158, 99.55263158, 99.55263158, ...,  5.32222222,
        5.32222222,  5.32222222])
In [33]:
model.fit(X, y)
Out[33]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)
In [35]:
model.predict([[1,3]])
Out[35]:
array([-7.8512405e+13])
In [36]:
model.predict([
    [30,3],
    [75,3],
])
Out[36]:
array([1.05991747e+15, 2.82644658e+15])
In [51]:
# SAVE MODEL

from pickle import dump
dump(model, open('model.pkl', 'wb'))
In [52]:
from pickle import load
model = load(open('model.pkl', 'rb'))
model
Out[52]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)
In [39]:
# TRAINING MODEL

t = pd.read_csv('data.csv')
t
Out[39]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
DBN WKT School Name asian_1 asian_2 black_1 black_2 dbn economic_need_index english_language_learners_1 ... school_name students_with_disabilities_1 students_with_disabilities_2 total_enrollment white_1 white_2 year Diversity Index district Graduation Rate
0 25Q525 POINT (-73.821532 40.737038) Townsend Harris High School 663 58.5 64 5.6 25Q525 28.2 0 ... Townsend Harris High School 19 1.7 1133 247 21.8 2015-16 28.2 25 99.552632
1 25Q525 POINT (-73.821532 40.737038) Townsend Harris High School 644 58.0 60 5.4 25Q525 26.1 0 ... Townsend Harris High School 29 2.6 1110 234 21.1 2016-17 26.1 25 99.552632
2 25Q525 POINT (-73.821532 40.737038) Townsend Harris High School 662 58.0 59 5.2 25Q525 38.5 0 ... Townsend Harris High School 40 3.5 1141 231 20.2 2017-18 38.5 25 99.552632
3 25Q525 POINT (-73.821532 40.737038) Townsend Harris High School 662 58.5 70 6.2 25Q525 28.0 0 ... Townsend Harris High School 16 1.4 1132 247 21.8 2014-15 28.0 25 99.552632
4 02M416 POINT (-73.953276 40.770288) Eleanor Roosevelt High School 91 16.8 18 3.3 02M416 22.9 0 ... Eleanor Roosevelt High School 78 14.4 541 347 64.1 2017-18 22.9 2 99.345238
5 02M416 POINT (-73.953276 40.770288) Eleanor Roosevelt High School 111 20.4 22 4.1 02M416 19.4 0 ... Eleanor Roosevelt High School 62 11.4 543 344 63.4 2016-17 19.4 2 99.345238
6 02M416 POINT (-73.953276 40.770288) Eleanor Roosevelt High School 119 21.5 22 4.0 02M416 18.6 1 ... Eleanor Roosevelt High School 43 7.8 554 344 62.1 2015-16 18.6 2 99.345238
7 02M416 POINT (-73.953276 40.770288) Eleanor Roosevelt High School 109 20.1 24 4.4 02M416 21.0 0 ... Eleanor Roosevelt High School 32 5.9 541 332 61.4 2014-15 21.0 2 99.345238
8 01M539 POINT (-73.979581 40.719416) New Explorations into Science, Technology and ... 575 33.2 134 7.7 01M539 27.8 14 ... New Explorations into Science, Technology and ... 139 8.0 1734 692 39.9 2017-18 27.8 1 98.503448
9 01M539 POINT (-73.979581 40.719416) New Explorations into Science, Technology and ... 579 33.2 140 8.0 01M539 23.3 7 ... New Explorations into Science, Technology and ... 107 6.1 1745 720 41.3 2016-17 23.3 1 98.503448
10 01M539 POINT (-73.979581 40.719416) New Explorations into Science, Technology and ... 579 33.0 153 8.7 01M539 25.5 7 ... New Explorations into Science, Technology and ... 82 4.7 1753 732 41.8 2015-16 25.5 1 98.503448
11 01M539 POINT (-73.979581 40.719416) New Explorations into Science, Technology and ... 577 33.3 158 9.1 01M539 25.7 2 ... New Explorations into Science, Technology and ... 66 3.8 1735 722 41.6 2014-15 25.7 1 98.503448
12 02M411 POINT (-73.985723 40.741888) Baruch College Campus High School 198 45.0 20 4.5 02M411 35.1 2 ... Baruch College Campus High School 68 15.5 440 149 33.9 2016-17 35.1 2 98.334043
13 02M411 POINT (-73.985723 40.741888) Baruch College Campus High School 221 49.7 26 5.8 02M411 41.7 1 ... Baruch College Campus High School 60 13.5 445 134 30.1 2015-16 41.7 2 98.334043
14 02M411 POINT (-73.985723 40.741888) Baruch College Campus High School 246 54.5 28 6.2 02M411 46.7 1 ... Baruch College Campus High School 45 10.0 451 106 23.5 2014-15 46.7 2 98.334043
15 02M411 POINT (-73.985723 40.741888) Baruch College Campus High School 180 39.1 21 4.6 02M411 39.5 2 ... Baruch College Campus High School 73 15.9 460 179 38.9 2017-18 39.5 2 98.334043
16 04M610 POINT (-73.947171 40.792932) Young Women's Leadership School 38 7.9 128 26.7 04M610 71.7 11 ... Young Women's Leadership School 92 19.2 480 15 3.1 2014-15 71.7 4 98.081579
17 04M610 POINT (-73.947171 40.792932) Young Women's Leadership School 42 8.7 115 23.9 04M610 76.9 13 ... Young Women's Leadership School 100 20.8 481 18 3.7 2017-18 76.9 4 98.081579
18 04M610 POINT (-73.947171 40.792932) Young Women's Leadership School 44 9.0 121 24.6 04M610 72.7 9 ... Young Women's Leadership School 93 18.9 491 18 3.7 2015-16 72.7 4 98.081579
19 04M610 POINT (-73.947171 40.792932) Young Women's Leadership School 43 8.9 115 23.9 04M610 71.2 8 ... Young Women's Leadership School 97 20.1 482 20 4.1 2016-17 71.2 4 98.081579
20 02M475 POINT (-74.013921 40.718025) Stuyvesant High School 2443 73.4 20 0.6 02M475 32.6 0 ... Stuyvesant High School 18 0.5 3327 634 19.1 2015-16 32.6 2 97.708621
21 02M475 POINT (-74.013921 40.718025) Stuyvesant High School 2417 73.3 28 0.8 02M475 32.8 2 ... Stuyvesant High School 17 0.5 3296 674 20.4 2014-15 32.8 2 97.708621
22 02M475 POINT (-74.013921 40.718025) Stuyvesant High School 2507 74.5 21 0.6 02M475 32.3 1 ... Stuyvesant High School 20 0.6 3365 602 17.9 2016-17 32.3 2 97.708621
23 02M475 POINT (-74.013921 40.718025) Stuyvesant High School 2451 73.5 24 0.7 02M475 41.8 0 ... Stuyvesant High School 14 0.4 3336 595 17.8 2017-18 41.8 2 97.708621
24 31R605 POINT (-74.117086 40.568299) Staten Island Technical High School 612 46.6 13 1.0 31R605 26.1 1 ... Staten Island Technical High School 15 1.1 1312 633 48.2 2016-17 26.1 31 97.539286
25 31R605 POINT (-74.117086 40.568299) Staten Island Technical High School 512 41.1 13 1.0 31R605 25.8 0 ... Staten Island Technical High School 16 1.3 1247 651 52.2 2014-15 25.8 31 97.539286
26 31R605 POINT (-74.117086 40.568299) Staten Island Technical High School 544 42.5 19 1.5 31R605 25.4 0 ... Staten Island Technical High School 14 1.1 1279 656 51.3 2015-16 25.4 31 97.539286
27 31R605 POINT (-74.117086 40.568299) Staten Island Technical High School 639 48.4 10 0.8 31R605 35.1 0 ... Staten Island Technical High School 16 1.2 1320 592 44.8 2017-18 35.1 31 97.539286
28 10X445 POINT (-73.889011 40.879958) Bronx High School of Science 1893 62.8 77 2.6 10X445 30.1 1 ... The Bronx High School of Science 40 1.3 3015 667 22.1 2014-15 30.1 10 97.503175
29 10X445 POINT (-73.889011 40.879958) Bronx High School of Science 1871 62.2 67 2.2 10X445 29.5 0 ... The Bronx High School of Science 32 1.1 3010 632 21.0 2015-16 29.5 10 97.503175
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1262 23K646 POINT (-73.904143 40.677528) Aspirations Diploma Plus High School 3 1.3 178 75.4 23K646 69.0 7 ... Aspirations Diploma Plus High School 43 18.2 236 1 0.4 2016-17 69.0 23 10.335000
1263 23K646 POINT (-73.904143 40.677528) Aspirations Diploma Plus High School 1 0.5 159 74.3 23K646 88.6 7 ... Aspirations Diploma Plus High School 53 24.8 214 2 0.9 2017-18 88.6 23 10.335000
1264 07X381 POINT (-73.919949 40.818645) Bronx Haven High School 2 1.0 69 35.9 07X381 89.1 13 ... Bronx Haven High School 44 22.9 192 0 0.0 2017-18 89.1 7 9.500000
1265 07X381 POINT (-73.919949 40.818645) Bronx Haven High School 2 1.1 54 29.5 07X381 79.3 11 ... Bronx Haven High School 41 22.4 183 1 0.5 2015-16 79.3 7 9.500000
1266 07X381 POINT (-73.919949 40.818645) Bronx Haven High School 5 2.9 50 28.9 07X381 78.3 6 ... Bronx Haven High School 35 20.2 173 1 0.6 2014-15 78.3 7 9.500000
1267 07X381 POINT (-73.919949 40.818645) Bronx Haven High School 3 1.6 62 32.3 07X381 78.8 10 ... Bronx Haven High School 46 24.0 192 1 0.5 2016-17 78.8 7 9.500000
1268 24Q744 POINT (-73.871505 40.743228) VOYAGES Preparatory 21 9.3 41 18.1 24Q744 48.3 0 ... VOYAGES Preparatory 14 6.2 226 13 5.8 2015-16 48.3 24 9.200000
1269 24Q744 POINT (-73.871505 40.743228) VOYAGES Preparatory 23 9.2 47 18.7 24Q744 48.7 3 ... VOYAGES Preparatory 15 6.0 251 12 4.8 2016-17 48.7 24 9.200000
1270 24Q744 POINT (-73.871505 40.743228) VOYAGES Preparatory 23 8.5 42 15.5 24Q744 68.5 9 ... VOYAGES Preparatory 25 9.2 271 16 5.9 2017-18 68.5 24 9.200000
1271 24Q744 POINT (-73.871505 40.743228) VOYAGES Preparatory 18 8.0 43 19.2 24Q744 51.5 3 ... VOYAGES Preparatory 15 6.7 224 9 4.0 2014-15 51.5 24 9.200000
1272 17K568 POINT (-73.923881 40.666251) Brownsville Academy High School 0 0.0 97 82.9 17K568 78.6 1 ... Brownsville Academy High School 26 22.2 117 0 0.0 2017-18 78.6 17 8.907143
1273 17K568 POINT (-73.923881 40.666251) Brownsville Academy High School 2 1.2 144 88.3 17K568 69.6 6 ... Brownsville Academy High School 33 20.2 163 0 0.0 2015-16 69.6 17 8.907143
1274 17K568 POINT (-73.923881 40.666251) Brownsville Academy High School 3 1.5 176 89.3 17K568 68.2 3 ... Brownsville Academy High School 30 15.2 197 0 0.0 2014-15 68.2 17 8.907143
1275 17K568 POINT (-73.923881 40.666251) Brownsville Academy High School 1 0.6 133 85.8 17K568 67.0 3 ... Brownsville Academy High School 36 23.2 155 0 0.0 2016-17 67.0 17 8.907143
1276 05M285 POINT (-73.939974 40.807692) Harlem Renaissance High School 6 2.6 102 43.8 05M285 92.4 27 ... Harlem Renaissance High School 63 27.0 233 2 0.9 2017-18 92.4 5 7.521429
1277 05M285 POINT (-73.939974 40.807692) Harlem Renaissance High School 7 3.0 99 42.9 05M285 77.4 31 ... Harlem Renaissance High School 52 22.5 231 3 1.3 2016-17 77.4 5 7.521429
1278 05M285 POINT (-73.939974 40.807692) Harlem Renaissance High School 4 1.6 118 46.5 05M285 76.1 44 ... Harlem Renaissance High School 54 21.3 254 4 1.6 2015-16 76.1 5 7.521429
1279 05M285 POINT (-73.939974 40.807692) Harlem Renaissance High School 0 0.0 112 53.3 05M285 78.9 29 ... Harlem Renaissance High School 43 20.5 210 2 1.0 2014-15 78.9 5 7.521429
1280 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 0 0.0 162 83.9 18K673 67.2 12 ... East Brooklyn Community High School 67 34.7 193 4 2.1 2016-17 67.2 18 5.441667
1281 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 2 1.0 146 76.0 18K673 75.2 14 ... East Brooklyn Community High School 76 39.6 192 11 5.7 2017-18 75.2 18 5.441667
1282 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 1 0.6 146 80.7 18K673 68.6 4 ... East Brooklyn Community High School 43 23.8 181 4 2.2 2015-16 68.6 18 5.441667
1283 18K673 POINT (-73.920658 40.659914) East Brooklyn Community High School 3 1.6 158 82.7 18K673 61.2 4 ... East Brooklyn Community High School 42 22.0 191 3 1.6 2014-15 61.2 18 5.441667
1284 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 0 0.0 67 28.4 32K564 71.0 10 ... Bushwick Community High School 56 23.7 236 7 3.0 2016-17 71.0 32 5.337500
1285 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 1 0.4 79 32.4 32K564 74.5 6 ... Bushwick Community High School 46 18.9 244 6 2.5 2015-16 74.5 32 5.337500
1286 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 1 0.3 100 34.6 32K564 72.7 3 ... Bushwick Community High School 43 14.9 289 5 1.7 2014-15 72.7 32 5.337500
1287 32K564 POINT (-73.915217 40.695875) Bushwick Community High School 0 0.0 81 30.8 32K564 89.6 19 ... Bushwick Community High School 83 31.6 263 5 1.9 2017-18 89.6 32 5.337500
1288 13K616 POINT (-73.95818 40.692015) Brooklyn High School for Leadership and Community 1 0.5 141 69.5 13K616 70.6 9 ... Brooklyn High School for Leadership and Community 38 18.7 203 5 2.5 2016-17 70.6 13 5.322222
1289 13K616 POINT (-73.95818 40.692015) Brooklyn High School for Leadership and Community 2 0.9 155 70.8 13K616 75.1 8 ... Brooklyn High School for Leadership and Community 43 19.6 219 3 1.4 2014-15 75.1 13 5.322222
1290 13K616 POINT (-73.95818 40.692015) Brooklyn High School for Leadership and Community 3 1.5 140 68.3 13K616 84.5 12 ... Brooklyn High School for Leadership and Community 42 20.5 205 2 1.0 2017-18 84.5 13 5.322222
1291 13K616 POINT (-73.95818 40.692015) Brooklyn High School for Leadership and Community 1 0.4 161 70.0 13K616 71.6 11 ... Brooklyn High School for Leadership and Community 37 16.1 230 4 1.7 2015-16 71.6 13 5.322222
<p>1292 rows × 45 columns</p>
In [41]:
X = t[['Diversity Index','economic_need_index']].values
In [42]:
y = t['Graduation Rate']
In [43]:
# COMPARING MODELS

# You will need to choose an appropriate metric to evaluate the performance of your fitted model.
# Which metric you choose depends on whether you are performing classification, clustering or regression.

from sklearn.model_selection import cross_val_score
models = []
scores = []

def train(model, X):
    model.fit(X, y)
    models.append(model)
    score = cross_val_score(
        model, X, y, cv=3,
        scoring='neg_mean_absolute_error',
    ).mean()
    scores.append(score)
    return score
In [44]:
from sklearn.linear_model import LinearRegression
train(LinearRegression(), X)
Out[44]:
-24.669098750566963
In [45]:
from sklearn.linear_model import BayesianRidge
train(BayesianRidge(), X)
Out[45]:
-24.610465353270072
In [46]:
from sklearn.svm import SVR
train(SVR(gamma='scale'), X)
Out[46]:
-22.112387800202015
In [47]:
from sklearn.svm import SVR
train(SVR(gamma='scale'), X)
Out[47]:
-22.112387800202015
In [48]:
# CHOOSE MODEL WITH THE LEAST ERROR

import numpy as np
best_index = np.argmax(scores)
best_index
Out[48]:
2
In [ ]:
 
In [49]:
best_model = models[best_index]
best_model
Out[49]:
SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
In [26]:
import pickle
pickle.dump(best_model, open('/tmp/model.pkl', 'wb'))
In [50]:
scores
Out[50]:
[-24.669098750566963,
 -24.610465353270072,
 -22.112387800202015,
 -22.112387800202015]
In [ ]: