UKO5A




Pay Notebook Creator: Muhammad0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [7]:
#CrossCompute
target_folder = '/tmp'
ethnicity_select = """
    Hispanic

    Black Non-Hispanic
    Not Stated/Unknown
    Other Race/ Ethnicity
    Hispanic
    Asian and Pacific Islander
    White Non-Hispanic
"""
In [8]:
race = ethnicity_select.strip().splitlines()[0]
In [9]:
import pandas as pd
# data = pd.read_csv('https://data.cityofnewyork.us/api/views/jb7j-dtam/rows.csv?accessType=DOWNLOAD 
#using pandas to manipulate the data
data = pd.read_csv('https://data.cityofnewyork.us/api/views/jb7j-dtam/rows.csv?accessType=DOWNLOAD', na_values='.')
data['Race Ethnicity'].unique()
data_frame = data[(data['Race Ethnicity'] == race)]
In [10]:
data.head()
Out[10]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Year Leading Cause Sex Race Ethnicity Deaths Death Rate Age Adjusted Death Rate
0 2010 Assault (Homicide: Y87.1, X85-Y09) M Black Non-Hispanic 299.0 35.1 35.5
1 2011 Mental and Behavioral Disorders due to Acciden... M Not Stated/Unknown 5.0 NaN NaN
2 2011 Diseases of Heart (I00-I09, I11, I13, I20-I51) M Black Non-Hispanic 1840.0 215.7 268.3
3 2008 Certain Conditions originating in the Perinata... F Other Race/ Ethnicity NaN NaN NaN
4 2014 Accidents Except Drug Posioning (V01-X39, X43,... F Hispanic 64.0 5.1 5.4
In [13]:
data.dtypes
Out[13]:
Year                         int64
Leading Cause               object
Sex                         object
Race Ethnicity              object
Deaths                     float64
Death Rate                 float64
Age Adjusted Death Rate    float64
dtype: object
In [14]:
import numpy as np
data.replace('.', np.nan)
##fixing the data set since it was not returning the right data type.
Out[14]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Year Leading Cause Sex Race Ethnicity Deaths Death Rate Age Adjusted Death Rate
0 2010 Assault (Homicide: Y87.1, X85-Y09) M Black Non-Hispanic 299.0 35.1 35.5
1 2011 Mental and Behavioral Disorders due to Acciden... M Not Stated/Unknown 5.0 NaN NaN
2 2011 Diseases of Heart (I00-I09, I11, I13, I20-I51) M Black Non-Hispanic 1840.0 215.7 268.3
3 2008 Certain Conditions originating in the Perinata... F Other Race/ Ethnicity NaN NaN NaN
4 2014 Accidents Except Drug Posioning (V01-X39, X43,... F Hispanic 64.0 5.1 5.4
5 2007 Intentional Self-Harm (Suicide: X60-X84, Y87.0) M Not Stated/Unknown 5.0 NaN NaN
6 2012 Accidents Except Drug Posioning (V01-X39, X43,... M Black Non-Hispanic 152.0 17.8 18.6
7 2009 All Other Causes M Asian and Pacific Islander 220.0 43.1 56.1
8 2013 Diseases of Heart (I00-I09, I11, I13, I20-I51) F Asian and Pacific Islander 437.0 72.8 81.8
9 2014 Accidents Except Drug Posioning (V01-X39, X43,... M Other Race/ Ethnicity 12.0 NaN NaN
10 2012 Septicemia (A40-A41) F Other Race/ Ethnicity NaN NaN NaN
11 2012 Certain Conditions originating in the Perinata... M Not Stated/Unknown 17.0 NaN NaN
12 2012 Essential Hypertension and Renal Diseases (I10... F White Non-Hispanic 199.0 14.0 7.2
13 2014 Diabetes Mellitus (E10-E14) F Other Race/ Ethnicity 11.0 NaN NaN
14 2008 Influenza (Flu) and Pneumonia (J09-J18) F Not Stated/Unknown 14.0 NaN NaN
15 2014 Cerebrovascular Disease (Stroke: I60-I69) M Hispanic 165.0 13.8 20.4
16 2011 Diseases of Heart (I00-I09, I11, I13, I20-I51) M White Non-Hispanic 4220.0 316.4 260.2
17 2014 Chronic Lower Respiratory Diseases (J40-J47) F Hispanic 193.0 15.2 16.8
18 2014 Certain Conditions originating in the Perinata... M Other Race/ Ethnicity 8.0 NaN NaN
19 2014 Septicemia (A40-A41) M Not Stated/Unknown 8.0 NaN NaN
20 2008 Human Immunodeficiency Virus Disease (HIV: B20... F Hispanic 113.0 9.6 9.6
21 2010 Malignant Neoplasms (Cancer: C00-C97) F Black Non-Hispanic 1768.0 168.6 151.8
22 2009 Malignant Neoplasms (Cancer: C00-C97) M White Non-Hispanic 3236.0 240.5 205.6
23 2007 Chronic Lower Respiratory Diseases (J40-J47) M Asian and Pacific Islander 43.0 8.9 16.7
24 2011 Malignant Neoplasms (Cancer: C00-C97) F Black Non-Hispanic 1918.0 183.2 163.3
25 2009 Malignant Neoplasms (Cancer: C00-C97) F Black Non-Hispanic 1735.0 165.7 151.1
26 2013 Influenza (Flu) and Pneumonia (J09-J18) F Not Stated/Unknown 18.0 NaN NaN
27 2010 Human Immunodeficiency Virus Disease (HIV: B20... F Hispanic 79.0 6.5 6.5
28 2007 All Other Causes F Hispanic 820.0 70.2 85.6
29 2009 Intentional Self-Harm (Suicide: X60-X84, Y87.0) M Asian and Pacific Islander 35.0 6.9 6.7
... ... ... ... ... ... ... ...
1064 2007 Alzheimer's Disease (G30) F Not Stated/Unknown NaN NaN NaN
1065 2007 Essential Hypertension and Renal Diseases (I10... F Hispanic 78.0 6.7 8.9
1066 2009 Cerebrovascular Disease (Stroke: I60-I69) M Asian and Pacific Islander 58.0 11.4 15.5
1067 2014 Intentional Self-Harm (Suicide: X60-X84, Y87.0) F Asian and Pacific Islander 28.0 4.5 4.2
1068 2012 Essential Hypertension and Renal Diseases (I10... M Black Non-Hispanic 143.0 16.7 21.3
1069 2012 Alzheimer's Disease (G30) F Black Non-Hispanic 115.0 11.0 8.9
1070 2014 Nephritis, Nephrotic Syndrome and Nephrisis (N... F Black Non-Hispanic 90.0 8.6 7.1
1071 2010 All Other Causes F White Non-Hispanic 2140.0 149.7 93.9
1072 2013 Malignant Neoplasms (Cancer: C00-C97) F Asian and Pacific Islander 467.0 77.8 79.6
1073 2013 Essential Hypertension and Renal Diseases (I10... M Other Race/ Ethnicity 5.0 NaN NaN
1074 2011 Cerebrovascular Disease (Stroke: I60-I69) F Hispanic 179.0 14.6 17.3
1075 2013 Chronic Lower Respiratory Diseases (J40-J47) F Black Non-Hispanic 258.0 24.7 20.8
1076 2014 Diseases of Heart (I00-I09, I11, I13, I20-I51) F Black Non-Hispanic 2194.0 209.1 169.1
1077 2010 Accidents Except Drug Posioning (V01-X39, X43,... F Hispanic 58.0 4.8 5.2
1078 2011 Nephritis, Nephrotic Syndrome and Nephrisis (N... F Not Stated/Unknown NaN NaN NaN
1079 2014 Alzheimer's Disease (G30) F White Non-Hispanic 276.0 19.5 8.6
1080 2007 Certain Conditions originating in the Perinata... F Other Race/ Ethnicity NaN NaN NaN
1081 2011 All Other Causes F Not Stated/Unknown 14.0 NaN NaN
1082 2014 Chronic Lower Respiratory Diseases (J40-J47) M White Non-Hispanic 399.0 29.7 24.0
1083 2007 Influenza (Flu) and Pneumonia (J09-J18) M White Non-Hispanic 530.0 39.6 32.9
1084 2009 Chronic Lower Respiratory Diseases (J40-J47) M Hispanic 111.0 9.9 18.3
1085 2011 Mental and Behavioral Disorders due to Acciden... F White Non-Hispanic 88.0 6.2 6.1
1086 2010 Influenza (Flu) and Pneumonia (J09-J18) M Not Stated/Unknown NaN NaN NaN
1087 2011 Intentional Self-Harm (Suicide: X60-X84, Y87.0) M Asian and Pacific Islander 49.0 9.3 9.4
1088 2009 Essential Hypertension and Renal Diseases (I10... F Hispanic 84.0 7.0 8.8
1089 2009 Chronic Lower Respiratory Diseases (J40-J47) F Black Non-Hispanic 188.0 18.0 16.4
1090 2009 Cerebrovascular Disease (Stroke: I60-I69) F Not Stated/Unknown 5.0 NaN NaN
1091 2014 Diabetes Mellitus (E10-E14) M Black Non-Hispanic 318.0 36.8 42.2
1092 2008 Nephritis, Nephrotic Syndrome and Nephrisis (N... F Black Non-Hispanic 71.0 6.8 6.3
1093 2011 Diseases of Heart (I00-I09, I11, I13, I20-I51) M Not Stated/Unknown 62.0 NaN NaN
<p>1094 rows × 7 columns</p>
In [15]:
data.iloc[1]
Out[15]:
Year                                                                    2011
Leading Cause              Mental and Behavioral Disorders due to Acciden...
Sex                                                                        M
Race Ethnicity                                            Not Stated/Unknown
Deaths                                                                     5
Death Rate                                                               NaN
Age Adjusted Death Rate                                                  NaN
Name: 1, dtype: object
In [46]:
data.dtypes
Out[46]:
Year                          int64
Leading Cause              category
Sex                        category
Race Ethnicity             category
Deaths                      float64
Death Rate                  float64
Age Adjusted Death Rate     float64
dtype: object
In [16]:
%matplotlib inline
In [48]:
data['Deaths'] = pd.to_numeric(data['Deaths'], errors='coerce')
data['Death Rate'] = pd.to_numeric(data['Death Rate'], errors='coerce')
data['Age Adjusted Death Rate'] = pd.to_numeric(data['Age Adjusted Death Rate'], errors='coerce')
data['Sex'] = data['Sex'].astype('category')
data['Race Ethnicity'] = data['Race Ethnicity'].astype('category')
data['Leading Cause'] = data['Leading Cause'].astype('category')
In [17]:
data.columns  
Out[17]:
Index(['Year', 'Leading Cause', 'Sex', 'Race Ethnicity', 'Deaths',
       'Death Rate', 'Age Adjusted Death Rate'],
      dtype='object')
In [18]:
data.dtypes
Out[18]:
Year                         int64
Leading Cause               object
Sex                         object
Race Ethnicity              object
Deaths                     float64
Death Rate                 float64
Age Adjusted Death Rate    float64
dtype: object
In [19]:
selected_t = data_frame[data_frame['Year'] == 2010]
selected_t[:5]
Out[19]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Year Leading Cause Sex Race Ethnicity Deaths Death Rate Age Adjusted Death Rate
27 2010 Human Immunodeficiency Virus Disease (HIV: B20... F Hispanic 79.0 6.5 6.5
41 2010 Mental and Behavioral Disorders due to Acciden... M Hispanic 138.0 12.1 12.9
51 2010 All Other Causes M Hispanic 1167.0 102.1 150.0
53 2010 Chronic Lower Respiratory Diseases (J40-J47) F Hispanic 156.0 12.8 15.4
61 2010 All Other Causes F Hispanic 1007.0 82.7 94.6
In [21]:
data['Deaths'] = pd.to_numeric(data['Deaths'], errors='coerce')
data['Death Rate'] = pd.to_numeric(data['Death Rate'], errors='coerce')
data['Age Adjusted Death Rate'] = pd.to_numeric(data['Age Adjusted Death Rate'], errors='coerce')
data['Sex'] = data['Sex'].astype('category')
data['Race Ethnicity'] = data['Race Ethnicity'].astype('category')
data['Leading Cause'] = data['Leading Cause'].astype('category')
In [23]:
g = selected_t.groupby('Leading Cause')    
In [25]:
g['Death Rate'].mean()
Out[25]:
Leading Cause
Accidents Except Drug Posioning (V01-X39, X43, X45-X59, Y85-Y86)                                                                       8.20
All Other Causes                                                                                                                      92.40
Alzheimer's Disease (G30)                                                                                                              5.80
Cerebrovascular Disease (Stroke: I60-I69)                                                                                             13.05
Chronic Liver Disease and Cirrhosis (K70, K73)                                                                                        13.10
Chronic Lower Respiratory Diseases (J40-J47)                                                                                          12.90
Diabetes Mellitus (E10-E14)                                                                                                           17.20
Diseases of Heart (I00-I09, I11, I13, I20-I51)                                                                                       113.30
Essential Hypertension and Renal Diseases (I10, I12)                                                                                   7.80
Human Immunodeficiency Virus Disease (HIV: B20-B24)                                                                                   11.50
Influenza (Flu) and Pneumonia (J09-J18)                                                                                               19.65
Malignant Neoplasms (Cancer: C00-C97)                                                                                                 89.20
Mental and Behavioral Disorders due to Accidental Poisoning and Other Psychoactive Substance Use (F11-F16, F18-F19, X40-X42, X44)     12.10
Name: Death Rate, dtype: float64
In [27]:
%matplotlib inline
x_plot = g['Deaths'].sum().plot.barh(figsize=(20, 5))
In [29]:
fig = x_plot.get_figure()
fig
Out[29]:
In [30]:
target_path = target_folder + '/x.png'
fig.savefig(target_path)
print('abc_image_path = %s' % target_path)
abc_image_path = /tmp/x.png
In [31]:
data.iloc[0]
Out[31]:
Year                                                     2010
Leading Cause              Assault (Homicide: Y87.1, X85-Y09)
Sex                                                         M
Race Ethnicity                             Black Non-Hispanic
Deaths                                                    299
Death Rate                                               35.1
Age Adjusted Death Rate                                  35.5
Name: 0, dtype: object
In [34]:
import pandas as pd
In [35]:
data_frame['Leading Cause'].unique()
Out[35]:
array(['Accidents Except Drug Posioning (V01-X39, X43, X45-X59, Y85-Y86)',
       'Cerebrovascular Disease (Stroke: I60-I69)',
       'Chronic Lower Respiratory Diseases (J40-J47)',
       'Human Immunodeficiency Virus Disease (HIV: B20-B24)',
       'All Other Causes',
       'Mental and Behavioral Disorders due to Accidental Poisoning and Other Psychoactive Substance Use (F11-F16, F18-F19, X40-X42, X44)',
       'Diseases of Heart (I00-I09, I11, I13, I20-I51)',
       'Influenza (Flu) and Pneumonia (J09-J18)',
       'Malignant Neoplasms (Cancer: C00-C97)',
       'Chronic Liver Disease and Cirrhosis (K70, K73)',
       'Diabetes Mellitus (E10-E14)',
       'Essential Hypertension and Renal Diseases (I10, I12)',
       'Septicemia (A40-A41)', "Alzheimer's Disease (G30)",
       'Assault (Homicide: Y87.1, X85-Y09)'], dtype=object)
In [33]:
%matplotlib inline