The U.S. Geological Survey provides real-time earthquake data in CSV format.
{place_query : Place Query ? Filter place by string, e.g. Fiji}
{sensor_network_table : Sensor Networks}
# CrossCompute
target_folder = '/tmp'
place_query = ''
sensor_network_table_path = 'datasets/World-AdvancedNationalSeismicSystemNetworks.csv'
%matplotlib inline
from matplotlib import pyplot as plt
from os.path import join
from pandas import read_csv
url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.csv'
earthquakes = read_csv(url, usecols=[
'time', 'latitude', 'longitude', 'depth', 'mag', 'net', 'place',
], parse_dates=['time'])
earthquake_count = len(earthquakes)
# Filter by place_query
earthquakes = earthquakes[earthquakes['place'].str.contains(place_query, case=False)]
earthquakes.iloc[earthquakes.index[0]]
target_path = join(target_folder, 'selected-earthquakes.csv')
sorted_earthquakes = earthquakes.sort_values('mag', ascending=False)
sorted_earthquakes.to_csv(target_path, index=False)
sorted_earthquake_count = len(sorted_earthquakes)
print('selected_earthquake_table_path = %s' % target_path)
print('selected_earthquake_count = %s' % sorted_earthquake_count)
print('selected_earthquake_percent = %s' % (100 * sorted_earthquake_count / float(earthquake_count)))
sorted_earthquakes.head()
earthquakes.hist();
target_path = join(target_folder, 'histograms.png')
plt.savefig(target_path)
print('histograms_image_path = %s' % target_path)
plt.figure()
earthquakes['mag'].max()
earthquakes.loc[earthquakes['mag'].idxmax()]['place']
# Plot by day
earthquakes_by_time = earthquakes.set_index('time')
earthquakes_by_time['mag'].plot();
# Plot by week
earthquakes_by_time.resample('W')['mag'].plot();
target_path = join(target_folder, 'magnitudes.png')
plt.savefig(target_path)
print('magnitudes_image_path = %s' % target_path)
plt.figure()
len(earthquakes[earthquakes['latitude'] > 0])
# Type your solution here and press CTRL-ENTER
# Count earthquakes by sensor network code
earthquakes['net'].value_counts()
# Load information on sensor networks
sensor_networks = read_csv(sensor_network_table_path)
sensor_networks.head()
# Rename columns
sensor_networks.rename(columns={
'Network Code': 'net',
'Network Name': 'netName',
}, inplace=True)
# Change codes to lowercase
sensor_networks['net'] = sensor_networks['net'].str.lower()
sensor_networks.head()
# Merge tables
merged_earthquakes = earthquakes.merge(sensor_networks)
merged_earthquakes[['place', 'mag', 'net', 'netName']].tail()
# Count earthquakes by sensor network name
merged_earthquakes.groupby('netName')['mag'].count()
earthquakes['place'].value_counts()[:10]
earthquakes.groupby('place')['mag'].max().sort_values(ascending=False)[:10]
from datetime import datetime
now = datetime.utcnow()
def get_elapsed_time_in_negative_seconds(x):
return -1 * (now - x).seconds
geotable = earthquakes.copy()
geotable['fillReds'] = geotable['time'].apply(get_elapsed_time_in_negative_seconds)
geotable['radiusInPixelsRange3-27'] = geotable['mag']
target_path = join(target_folder, 'mapped-earthquakes.csv')
geotable.to_csv(target_path, index=False)
print('selected_earthquakes_geotable_path = %s' % target_path)
Information provided courtesy of the USGS Earthquake Hazards Program.
{selected_earthquake_count : selected earthquake count (past 30 days) ? # of earthquakes matching your query}