Opening your session in 90 seconds...

If your browser keeps redirecting back and forth from this page in an endless loop, it is possible that you are using an older browser. Please update Google Chrome or use Mozilla Firefox.

Check our forum for the latest updates.

In [9]:

# CrossCompute
search_query = 'sex trafficker'
target_folder = '/tmp'

In [10]:

"""
import simplejson as json
import time
import webhoseio
webhoseio.config(token='901881ff-1e8a-4631-b6f0-9103d71c00ba')

d = webhoseio.query('filterWebContent', {
    'q': 'sex trafficker',
    'ts': '1507927474389',
    'sort': 'relevancy',    
})
json.dump(d, open('d.json', 'wt'))
# time.sleep(1)
# d = webhoseio.get_next()
""";

In [11]:

import simplejson as json
d = json.load(open('d.json'))

In [12]:

import spacy
nlp = spacy.load('en_core_web_lg')

In [13]:

from collections import Counter

def get_most_frequent_value(xs):
    return sorted(Counter(xs).items(), key=lambda x: -x[1])[0][0]

get_most_frequent_value('apple')

Out[13]:

'p'

In [17]:

rows = []
person_names = []
place_names = []
for post in d['posts']:
    text = post['text']
    document = nlp(text)
    for x in document.ents:
        x_label = x.label_
        x_text = x.text
        if x_label == 'PERSON':
            person_names.append(x_text)
        elif x_label == 'GPE':
            place_names.append(x_text)
    person_name = get_most_frequent_value(person_names)
    place_name = get_most_frequent_value(place_names)
    rows.append([post['published'], person_name, place_name])

In [18]:

from os.path import join
from pandas import DataFrame
target_path = join(target_folder, 'incidents.csv')
DataFrame(rows).to_csv(target_path, index=False)
print('a_table_path = %s' % target_path)

a_table_path = /tmp/incidents.csv

Pay Notebook Creator: Roy Hyunjin Han	0
Set Container: Numerical CPU with TINY Memory for 10 Minutes	0
Total	0

Test Hypotheses on Human Trafficking