import time
time.time()
"""
import simplejson as json
import time
import webhoseio
webhoseio.config(token='901881ff-1e8a-4631-b6f0-9103d71c00ba')
d = webhoseio.query('filterWebContent', {
'q': 'sex trafficker',
'ts': '1507927474389',
'sort': 'relevancy',
})
json.dump(d, open('d.json', 'wt'))
# time.sleep(1)
# d = webhoseio.get_next()
""";
import simplejson as json
d = json.load(open('d.json'))
d.keys()
len(d['posts'])
d['totalResults']
d['moreResultsAvailable']
d['next']
d['requestsLeft']
d['posts'][0].keys()
d['posts'][0]['thread']
x = d['posts'][0]
x['uuid']
x['url']
x['ord_in_thread']
x['author']
x['published']
x['title']
x['text']
x['highlightText']
x['highlightTitle']
x['language']
x['external_links']
x['entities']
x['rating']
x['crawled']
import arrow
arrow.get(x['crawled'])
# + Get news articles
# _ use soup to extract text
import spacy
nlp = spacy.load('en_core_web_lg')
title = nlp(x['title'])
text = nlp(x['text'])
title
title.ents
list(title.noun_chunks)
for x in text.ents:
print(x, x.label_)
x.text
# use NER with spacy to extract information
# Return JSON
Trafficking Category
Date
Location
Age (Trafficker, Victim) Gender (Trafficker, Victim)
from collections import Counter
sorted(Counter('apple').items(), key=lambda x: -x[1])[0][0]