Sybil: Search Reddit for Homeland Security Keywords




Pay Notebook Creator: Elaine Chan0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [3]:
!pip install praw
Collecting praw
  Downloading https://files.pythonhosted.org/packages/9a/c4/b33aa84d9c5c582d2fd92cb28b7027b5b6285485a68e56c9748cd49dd95b/praw-6.1.1-py2.py3-none-any.whl (117kB)
    100% |████████████████████████████████| 122kB 24.1MB/s ta 0:00:01
Collecting update-checker>=0.16 (from praw)
  Downloading https://files.pythonhosted.org/packages/17/c9/ab11855af164d03be0ff4fddd4c46a5bd44799a9ecc1770e01a669c21168/update_checker-0.16-py2.py3-none-any.whl
Collecting prawcore<2.0,>=1.0.0 (from praw)
  Downloading https://files.pythonhosted.org/packages/76/b5/ce6282dea45cba6f08a30e25d18e0f3d33277e2c9fcbda75644b8dc0089b/prawcore-1.0.1-py2.py3-none-any.whl
Requirement already satisfied: websocket-client>=0.54.0 in /home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages (from praw) (0.54.0)
Requirement already satisfied: requests>=2.3.0 in /home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages (from update-checker>=0.16->praw) (2.21.0)
Requirement already satisfied: six in /home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages (from websocket-client>=0.54.0->praw) (1.12.0)
Requirement already satisfied: certifi>=2017.4.17 in /home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages (from requests>=2.3.0->update-checker>=0.16->praw) (2018.11.29)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages (from requests>=2.3.0->update-checker>=0.16->praw) (3.0.4)
Requirement already satisfied: idna<2.9,>=2.5 in /home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages (from requests>=2.3.0->update-checker>=0.16->praw) (2.8)
Requirement already satisfied: urllib3<1.25,>=1.21.1 in /home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages (from requests>=2.3.0->update-checker>=0.16->praw) (1.24.1)
Installing collected packages: update-checker, prawcore, praw
Successfully installed praw-6.1.1 prawcore-1.0.1 update-checker-0.16
You are using pip version 19.0.2, however version 19.0.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
In [4]:
import requests
import json
import praw

def cybersecurity_keywords():
    return [
        'cyber security',
        'Botnet',
        'botnet',
        'DDOS',
        'ddos',
        'dedicated denial of service',
        'denial of service',
        'malware',
        'cyber security',
        'virus',
        'trojan',
        'keylogger',
        'Cyber Command',
        'cyber command',
        '2600',
        'spammer',
        'spam',
        'phish',
        'phishing',
        'rootkit',
        'phreaking',
        'cain and abel',
        'Cain and Able',
        'brute forcing',
        'brute-forcing',
        'brute force'
        'brute-force',
        'bruteforce',
        'sql injection',
        'mysql injection',
        'cyber attack',
        'cyber terror',
        'hacker',
        'hack',
        'conficker',
        'worm',
        'scam',
        'scammer'
    ]

def terrorism_keywords():
    return [
        'terrorism',
        'Al Queda',
        'terror',
        'attack',
        'Iraq',
        'Afghanistan',
        'Iran',
        'Pakistan',
        'Agro',
        'environmental terrorist',
        'eco terrorism',
        'conventional weapon',
        'target',
        'weapons grade',
        'dirty bomb',
        'enriched',
        'nuclear',
        'chemical weapon',
        'biological weapon',
        'ammonium nitrate',
        'improvised explosive device',
        'IED',
        'Abu Sayyaf',
        'Hamas',
        'FARC',
        'Armed Revolutionary Forces Colombia',
        'IRA',
        'Irish Republican Army',
        'ETA',
        'Euskadi ta Askatasuna',
        'Basque Separatists',
        'Hezbollah',
        'Tamil Tiger',
        'PLF',
        'Palestine Liberation Front',
        'PLO',
        'Palestine Libration Organization',
        'car bomb',
        'Jihad',
        'Taliban',
        'weapons cache',
        'suicide bomber',
        'suicide attack',
        'suspicious substance',
        'AQAP',
        'Al Qaeda Arabian Peninsula',
        'AQIM'
        'Al Qaeda in the Islamic Maghreb',
        'TTP',
        'Tehrik-i-Taliban Pakistan',
        'Yemen',
        'pirates',
        'extremism',
        'Somalia',
        'Nigeria',
        'radicals',
        'Al-Shabaab',
        'home grown',
        'plot',
        'nationalist',
        'recruitment',
        'fundamentalism',
        'Islamist'

    ]

def multi_search(keyword_list):
    reddit = praw.Reddit(
        client_id='ypRz2owAJ3YKBg',
        client_secret='yCYUfXOLrVzLswYWoRevzG1Sx-U',
        user_agent='Sybil'
    )

    cybersecurity = reddit.subreddit('cybersecurity').hot(limit=100)
    politics = reddit.subreddit('politics').hot(limit=100)
    security = reddit.subreddit('security').hot(limit=100)
    conspiracy = reddit.subreddit('conspiracy').hot(limit=100)
    deepweb = reddit.subreddit('deepweb').hot(limit=100)
    cryptocurrency = reddit.subreddit('cryptocurrency').hot(limit=100)
    worldnews = reddit.subreddit('worldnews').hot(limit=100)


    subreddit_list = [
        cybersecurity,
        politics,
        security,
        conspiracy,
        deepweb,
        cryptocurrency,
        worldnews
        ]

    comments_dict = {}
    
    for item in subreddit_list:
        for submission in cybersecurity:
            for word in keyword_list:
                    if word in submission.selftext:
                        data = submission.selftext
                        transformed_data = 'text=' + submission.selftext
                        if word not in  comments_dict:
                            comments_dict[word] = []
                        comments_dict[word].append({
                            'comment': data,
                            'permalink': 'https://www.reddit.com' 
                            + submission.permalink
                            })

        for k, v in comments_dict.items():
            for item in v:
                transformed_data = 'text=' + item['comment']
                encoded_data = transformed_data.encode('utf-8')
                r = requests.post(
                    'http://text-processing.com/api/sentiment/',
                    data=encoded_data
                )
                item['sentiment'] = r.text
        
        return comments_dict
In [6]:
terror = terrorism_keywords()
print(multi_search(terror))
{'target': [{'comment': 'All, \n\nWow! The feedback we’ve received since the “great toilet flush” last weekend has been great. Everything from the suggestions about where the subreddit should focus, to the astronomical (and rightfully placed) number of complaints about the strict Auto-Moderator rules, has been brought to our attention either in [our former announcement thread](https://www.reddit.com/r/cybersecurity/comments/8mxgkd/input_wanted_where_do_you_think_rcybersecurity/?st=jhy63ned&sh=6cbaf288) or in mod-mail. We continue to encourage honest, good faith, discourse. Here’s some updates:\n\n* **Auto-Moderator:** The rules that were established last weekend are still in play, but they’re a bit more honed for their intended targets now. For instance, a *post* (self-post or link submission) now requires 90 days account history where as comments do not. On the flip side of that coin, *comments* require >= 20 karma but have no account age requirements. This is still less than ideal, and will still catch well-meaning members to be targeted by the Auto-Moderator. What make’s this different than last week’s mentality of “collateral damage” is that now there is a white-list mechanism in place. If anyone gets caught in the cross hair of Auto-Moderator’s remove button, message the mods and we’ll review your account and your submission and add you to the whitelist.\n\n* **Subreddit Focus:** We’ve heard you and all that remains on determining the focus of the new r/cybersecurity is our own internal deliberation. The r/cybersecurity mod team (u/chumstick, u/CDSEChris, u/Spncrgmn, and u/danielrm26) need to hash out our ideas, and our reservations, about the different directions the sub can be taken. Not just about content, but about the reality of keeping on track with the mission. I suspect that we should have some sort of idea/plan ready in short-order. Of course, we’ll keep you all updated on progress and new developments along the way. \n\nKeep the feedback coming! To paraphrase a certain renowned doctor from Seattle: *We’re listening.*\n\ne: added link to former thread to make it easier to reference.', 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/8o3cvu/were_listening_the_next_phase_of_rebuilding/', 'sentiment': '{"probability": {"neg": 0.35707343527564395, "neutral": 0.82198649640315535, "pos": 0.64292656472435605}, "label": "neutral"}'}], 'attack': [{'comment': 'Andrea Carcano from Nozomi Networks told POWER LockerGoga is able to encrypt files that have the following extensions: doc, dot, wbk, docx, dotx, docb, xlm, xlsx, xltx, xlsb, xlw, ppt, pot, pps, pptx, potx, ppsx, sldx, pdf. He also said attackers dropped a “README-NOW.txt” file inside the filesystem. It contains the following message, which essentially extorts users to pay a ransom using Bitcoin cryptocurrency if they want their files back:\n\nHere’s the full story: https://www.powermag.com/cyberattack-debilitates-major-aluminum-and-hydropower-producer/\n\nhttps://i.redd.it/ekdyv2yq0an21.jpg', 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/b3c4uy/heres_the_ransomeware_note_associated_with_the/', 'sentiment': '{"probability": {"neg": 0.46227955655667397, "neutral": 0.90740077354661874, "pos": 0.53772044344332603}, "label": "neutral"}'}, {'comment': "Hi All! I'm doing a paper for Uni where I want to purposefully expose my home internet by putting a few IoT devices on the network that are vulnerable to Mirai etc. Does anyone know if there are lists out there of IoT devices that are well known to be vulnerable? I'm looking for something that I would be guaranteed to find exposed through zmap or nmap etc. so that I can then try to hack it myself.... I know Krebs has a list here but it's all IP cameras mostly ([https://krebsonsecurity.com/2016/10/who-makes-the-iot-things-under-attack/](https://krebsonsecurity.com/2016/10/who-makes-the-iot-things-under-attack/)) ", 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/b1qrop/vulnerable_iot_devices/', 'sentiment': '{"probability": {"neg": 0.75422071403576973, "neutral": 0.2114088281538864, "pos": 0.24577928596423032}, "label": "neg"}'}, {'comment': "How do hackers identify that this specific old router of specific company has a hole?\n\nDo they bruteforce and try every possible attack on all existing models of hardware/software?\n\nSince the antivirus company is not even aware that their tool can't handle a very unique attack - how do hackers find it out?!", 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/b1kfo7/how_do_hackers_find_or_identify_0day_exploiteven/', 'sentiment': '{"probability": {"neg": 0.67373125572174564, "neutral": 0.78441996278916049, "pos": 0.32626874427825436}, "label": "neutral"}'}]}
{'target': [{'comment': 'All, \n\nWow! The feedback we’ve received since the “great toilet flush” last weekend has been great. Everything from the suggestions about where the subreddit should focus, to the astronomical (and rightfully placed) number of complaints about the strict Auto-Moderator rules, has been brought to our attention either in [our former announcement thread](https://www.reddit.com/r/cybersecurity/comments/8mxgkd/input_wanted_where_do_you_think_rcybersecurity/?st=jhy63ned&sh=6cbaf288) or in mod-mail. We continue to encourage honest, good faith, discourse. Here’s some updates:\n\n* **Auto-Moderator:** The rules that were established last weekend are still in play, but they’re a bit more honed for their intended targets now. For instance, a *post* (self-post or link submission) now requires 90 days account history where as comments do not. On the flip side of that coin, *comments* require >= 20 karma but have no account age requirements. This is still less than ideal, and will still catch well-meaning members to be targeted by the Auto-Moderator. What make’s this different than last week’s mentality of “collateral damage” is that now there is a white-list mechanism in place. If anyone gets caught in the cross hair of Auto-Moderator’s remove button, message the mods and we’ll review your account and your submission and add you to the whitelist.\n\n* **Subreddit Focus:** We’ve heard you and all that remains on determining the focus of the new r/cybersecurity is our own internal deliberation. The r/cybersecurity mod team (u/chumstick, u/CDSEChris, u/Spncrgmn, and u/danielrm26) need to hash out our ideas, and our reservations, about the different directions the sub can be taken. Not just about content, but about the reality of keeping on track with the mission. I suspect that we should have some sort of idea/plan ready in short-order. Of course, we’ll keep you all updated on progress and new developments along the way. \n\nKeep the feedback coming! To paraphrase a certain renowned doctor from Seattle: *We’re listening.*\n\ne: added link to former thread to make it easier to reference.', 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/8o3cvu/were_listening_the_next_phase_of_rebuilding/', 'sentiment': '{"probability": {"neg": 0.35707343527564395, "neutral": 0.82198649640315535, "pos": 0.64292656472435605}, "label": "neutral"}'}], 'attack': [{'comment': 'Andrea Carcano from Nozomi Networks told POWER LockerGoga is able to encrypt files that have the following extensions: doc, dot, wbk, docx, dotx, docb, xlm, xlsx, xltx, xlsb, xlw, ppt, pot, pps, pptx, potx, ppsx, sldx, pdf. He also said attackers dropped a “README-NOW.txt” file inside the filesystem. It contains the following message, which essentially extorts users to pay a ransom using Bitcoin cryptocurrency if they want their files back:\n\nHere’s the full story: https://www.powermag.com/cyberattack-debilitates-major-aluminum-and-hydropower-producer/\n\nhttps://i.redd.it/ekdyv2yq0an21.jpg', 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/b3c4uy/heres_the_ransomeware_note_associated_with_the/', 'sentiment': '{"probability": {"neg": 0.46227955655667397, "neutral": 0.90740077354661874, "pos": 0.53772044344332603}, "label": "neutral"}'}, {'comment': "Hi All! I'm doing a paper for Uni where I want to purposefully expose my home internet by putting a few IoT devices on the network that are vulnerable to Mirai etc. Does anyone know if there are lists out there of IoT devices that are well known to be vulnerable? I'm looking for something that I would be guaranteed to find exposed through zmap or nmap etc. so that I can then try to hack it myself.... I know Krebs has a list here but it's all IP cameras mostly ([https://krebsonsecurity.com/2016/10/who-makes-the-iot-things-under-attack/](https://krebsonsecurity.com/2016/10/who-makes-the-iot-things-under-attack/)) ", 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/b1qrop/vulnerable_iot_devices/', 'sentiment': '{"probability": {"neg": 0.75422071403576973, "neutral": 0.2114088281538864, "pos": 0.24577928596423032}, "label": "neg"}'}, {'comment': "How do hackers identify that this specific old router of specific company has a hole?\n\nDo they bruteforce and try every possible attack on all existing models of hardware/software?\n\nSince the antivirus company is not even aware that their tool can't handle a very unique attack - how do hackers find it out?!", 'permalink': 'https://www.reddit.com/r/cybersecurity/comments/b1kfo7/how_do_hackers_find_or_identify_0day_exploiteven/', 'sentiment': '{"probability": {"neg": 0.67373125572174564, "neutral": 0.78441996278916049, "pos": 0.32626874427825436}, "label": "neutral"}'}]}
In [ ]: