Leaves Outlook




Pay Notebook Creator: Naiem Gafar0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [5]:
# This set of code was used to create a final dataframe with three columns:
    # Species
    # Type (coniferous or decidious or None)
    # Link for that species
    
# Example of final output: 
    # 7,Acer pseudoplatanus,deciduous,https://en.wikipedia.org/wiki?curid=437919
    
# The final results were then saved as a new csv file

# As a team, we have decided that it would be faster/ more efficent to just do this process once

# We would then implement this newly created csv file into our main program
    # so that we dont have to call the wikipedia API everytime
    # this saves a lot of time!
In [6]:
import pandas as pd
import wikipedia # Used to access/ work with wikipedia API
import datetime

# Function # 1 that takes in a species name as a parameter and returns its type (coniferous or deciduous)
def LookInSummary(treeSpeciesInLatin):
    
    # Takes a 10 millisecond pause after every call
    wikipedia.set_rate_limiting(rate_limit = True, min_wait=datetime.timedelta(0, 0, 10))
    
    # First get the summary from that species' wikipedia page
    try:
        summary = wikipedia.summary(treeSpeciesInLatin)
        
    # If it finds nothing OR multiple results, just return None (do nothing)
    except wikipedia.exceptions.DisambiguationError as e:
        return (None)
    except wikipedia.exceptions.PageError as e:
        return (None)
    if summary is None:
        return
    
    # If it finds deciduous or confierous in the summary, then that tree is decidouous or coniferous
    if "deciduous" in summary:
        return ('deciduous')
    elif "coniferous" in summary:
        return ('coniferous')
    elif "evergreen" in summary:
        return ('evergreen')
    
    # If it can't find it in the page's summary, then it might be in the "Description" section of the page
    else:
        page = wikipedia.page(treeSpeciesInLatin)
        if page is None:
            return
        description = page.section("Description")
        if description is None:
            return
        if "deciduous" in description: 
            return ('deciduous')
        elif "coniferous" in description: 
            return ('coniferous')
        elif "evergreen" in summary:
            return ('evergreen')
        return ('None')

# Function # 2 that takes in a species name as a parameter and returns the URL of its wikipedia page
def getLink(treeSpeciesInLatin):
    try:
        webpage = wikipedia.page(treeSpeciesInLatin)
    except wikipedia.exceptions.DisambiguationError as e:
        return (None)
    except wikipedia.exceptions.PageError as e:
        return (None)
    URL = webpage.pageid
    return ('https://en.wikipedia.org/wiki?curid='+URL)
In [7]:
# Use the original CSV file, open it as a dataframe
df = pd.read_csv('Copy of 2015StreetTreesCensus_TREES.csv')

# Only get the 'spc_latin' column for each tree
df = df['spc_latin']

# Get a list of all of the unique species (about 132 different species)
listOfAllTreeSpecies = df.unique()

# Turn that list into a dataframe with the column heading 'Species'
df = pd.DataFrame({'Species':listOfAllTreeSpecies})

# Add a new column to the dataframe named 'Type'
# Each value in this column is the type of that species
# To get the type, we used Function # 1 from above
df['Type'] = list(map(LookInSummary, listOfAllTreeSpecies))

# If it is blank, fill it in with 'Obama', this is a safe word, as opposed to 'None'
df = df.fillna('Obama')

# Add a new column to the dataframe named 'URL Links'
# Each value in this column is the Wikipedia Link for that species
# To get the link, we used Function # 2 from above
df['URL Links'] = list(map(getLink, df['Species']))

# Exports the new dataframe as a csv file
df.to_csv('Species_Types.csv')