NLP




Pay Notebook Creator: Salah Ahmed0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0

Stemming

In [43]:
from os.path import join
try:
    import nltk
except ImportError:
    import sys
    !{sys.executable} -m pip install nltk
    import nltk
nltk.download('wordnet')  
from nltk.corpus import wordnet
[nltk_data] Downloading package wordnet to /home/user/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
In [44]:
from nltk.stem import PorterStemmer
In [45]:
word = "finished"
In [46]:
p_stemmer = PorterStemmer()
p_stemmer.stem(word)
Out[46]:
'finish'
In [47]:
from nltk.stem import SnowballStemmer
SnowballStemmer.languages
Out[47]:
('arabic',
 'danish',
 'dutch',
 'english',
 'finnish',
 'french',
 'german',
 'hungarian',
 'italian',
 'norwegian',
 'porter',
 'portuguese',
 'romanian',
 'russian',
 'spanish',
 'swedish')
In [48]:
stemmer = SnowballStemmer('english')
 
stemmer.stem("working")
Out[48]:
'work'

lemmatizing

In [49]:
w = 'increases'
p_stemmer.stem(w)
Out[49]:
'increas'
In [50]:
from nltk.stem import WordNetLemmatizer
 
lemmatizer = WordNetLemmatizer()
 
lemmatizer.lemmatize(w)
Out[50]:
'increase'
In [51]:
lemmatizer.lemmatize('playing')
Out[51]:
'playing'
In [52]:
lemmatizer.lemmatize('playing', pos="v")
Out[52]:
'play'