-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy path5. stem.py
More file actions
26 lines (21 loc) · 719 Bytes
/
5. stem.py
File metadata and controls
26 lines (21 loc) · 719 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
populated_island = 'Java is an Indonesian island in the Pacific Ocean. It is the most populated island in the world, with over 140 million people.'
island_tokenized = word_tokenize(populated_island)
stemmed = [stemmer.stem(token) for token in island_tokenized]
try:
print('A stemmer exists:')
print(stemmer)
except:
print('Expected a variable called `stemmer`')
try:
print('Words Tokenized:')
print(island_tokenized)
except:
print('Expected a variable called `island_tokenized`')
try:
print('Stemmed Words:')
print(stemmed)
except:
print('Expected a variable called `stemmed`')