-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathIMDB.py
More file actions
33 lines (28 loc) · 965 Bytes
/
IMDB.py
File metadata and controls
33 lines (28 loc) · 965 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import string
import nltk
from bs4 import BeautifulSoup
import urllib.request
import spacy
from random import *
import re
import enchant
import random
englishwords = enchant.Dict("en_US")
titlelist = []
bagofwords = set()
resp = urllib.request.urlopen("http://www.imdb.com/chart/top")
soup = BeautifulSoup(resp,"html.parser")
for heading in soup.findAll("td", {"class": "titleColumn"}):
title = str(heading).split()
words = title[12].replace("</a>", "").replace("<span>", "").replace(">", "")
if englishwords.check(words):
bagofwords.add(words)
words = title[11].replace("</a>", "").replace("<span>", "").replace(">", "")
if englishwords.check(words):
bagofwords.add(words)
words = title[13].replace("</a>", "").replace("<span>", "").replace(">", "")
if englishwords.check(words):
bagofwords.add(words)
for j in range(6):
poem = random.sample(bagofwords,1)
print(str(poem).replace("['", "").replace("']", "") + " ", end="")