-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCrawler.py
More file actions
59 lines (51 loc) · 1.88 KB
/
Crawler.py
File metadata and controls
59 lines (51 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import BlockMaker
import ContentExtractor
import sys
class Crawler:
def __init__(self, mode):
self.blockmaker = BlockMaker.BlockMaker()
self.url = ""
self.mode = ""
if mode == 1 or mode == "news":
self.mode = " 1"
self.extractor = ContentExtractor.ContentExtractor(1)
elif mode == 2 or mode == "blog":
self.mode = " 2"
self.extractor = ContentExtractor.ContentExtractor(2)
elif mode == 3 or mode == "shop":
self.mode = " 3"
self.extractor = ContentExtractor.ContentExtractor(3)
else:
raise ValueError("Select mode \"news\" or \"blog\" or \"shop\" for 1st argument!")
self.titles = []
self.texts = []
self.images = []
def seturl(self, url):
self.url = url
def extract(self):
self.blockmaker.seturl(self.url + self.mode)
blocklist = self.blockmaker.makeblock()
self.extractor.setblocklist(blocklist)
self.titles, self.texts, self.images = self.extractor.extractcontent()
def show(self):
print("----<Title>----")
for title in self.titles:
print(title)
print("----<Text>----")
for text in self.texts:
print(text)
print("----<Image>----")
for image in self.images:
print(image)
crawler = Crawler("news")
crawler.seturl("https://news.naver.com/main/ranking/read.nhn?mid=etc&sid1=111&rankingType=popular_day&oid=119&aid=0002321301&date=20190405&type=1&rankingSeq=7&rankingSectionId=100")
crawler.extract()
crawler.show()
# crawler = Crawler("blog")
# crawler.seturl("https://blog.naver.com/eunji318/221505952095")
# crawler.extract()
# crawler.show()
# crawler = Crawler("shop")
# crawler.seturl("http://shopping.interpark.com/product/productInfo.do?prdNo=6087626322&dispNo=008022001")
# crawler.extract()
# crawler.show()