-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathurl_example.py
More file actions
59 lines (48 loc) · 1.74 KB
/
url_example.py
File metadata and controls
59 lines (48 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import urllib.request
import urllib.parse
import re
def get_ganres_list():
"""
@brief get list of ganres from kinopoisk.ru
@return The ganres list.
"""
with urllib.request.urlopen('https://www.kinopoisk.ru/top/lists/') as f:
text = f.read().decode("utf8")
start = text.index("list_main js-rum-hero")
stop = text.index(r'</ul>', start)
text = text[start + 28:stop]
text = re.sub(r'\ *<[/]*li.*\n', '', text)
text = re.sub(r'\ *<b.*\n', '', text)
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'<div[^>]*>', '', text).split(r'</div>')
ar = []
for el in text:
i = el.find('>')
ar.append((el[10:i - 1], el[i + 1:-4]))
return ar
sort_type = {
"по порядку" : "order",
"по году" : "year",
"по названию" : "name",
"по оригинальному названию" : "oname",
"по рейтингу КиноПоиска" : "rating",
"по рейтингу IMDb" : "rating_imdb",
"по количеству оценок" : "votes",
"по времени" : "runtime"
}
data = {
"level": "60",
"list": "5",
"_filtr": "all",
"_sort": sort_type["по порядку"],
"page": 2,
"_ord": ""
}
data = urllib.parse.urlencode(data).encode()
req = urllib.request.Request('https://www.kinopoisk.ru/top/lists/5/page/2', data, method="POST")
with urllib.request.urlopen(req) as f:
text = f.read().decode("utf8")
start = text.index("table id=\"itemList")
stop = text.index('/table', start)
text = text[start: stop]
print(text)