-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtaiyingshi_dm_spide.py
More file actions
86 lines (64 loc) · 2.13 KB
/
taiyingshi_dm_spide.py
File metadata and controls
86 lines (64 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests
from bs4 import BeautifulSoup
import pymysql
import threading
import time
ThreadCount = 1
ID = 0
lock = threading.Lock()
conn = pymysql.connect(host = '127.0.0.1',user = 'root',passwd = 'root',db = 'mysql',charset = 'utf8')
cur = conn.cursor()
cur.execute("use taiyingshi")
'''
You are getting response.content. But it return response body as bytes (docs). But you should pass str to BeautifulSoup constructor (docs). So you need to use the response.text instead of getting content.
'''
def OpenUrl(url):
global ThreadCount, ID
url = url.strip('\'"')
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/51.0.2704.63 Safari/537.36'
}
print(url)
#re = requests.Request(url,headers = headers)
try:
html = requests.get(url,headers = headers).content
except Exception as e:
print(e)
return
bsObj = BeautifulSoup(html, "html.parser")
div_list = bsObj.find_all("div", {"class":"pic"})
for div in div_list:
title = div.find("span", {"class":"title"}).get_text()
_url = div.find("a")['href']
print(title)
print(_url)
lock.acquire()
try:
cur.execute("insert into dm (name, url, id) values (\"%s\", \"%s\", \"%s\")", (title, _url, ID))
cur.connection.commit()
ID = ID + 1
except Exception as ee:
print(ee)
pass
ThreadCount = ThreadCount + 1
lock.release()
if __name__ == '__main__':
id = 491
cur.execute("select *from dm order by id desc LIMIT 1")
data = cur.fetchone()
ID = data[0] + 1
while 1:
lock.acquire()
cur.execute("select* from `dm` where id = " + id.__str__())
data = cur.fetchone()
lock.release()
id = id + 1
if ThreadCount > 0:
t = threading.Thread(target = OpenUrl, args = ( data[2], ))
t.start()
#OpenUrl(data[2])
print(data[0])
lock.acquire()
ThreadCount = ThreadCount - 1
lock.release()
else:
time.sleep(1)