-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathscrape-cirt_net_passwords
More file actions
executable file
·73 lines (65 loc) · 2.28 KB
/
scrape-cirt_net_passwords
File metadata and controls
executable file
·73 lines (65 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python2.7
import sys
import requests
from lxml.html import fromstring
def clean(content):
if content is None:
return ''
content = content.strip()
if content.lower() == 'n/a' or content.lower() == 'n.a' or content.lower() == '(none)' or content.lower() == '(blank)':
content = ''
if content.lower() == '(any 3 chars)':
content = 'abc'
return content
def clean_protocol(content):
content = clean(content)
content = content.replace(',', '/')
content = content.replace(', ', '/')
return content
def ignore(protocol, username, password):
if password is not None:
if 'ast 6 characters' in password:
return True
if 'characters of the' in password:
return True
if '0 and the number of OCE printer' in password:
return True
if 'use ALT+G' in password:
return True
if 'last eight digits' in password:
return True
return False
response = requests.get('https://cirt.net/passwords')
page = fromstring(response.text)
links = set()
for a in page.xpath("//a"):
href = a.get("href")
if href.startswith('?vendor='):
links.add('https://cirt.net/passwords%s' % href)
for link in links:
response = requests.get(link)
page = fromstring(response.text)
for table in page.xpath("//table"):
protocol = None
username = None
password = None
for row in table.findall("tr"):
try:
header = row[0].text_content()
data = row[1].text_content()
except IndexError:
continue
if header.strip().lower() == 'user id':
username = data
elif header.strip().lower() == 'password':
password = data
elif header.strip().lower() == 'method':
protocol = data.lower() if data is not None else ''
if ignore(protocol, username, password):
continue
if protocol is not None or username is not None or password is not None:
try:
sys.stdout.write('%s,%s,%s\n' % (clean_protocol(protocol), clean(username), clean(password)))
sys.stdout.flush()
except UnicodeEncodeError:
continue