-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpan_scraper2.py
More file actions
94 lines (92 loc) · 3.3 KB
/
pan_scraper2.py
File metadata and controls
94 lines (92 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import time, random, pickle
from selenium import webdriver
def pandora():
#
SLEEP_LIMIT = 4
PANDORA = 'https://www.pandora.com/'
#
browser = webdriver.Chrome()
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
browser.get(PANDORA)
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
browser.find_element_by_link_text('Log In').click()
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
email = input('Enter your login email:\n')
browser.find_element_by_name('username').send_keys(email)
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
password = input('Enter your login password:\n')
browser.find_element_by_name('password').send_keys(password)
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
browser.find_elements_by_tag_name('button')[3].click()
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
browser.find_element_by_link_text('Playlists').click()
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
master = {}
playlist_name = input('Enter name of playlist to scrape, or "quit" to quit:\n')
while playlist_name.lower() != 'quit':
#
try:
browser.find_element_by_link_text(playlist_name).click()
except Exception:
print("Couldn't find that one. Is it in view of the browser? Did you make a typo?")
playlist_name = input('Enter name of playlist to scrape, or "quit" to quit:\n')
continue
#
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
#
playlist = {}
segment = {}
old_segment = {}
y = 0
height = browser.get_window_size()['height']
#
while True:
songs = [song.text for song in browser.find_elements_by_class_name('RowItemCenterColumn__mainText')]
artists = [artist.text for artist in browser.find_elements_by_class_name('RowItemCenterColumn__secondText')]
#
try:
assert len(songs) == len(artists)
except AssertionError:
print('Songs and artists mismatch:')
print('len(songs):', len(songs))
print('len(artists):', len(artists))
break
#
segment = dict(zip(songs, artists))
if segment == old_segment:
break
for key in segment.keys():
if key not in playlist.keys():
playlist[key] = segment[key]
old_segment = segment
#
y += 2*height
js = 'window.scrollTo(0, ' + str(y) + ');'
browser.execute_script(js)
#
print('Pausing for discretion...')
time.sleep(random.uniform(1, SLEEP_LIMIT))
#
master[playlist_name] = playlist
browser.back()
playlist_name = input('Enter name of playlist to scrape, or "quit" to quit:\n')
#
fn = input('Enter full filepath of file to store data in:\n')
with open(fn, 'wb') as f:
pickle.dump(master, f)