-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetProductInfo.py
More file actions
153 lines (115 loc) · 4.68 KB
/
getProductInfo.py
File metadata and controls
153 lines (115 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Get Product Info
import requests, urllib, string
from HTMLParser import HTMLParser
def getJSON(id_type, product_id):
api = 'products/v3' # products or location
r = requests.get('https://api.target.com/' + api + '/?key=J5PsS2XGuqCnkdQq0Let6RSfvU7oyPwF'
+ '&id_type=' + id_type + '&product_id=' + product_id,
auth=('user', 'pass'))
return r.json() # <type 'dict'>
# Returns a string of the Product Name or the bool False
def getName(json):
name = False # did not find a name yet
if 'product_composite_response' in json:
tempName = json['product_composite_response']
if 'items' in tempName:
tempName = tempName['items']
if len(tempName) > 0:
tempName = tempName[0]
if 'general_description' in tempName:
name = str(tempName['general_description'])
return name
# Returns string of URL of Product's page or False
def getHTML(json):
pageLink, HTML = False, False
if 'product_composite_response' in json:
tempPageLink = json['product_composite_response']
if 'items' in tempPageLink:
tempPageLink = tempPageLink['items']
if len(tempPageLink) > 0:
tempPageLink = tempPageLink[0]
if 'data_page_link' in tempPageLink:
pageLink = str(tempPageLink['data_page_link'])
if pageLink != False:
r = requests.get(pageLink, auth=('user', 'pass'))
HTML = r.content
return HTML
# Returns string of img URL or the bool False
def getImgURL(HTML):
imgURL = False
imgTag = 'name="twitter:image:src" content="'
startIndex = HTML.find(imgTag) + len(imgTag)
endIndex = HTML.find('"', startIndex) # end of quote ends url
if (startIndex < endIndex and endIndex < len(HTML)):
imgURL = HTML[startIndex:endIndex]
return imgURL
def makeReadable(desc):
desc = desc.replace('”', '"')
desc = desc.replace('’', "'")
desc = desc.replace(''', "'")
desc = desc.replace('–', '-')
desc = desc.replace('<br>', '\n')
desc = desc.replace('<br', '\n')
desc = desc.replace('™', '(TM)')
desc = desc.replace(' ', ' ') # non breaking space
desc = desc.replace('•', '\n*')
desc = desc.replace('<', '')
desc = desc.replace('p>', '')
desc = desc.replace('>', '')
desc = desc.replace('®', '')
desc = desc.replace('/', '')
return desc
# Returns string of description or the bool False
def getDescription(HTML):
desc = False
parser = HTMLParser()
descTag = 'property="og:description" content="'
startIndex = HTML.find(descTag) + len(descTag)
endIndex = HTML.find('"', startIndex) # end of quote ends desc
if (startIndex < endIndex and endIndex < len(HTML)):
desc = HTML[startIndex:endIndex]
# Get rid of most HTML characters
parser = HTMLParser()
desc = str(parser.unescape(desc))
# Send through handmade parser
desc = makeReadable(desc)
return desc
# Returns list of price in dollars and cent (two ints) or the bool False
def getPrice(HTML):
price, priceList = False, False
priceTag = 'class="offerPrice" itemprop="price">$'
startIndex = HTML.find(priceTag) + len(priceTag)
endIndex = HTML.find('<', startIndex) # end of span is character that signals end
if (startIndex < endIndex and endIndex < len(HTML)):
price = HTML[startIndex:endIndex]
# Get dollar and cents separately
if price != False:
priceList = price.split('.')
for i in xrange(len(priceList)):
priceList[i] = int(priceList[i])
return priceList
# Info to be sent to Unity
def productInfo(id_type, product_id):
json = getJSON(id_type, product_id) # Specify product
HTML = getHTML(json) # string of HTML of product's webpage
name = getName(json) # site's name for product
imgURL = getImgURL(HTML)
desc = getDescription(HTML) # string
priceList = getPrice(HTML)
if (name != False and desc != False and priceList != False and imgURL != False):
assert(len(priceList) > 1)
priceDollars = priceList[0]
priceCents = priceList[1]
info = dict()
info['productName'] = name
info['priceDollars'] = priceDollars
info['priceCents'] = priceCents
info['description'] = desc
# Create txt file with info of product for Unity
f = open(name + '.txt', 'w')
f.write(str(info))
# Create PNG file from string of URL
productImgFile(imgURL, name)
def productImgFile(imgURL, name):
filename = name + '.png'
urllib.urlretrieve(imgURL, filename)