Sentiment-Analysis/sentiment_analysis_with_3D_visualization_2.py at master · JeevanSandhu/Sentiment-Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
##################################################
##### Sentiment Analysis with 3D Visualization
##################################################

import os
import json
from nltk.corpus import stopwords
from nltk import word_tokenize, sent_tokenize
from vader import SentimentIntensityAnalyzer
import plotly.plotly as py
import plotly.graph_objs as go


def get_sentences(text):
	return sent_tokenize(text)


def get_words(sentence):
	return word_tokenize(sentence)


def tag_pos(tokens):
	return pos_tag(tokens)


def stopword_rem(wordlist):
	stopword_list = set(stopwords.words('english'))
	return [stopped for stopped in wordlist if stopped not in stopword_list]


def get_frequency(wordlist, paralist):
	from collections import Counter
	count = Counter(paralist)
	new_count = []
	for word in wordlist:
		new_count.append(count[word])
	frequency = dict(zip(wordlist,new_count))
	return frequency


def read_file(file_number):
	#Open customer review files and read Review Titles and Content from them
	path = 'Dataset/AmazonReviews/laptops/'
	filenames = []
	for filename in os.listdir(path):
		filenames.append(filename)
	products = []
	reviewTitle = []
	reviewContent = []
	with open(path + filenames[file_number]) as dataFile:
		data = json.load(dataFile)
		products.append(data['ProductInfo'])
		for reviews in data['Reviews']:
			reviewTitle.append(reviews['Title'])
			reviewContent.append(reviews['Content'])
	sample = ''
	for rev in reviewContent:
		sample = sample + ' ' + rev
	return sample

def read_file_2(file_number):
	#Open customer review files and read Review Titles and Content from them
	path = 'Dataset/AmazonReviews/laptops/'
	filenames = []
	for filename in os.listdir(path):
		filenames.append(filename)
	products = []
	reviewTitle = []
	reviewContent = []
	with open(path + filenames[file_number]) as dataFile:
		data = json.load(dataFile)
		products.append(data['ProductInfo'])
		for reviews in data['Reviews']:
			reviewTitle.append(reviews['Title'])
			reviewContent.append(reviews['Content'])
	return reviewContent

def read_file_1():
	path = 'Dataset/customer_review_data/'
	data = open(path + 'Canon G3.txt').read()
	data1 = data.split('\n')
	data2 = [asdf.split('##',1)[-1] for asdf in data1]
	data3 = ''
	for asdf in data2:
		data3 = data3 + asdf
	data4 = data3.split('[t]')
	return data4


def main():
	text = read_file_2(3)
	review_tokens = [get_words(asdf) for asdf in text]
	stopped_sent = [stopword_rem(sentence) for sentence in review_tokens]

	sents = []
	for i in stopped_sent:
		asdf = ''
		for j in i:
			asdf = asdf + j + ' '
		sents.append(asdf)


	sid = SentimentIntensityAnalyzer()
	sentiment_scores = [sid.polarity_scores(sent) for sent in sents]
	bla = [[asdf['pos'],-1*asdf['neg']] for asdf in sentiment_scores]
	bla.sort()
	qwer = range(len(bla))
	data = [
		go.Surface(
			z=bla,
        	x=qwer,
        	y=qwer
        )
    ]
	layout = go.Layout(
    	title='Sentiment Analysis',
    	autosize=False,
    	width=500,
    	height=500,
    	margin=dict(
	        l=65,
        	r=50,
        	b=65,
        	t=90
    	)
    )
	fig = go.Figure(data=data, layout=layout)
	py.plot(fig, filename='Sentiment Analysis')

if __name__ == '__main__':
		main()