-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparsers.py
More file actions
168 lines (131 loc) · 5.46 KB
/
parsers.py
File metadata and controls
168 lines (131 loc) · 5.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import glob
import os.path
from collections import OrderedDict
from pygments.lexers import PythonLexer
import xmltodict
import javalang
import ast
import pygments
from pygments.lexers import JavaLexer
from pygments.token import Token
import json
from datasets import DATASET
class BugReport:
"""Class representing each bug report"""
__slots__ = ['summary', 'description', 'fixed_files',
'pos_tagged_summary', 'pos_tagged_description', 'stack_traces']
def __init__(self, summary, description, fixed_files):
self.summary = summary
self.description = description
self.fixed_files = fixed_files
self.pos_tagged_summary = None
self.pos_tagged_description = None
self.stack_traces = None
class SourceFile:
"""Class representing each source file"""
__slots__ = ['all_content', 'comments', 'class_names', 'attributes',
'method_names', 'variables', 'file_name', 'pos_tagged_comments',
'exact_file_name', 'package_name']
def __init__(self, all_content, comments, class_names, attributes,
method_names, variables, file_name, package_name):
self.all_content = all_content
self.comments = comments
self.class_names = class_names
self.attributes = attributes
self.method_names = method_names
self.variables = variables
self.file_name = file_name
self.exact_file_name = file_name[0]
self.package_name = package_name
self.pos_tagged_comments = None
class Parser:
"""Class containing different parsers"""
__slots__ = ['name', 'src', 'bug_repo']
def __init__(self, project):
self.name = project.name
self.src = project.src
self.bug_repo = project.bug_repo
def report_parser(self):
with open(self.bug_repo) as json_file:
data = json.load(json_file)
bug_reports = OrderedDict()
count=0
fixedFilesArray=[]
for i in data["closed_issues"]:
fixedFilesArray.clear()
for items in data["closed_issues"][i].get("files_changed"):
if items != []:
fixedFilesArray.append(os.path.normpath(items[1]))
selected_files = list(filter(lambda x: x.endswith('.py'), fixedFilesArray))
if selected_files:
bug_reports[count] = BugReport(
data["closed_issues"][i].get("issue_summary"),
data["closed_issues"][i].get("issue_description")
if data["closed_issues"][i].get("issue_description") else '',
selected_files[:]
)
count+=1
print('Total bug reports:'+str(count))
return bug_reports
def src_parser(self):
src_addresses = glob.glob(str(self.src) + '/**/*.py', recursive=True)
# Creating a java lexer instance for pygments.lex() method
python_lexer = PythonLexer()
src_files = OrderedDict()
# Looping to parse each source file
for src_file in src_addresses:
with open(src_file,encoding = "ISO-8859-1") as file:
src = file.read()
# Placeholder for different parts of a source file
comments = ''
class_names = []
attributes = []
method_names = []
variables = []
module = ast.parse(src)
# Source parsing
try:
for node in ast.walk(module):
if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Store):
variables.append(node.id)
elif isinstance(node, ast.Attribute):
attributes.append(node.attr)
elif isinstance(node, ast.FunctionDef):
method_names.append(node.name)
elif isinstance(node, ast.ClassDef):
class_names.append(node.name)
except:
pass
# Lexically tokenize the source file
lexed_src = pygments.lex(src, python_lexer)
ind = True
for i, token in enumerate(lexed_src):
if token[0] in Token.Comment:
if ind and i == 0 and token[0] is Token.Comment.Multiline:
src = src[src.index(token[1]) + len(token[1]):]
continue
comments += token[1]
package_name = None
src_id=src_file
src_id=src_id.replace(str(DATASET.src)+'/','')
s1= SourceFile(
src, comments, class_names, attributes,
method_names, variables,
[os.path.basename(src_file).split('.')[0]],
package_name
)
src_files[src_id] =s1;
return src_files
def test():
import datasets
print(DATASET.src)
parser = Parser(datasets.zxing)
print('1')
#x = parser.report_parser()
print('2')
d = parser.src_parser()
print('3')
#src_id, src = list(d.items())[10]
#print(src_id, src.exact_file_name, src.package_name)
if __name__ == '__main__':
test()