BugLocalization/parsers.py at main · AishaSaleem110/BugLocalization · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import glob
import os.path
from collections import OrderedDict
from pygments.lexers import PythonLexer
import xmltodict
import javalang
import ast
import pygments
from pygments.lexers import JavaLexer
from pygments.token import Token
import json
from datasets import DATASET


class BugReport:
    """Class representing each bug report"""

    __slots__ = ['summary', 'description', 'fixed_files',
                 'pos_tagged_summary', 'pos_tagged_description', 'stack_traces']

    def __init__(self, summary, description, fixed_files):
        self.summary = summary
        self.description = description
        self.fixed_files = fixed_files
        self.pos_tagged_summary = None
        self.pos_tagged_description = None
        self.stack_traces = None


class SourceFile:
    """Class representing each source file"""

    __slots__ = ['all_content', 'comments', 'class_names', 'attributes',
                 'method_names', 'variables', 'file_name', 'pos_tagged_comments',
                 'exact_file_name', 'package_name']

    def __init__(self, all_content, comments, class_names, attributes,
                 method_names, variables, file_name, package_name):
        self.all_content = all_content
        self.comments = comments
        self.class_names = class_names
        self.attributes = attributes
        self.method_names = method_names
        self.variables = variables
        self.file_name = file_name
        self.exact_file_name = file_name[0]
        self.package_name = package_name
        self.pos_tagged_comments = None


class Parser:
    """Class containing different parsers"""

    __slots__ = ['name', 'src', 'bug_repo']

    def __init__(self, project):
        self.name = project.name
        self.src = project.src
        self.bug_repo = project.bug_repo


    def report_parser(self):
      with open(self.bug_repo) as json_file:
        data = json.load(json_file)

      bug_reports = OrderedDict()
      count=0
      fixedFilesArray=[]
      for i in data["closed_issues"]:
        fixedFilesArray.clear()
        for items in data["closed_issues"][i].get("files_changed"):
          if items != []:
            fixedFilesArray.append(os.path.normpath(items[1]))
        selected_files = list(filter(lambda x: x.endswith('.py'), fixedFilesArray))
        if selected_files:
          bug_reports[count] = BugReport(
                 data["closed_issues"][i].get("issue_summary"),
                 data["closed_issues"][i].get("issue_description")
                     if data["closed_issues"][i].get("issue_description") else '',
                 selected_files[:]
             )

          count+=1

      print('Total bug reports:'+str(count))
      return bug_reports


    def src_parser(self):
        src_addresses = glob.glob(str(self.src) + '/**/*.py', recursive=True)

        # Creating a java lexer instance for pygments.lex() method
        python_lexer = PythonLexer()

        src_files = OrderedDict()
        # Looping to parse each source file
        for src_file in src_addresses:

            with open(src_file,encoding = "ISO-8859-1") as file:
                src = file.read()

            # Placeholder for different parts of a source file
            comments = ''
            class_names = []
            attributes = []
            method_names = []
            variables = []
            module = ast.parse(src)

            # Source parsing

            try:
               for node in ast.walk(module):
                  if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Store):
                    variables.append(node.id)
                  elif isinstance(node, ast.Attribute):
                    attributes.append(node.attr)
                  elif isinstance(node, ast.FunctionDef):
                    method_names.append(node.name)
                  elif isinstance(node, ast.ClassDef):
                    class_names.append(node.name)
            except:
                pass

            # Lexically tokenize the source file
            lexed_src = pygments.lex(src, python_lexer)
            ind = True
            for i, token in enumerate(lexed_src):
                if token[0] in Token.Comment:
                    if ind and i == 0 and token[0] is Token.Comment.Multiline:
                        src = src[src.index(token[1]) + len(token[1]):]
                        continue
                    comments += token[1]


            package_name = None


            src_id=src_file

            src_id=src_id.replace(str(DATASET.src)+'/','')

            s1= SourceFile(
                    src, comments, class_names, attributes,
                    method_names, variables,
                    [os.path.basename(src_file).split('.')[0]],
                    package_name
                )

            src_files[src_id] =s1;

        return src_files

def test():
    import datasets
    print(DATASET.src)
    parser = Parser(datasets.zxing)
    print('1')
    #x = parser.report_parser()
    print('2')
    d = parser.src_parser()
    print('3')
    #src_id, src = list(d.items())[10]
    #print(src_id, src.exact_file_name, src.package_name)


if __name__ == '__main__':
    test()