-
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathanalyze.py
More file actions
155 lines (116 loc) · 5.12 KB
/
analyze.py
File metadata and controls
155 lines (116 loc) · 5.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
# this is the universal token, used by all lexers to know what to output
from lexers.token import TokenType
# these are the individual lexers for all languages we support
from lexers.ruby.rubylexer import RubyLexer
from lexers.python.pythonlexer import PythonLexer
from lexers.javascript.javascriptlexer import JavaScriptLexer
from lexers.golang.golexer import GoLexer
# this will read the file extension and return the correct lexer
def get_lexer_for_file(file_path):
_, ext = os.path.splitext(file_path)
if ext == ".rb":
return RubyLexer
elif ext == ".py":
return PythonLexer
elif ext == ".js":
return JavaScriptLexer
elif ext == ".go":
return GoLexer
else:
raise ValueError(f"Unsupported file extension: {ext}")
# this is the analyze function
def analyze_file(file_path: str, selected_stats=None):
"""
Analyze a file and return only the requested stats.
Args:
file_path (str): Path to the file to analyze
selected_stats (list, optional): List of stats to compute. If None, compute all stats.
Returns:
dict: Dictionary containing the requested stats
"""
# default to all stats if none specified
if selected_stats is None:
selected_stats = ["line_count", "function_count", "comment_line_count"]
# initialize results with the file name (dont change this please)
results = {
"file_name": os.path.basename(file_path)
}
# read the code file only once and load it into memory
with open(file_path, "r", encoding="utf-8") as file:
code = file.read()
# line count if requested
if "line_count" in selected_stats:
results["line_count"] = count_lines(code)
# only put the code through the lexer and proceed with tokenization if we need function count or comment count (UPDATE THIS WHEN NEEDED PLEASE !!!!!!!!)
if "function_count" in selected_stats or "comment_line_count" in selected_stats:
# get the lexer for the code's language
LexerClass = get_lexer_for_file(file_path)
# tokenize the code via lexer
lexer = LexerClass(code)
tokens = lexer.tokenize()
# process comment line count if requested
if "comment_line_count" in selected_stats:
results["comment_line_count"] = count_comment_lines(code)
# only put the code through the parser and proceed with parsing if we need function count (UPDATE THIS WHEN NEEDED PLEASE !!!!!!!!)
if "function_count" in selected_stats:
# import parser here to avoid error i still dont know why but it works
from parser.parser import Parser
# prase tokens into AST
parser = Parser(tokens)
ast = parser.parse()
# count functions
results["function_count"] = count_functions(ast)
return results
# this will count lines straight from the raw code
def count_lines(code):
return code.count("\n") + 1
# this will count functions in the AST
def count_functions(ast):
# import function definition from the parser's ast
from parser.ast import FunctionDefinition, Program
if not isinstance(ast, Program):
return 0
function_count = 0
# recursive search for function definitions in the AST
def search_node(node):
nonlocal function_count
if isinstance(node, FunctionDefinition):
function_count += 1
# process child nodes if they exist
if hasattr(node, 'statements') and node.statements:
for statement in node.statements:
search_node(statement)
if hasattr(node, 'body') and node.body:
for body_statement in node.body:
search_node(body_statement)
# for binary operation, check both sides
if hasattr(node, 'left'):
search_node(node.left)
if hasattr(node, 'right'):
search_node(node.right)
# check the value part of an assignment
if hasattr(node, 'value'):
search_node(node.value)
# check function call arguments
if hasattr(node, 'arguments') and node.arguments:
for arg in node.arguments:
search_node(arg)
# start recursive search from the root Program node
search_node(ast)
return function_count
# this will count comment lines, since our AST/Parser doesn't include comment lines, this needs to be done in the tokenized output of the lexer
# COMMENT LINE IS A LINE THAT EXCLUSIVELY HAS A COMMENT
# so like: y = 5 #sets y to 5 IS NOT A COMMENT LINE!!!!!!!!
def count_comment_lines(code):
"""Count lines that are exclusively comments (no code on the same line)"""
# split the code into lines
lines = code.splitlines()
comment_count = 0
for line in lines:
# Remove leading whitespace
stripped = line.strip()
# Check if this line consists only of a comment
if stripped and stripped.startswith('#'):
comment_count += 1
return comment_count