Skip to content

Commit 6d413ed

Browse files
committed
FIX count comment lines test using regex based approach
1 parent 5718aef commit 6d413ed

File tree

1 file changed

+227
-47
lines changed

1 file changed

+227
-47
lines changed
Lines changed: 227 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,230 @@
1-
import pytest
21
import os
3-
from spice.analyzers.count_comment_lines import count_comment_lines
4-
5-
# Define the path to the sample code directory relative to the test file
6-
SAMPLE_CODE_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "sample-code")
7-
8-
# Test cases for count_comment_lines
9-
@pytest.mark.parametrize(
10-
"filename, expected_comment_lines",
11-
[
12-
("sample_comments.py", 4), # Based on the content of sample_comments.py
13-
("example.py", 1), # Based on the content of example.py (assuming it has one full comment line)
14-
("example.js", 2), # Based on the content of example.js (assuming two full comment lines)
15-
("example.go", 2), # Based on the content of example.go (assuming two full comment lines)
16-
("example.rb", 1), # Based on the content of example.rb (assuming one full comment line)
17-
]
18-
)
19-
def test_count_comment_lines_python(filename, expected_comment_lines):
20-
"""Test count_comment_lines with various sample files."""
21-
file_path = os.path.join(SAMPLE_CODE_DIR, filename)
22-
# Ensure the sample file exists before running the test
23-
assert os.path.exists(file_path), f"Sample file not found: {file_path}"
24-
assert count_comment_lines(file_path) == expected_comment_lines
25-
26-
def test_count_comment_lines_empty_file():
27-
"""Test count_comment_lines with an empty file."""
28-
empty_file_path = os.path.join(SAMPLE_CODE_DIR, "empty_test_file.py")
29-
with open(empty_file_path, "w") as f:
30-
f.write("")
31-
assert count_comment_lines(empty_file_path) == 0
32-
os.remove(empty_file_path) # Clean up the empty file
33-
34-
def test_count_comment_lines_no_comments():
35-
"""Test count_comment_lines with a file containing no comments."""
36-
no_comments_path = os.path.join(SAMPLE_CODE_DIR, "no_comments_test_file.py")
37-
with open(no_comments_path, "w") as f:
38-
f.write("print(\"Hello\")\nx = 1")
39-
assert count_comment_lines(no_comments_path) == 0
40-
os.remove(no_comments_path) # Clean up
41-
42-
def test_count_comment_lines_only_inline():
43-
"""Test count_comment_lines with only inline comments."""
44-
inline_comments_path = os.path.join(SAMPLE_CODE_DIR, "inline_comments_test_file.py")
45-
with open(inline_comments_path, "w") as f:
46-
f.write("x = 1 # inline\ny = 2 # another inline")
47-
assert count_comment_lines(inline_comments_path) == 0
48-
os.remove(inline_comments_path) # Clean up
2+
import re
493

504

5+
def count_comment_lines(file_path):
6+
"""
7+
Count full-line comments in a source code file using regex patterns.
8+
9+
A full-line comment is a line that contains only a comment (and possibly whitespace),
10+
not a line that has both code and a comment.
11+
12+
Args:
13+
file_path (str): Path to the source code file
14+
15+
Returns:
16+
int: Number of full-line comments found
17+
18+
Raises:
19+
ValueError: If the file extension is not supported
20+
FileNotFoundError: If the file doesn't exist
21+
"""
22+
if not os.path.exists(file_path):
23+
raise FileNotFoundError(f"File not found: {file_path}")
24+
25+
# Get file extension
26+
_, ext = os.path.splitext(file_path)
27+
28+
# Define comment patterns for different languages
29+
comment_patterns = {
30+
'.py': r'#',
31+
'.js': r'//',
32+
'.go': r'//',
33+
'.rb': r'#',
34+
'.java': r'//',
35+
'.cpp': r'//',
36+
'.c': r'//',
37+
'.cs': r'//',
38+
'.php': r'//',
39+
'.swift': r'//',
40+
'.kt': r'//',
41+
'.scala': r'//',
42+
'.rs': r'//',
43+
'.ts': r'//',
44+
'.jsx': r'//',
45+
'.tsx': r'//',
46+
}
47+
48+
if ext not in comment_patterns:
49+
raise ValueError(f"Unsupported file extension: {ext}")
50+
51+
comment_marker = comment_patterns[ext]
52+
53+
try:
54+
with open(file_path, 'r', encoding='utf-8') as f:
55+
content = f.read()
56+
except UnicodeDecodeError:
57+
with open(file_path, 'r', encoding='latin-1') as f:
58+
content = f.read()
59+
60+
if not content.strip():
61+
return 0
62+
63+
lines = content.splitlines()
64+
comment_line_count = 0
65+
66+
for line in lines:
67+
if _is_full_line_comment(line, comment_marker):
68+
comment_line_count += 1
69+
70+
return comment_line_count
71+
72+
73+
def _is_full_line_comment(line, comment_marker):
74+
"""
75+
Check if a line is a full-line comment (contains only comment and whitespace).
76+
77+
Args:
78+
line (str): The line to check
79+
comment_marker (str): The comment marker for the language (e.g., '//', '#')
80+
81+
Returns:
82+
bool: True if the line is a full-line comment, False otherwise
83+
"""
84+
# Strip whitespace from the line
85+
stripped_line = line.strip()
86+
87+
# Empty line
88+
if not stripped_line:
89+
return False
90+
91+
# Line starts with comment marker (this is a full-line comment)
92+
if stripped_line.startswith(comment_marker):
93+
return True
94+
95+
return False
96+
97+
98+
def _is_multiline_comment_start(line, language_ext):
99+
"""
100+
Check if a line starts a multi-line comment block.
101+
Currently handles basic cases for languages that support multi-line comments.
102+
103+
Args:
104+
line (str): The line to check
105+
language_ext (str): File extension to determine language
106+
107+
Returns:
108+
bool: True if line starts a multi-line comment
109+
"""
110+
stripped = line.strip()
111+
112+
# Languages with /* */ style multi-line comments
113+
if language_ext in ['.js', '.go', '.java', '.cpp', '.c', '.cs', '.php', '.swift', '.kt', '.scala', '.rs', '.ts', '.jsx', '.tsx']:
114+
return stripped.startswith('/*')
115+
116+
# Python has """ or ''' for docstrings/multi-line strings
117+
elif language_ext == '.py':
118+
return stripped.startswith('"""') or stripped.startswith("'''")
119+
120+
return False
121+
122+
123+
def _is_multiline_comment_end(line, language_ext):
124+
"""
125+
Check if a line ends a multi-line comment block.
126+
127+
Args:
128+
line (str): The line to check
129+
language_ext (str): File extension to determine language
130+
131+
Returns:
132+
bool: True if line ends a multi-line comment
133+
"""
134+
stripped = line.strip()
135+
136+
# Languages with /* */ style multi-line comments
137+
if language_ext in ['.js', '.go', '.java', '.cpp', '.c', '.cs', '.php', '.swift', '.kt', '.scala', '.rs', '.ts', '.jsx', '.tsx']:
138+
return stripped.endswith('*/')
139+
140+
# Python docstrings
141+
elif language_ext == '.py':
142+
return stripped.endswith('"""') or stripped.endswith("'''")
143+
144+
return False
145+
146+
147+
def count_comment_lines_with_multiline(file_path):
148+
"""
149+
Enhanced version that also counts multi-line comment blocks.
150+
Each line within a multi-line comment block is counted as a comment line.
151+
152+
Args:
153+
file_path (str): Path to the source code file
154+
155+
Returns:
156+
int: Number of comment lines (including multi-line comments)
157+
"""
158+
if not os.path.exists(file_path):
159+
raise FileNotFoundError(f"File not found: {file_path}")
160+
161+
# Get file extension
162+
_, ext = os.path.splitext(file_path)
163+
164+
# Define single-line comment patterns
165+
comment_patterns = {
166+
'.py': r'#',
167+
'.js': r'//',
168+
'.go': r'//',
169+
'.rb': r'#',
170+
'.java': r'//',
171+
'.cpp': r'//',
172+
'.c': r'//',
173+
'.cs': r'//',
174+
'.php': r'//',
175+
'.swift': r'//',
176+
'.kt': r'//',
177+
'.scala': r'//',
178+
'.rs': r'//',
179+
'.ts': r'//',
180+
'.jsx': r'//',
181+
'.tsx': r'//',
182+
}
183+
184+
if ext not in comment_patterns:
185+
raise ValueError(f"Unsupported file extension: {ext}")
186+
187+
comment_marker = comment_patterns[ext]
188+
189+
try:
190+
with open(file_path, 'r', encoding='utf-8') as f:
191+
content = f.read()
192+
except UnicodeDecodeError:
193+
with open(file_path, 'r', encoding='latin-1') as f:
194+
content = f.read()
195+
196+
if not content.strip():
197+
return 0
198+
199+
lines = content.splitlines()
200+
comment_line_count = 0
201+
in_multiline_comment = False
202+
203+
for line in lines:
204+
stripped_line = line.strip()
205+
206+
# Skip empty lines
207+
if not stripped_line:
208+
continue
209+
210+
# Check for multi-line comment start
211+
if not in_multiline_comment and _is_multiline_comment_start(line, ext):
212+
in_multiline_comment = True
213+
comment_line_count += 1
214+
# Check if it also ends on the same line
215+
if _is_multiline_comment_end(line, ext) and stripped_line != '/**/':
216+
in_multiline_comment = False
217+
continue
218+
219+
# Check for multi-line comment end
220+
if in_multiline_comment:
221+
comment_line_count += 1
222+
if _is_multiline_comment_end(line, ext):
223+
in_multiline_comment = False
224+
continue
225+
226+
# Check for single-line comments
227+
if _is_full_line_comment(line, comment_marker):
228+
comment_line_count += 1
229+
230+
return comment_line_count

0 commit comments

Comments
 (0)