1- import pytest
21import os
3- from spice .analyzers .count_comment_lines import count_comment_lines
4-
5- # Define the path to the sample code directory relative to the test file
6- SAMPLE_CODE_DIR = os .path .join (os .path .dirname (__file__ ), ".." , ".." , "sample-code" )
7-
8- # Test cases for count_comment_lines
9- @pytest .mark .parametrize (
10- "filename, expected_comment_lines" ,
11- [
12- ("sample_comments.py" , 4 ), # Based on the content of sample_comments.py
13- ("example.py" , 1 ), # Based on the content of example.py (assuming it has one full comment line)
14- ("example.js" , 2 ), # Based on the content of example.js (assuming two full comment lines)
15- ("example.go" , 2 ), # Based on the content of example.go (assuming two full comment lines)
16- ("example.rb" , 1 ), # Based on the content of example.rb (assuming one full comment line)
17- ]
18- )
19- def test_count_comment_lines_python (filename , expected_comment_lines ):
20- """Test count_comment_lines with various sample files."""
21- file_path = os .path .join (SAMPLE_CODE_DIR , filename )
22- # Ensure the sample file exists before running the test
23- assert os .path .exists (file_path ), f"Sample file not found: { file_path } "
24- assert count_comment_lines (file_path ) == expected_comment_lines
25-
26- def test_count_comment_lines_empty_file ():
27- """Test count_comment_lines with an empty file."""
28- empty_file_path = os .path .join (SAMPLE_CODE_DIR , "empty_test_file.py" )
29- with open (empty_file_path , "w" ) as f :
30- f .write ("" )
31- assert count_comment_lines (empty_file_path ) == 0
32- os .remove (empty_file_path ) # Clean up the empty file
33-
34- def test_count_comment_lines_no_comments ():
35- """Test count_comment_lines with a file containing no comments."""
36- no_comments_path = os .path .join (SAMPLE_CODE_DIR , "no_comments_test_file.py" )
37- with open (no_comments_path , "w" ) as f :
38- f .write ("print(\" Hello\" )\n x = 1" )
39- assert count_comment_lines (no_comments_path ) == 0
40- os .remove (no_comments_path ) # Clean up
41-
42- def test_count_comment_lines_only_inline ():
43- """Test count_comment_lines with only inline comments."""
44- inline_comments_path = os .path .join (SAMPLE_CODE_DIR , "inline_comments_test_file.py" )
45- with open (inline_comments_path , "w" ) as f :
46- f .write ("x = 1 # inline\n y = 2 # another inline" )
47- assert count_comment_lines (inline_comments_path ) == 0
48- os .remove (inline_comments_path ) # Clean up
2+ import re
493
504
5+ def count_comment_lines (file_path ):
6+ """
7+ Count full-line comments in a source code file using regex patterns.
8+
9+ A full-line comment is a line that contains only a comment (and possibly whitespace),
10+ not a line that has both code and a comment.
11+
12+ Args:
13+ file_path (str): Path to the source code file
14+
15+ Returns:
16+ int: Number of full-line comments found
17+
18+ Raises:
19+ ValueError: If the file extension is not supported
20+ FileNotFoundError: If the file doesn't exist
21+ """
22+ if not os .path .exists (file_path ):
23+ raise FileNotFoundError (f"File not found: { file_path } " )
24+
25+ # Get file extension
26+ _ , ext = os .path .splitext (file_path )
27+
28+ # Define comment patterns for different languages
29+ comment_patterns = {
30+ '.py' : r'#' ,
31+ '.js' : r'//' ,
32+ '.go' : r'//' ,
33+ '.rb' : r'#' ,
34+ '.java' : r'//' ,
35+ '.cpp' : r'//' ,
36+ '.c' : r'//' ,
37+ '.cs' : r'//' ,
38+ '.php' : r'//' ,
39+ '.swift' : r'//' ,
40+ '.kt' : r'//' ,
41+ '.scala' : r'//' ,
42+ '.rs' : r'//' ,
43+ '.ts' : r'//' ,
44+ '.jsx' : r'//' ,
45+ '.tsx' : r'//' ,
46+ }
47+
48+ if ext not in comment_patterns :
49+ raise ValueError (f"Unsupported file extension: { ext } " )
50+
51+ comment_marker = comment_patterns [ext ]
52+
53+ try :
54+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
55+ content = f .read ()
56+ except UnicodeDecodeError :
57+ with open (file_path , 'r' , encoding = 'latin-1' ) as f :
58+ content = f .read ()
59+
60+ if not content .strip ():
61+ return 0
62+
63+ lines = content .splitlines ()
64+ comment_line_count = 0
65+
66+ for line in lines :
67+ if _is_full_line_comment (line , comment_marker ):
68+ comment_line_count += 1
69+
70+ return comment_line_count
71+
72+
73+ def _is_full_line_comment (line , comment_marker ):
74+ """
75+ Check if a line is a full-line comment (contains only comment and whitespace).
76+
77+ Args:
78+ line (str): The line to check
79+ comment_marker (str): The comment marker for the language (e.g., '//', '#')
80+
81+ Returns:
82+ bool: True if the line is a full-line comment, False otherwise
83+ """
84+ # Strip whitespace from the line
85+ stripped_line = line .strip ()
86+
87+ # Empty line
88+ if not stripped_line :
89+ return False
90+
91+ # Line starts with comment marker (this is a full-line comment)
92+ if stripped_line .startswith (comment_marker ):
93+ return True
94+
95+ return False
96+
97+
98+ def _is_multiline_comment_start (line , language_ext ):
99+ """
100+ Check if a line starts a multi-line comment block.
101+ Currently handles basic cases for languages that support multi-line comments.
102+
103+ Args:
104+ line (str): The line to check
105+ language_ext (str): File extension to determine language
106+
107+ Returns:
108+ bool: True if line starts a multi-line comment
109+ """
110+ stripped = line .strip ()
111+
112+ # Languages with /* */ style multi-line comments
113+ if language_ext in ['.js' , '.go' , '.java' , '.cpp' , '.c' , '.cs' , '.php' , '.swift' , '.kt' , '.scala' , '.rs' , '.ts' , '.jsx' , '.tsx' ]:
114+ return stripped .startswith ('/*' )
115+
116+ # Python has """ or ''' for docstrings/multi-line strings
117+ elif language_ext == '.py' :
118+ return stripped .startswith ('"""' ) or stripped .startswith ("'''" )
119+
120+ return False
121+
122+
123+ def _is_multiline_comment_end (line , language_ext ):
124+ """
125+ Check if a line ends a multi-line comment block.
126+
127+ Args:
128+ line (str): The line to check
129+ language_ext (str): File extension to determine language
130+
131+ Returns:
132+ bool: True if line ends a multi-line comment
133+ """
134+ stripped = line .strip ()
135+
136+ # Languages with /* */ style multi-line comments
137+ if language_ext in ['.js' , '.go' , '.java' , '.cpp' , '.c' , '.cs' , '.php' , '.swift' , '.kt' , '.scala' , '.rs' , '.ts' , '.jsx' , '.tsx' ]:
138+ return stripped .endswith ('*/' )
139+
140+ # Python docstrings
141+ elif language_ext == '.py' :
142+ return stripped .endswith ('"""' ) or stripped .endswith ("'''" )
143+
144+ return False
145+
146+
147+ def count_comment_lines_with_multiline (file_path ):
148+ """
149+ Enhanced version that also counts multi-line comment blocks.
150+ Each line within a multi-line comment block is counted as a comment line.
151+
152+ Args:
153+ file_path (str): Path to the source code file
154+
155+ Returns:
156+ int: Number of comment lines (including multi-line comments)
157+ """
158+ if not os .path .exists (file_path ):
159+ raise FileNotFoundError (f"File not found: { file_path } " )
160+
161+ # Get file extension
162+ _ , ext = os .path .splitext (file_path )
163+
164+ # Define single-line comment patterns
165+ comment_patterns = {
166+ '.py' : r'#' ,
167+ '.js' : r'//' ,
168+ '.go' : r'//' ,
169+ '.rb' : r'#' ,
170+ '.java' : r'//' ,
171+ '.cpp' : r'//' ,
172+ '.c' : r'//' ,
173+ '.cs' : r'//' ,
174+ '.php' : r'//' ,
175+ '.swift' : r'//' ,
176+ '.kt' : r'//' ,
177+ '.scala' : r'//' ,
178+ '.rs' : r'//' ,
179+ '.ts' : r'//' ,
180+ '.jsx' : r'//' ,
181+ '.tsx' : r'//' ,
182+ }
183+
184+ if ext not in comment_patterns :
185+ raise ValueError (f"Unsupported file extension: { ext } " )
186+
187+ comment_marker = comment_patterns [ext ]
188+
189+ try :
190+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
191+ content = f .read ()
192+ except UnicodeDecodeError :
193+ with open (file_path , 'r' , encoding = 'latin-1' ) as f :
194+ content = f .read ()
195+
196+ if not content .strip ():
197+ return 0
198+
199+ lines = content .splitlines ()
200+ comment_line_count = 0
201+ in_multiline_comment = False
202+
203+ for line in lines :
204+ stripped_line = line .strip ()
205+
206+ # Skip empty lines
207+ if not stripped_line :
208+ continue
209+
210+ # Check for multi-line comment start
211+ if not in_multiline_comment and _is_multiline_comment_start (line , ext ):
212+ in_multiline_comment = True
213+ comment_line_count += 1
214+ # Check if it also ends on the same line
215+ if _is_multiline_comment_end (line , ext ) and stripped_line != '/**/' :
216+ in_multiline_comment = False
217+ continue
218+
219+ # Check for multi-line comment end
220+ if in_multiline_comment :
221+ comment_line_count += 1
222+ if _is_multiline_comment_end (line , ext ):
223+ in_multiline_comment = False
224+ continue
225+
226+ # Check for single-line comments
227+ if _is_full_line_comment (line , comment_marker ):
228+ comment_line_count += 1
229+
230+ return comment_line_count
0 commit comments