@@ -11,6 +11,17 @@ use crate::language::{Language, LanguageRegistry};
1111use super :: complexity:: ComplexityAnalyzer ;
1212use super :: stats:: { FileStats , LineStats } ;
1313
14+ /// Represents a string delimiter for multiline string detection.
15+ #[ derive( Debug , Clone ) ]
16+ struct StringDelimiter {
17+ /// The closing delimiter pattern
18+ end_pattern : String ,
19+ /// Whether this is a raw string (no escape processing)
20+ is_raw : bool ,
21+ /// Whether this is a docstring (Python) - should be counted as comment
22+ is_docstring : bool ,
23+ }
24+
1425/// Analyzes individual source files.
1526pub struct FileAnalyzer {
1627 registry : Arc < LanguageRegistry > ,
@@ -92,6 +103,8 @@ impl FileAnalyzer {
92103 let mut stats = LineStats :: default ( ) ;
93104 let mut in_block_comment = false ;
94105 let mut block_comment_end = "" ;
106+ let mut in_multiline_string = false ;
107+ let mut string_delimiter: Option < StringDelimiter > = None ;
95108
96109 for line in content. lines ( ) {
97110 stats. total += 1 ;
@@ -103,6 +116,23 @@ impl FileAnalyzer {
103116 continue ;
104117 }
105118
119+ // Inside multiline string
120+ if in_multiline_string {
121+ if let Some ( ref delim) = string_delimiter {
122+ // Docstrings count as comments, regular strings as code
123+ if delim. is_docstring {
124+ stats. comment += 1 ;
125+ } else {
126+ stats. code += 1 ;
127+ }
128+ if self . line_ends_string ( line, delim) {
129+ in_multiline_string = false ;
130+ string_delimiter = None ;
131+ }
132+ }
133+ continue ;
134+ }
135+
106136 // Inside block comment
107137 if in_block_comment {
108138 stats. comment += 1 ;
@@ -120,6 +150,32 @@ impl FileAnalyzer {
120150 continue ;
121151 }
122152
153+ // Check if line starts a multiline string
154+ // starts_multiline_string only returns Some if the string is NOT closed on the same line
155+ if let Some ( delim) = self . starts_multiline_string ( line, lang) {
156+ // Docstrings count as comments, regular strings as code
157+ if delim. is_docstring {
158+ stats. comment += 1 ;
159+ } else {
160+ stats. code += 1 ;
161+ }
162+ in_multiline_string = true ;
163+ string_delimiter = Some ( delim) ;
164+ continue ;
165+ }
166+
167+ // Check for single-line Python docstring ("""...""" on one line)
168+ if lang. name == "Python" {
169+ if let Some ( is_docstring) = self . is_single_line_docstring ( trimmed) {
170+ if is_docstring {
171+ stats. comment += 1 ;
172+ } else {
173+ stats. code += 1 ;
174+ }
175+ continue ;
176+ }
177+ }
178+
123179 // Check for block comment start
124180 let mut found_block_start = false ;
125181 for ( start, end) in & lang. block_comments {
@@ -177,6 +233,188 @@ impl FileAnalyzer {
177233 stats
178234 }
179235
236+ /// Check if a line starts a multiline string literal.
237+ /// Returns the delimiter info if a multiline string starts on this line.
238+ fn starts_multiline_string ( & self , line : & str , lang : & Language ) -> Option < StringDelimiter > {
239+ // Check for Rust raw strings: r#"..."# or r##"..."##
240+ if lang. name == "Rust" {
241+ if let Some ( delim) = self . detect_rust_raw_string_start ( line) {
242+ return Some ( delim) ;
243+ }
244+ }
245+
246+ // Check for Python triple-quoted strings (including docstrings)
247+ if lang. name == "Python" {
248+ for pattern in & [ "\" \" \" " , "'''" ] {
249+ if let Some ( pos) = line. find ( pattern) {
250+ let before = & line[ ..pos] ;
251+ if !self . is_in_string ( before, lang) {
252+ let after = & line[ pos + 3 ..] ;
253+ // Check if it closes on the same line
254+ if after. find ( pattern) . is_none ( ) {
255+ // Docstring: no assignment before the triple quotes
256+ let is_docstring = !before. contains ( '=' ) ;
257+ return Some ( StringDelimiter {
258+ end_pattern : pattern. to_string ( ) ,
259+ is_raw : false ,
260+ is_docstring,
261+ } ) ;
262+ }
263+ }
264+ }
265+ }
266+ }
267+
268+ // Check for regular multiline strings (string not closed on same line)
269+ let mut in_string = false ;
270+ let mut string_char = '"' ;
271+ let mut escape_next = false ;
272+
273+ let chars: Vec < char > = line. chars ( ) . collect ( ) ;
274+ let mut i = 0 ;
275+ while i < chars. len ( ) {
276+ let c = chars[ i] ;
277+
278+ if escape_next {
279+ escape_next = false ;
280+ i += 1 ;
281+ continue ;
282+ }
283+
284+ if c == '\\' && in_string {
285+ escape_next = true ;
286+ i += 1 ;
287+ continue ;
288+ }
289+
290+ if ( c == '"' || c == '\'' ) && !in_string {
291+ // Calculate byte position for string slice
292+ let byte_pos: usize = chars[ ..i] . iter ( ) . map ( |ch| ch. len_utf8 ( ) ) . sum ( ) ;
293+ let before = & line[ ..byte_pos] ;
294+ if !self . is_in_string ( before, lang) {
295+ in_string = true ;
296+ string_char = c;
297+ }
298+ } else if c == string_char && in_string {
299+ in_string = false ;
300+ }
301+
302+ i += 1 ;
303+ }
304+
305+ if in_string {
306+ return Some ( StringDelimiter {
307+ end_pattern : string_char. to_string ( ) ,
308+ is_raw : false ,
309+ is_docstring : false ,
310+ } ) ;
311+ }
312+
313+ None
314+ }
315+
316+ /// Detect Rust raw string start (r#"..."# or r##"..."##, etc.)
317+ fn detect_rust_raw_string_start ( & self , line : & str ) -> Option < StringDelimiter > {
318+ let bytes = line. as_bytes ( ) ;
319+ let len = bytes. len ( ) ;
320+ let mut i = 0 ;
321+
322+ while i < len {
323+ // Look for 'r' followed by optional '#' and '"'
324+ if bytes[ i] == b'r' && i + 1 < len {
325+ let start = i;
326+ i += 1 ;
327+
328+ // Count the number of '#' after 'r'
329+ let mut hash_count = 0 ;
330+ while i < len && bytes[ i] == b'#' {
331+ hash_count += 1 ;
332+ i += 1 ;
333+ }
334+
335+ // Check for opening '"'
336+ if i < len && bytes[ i] == b'"' {
337+ // Check that 'r' is not part of an identifier
338+ if start == 0 || !bytes[ start - 1 ] . is_ascii_alphanumeric ( ) {
339+ // Build the closing pattern: "# repeated hash_count times
340+ let end_pattern = format ! ( "\" {}" , "#" . repeat( hash_count) ) ;
341+
342+ // Check if it closes on the same line
343+ let after_quote = & line[ i + 1 ..] ;
344+ if after_quote. find ( & end_pattern) . is_none ( ) {
345+ return Some ( StringDelimiter {
346+ end_pattern,
347+ is_raw : true ,
348+ is_docstring : false ,
349+ } ) ;
350+ }
351+ }
352+ }
353+ }
354+ i += 1 ;
355+ }
356+
357+ None
358+ }
359+
360+ /// Check if a line ends the current multiline string.
361+ fn line_ends_string ( & self , line : & str , delim : & StringDelimiter ) -> bool {
362+ if delim. is_raw {
363+ // For raw strings, just look for the closing pattern
364+ line. contains ( & delim. end_pattern )
365+ } else {
366+ // For regular strings, need to handle escapes
367+ let mut chars = line. chars ( ) . peekable ( ) ;
368+ let target: Vec < char > = delim. end_pattern . chars ( ) . collect ( ) ;
369+
370+ while let Some ( c) = chars. next ( ) {
371+ if c == '\\' {
372+ // Skip escaped character
373+ chars. next ( ) ;
374+ continue ;
375+ }
376+
377+ if !target. is_empty ( ) && c == target[ 0 ] {
378+ // Check if this matches the closing pattern
379+ let mut matched = true ;
380+ for expected in target. iter ( ) . skip ( 1 ) {
381+ if chars. next ( ) != Some ( * expected) {
382+ matched = false ;
383+ break ;
384+ }
385+ }
386+ if matched {
387+ return true ;
388+ }
389+ }
390+ }
391+ false
392+ }
393+ }
394+
395+ /// Check if a line is a single-line Python docstring.
396+ /// Returns Some(true) if it's a docstring, Some(false) if it's a regular string assignment,
397+ /// None if it doesn't contain a complete triple-quoted string.
398+ fn is_single_line_docstring ( & self , trimmed : & str ) -> Option < bool > {
399+ for pattern in & [ "\" \" \" " , "'''" ] {
400+ if let Some ( start_pos) = trimmed. find ( pattern) {
401+ let after_start = & trimmed[ start_pos + 3 ..] ;
402+ // Check if it closes on the same line
403+ if let Some ( end_pos) = after_start. find ( pattern) {
404+ // Make sure there's nothing significant after the closing quotes
405+ let after_end = after_start[ end_pos + 3 ..] . trim ( ) ;
406+ if after_end. is_empty ( ) || after_end. starts_with ( '#' ) {
407+ // It's a complete triple-quoted string on one line
408+ let before = & trimmed[ ..start_pos] ;
409+ // Docstring: no assignment before the triple quotes
410+ return Some ( !before. contains ( '=' ) ) ;
411+ }
412+ }
413+ }
414+ }
415+ None
416+ }
417+
180418 /// Check if a string position is likely inside a string literal.
181419 fn is_in_string ( & self , text : & str , _lang : & Language ) -> bool {
182420 // Simplified check: count unescaped quotes
@@ -270,4 +508,34 @@ mod tests {
270508 assert_eq ! ( stats. comment, 4 ) ;
271509 assert_eq ! ( stats. blank, 0 ) ;
272510 }
511+
512+ #[ test]
513+ fn test_count_lines_multiline_string ( ) {
514+ let lang = make_rust_lang ( ) ;
515+ let registry = Arc :: new ( LanguageRegistry :: empty ( ) ) ;
516+ let analyzer = FileAnalyzer :: new ( registry, & Config :: default ( ) ) ;
517+
518+ // Multiline string with content that looks like a comment
519+ let content = "let s = \" hello\n // not a comment\n world\" ;\n " ;
520+ let stats = analyzer. count_lines ( content, & lang) ;
521+ assert_eq ! ( stats. total, 3 ) ;
522+ assert_eq ! ( stats. code, 3 , "All lines should be code (inside string)" ) ;
523+ assert_eq ! ( stats. comment, 0 , "No comments - // is inside string" ) ;
524+ assert_eq ! ( stats. blank, 0 ) ;
525+ }
526+
527+ #[ test]
528+ fn test_count_lines_raw_string ( ) {
529+ let lang = make_rust_lang ( ) ;
530+ let registry = Arc :: new ( LanguageRegistry :: empty ( ) ) ;
531+ let analyzer = FileAnalyzer :: new ( registry, & Config :: default ( ) ) ;
532+
533+ // Raw string with content that looks like a comment
534+ let content = "let s = r#\" hello\n // not a comment\n /* also not */\n world\" #;\n " ;
535+ let stats = analyzer. count_lines ( content, & lang) ;
536+ assert_eq ! ( stats. total, 4 ) ;
537+ assert_eq ! ( stats. code, 4 , "All lines should be code (inside raw string)" ) ;
538+ assert_eq ! ( stats. comment, 0 , "No comments - everything is inside raw string" ) ;
539+ assert_eq ! ( stats. blank, 0 ) ;
540+ }
273541}
0 commit comments