2121# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222
2323import logging
24+ import re
2425from collections import defaultdict
2526
2627from licensedcode .cache import get_licensing
@@ -182,8 +183,9 @@ def find_license_inconsistencies(declared_license, detected_licenses):
182183 "detected" : detected ,
183184 "suggestion" : "Review source files to determine correct license. Consider if this is dual-licensing or incorrect declaration."
184185 })
185- except Exception :
186- # Skip invalid detected licenses
186+ except Exception as e :
187+ # Log invalid detected licenses for debugging
188+ logger .debug (f"Invalid detected license expression '{ detected } ': { e } " )
187189 continue
188190
189191 return issues
@@ -201,14 +203,15 @@ def are_licenses_compatible(declared, detected, licensing):
201203 if declared_str == detected_str :
202204 return True
203205
204- # Split on common license expression delimiters for more precise matching
205- import re
206- declared_parts = set (re .split (r'[\s\-_ ()]+|\bor\b|\band\b|\bwith\b ' , declared_str ))
207- detected_parts = set (re .split (r'[\s\-_ ()]+|\bor\b|\band\b|\bwith\b ' , detected_str ))
206+ # Check if either expression contains the other
207+ # Split only on spaces, hyphens, and parentheses (not on operators)
208+ declared_parts = set (re .split (r'[\s()]+' , declared_str ))
209+ detected_parts = set (re .split (r'[\s()]+' , detected_str ))
208210
209- # Remove empty strings from split
210- declared_parts .discard ('' )
211- detected_parts .discard ('' )
211+ # Remove empty strings and common operators from comparison
212+ operators = {'or' , 'and' , 'with' , '' }
213+ declared_parts -= operators
214+ detected_parts -= operators
212215
213216 # Check if detected parts are subset of declared or vice versa
214217 if detected_parts and declared_parts :
@@ -264,7 +267,6 @@ def check_license_clarity(package):
264267
265268 # Check if license is too generic or unclear
266269 if package .declared_license_expression :
267- import re
268270 unclear_indicators = [
269271 "unknown" ,
270272 "see-license" ,
@@ -493,7 +495,6 @@ def check_nixpkgs_ecosystem_license(package):
493495 declared = package .declared_license_expression
494496
495497 # Check if declared license matches expected patterns
496- import re
497498 declared_lower = declared .lower ()
498499 for expected in expected_licenses :
499500 # Use word boundary matching to avoid false positives like "mit" in "limited"
@@ -523,7 +524,6 @@ def detect_copyleft_compliance_issues(package):
523524 declared_lower = package .declared_license_expression .lower ()
524525
525526 # Check for copyleft licenses using word boundary matching
526- import re
527527 copyleft_indicators = ["gpl" , "agpl" , "lgpl" , "mpl" , "epl" , "cpl" ]
528528 is_copyleft = any (
529529 re .search (r'\b' + re .escape (ind ) + r'\b' , declared_lower )
@@ -545,7 +545,6 @@ def detect_copyleft_compliance_issues(package):
545545 })
546546
547547 # Check for proprietary indicators in notes or description
548- import re
549548 proprietary_indicators = ["proprietary" , "commercial" , "closed" ]
550549 description = (package .description or "" ).lower ()
551550 notes = (package .notes or "" ).lower ()
@@ -578,6 +577,9 @@ def detect_license_file_issues(package):
578577 license_files = []
579578 resources = package .codebase_resources .all ()
580579
580+ # Known file extensions for license files
581+ license_extensions = {'.txt' , '.md' , '.rst' , '.html' , '.pdf' , '' }
582+
581583 for resource in resources :
582584 # Prefer the is_legal flag when available
583585 if getattr (resource , "is_legal" , False ):
@@ -586,6 +588,10 @@ def detect_license_file_issues(package):
586588
587589 # Fall back to matching common canonical license filenames
588590 name_lower = resource .name .lower ()
591+ # Extract file extension for validation
592+ file_ext = '.' + name_lower .split ('.' )[- 1 ] if '.' in name_lower else ''
593+
594+ # Check if it matches expected license file patterns with valid extensions
589595 if (
590596 # Exact common license filenames
591597 name_lower in {
@@ -594,13 +600,13 @@ def detect_license_file_issues(package):
594600 "copying" , "copying.txt" , "copying.md" ,
595601 "copyright" , "copyright.txt" , "copyright.md" ,
596602 }
597- # Files starting with common license-related prefixes
598- or name_lower .startswith ((
603+ # Files starting with common license-related prefixes and valid extensions
604+ or ( file_ext in license_extensions and name_lower .startswith ((
599605 "license." , "license-" ,
600606 "licence." , "licence-" ,
601607 "copying." , "copying-" ,
602608 "copyright." , "copyright-" ,
603- ))
609+ )))
604610 ):
605611 license_files .append (resource )
606612
0 commit comments