@@ -94,4 +94,48 @@ def is_cornell_loss(result: str):
9494
9595 # Common loss indicators in result strings
9696 loss_indicators = ["L" , "Loss" , "loss" , "Defeated" , "defeated" ]
97- return any (indicator in result for indicator in loss_indicators )
97+ return any (indicator in result for indicator in loss_indicators )
98+
99+ def extract_sport_type_from_title (title : str ):
100+ """
101+ Extract the sport type from an article title by matching against known sports.
102+
103+ Args:
104+ title (str): The article title to analyze
105+
106+ Returns:
107+ str: The sport name if found, otherwise "sports" as default
108+ """
109+ from .constants import SPORT_URLS
110+
111+ if not title :
112+ return "sports"
113+
114+ # Get all unique sport names from SPORT_URLS
115+ sport_names = set ()
116+ for sport_data in SPORT_URLS .values ():
117+ sport_name = sport_data ["sport" ].strip ()
118+ if sport_name :
119+ sport_names .add (sport_name )
120+
121+ # Sort by length (longest first) to match "Swimming & Diving" before "Swimming"
122+ sport_names_sorted = sorted (sport_names , key = len , reverse = True )
123+
124+ title_lower = title .lower ()
125+
126+ for sport_name in sport_names_sorted :
127+ if sport_name .lower () in title_lower :
128+ return sport_name
129+
130+ # Special mappings for common variations in titles
131+ # Only checked if no exact match found above
132+ # e.g., "Hockey" in title should match "Ice Hockey" in sport names
133+ special_mappings = {
134+ "hockey" : "Ice Hockey" , # "Men's Hockey" or "Women's Hockey" → "Ice Hockey"
135+ }
136+
137+ for keyword , sport_name in special_mappings .items ():
138+ if keyword in title_lower and sport_name in sport_names :
139+ return sport_name
140+
141+ return "sports"
0 commit comments