@@ -246,9 +246,9 @@ def test_thai_sentence_with_overlap(self):
246246 candidate = "สวัสดี คุณ สบายดี ไหม วันนี้"
247247 reference = "สวัสดี คุณ อากาศ เป็น อย่างไร"
248248 rouge_1_score = _calculate_rouge_1_scores (candidate , reference )
249- # Should match "สวัสดี" and "คุณ" (2 out of 5 words each)
250- assert rouge_1_score . fmeasure > 0
251- assert rouge_1_score .fmeasure < 1.0
249+ # Tokenizer splits combining marks (e.g. "สวัสดี" → ["สว", "สด"]), yielding
250+ # 4 common tokens out of 8 candidate and 9 reference tokens → F = 8/17
251+ assert rouge_1_score .fmeasure == pytest . approx ( 8 / 17 )
252252
253253 def test_thai_polite_particle_variation (self ):
254254 """Thai: Same meaning with polite particle should show high match."""
@@ -257,7 +257,7 @@ def test_thai_polite_particle_variation(self):
257257 reference = "สวัสดี ค่ะ"
258258 rouge_1_score = _calculate_rouge_1_scores (candidate , reference )
259259 # Should match "สวัสดี" (1 out of 2 words)
260- assert rouge_1_score .fmeasure == pytest .approx (0.5 , rel = 0.1 )
260+ assert rouge_1_score .fmeasure == pytest .approx (0.5 )
261261
262262 # === Chinese Language Tests ===
263263
@@ -271,12 +271,11 @@ def test_chinese_greeting_identical(self):
271271 def test_chinese_sentence_with_overlap (self ):
272272 """Chinese: Sentences with common words should show partial match."""
273273 # Space-separated for tokenization
274- candidate = "今天 天气 很好 " # "Today's weather is good"
274+ candidate = "今天 天气 很 好 " # "Today's weather is very good"
275275 reference = "今天 我 很 开心" # "Today I am happy"
276276 rouge_1_score = _calculate_rouge_1_scores (candidate , reference )
277277 # Should match "今天" and "很"
278- assert rouge_1_score .fmeasure > 0
279- assert rouge_1_score .fmeasure < 1.0
278+ assert rouge_1_score .fmeasure == pytest .approx (0.5 )
280279
281280 def test_chinese_different_sentences (self ):
282281 """Chinese: Completely different sentences should have zero score."""
@@ -318,7 +317,7 @@ def test_japanese_sentence_with_overlap(self):
318317 reference = "今日 は 仕事 が 忙しい です" # "Today work is busy"
319318 rouge_1_score = _calculate_rouge_1_scores (candidate , reference )
320319 # Should match "今日", "は", "が", "です"
321- assert rouge_1_score .fmeasure > 0.5
320+ assert rouge_1_score .fmeasure == pytest . approx ( 2 / 3 )
322321
323322 # === Korean Language Tests ===
324323
@@ -335,8 +334,7 @@ def test_korean_sentence_with_overlap(self):
335334 reference = "오늘 기분이 좋습니다" # "Today my mood is good"
336335 rouge_1_score = _calculate_rouge_1_scores (candidate , reference )
337336 # Should match "오늘" and "좋습니다"
338- assert rouge_1_score .fmeasure > 0
339- assert rouge_1_score .fmeasure < 1.0
337+ assert rouge_1_score .fmeasure == pytest .approx (2 / 3 )
340338
341339 # === European Languages (Latin script with accents) ===
342340
0 commit comments