88Run: uv run pytest tests/test_deepeval_analysis.py -v
99Skip: uv run pytest tests/ -m "not deepeval"
1010"""
11+
1112import pytest
1213
1314from tests .conftest import skip_no_deepeval
1415
1516pytestmark = [pytest .mark .deepeval , skip_no_deepeval ]
1617
18+
1719@pytest .fixture (scope = "module" )
1820def judge_model ():
1921 from deepeval .models import OllamaModel
22+
2023 return OllamaModel (
2124 model = "command-r7b:latest" ,
2225 base_url = "http://localhost:11434" ,
2326 )
2427
28+
2529@pytest .fixture (scope = "module" )
2630def claim_extraction_quality (judge_model ):
2731 from deepeval .metrics import GEval
2832 from deepeval .test_case import LLMTestCaseParams
33+
2934 return GEval (
3035 name = "Claim Extraction Quality" ,
3136 criteria = (
@@ -46,10 +51,12 @@ def claim_extraction_quality(judge_model):
4651 threshold = 0.6 ,
4752 )
4853
54+
4955@pytest .fixture (scope = "module" )
5056def synthesis_faithfulness (judge_model ):
5157 from deepeval .metrics import GEval
5258 from deepeval .test_case import LLMTestCaseParams
59+
5360 return GEval (
5461 name = "Synthesis Faithfulness" ,
5562 criteria = (
@@ -69,6 +76,7 @@ def synthesis_faithfulness(judge_model):
6976 threshold = 0.6 ,
7077 )
7178
79+
7280def test_sp_claim_extraction (judge_model , claim_extraction_quality ):
7381 """SP worker should extract relevant claims with proper epistemic tagging."""
7482 from deepeval import assert_test
@@ -89,11 +97,12 @@ def test_sp_claim_extraction(judge_model, claim_extraction_quality):
8997 ' "epistemic_tag": "inference", "source_tier": 3},'
9098 ' {"text": "Some economists skeptical about effectiveness", '
9199 ' "epistemic_tag": "uncertain", "source_tier": 3}'
92- ']}'
100+ "]}"
93101 ),
94102 )
95103 assert_test (test_case , [claim_extraction_quality ])
96104
105+
97106def test_as_synthesis_faithfulness (judge_model , synthesis_faithfulness ):
98107 """AS synthesis should faithfully represent audit node outputs."""
99108 from deepeval import assert_test
0 commit comments