Azure · slister1001 · May 6, 2026 · May 4, 2026 · May 4, 2026 · May 5, 2026
@@ -15,6 +15,7 @@
 ### Bugs Fixed
 
 - Fixed `evaluate()` raising `EvaluationException: (InternalError) unhashable type: 'list'` when an evaluator emitted a list value under a `_result`-suffixed column. Binary aggregation now skips such columns with a warning instead of aborting the entire run.
+- Fixed `task_adherence` red team scoring by adding `scenario=redteam` to the RAI scorer evaluation payload, ensuring the server-side score mapping correctly routes to Direct mapping for attack success determination.
 - Fixed row classification double-counting in `_calculate_aoai_evaluation_summary` where errored rows were counted separately and could also be counted as passed/failed. Rows are now classified into mutually exclusive buckets with priority: passed > failed > errored > skipped.
 - Fixed row classification where rows with empty or missing results lists were incorrectly counted as "passed" (the condition `passed_count == len(results) - error_count` evaluated `0 == 0` as True).
 - Fixed `_get_metric_result` prefix matching where shorter metric names (e.g., `xpia`) could match before longer, more-specific ones (e.g., `xpia_manipulated_content`). Now sorts by length descending for correct longest-prefix matching.

@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_67d91b0617"
-}
+  "Tag": "python/evaluation/azure-ai-evaluation_0748353c8d"
+}
@@ -180,6 +180,7 @@ async def _score_piece_async(
             "query": "query",
             "response": response_text,
             "context": context,
+            "scenario": "redteam",
         }
 
         # Get metric and annotation task for risk category

@@ -825,7 +825,55 @@ async def test_score_async_error_handling(self, mock_credential, mock_azure_ai_p
             with pytest.raises(Exception, match="RAI service error"):
                 await scorer.score_async(mock_message, objective="Test attack")
 
-    def test_get_context_for_piece_from_labels(self, mock_credential, mock_azure_ai_project, mock_logger):
+    @pytest.mark.asyncio
+    async def test_score_async_sends_redteam_scenario(self, mock_credential, mock_azure_ai_project, mock_logger):
+        """Regression: RAIServiceScorer must include scenario=redteam in eval_input
+        so the server-side ContextDependent score mapping routes to Direct for red team."""
+        scorer = RAIServiceScorer(
+            credential=mock_credential,
+            azure_ai_project=mock_azure_ai_project,
+            risk_category=RiskCategory.Violence,
+            logger=mock_logger,
+        )
+
+        mock_piece = MagicMock()
+        mock_piece.id = "test-id"
+        mock_piece.converted_value = "Test response"
+        mock_piece.original_value = "Original content"
+        mock_piece.labels = {}
+        mock_piece.api_role = "assistant"
+
+        mock_message = MagicMock()
+        mock_message.message_pieces = [mock_piece]
+
+        mock_eval_result = MagicMock()
+        mock_eval_result.results = [
+            MagicMock(
+                name="violence",
+                metric="violence",
+                score=5,
+                reason="Harmful content",
+                threshold=3,
+                passed=False,
+                label="high",
+            )
+        ]
+
+        with patch(
+            "azure.ai.evaluation.red_team._foundry._rai_scorer.evaluate_with_rai_service_sync",
+            new_callable=AsyncMock,
+        ) as mock_eval:
+            mock_eval.return_value = mock_eval_result
+
+            await scorer.score_async(mock_message, objective="Test attack")
+
+            # Verify scenario=redteam was included in the eval_input
+            call_args = mock_eval.call_args
+            eval_input = call_args[0][0]  # First positional arg
+            assert eval_input.get("scenario") == "redteam", (
+                "RAIServiceScorer must send scenario=redteam so server-side "
+                "ContextDependent mapping routes to Direct for red team evaluations"
+            )
         """Test context retrieval from message labels."""
         scorer = RAIServiceScorer(
             credential=mock_credential,