ServiceNow · tara-servicenow · Apr 13, 2026 · Apr 13, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/assets/noise/airport_gate.wav b/assets/noise/airport_gate.wav
diff --git a/assets/noise/baby_crying.wav b/assets/noise/baby_crying.wav
diff --git a/assets/noise/background_music.wav b/assets/noise/background_music.wav
diff --git a/assets/noise/coffee_shop.wav b/assets/noise/coffee_shop.wav
diff --git a/assets/noise/loud_construction.wav b/assets/noise/loud_construction.wav
diff --git a/assets/noise/nyc_street.wav b/assets/noise/nyc_street.wav
diff --git a/assets/noise/road_noise.wav b/assets/noise/road_noise.wav
diff --git a/configs/user_behaviors.yaml b/configs/user_behaviors.yaml
@@ -0,0 +1,24 @@
+default:
+  You're direct and to the point—you don't have time for lengthy explanations or unnecessary back-and-forth. 
+  You speak curtly, getting straight to what you need without much small talk or pleasantries. 
+  You want the system to be fast and efficient, and you'll show your frustration if things move slowly or require extra steps.
+
+elderly_slow:
+  You are elderly and have difficulty understanding fast speech. You speak extremely slowly, with frequent deliberate pauses.
+  You occasionally ask the agent to repeat themselves slowly. You do not rush.
+  You frequently use ellipses (...) in your output to indicate pauses.
+  Ex. ("Ok yes... my confirmation code is... W... K... 2... E... X... B...")
+
+aggressive_impatient:
+  You are impatient and easily frustrated when the agent does not resolve your requests immediately.
+  You speak very quickly and often interrupt the agent mid-sentence when they are talking for too long to make your frustation clear and ask them to hurry it up.
+  Express your frustration whenever progress is not being made and remember to interrupt often.
+  You frequently output words in all caps to indicate your frustration and add emphasis.
+
+forgetful_disorganized:
+  You are forgetful and prone to disfluencies (um..., uh..., huh..., let me think..., hold on a second..., let me find that piece of information..., etc).
+  You frequently use ellipses (...) in your output to indicate pauses.
+  You often forget the information you need and have to search for it mid information.
+  Simple things like your name and date of birth you remember easily, but for any specific codes and IDs you need a couple of seconds to find it.
+  You often lose your train of thought and need a moment to remember what you were saying.
+  You also make mistakes when you speak and have to repeat yourself (ex. "hmm yeah one second... let me find that... ok its A E 2 B oh wait sorry actually its A F 2 B")
diff --git a/src/eva/models/config.py b/src/eva/models/config.py
@@ -15,6 +15,7 @@
 import copy
 import logging
 from datetime import UTC, datetime
+from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Any, ClassVar, Literal
 
@@ -239,6 +240,75 @@ def _strip_other_mode_fields(data: dict) -> dict:
     return {k: v for k, v in data.items() if k in _PIPELINE_FIELDS}
 
 
+class BackgroundNoiseType(StrEnum):
+    """Ambient noise type mixed into user audio (speech and silence)."""
+
+    coffee_shop = "coffee_shop"
+    airport_gate = "airport_gate"
+    bad_connection_static = "bad_connection_static"
+    road_noise = "road_noise"
+    nyc_street = "nyc_street"
+    background_music = "background_music"
+    loud_construction = "loud_construction"
+    baby_crying = "baby_crying"
+
+
+class AccentType(StrEnum):
+    """Accent variant — selects a different ElevenLabs agent ID for the user simulator."""
+
+    french = "french"
+    indian = "indian"
+    spanish = "spanish"
+    chinese = "chinese"
+
+
+class BehaviorType(StrEnum):
+    """User behavior variant — modifies persona prompt and selects a different agent ID."""
+
+    elderly_slow = "elderly_slow"
+    aggressive_impatient = "aggressive_impatient"
+    forgetful_disorganized = "forgetful_disorganized"
+
+
+class PerturbationConfig(BaseModel):
+    """Perturbations applied to the simulated user during a benchmark run.
+
+    Three independent axes:
+    - background_noise: ambient audio mixed into user speech and silence
+    - accent: uses accent-specific ElevenLabs agent IDs (mutually exclusive with behavior)
+    - behavior: modifies persona prompt + uses behavior-specific agent IDs (mutually exclusive with accent)
+    - connection_degradation: stacks codec artifacts, packet loss, and volume fluctuation on top
+
+    Agent ID env vars follow the pattern EVA_{TYPE}_USER_F / EVA_{TYPE}_USER_M.
+    Default (no accent/behavior): EVA_DEFAULT_USER_F and EVA_DEFAULT_USER_M.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    background_noise: BackgroundNoiseType | None = Field(
+        None,
+        description="Ambient noise type to mix into user audio",
+    )
+    snr_db: float = Field(
+        15.0,
+        description="Signal-to-noise ratio in dB for file-based background noise (higher = cleaner)",
+    )
+    accent: AccentType | None = Field(None, description="Accent variant for the user simulator voice")
+    behavior: BehaviorType | None = Field(None, description="User behavior variant (modifies persona + agent ID)")
+    connection_degradation: bool = Field(
+        False,
+        description="Apply VoIP degradation (codec artifacts, packet loss, volume fluctuation) on top of other perturbations",
+    )
+
+    @model_validator(mode="after")
+    def _validate_exclusivity(self) -> "PerturbationConfig":
+        if self.accent is not None and self.behavior is not None:
+            raise ValueError(
+                "accent and behavior cannot both be set — they each require exclusive use of the ElevenLabs agent ID"
+            )
+        return self
+
+
 # Discriminated union so Pydantic picks the right config type from env vars / CLI
 ModelConfigUnion = Annotated[
     Annotated[PipelineConfig, Tag("pipeline")]
@@ -399,6 +469,15 @@ class ModelDeployment(DeploymentTypedDict):
         description="Recompute EVA aggregate scores from existing metrics.json files without re-running judges",
     )
 
+    perturbation: PerturbationConfig | None = Field(
+        None,
+        description=(
+            "Perturbations applied to the simulated user. "
+            "Example: EVA_PERTURBATION__BACKGROUND_NOISE=coffee_shop EVA_PERTURBATION__ACCENT=french. "
+            "See PerturbationConfig for all options."
+        ),
+    )
+
     # Debug and filtering
     debug: bool = Field(
         False,

diff --git a/src/eva/orchestrator/worker.py b/src/eva/orchestrator/worker.py
@@ -257,6 +257,7 @@ async def _start_user_simulator(self) -> None:
             server_url=f"ws://localhost:{self.port}/ws",
             output_dir=self.output_dir,
             user_simulator_context=self.agent.user_simulator_context,
+            perturbation_config=self.config.perturbation,
         )
 
     async def _run_conversation(self) -> str:

diff --git a/src/eva/user_simulator/audio_interface.py b/src/eva/user_simulator/audio_interface.py
@@ -22,6 +22,7 @@
 
 from elevenlabs.conversational_ai.conversation import AudioInterface
 
+from eva.user_simulator.perturbation import AudioPerturbator
 from eva.utils.logging import get_logger
 
 logger = get_logger(__name__)
@@ -74,6 +75,7 @@ def __init__(
         record_callback: Callable[[str, bytes], None] | None = None,
         event_logger=None,
         conversation_done_callback: Callable[[str], None] | None = None,
+        perturbator: AudioPerturbator | None = None,
     ):
         """Initialize the audio interface.
 
@@ -83,12 +85,14 @@ def __init__(
             record_callback: Optional callback for recording audio (source, data)
             event_logger: Optional ElevenLabsEventLogger for logging audio timing
             conversation_done_callback: Optional callback for signaling conversation end
+            perturbator: Optional perturbator to apply to user audio before sending
         """
         self.websocket_uri = websocket_uri
         self.conversation_id = conversation_id
         self.record_callback = record_callback
         self.event_logger = event_logger
         self.conversation_done_callback = conversation_done_callback
+        self._perturbator = perturbator
 
         self.websocket = None
         self.running = False
@@ -231,6 +235,8 @@ def output(self, audio: bytes) -> None:
         """
         if self.running:
             try:
+                if self._perturbator is not None:
+                    audio = self._perturbator.apply(audio)
                 self.send_queue.put_nowait(audio)
                 # Record user audio
                 if self.record_callback:
@@ -337,8 +343,10 @@ async def _send_silence_frame(self, chunk_size: int = SEND_CHUNK_SIZE_PCM) -> bo
         Returns:
             True if silence was sent, False otherwise
         """
-        # Create PCM silence and convert to μ-law
-        silence_pcm = b"\x00" * chunk_size
+        if self._perturbator is not None and self._perturbator.has_ambient_noise:
+            silence_pcm = self._perturbator.get_ambient_chunk(chunk_size)
+        else:
+            silence_pcm = b"\x00" * chunk_size
         silence_mulaw = self._convert_pcm_to_mulaw(silence_pcm)
 
         if not silence_mulaw:

diff --git a/src/eva/user_simulator/client.py b/src/eva/user_simulator/client.py
@@ -7,22 +7,35 @@
 import asyncio
 import json
 import os
+from functools import lru_cache
 from pathlib import Path
 
 import httpx
+import yaml
 from elevenlabs.client import ElevenLabs
 from elevenlabs.conversational_ai.conversation import (
     Conversation,
     ConversationInitiationData,
 )
 
+from eva.models.config import PerturbationConfig
 from eva.user_simulator.audio_interface import BotToBotAudioInterface
 from eva.user_simulator.event_logger import ElevenLabsEventLogger
+from eva.user_simulator.perturbation import AudioPerturbator
 from eva.utils.logging import get_logger
 from eva.utils.prompt_manager import PromptManager
 
 logger = get_logger(__name__)
 
+_BEHAVIORS_PATH = Path(__file__).parent.parent.parent.parent / "configs" / "user_behaviors.yaml"
+_PERSONA_GENDER = {1: "F", 2: "M"}
+
+
+@lru_cache(maxsize=1)
+def _load_behavior_prompts() -> dict:
+    with open(_BEHAVIORS_PATH) as f:
+        return yaml.safe_load(f)
+
 
 class UserSimulator:
     """ElevenLabs-based user simulator that connects to the assistant.
@@ -42,6 +55,7 @@ def __init__(
         output_dir: Path,
         timeout: int = 600,
         user_simulator_context: str = "",
+        perturbation_config: PerturbationConfig | None = None,
     ):
         """Initialize the user simulator.
 
@@ -53,6 +67,7 @@ def __init__(
             output_dir: Directory for output files
             timeout: Conversation timeout in seconds
             user_simulator_context: Domain-specific context line from agent config
+            perturbation_config: Optional perturbation to apply to user audio
         """
         self.persona_config = persona_config
         self.goal = goal
@@ -61,6 +76,13 @@ def __init__(
         self.timeout = timeout
         self.current_date_time = current_date_time
         self.user_simulator_context = user_simulator_context
+        self._perturbation_config = perturbation_config
+        self._perturbator = (
+            AudioPerturbator(perturbation_config)
+            if perturbation_config is not None
+            and (perturbation_config.background_noise is not None or perturbation_config.connection_degradation)
+            else None
+        )
 
         # State
         self._conversation = None
@@ -132,6 +154,7 @@ async def _run_elevenlabs_conversation(self, api_key: str) -> str:
             record_callback=self._record_audio,
             event_logger=self.event_logger,
             conversation_done_callback=self._on_conversation_end,
+            perturbator=self._perturbator,
         )
 
         # Start the audio interface WebSocket connection
@@ -147,7 +170,14 @@ async def _run_elevenlabs_conversation(self, api_key: str) -> str:
                 httpx_client=http_client,
             )
 
-            # Build the user simulation prompt
+            # TODO: test and improve behavior prompts to more closely match desired user behavior
+            behavior_prompts = _load_behavior_prompts()
+            if self._perturbation_config and self._perturbation_config.behavior:
+                behavior_key = self._perturbation_config.behavior.value
+                user_persona = behavior_prompts[behavior_key]
+            else:
+                user_persona = behavior_prompts["default"]
+
             prompt = PromptManager().get_prompt(
                 "user_simulator.system_prompt",
                 user_simulator_context=self.user_simulator_context,
@@ -160,7 +190,7 @@ async def _run_elevenlabs_conversation(self, api_key: str) -> str:
                 failure_condition=self.goal["decision_tree"]["failure_condition"],
                 edge_cases=self.goal["decision_tree"]["edge_cases"],
                 information_required=self.goal["information_required"],
-                user_persona=self.persona_config["user_persona"],
+                user_persona=user_persona,
                 starting_utterance=self.goal["starting_utterance"],
                 current_date_time=self.current_date_time,
             )
@@ -170,11 +200,21 @@ async def _run_elevenlabs_conversation(self, api_key: str) -> str:
 
             # ElevenLabs user simulator agent ID
             persona_id = self.persona_config["user_persona_id"]
-            ELEVENLABS_USER_AGENT_ID = os.getenv(f"ELEVENLABS_USER_AGENT_ID_USER_PERSONA_{persona_id}")
+            gender = _PERSONA_GENDER[persona_id]
+            if self._perturbation_config and self._perturbation_config.accent:
+                key = self._perturbation_config.accent.value.upper()
+                env_var = f"EVA_{key}_ACCENT_USER_{gender}"
+            elif self._perturbation_config and self._perturbation_config.behavior:
+                key = self._perturbation_config.behavior.value.upper()
+                env_var = f"EVA_{key}_USER_{gender}"
+            else:
+                env_var = f"EVA_DEFAULT_USER_{gender}"
+            ELEVENLABS_USER_AGENT_ID = os.getenv(env_var)
+            logger.info(f"Using agent ID from env var: {env_var}")
 
             # Create the conversation
             if not ELEVENLABS_USER_AGENT_ID:
-                raise ValueError(f"Missing elevenlabs agent ID environment variable for user persona {persona_id}")
+                raise ValueError(f"Missing ElevenLabs agent ID environment variable: {env_var}")
 
             self._client = client