diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
index 76b7f36f74..ec9fb5865b 100644
--- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
+++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -79,11 +79,13 @@
         )
     ]
 )
-AGENT_INFO = types.evals.AgentInfo(
-    agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456",
-    name="agent-1",
+AGENT_CONFIG = types.evals.AgentConfig(
+    agent_id="agent-1",
+    agent_resource_name=(
+        "projects/123/locations/us-central1/reasoningEngines/456"
+    ),
     instruction="agent-1 instruction",
-    tool_declarations=[TOOL],
+    tools=[TOOL],
 )
 DEFAULT_PROMPT_TEMPLATE = "{prompt}"
 INPUT_DF_WITH_CONTEXT_AND_HISTORY = pd.DataFrame(
@@ -103,55 +105,55 @@
 
 
 def test_create_eval_run_data_source_evaluation_set(client):
-    """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
-    client._api_client._http_options.api_version = "v1beta1"
-    evaluation_run = client.evals.create_evaluation_run(
-        name="test4",
-        display_name="test4",
-        dataset=types.EvaluationRunDataSource(evaluation_set=EVAL_SET_NAME),
-        dest=GCS_DEST,
-        metrics=[
-            GENERAL_QUALITY_METRIC,
-            types.RubricMetric.FINAL_RESPONSE_QUALITY,
-            LLM_METRIC,
-            EXACT_MATCH_COMPUTATION_BASED_METRIC,
-            BLEU_COMPUTATION_BASED_METRIC,
-        ],
-        agent_info=AGENT_INFO,
-        labels={"label1": "value1"},
-    )
-    assert isinstance(evaluation_run, types.EvaluationRun)
-    assert evaluation_run.display_name == "test4"
-    assert evaluation_run.state == types.EvaluationRunState.PENDING
-    assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
-    assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
-        output_config=genai_types.OutputConfig(
-            gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
-        ),
-        metrics=[
-            GENERAL_QUALITY_METRIC,
-            FINAL_RESPONSE_QUALITY_METRIC,
-            LLM_METRIC,
-            EXACT_MATCH_COMPUTATION_BASED_METRIC,
-            BLEU_COMPUTATION_BASED_METRIC,
-        ],
-    )
-    assert evaluation_run.inference_configs[
-        AGENT_INFO.name
-    ] == types.EvaluationRunInferenceConfig(
-        agent_config=types.EvaluationRunAgentConfig(
-            developer_instruction=genai_types.Content(
-                parts=[genai_types.Part(text="agent-1 instruction")]
-            ),
-            tools=[TOOL],
-        )
-    )
-    assert evaluation_run.labels == {
-        "vertex-ai-evaluation-agent-engine-id": "456",
-        "label1": "value1",
-    }
-    assert evaluation_run.error is None
+  """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
+  client._api_client._http_options.api_version = "v1beta1"
+  evaluation_run = client.evals.create_evaluation_run(
+      name="test4",
+      display_name="test4",
+      dataset=types.EvaluationRunDataSource(evaluation_set=EVAL_SET_NAME),
+      dest=GCS_DEST,
+      metrics=[
+          GENERAL_QUALITY_METRIC,
+          types.RubricMetric.FINAL_RESPONSE_QUALITY,
+          LLM_METRIC,
+          EXACT_MATCH_COMPUTATION_BASED_METRIC,
+          BLEU_COMPUTATION_BASED_METRIC,
+      ],
+      agent_definitions={"agent-1": AGENT_CONFIG},
+      labels={"label1": "value1"},
+  )
+  assert isinstance(evaluation_run, types.EvaluationRun)
+  assert evaluation_run.display_name == "test4"
+  assert evaluation_run.state == types.EvaluationRunState.PENDING
+  assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+  assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
+  assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
+      output_config=genai_types.OutputConfig(
+          gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
+      ),
+      metrics=[
+          GENERAL_QUALITY_METRIC,
+          FINAL_RESPONSE_QUALITY_METRIC,
+          LLM_METRIC,
+          EXACT_MATCH_COMPUTATION_BASED_METRIC,
+          BLEU_COMPUTATION_BASED_METRIC,
+      ],
+  )
+  assert evaluation_run.inference_configs[
+      "agent-1"
+  ] == types.EvaluationRunInferenceConfig(
+      agent_config=types.EvaluationRunAgentConfig(
+          developer_instruction=genai_types.Content(
+              parts=[genai_types.Part(text="agent-1 instruction")]
+          ),
+          tools=[TOOL],
+      )
+  )
+  assert evaluation_run.labels == {
+      "vertex-ai-evaluation-agent-engine-id": "456",
+      "label1": "value1",
+  }
+  assert evaluation_run.error is None
 
 
 def test_create_eval_run_data_source_bigquery_request_set(client):
@@ -203,15 +205,15 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
 
 
 def test_create_eval_run_with_inference_configs(client):
-    """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
-    client._api_client._http_options.api_version = "v1beta1"
-    inference_config = types.EvaluationRunInferenceConfig(
+  """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
+  client._api_client._http_options.api_version = "v1beta1"
+  inference_config = types.EvaluationRunInferenceConfig(
         model=MODEL_NAME,
         prompt_template=types.EvaluationRunPromptTemplate(
             prompt_template="test prompt template"
         ),
     )
-    evaluation_run = client.evals.create_evaluation_run(
+  evaluation_run = client.evals.create_evaluation_run(
         name="test_inference_config",
         display_name="test_inference_config",
         dataset=types.EvaluationRunDataSource(evaluation_set=EVAL_SET_NAME),
@@ -220,22 +222,22 @@ def test_create_eval_run_with_inference_configs(client):
         inference_configs={"model_1": inference_config},
         labels={"label1": "value1"},
     )
-    assert isinstance(evaluation_run, types.EvaluationRun)
-    assert evaluation_run.display_name == "test_inference_config"
-    assert evaluation_run.state == types.EvaluationRunState.PENDING
-    assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
-    assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
-    assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
+  assert isinstance(evaluation_run, types.EvaluationRun)
+  assert evaluation_run.display_name == "test_inference_config"
+  assert evaluation_run.state == types.EvaluationRunState.PENDING
+  assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+  assert evaluation_run.data_source.evaluation_set == EVAL_SET_NAME
+  assert evaluation_run.evaluation_config == types.EvaluationRunConfig(
         output_config=genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST)
         ),
         metrics=[GENERAL_QUALITY_METRIC],
     )
-    assert evaluation_run.inference_configs["model_1"] == inference_config
-    assert evaluation_run.labels == {
+  assert evaluation_run.inference_configs["model_1"] == inference_config
+  assert evaluation_run.labels == {
         "label1": "value1",
     }
-    assert evaluation_run.error is None
+  assert evaluation_run.error is None
 
 
 # Dataframe tests fail in replay mode because of UUID generation mismatch.
@@ -533,7 +535,7 @@ def test_create_eval_run_with_inference_configs(client):
 #     ] == types.EvaluationRunInferenceConfig(
 #         agent_config=types.EvaluationRunAgentConfig(
 #             developer_instruction=genai_types.Content(
-#                 parts=[genai_types.Part(text=AGENT_INFO.instruction)]
+#                 parts=[genai_types.Part(text=AGENT_CONFIG.instruction)]
 #             ),
 #             tools=[TOOL],
 #         ),
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
index 97e56c9e19..2fed4e8230 100644
--- a/vertexai/_genai/_evals_common.py
+++ b/vertexai/_genai/_evals_common.py
@@ -282,11 +282,11 @@ def _resolve_dataset(
     api_client: BaseApiClient,
     dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
     dest: str,
-    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+    agent_configs: Optional[dict[str, types.evals.AgentConfig]] = None,
 ) -> types.EvaluationRunDataSource:
     """Resolves dataset for the evaluation run."""
     if isinstance(dataset, types.EvaluationDataset):
-        candidate_name = _get_candidate_name(dataset, agent_info_pydantic)
+        candidate_name = _get_candidate_name(dataset, agent_configs)
         eval_set = _create_evaluation_set_from_dataframe(
             api_client,
             dest,
@@ -338,22 +338,9 @@ def _resolve_inference_configs(
     inference_configs: Optional[
         dict[str, types.EvaluationRunInferenceConfigOrDict]
     ] = None,
-    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+    agent_configs: Optional[dict[str, types.evals.AgentConfig]] = None,
 ) -> Optional[dict[str, types.EvaluationRunInferenceConfigOrDict]]:
     """Resolves inference configs for the evaluation run."""
-    # Resolve agent config
-    if agent_info_pydantic and agent_info_pydantic.name:
-        inference_configs = {}
-        inference_configs[agent_info_pydantic.name] = (
-            types.EvaluationRunInferenceConfig(
-                agent_config=types.EvaluationRunAgentConfig(
-                    developer_instruction=genai_types.Content(
-                        parts=[genai_types.Part(text=agent_info_pydantic.instruction)]
-                    ),
-                    tools=agent_info_pydantic.tool_declarations,
-                )
-            )
-        )
     # Resolve prompt template data
     if inference_configs:
         for inference_config in inference_configs.values():
@@ -387,33 +374,32 @@ def _resolve_inference_configs(
 
 def _add_evaluation_run_labels(
     labels: Optional[dict[str, str]] = None,
-    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+    agent_configs: Optional[dict[str, types.evals.AgentConfig]] = None,
 ) -> Optional[dict[str, str]]:
     """Adds labels to the evaluation run."""
-    if agent_info_pydantic and agent_info_pydantic.agent_resource_name:
-        labels = labels or {}
-        labels["vertex-ai-evaluation-agent-engine-id"] = (
-            agent_info_pydantic.agent_resource_name.split("reasoningEngines/")[-1]
-        )
+    if agent_configs:
+        for config in agent_configs.values():
+            if config.agent_resource_name:
+                labels = labels or {}
+                labels["vertex-ai-evaluation-agent-engine-id"] = (
+                    config.agent_resource_name.split("reasoningEngines/")[-1]
+                )
+                break
     return labels
 
 
 def _get_candidate_name(
     dataset: types.EvaluationDataset,
-    agent_info_pydantic: Optional[types.evals.AgentInfo] = None,
+    agent_configs: Optional[dict[str, types.evals.AgentConfig]] = None,
 ) -> Optional[str]:
     """Internal helper to get candidate name."""
-    if agent_info_pydantic is not None and (
-        dataset.candidate_name
-        and agent_info_pydantic
-        and agent_info_pydantic.name
-        and dataset.candidate_name != agent_info_pydantic.name
-    ):
-        logger.warning(
-            "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
-        )
-    elif dataset.candidate_name is None and agent_info_pydantic:
-        return agent_info_pydantic.name
+    if agent_configs and dataset.candidate_name:
+        if dataset.candidate_name not in agent_configs:
+            logger.warning(
+                "Evaluation dataset candidate_name is not in the provided agent definitions. Please make sure this is intended."
+            )
+    elif not dataset.candidate_name and agent_configs:
+        return list(agent_configs.keys())[0]
     return dataset.candidate_name or None
 
 
@@ -1249,7 +1235,6 @@ def _resolve_dataset_inputs(
     dataset: list[types.EvaluationDataset],
     dataset_schema: Optional[Literal["GEMINI", "FLATTEN", "OPENAI"]],
     loader: "_evals_utils.EvalDatasetLoader",
-    agent_info: Optional[types.evals.AgentInfo] = None,
 ) -> tuple[types.EvaluationDataset, int]:
     """Loads and processes single or multiple datasets for evaluation.
 
@@ -1259,7 +1244,6 @@ def _resolve_dataset_inputs(
       dataset_schema: The schema to use for the dataset(s). If None, it will be
         auto-detected.
       loader: An instance of EvalDatasetLoader to load data.
-      agent_info: The agent info of the agent under evaluation.
 
     Returns:
       A tuple containing:
@@ -1319,7 +1303,6 @@ def _resolve_dataset_inputs(
 
     processed_eval_dataset = _evals_data_converters.merge_evaluation_datasets(
         datasets=parsed_evaluation_datasets,
-        agent_info=agent_info,
     )
 
     if not processed_eval_dataset.eval_cases:
@@ -1504,24 +1487,10 @@ def _execute_evaluation(  # type: ignore[no-untyped-def]
 
     loader = _evals_utils.EvalDatasetLoader(api_client=api_client)
 
-    agent_info = kwargs.get("agent_info", None)
-    validated_agent_info = None
-    if agent_info:
-        if isinstance(agent_info, dict):
-            validated_agent_info = types.evals.AgentInfo.model_validate(agent_info)
-        elif isinstance(agent_info, types.evals.AgentInfo):
-            validated_agent_info = agent_info
-        else:
-            raise TypeError(
-                "agent_info values must be of type types.evals.AgentInfo or dict,"
-                f" but got {type(agent_info)}'"
-            )
-
     processed_eval_dataset, num_response_candidates = _resolve_dataset_inputs(
         dataset=dataset_list,
         dataset_schema=dataset_schema,
         loader=loader,
-        agent_info=validated_agent_info,
     )
 
     resolved_metrics = _resolve_metrics(metrics, api_client)
@@ -1542,7 +1511,6 @@ def _execute_evaluation(  # type: ignore[no-untyped-def]
     logger.info("Evaluation took: %f seconds", t2 - t1)
 
     evaluation_result.evaluation_dataset = dataset_list
-    evaluation_result.agent_info = validated_agent_info
 
     if not evaluation_result.metadata:
         evaluation_result.metadata = types.EvaluationRunMetadata()
@@ -1636,7 +1604,7 @@ def _run_agent_internal(
                 # TODO: Migrate single turn agent run result to AgentData.
                 agent_data_row = types.evals.AgentData(
                     turns=resp_item,
-                    agents=agent_data_agents,
+                    agent_definitions=agent_data_agents,
                 ).model_dump()
 
         else:
@@ -2094,39 +2062,6 @@ def _get_eval_cases_eval_dfs_from_eval_items(
     return eval_case_results, eval_dfs
 
 
-def _get_agent_info_from_inference_configs(
-    candidate_names: list[str],
-    inference_configs: Optional[dict[str, types.EvaluationRunInferenceConfig]] = None,
-) -> Optional[types.evals.AgentInfo]:
-    """Retrieves an AgentInfo from the inference configs."""
-    # TODO(lakeyk): Support multiple agents.
-    if not (
-        inference_configs
-        and candidate_names
-        and candidate_names[0] in inference_configs
-        and inference_configs[candidate_names[0]].agent_config
-    ):
-        return None
-    if len(inference_configs.keys()) > 1:
-        logger.warning(
-            "Multiple agents are not supported yet. Displaying the first agent."
-        )
-    agent_config = inference_configs[candidate_names[0]].agent_config
-    di = (
-        agent_config.developer_instruction
-        if agent_config and agent_config.developer_instruction
-        else None
-    )
-    instruction = di.parts[0].text if di and di.parts and di.parts[0].text else None
-    return types.evals.AgentInfo(
-        name=candidate_names[0],
-        instruction=instruction,
-        tool_declarations=(
-            agent_config.tools if agent_config and agent_config.tools else None
-        ),
-    )
-
-
 def _get_eval_result_from_eval_items(
     results: types.EvaluationRunResults,
     eval_items: list[types.EvaluationItem],
@@ -2148,6 +2083,14 @@ def _get_eval_result_from_eval_items(
     aggregated_metrics = _get_aggregated_metrics(results)
     eval_case_results, eval_dfs = _get_eval_cases_eval_dfs_from_eval_items(eval_items)
     candidate_names = [eval_df.candidate_name for eval_df in eval_dfs]
+
+    agent_configs = None
+    if inference_configs:
+        for config in inference_configs.values():
+            if config.agent_definitions:
+                agent_configs = config.agent_definitions
+                break
+
     eval_result = types.EvaluationResult(
         summary_metrics=aggregated_metrics,
         eval_case_results=eval_case_results,
@@ -2155,9 +2098,7 @@ def _get_eval_result_from_eval_items(
         metadata=types.EvaluationRunMetadata(
             candidate_names=candidate_names,
         ),
-        agent_info=_get_agent_info_from_inference_configs(
-            candidate_names, inference_configs
-        ),
+        agent_definitions=agent_configs,
     )
     return eval_result
 
diff --git a/vertexai/_genai/_evals_data_converters.py b/vertexai/_genai/_evals_data_converters.py
index 876f9a7341..54513c2edf 100644
--- a/vertexai/_genai/_evals_data_converters.py
+++ b/vertexai/_genai/_evals_data_converters.py
@@ -743,7 +743,7 @@ def _validate_case_consistency(
 
 def merge_evaluation_datasets(
     datasets: list[types.EvaluationDataset],
-    agent_info: Optional[types.evals.AgentInfo] = None,
+    agent_info: Optional[types.evals.AgentConfig] = None,
 ) -> types.EvaluationDataset:
     """Merges multiple EvaluationDatasets into a single EvaluationDataset.
 
@@ -858,7 +858,6 @@ def merge_evaluation_datasets(
             reference=base_eval_case.reference,
             system_instruction=base_eval_case.system_instruction,
             conversation_history=base_eval_case.conversation_history,
-            agent_info=agent_info,
             intermediate_events=base_eval_case.intermediate_events,
             **eval_case_custom_columns,
         )
@@ -870,7 +869,6 @@ def merge_evaluation_datasets(
 def merge_response_datasets_into_canonical_format(
     raw_datasets: list[list[dict[str, Any]]],
     schemas: list[str],
-    agent_info: Optional[types.evals.AgentInfo] = None,
 ) -> types.EvaluationDataset:
     """Merges multiple raw response datasets into a single EvaluationDataset.
 
@@ -897,4 +895,4 @@ def merge_response_datasets_into_canonical_format(
         converter = get_dataset_converter(schema)
         parsed_evaluation_datasets.append(converter.convert(raw_ds_entry))
 
-    return merge_evaluation_datasets(parsed_evaluation_datasets, agent_info)
+    return merge_evaluation_datasets(parsed_evaluation_datasets)
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
index 1088df664a..ee7f7d99af 100644
--- a/vertexai/_genai/evals.py
+++ b/vertexai/_genai/evals.py
@@ -78,6 +78,9 @@ def _CreateEvaluationRunParameters_to_vertex(
     if getv(from_object, ["display_name"]) is not None:
         setv(to_object, ["displayName"], getv(from_object, ["display_name"]))
 
+    if getv(from_object, ["agent"]) is not None:
+        setv(to_object, ["agent"], getv(from_object, ["agent"]))
+
     if getv(from_object, ["data_source"]) is not None:
         setv(to_object, ["dataSource"], getv(from_object, ["data_source"]))
 
@@ -356,7 +359,7 @@ def _EvaluationRunInferenceConfig_from_vertex(
         setv(to_object, ["agent_run_config"], getv(from_object, ["agentRunConfig"]))
 
     if getv(from_object, ["agents"]) is not None:
-        setv(to_object, ["agent_configs"], getv(from_object, ["agents"]))
+        setv(to_object, ["agent_definitions"], getv(from_object, ["agents"]))
 
     return to_object
 
@@ -378,8 +381,8 @@ def _EvaluationRunInferenceConfig_to_vertex(
     if getv(from_object, ["agent_run_config"]) is not None:
         setv(to_object, ["agentRunConfig"], getv(from_object, ["agent_run_config"]))
 
-    if getv(from_object, ["agent_configs"]) is not None:
-        setv(to_object, ["agents"], getv(from_object, ["agent_configs"]))
+    if getv(from_object, ["agent_definitions"]) is not None:
+        setv(to_object, ["agents"], getv(from_object, ["agent_definitions"]))
 
     return to_object
 
@@ -526,8 +529,8 @@ def _GenerateUserScenariosParameters_to_vertex(
     if getv(from_object, ["location"]) is not None:
         setv(to_object, ["location"], getv(from_object, ["location"]))
 
-    if getv(from_object, ["agents"]) is not None:
-        setv(to_object, ["agents"], getv(from_object, ["agents"]))
+    if getv(from_object, ["agent_definitions"]) is not None:
+        setv(to_object, ["agents"], getv(from_object, ["agent_definitions"]))
 
     if getv(from_object, ["root_agent_id"]) is not None:
         setv(to_object, ["rootAgentId"], getv(from_object, ["root_agent_id"]))
@@ -840,6 +843,7 @@ def _create_evaluation_run(
         *,
         name: Optional[str] = None,
         display_name: Optional[str] = None,
+        agent: Optional[str] = None,
         data_source: types.EvaluationRunDataSourceOrDict,
         evaluation_config: types.EvaluationRunConfigOrDict,
         labels: Optional[dict[str, str]] = None,
@@ -855,6 +859,7 @@ def _create_evaluation_run(
         parameter_model = types._CreateEvaluationRunParameters(
             name=name,
             display_name=display_name,
+            agent=agent,
             data_source=data_source,
             evaluation_config=evaluation_config,
             labels=labels,
@@ -1104,7 +1109,7 @@ def _generate_user_scenarios(
         self,
         *,
         location: Optional[str] = None,
-        agents: Optional[dict[str, evals_types.AgentConfigOrDict]] = None,
+        agent_definitions: Optional[dict[str, evals_types.AgentConfigOrDict]] = None,
         root_agent_id: Optional[str] = None,
         user_scenario_generation_config: Optional[
             evals_types.UserScenarioGenerationConfigOrDict
@@ -1117,7 +1122,7 @@ def _generate_user_scenarios(
 
         parameter_model = types._GenerateUserScenariosParameters(
             location=location,
-            agents=agents,
+            agent_definitions=agent_definitions,
             root_agent_id=root_agent_id,
             user_scenario_generation_config=user_scenario_generation_config,
             config=config,
@@ -1794,7 +1799,11 @@ def create_evaluation_run(
         metrics: list[types.EvaluationRunMetricOrDict],
         name: Optional[str] = None,
         display_name: Optional[str] = None,
-        agent_info: Optional[evals_types.AgentInfoOrDict] = None,
+        agent: Optional[str] = None,
+        agent_definitions: Optional[dict[str, evals_types.AgentConfigOrDict]] = None,
+        user_simulator_config: Optional[
+            Union[evals_types.UserSimulatorConfigOrDict, dict[str, Any]]
+        ] = None,
         inference_configs: Optional[
             dict[str, types.EvaluationRunInferenceConfigOrDict]
         ] = None,
@@ -1809,10 +1818,12 @@ def create_evaluation_run(
           metrics: The list of metrics to evaluate.
           name: The name of the evaluation run.
           display_name: The display name of the evaluation run.
-          agent_info: The agent info to evaluate.
+          agent: The agent engine used to run agent.
+          agent_definitions: The agent configurations to evaluate, as a dictionary mapping agent IDs to agent configurations.
+          user_simulator_config: The configuration for the user simulator in multi-turn agent evaluation.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
-              If provided, agent_info must be None.
+              If provided, agent and agent_definitions must be None.
               Example:
               {"candidate-1": types.EvaluationRunInferenceConfig(model="gemini-2.5-flash")}
           labels: The labels to apply to the evaluation run.
@@ -1821,19 +1832,53 @@ def create_evaluation_run(
         Returns:
             The created evaluation run.
         """
-        if agent_info and inference_configs:
+        if (agent or agent_definitions) and inference_configs:
             raise ValueError(
-                "At most one of agent_info or inference_configs can be provided."
+                "At most one of (agent, agent_definitions) or inference_configs can be provided."
             )
-        agent_info_pydantic = (
-            evals_types.AgentInfo.model_validate(agent_info)
-            if isinstance(agent_info, dict)
-            else (agent_info or evals_types.AgentInfo())
-        )
+
+        agent_configs_map = None
+
+        if agent_definitions:
+            agent_configs_map = {
+                k: (
+                    evals_types.AgentConfig.model_validate(v)
+                    if isinstance(v, dict)
+                    else v
+                )
+                for k, v in agent_definitions.items()
+            }
+
+        candidate_name = None
+        if agent_configs_map:
+            candidate_name = list(agent_configs_map.keys())[0]
+
+        candidate_name = candidate_name or "candidate-1"
+        if isinstance(dataset, types.EvaluationDataset) and dataset.candidate_name:
+            candidate_name = dataset.candidate_name
+
+        if agent or agent_configs_map:
+            inference_configs = inference_configs or {}
+            if candidate_name not in inference_configs:
+                if user_simulator_config is None:
+                    user_simulator_config = evals_types.UserSimulatorConfig(max_turn=5)
+                elif isinstance(user_simulator_config, dict):
+                    user_simulator_config = (
+                        evals_types.UserSimulatorConfig.model_validate(
+                            user_simulator_config
+                        )
+                    )
+
+                inference_configs[candidate_name] = types.EvaluationRunInferenceConfig(
+                    agent_run_config=types.AgentRunConfig(
+                        agent_engine=agent, user_simulator_config=user_simulator_config
+                    ),
+                    agent_definitions=agent_configs_map,
+                )
         if isinstance(dataset, types.EvaluationDataset):
             _evals_utils._validate_dataset_agent_data(dataset, inference_configs)
         resolved_dataset = _evals_common._resolve_dataset(
-            self._api_client, dataset, dest, agent_info_pydantic
+            self._api_client, dataset, dest, agent_configs_map
         )
         output_config = genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
@@ -1845,10 +1890,10 @@ def create_evaluation_run(
             output_config=output_config, metrics=resolved_metrics
         )
         resolved_inference_configs = _evals_common._resolve_inference_configs(
-            self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
+            self._api_client, resolved_dataset, inference_configs, agent_configs_map
         )
         resolved_labels = _evals_common._add_evaluation_run_labels(
-            labels, agent_info_pydantic
+            labels, agent_configs_map
         )
         resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
         return self._create_evaluation_run(
@@ -2002,7 +2047,7 @@ def create_evaluation_set(
     def generate_user_scenarios(
         self,
         *,
-        agents: dict[str, evals_types.AgentConfigOrDict],
+        agent_definitions: dict[str, evals_types.AgentConfigOrDict],
         user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict,
         root_agent_id: str,
     ) -> types.EvaluationDataset:
@@ -2011,7 +2056,7 @@ def generate_user_scenarios(
            and the agent under test.
 
         Args:
-            agents: A map of agent ID to AgentConfig.
+            agent_definitions: A map of agent ID to AgentConfig.
             user_scenario_generation_config: Configuration for generating user scenarios.
             root_agent_id: The ID of the root agent.
 
@@ -2019,7 +2064,7 @@ def generate_user_scenarios(
             An EvaluationDataset containing the generated user scenarios.
         """
         response = self._generate_user_scenarios(
-            agents=agents,
+            agent_definitions=agent_definitions,
             user_scenario_generation_config=user_scenario_generation_config,
             root_agent_id=root_agent_id,
         )
@@ -2092,6 +2137,7 @@ async def _create_evaluation_run(
         *,
         name: Optional[str] = None,
         display_name: Optional[str] = None,
+        agent: Optional[str] = None,
         data_source: types.EvaluationRunDataSourceOrDict,
         evaluation_config: types.EvaluationRunConfigOrDict,
         labels: Optional[dict[str, str]] = None,
@@ -2107,6 +2153,7 @@ async def _create_evaluation_run(
         parameter_model = types._CreateEvaluationRunParameters(
             name=name,
             display_name=display_name,
+            agent=agent,
             data_source=data_source,
             evaluation_config=evaluation_config,
             labels=labels,
@@ -2364,7 +2411,7 @@ async def _generate_user_scenarios(
         self,
         *,
         location: Optional[str] = None,
-        agents: Optional[dict[str, evals_types.AgentConfigOrDict]] = None,
+        agent_definitions: Optional[dict[str, evals_types.AgentConfigOrDict]] = None,
         root_agent_id: Optional[str] = None,
         user_scenario_generation_config: Optional[
             evals_types.UserScenarioGenerationConfigOrDict
@@ -2377,7 +2424,7 @@ async def _generate_user_scenarios(
 
         parameter_model = types._GenerateUserScenariosParameters(
             location=location,
-            agents=agents,
+            agent_definitions=agent_definitions,
             root_agent_id=root_agent_id,
             user_scenario_generation_config=user_scenario_generation_config,
             config=config,
@@ -2724,7 +2771,11 @@ async def create_evaluation_run(
         metrics: list[types.EvaluationRunMetricOrDict],
         name: Optional[str] = None,
         display_name: Optional[str] = None,
-        agent_info: Optional[evals_types.AgentInfo] = None,
+        agent: Optional[str] = None,
+        agent_definitions: Optional[dict[str, evals_types.AgentConfigOrDict]] = None,
+        user_simulator_config: Optional[
+            Union[evals_types.UserSimulatorConfigOrDict, dict[str, Any]]
+        ] = None,
         inference_configs: Optional[
             dict[str, types.EvaluationRunInferenceConfigOrDict]
         ] = None,
@@ -2739,10 +2790,12 @@ async def create_evaluation_run(
           metrics: The list of metrics to evaluate.
           name: The name of the evaluation run.
           display_name: The display name of the evaluation run.
-          agent_info: The agent info to evaluate.
+          agent: The agent engine used to run agent.
+          agent_definitions: The agent configurations to evaluate, as a dictionary mapping agent IDs to agent configurations.
+          user_simulator_config: The configuration for the user simulator in multi-turn agent evaluation.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
-              If provided, agent_info must be None.
+              If provided, agent and agent_definitions must be None.
               Example:
               {"candidate-1": types.EvaluationRunInferenceConfig(model="gemini-2.5-flash")}
           labels: The labels to apply to the evaluation run.
@@ -2751,19 +2804,54 @@ async def create_evaluation_run(
         Returns:
             The created evaluation run.
         """
-        if agent_info and inference_configs:
+        if (agent or agent_definitions) and inference_configs:
             raise ValueError(
-                "At most one of agent_info or inference_configs can be provided."
+                "At most one of (agent, agent_definitions) or inference_configs can be provided."
             )
-        agent_info_pydantic = (
-            evals_types.AgentInfo.model_validate(agent_info)
-            if isinstance(agent_info, dict)
-            else (agent_info or evals_types.AgentInfo())
-        )
+
+        agent_configs_map = None
+
+        if agent_definitions:
+            agent_configs_map = {
+                k: (
+                    evals_types.AgentConfig.model_validate(v)
+                    if isinstance(v, dict)
+                    else v
+                )
+                for k, v in agent_definitions.items()
+            }
+
+        candidate_name = None
+        if agent_configs_map:
+            candidate_name = list(agent_configs_map.keys())[0]
+
+        candidate_name = candidate_name or "candidate-1"
+
+        if isinstance(dataset, types.EvaluationDataset) and dataset.candidate_name:
+            candidate_name = dataset.candidate_name
+
+        if agent or agent_configs_map:
+            inference_configs = inference_configs or {}
+            if candidate_name not in inference_configs:
+                if user_simulator_config is None:
+                    user_simulator_config = evals_types.UserSimulatorConfig(max_turn=5)
+                elif isinstance(user_simulator_config, dict):
+                    user_simulator_config = (
+                        evals_types.UserSimulatorConfig.model_validate(
+                            user_simulator_config
+                        )
+                    )
+
+                inference_configs[candidate_name] = types.EvaluationRunInferenceConfig(
+                    agent_run_config=types.AgentRunConfig(
+                        agent_engine=agent, user_simulator_config=user_simulator_config
+                    ),
+                    agent_definitions=agent_configs_map,
+                )
         if isinstance(dataset, types.EvaluationDataset):
             _evals_utils._validate_dataset_agent_data(dataset, inference_configs)
         resolved_dataset = _evals_common._resolve_dataset(
-            self._api_client, dataset, dest, agent_info_pydantic
+            self._api_client, dataset, dest, agent_configs_map
         )
         output_config = genai_types.OutputConfig(
             gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
@@ -2775,10 +2863,10 @@ async def create_evaluation_run(
             output_config=output_config, metrics=resolved_metrics
         )
         resolved_inference_configs = _evals_common._resolve_inference_configs(
-            self._api_client, resolved_dataset, inference_configs, agent_info_pydantic
+            self._api_client, resolved_dataset, inference_configs, agent_configs_map
         )
         resolved_labels = _evals_common._add_evaluation_run_labels(
-            labels, agent_info_pydantic
+            labels, agent_configs_map
         )
         resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
 
@@ -2939,7 +3027,7 @@ async def create_evaluation_set(
     async def generate_user_scenarios(
         self,
         *,
-        agents: dict[str, evals_types.AgentConfigOrDict],
+        agent_definitions: dict[str, evals_types.AgentConfigOrDict],
         user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict,
         root_agent_id: str,
     ) -> types.EvaluationDataset:
@@ -2948,7 +3036,7 @@ async def generate_user_scenarios(
            and the agent under test.
 
         Args:
-            agents: A map of agent ID to AgentConfig.
+            agent_definitions: A map of agent ID to AgentConfig.
             user_scenario_generation_config: Configuration for generating user scenarios.
             root_agent_id: The ID of the root agent.
 
@@ -2956,7 +3044,7 @@ async def generate_user_scenarios(
             An EvaluationDataset containing the generated user scenarios.
         """
         response = await self._generate_user_scenarios(
-            agents=agents,
+            agent_definitions=agent_definitions,
             user_scenario_generation_config=user_scenario_generation_config,
             root_agent_id=root_agent_id,
         )
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
index 4a7512d0fe..fab561c303 100644
--- a/vertexai/_genai/types/common.py
+++ b/vertexai/_genai/types/common.py
@@ -2663,7 +2663,7 @@ class EvaluationRunInferenceConfig(_common.BaseModel):
         default=None,
         description="""Configuration for Agent Run in evaluation management service.""",
     )
-    agent_configs: Optional[dict[str, evals_types.AgentConfig]] = Field(
+    agent_definitions: Optional[dict[str, evals_types.AgentConfig]] = Field(
         default=None,
         description="""A map of agent IDs to their respective agent config.""",
     )
@@ -2687,7 +2687,7 @@ class EvaluationRunInferenceConfigDict(TypedDict, total=False):
     agent_run_config: Optional[AgentRunConfigDict]
     """Configuration for Agent Run in evaluation management service."""
 
-    agent_configs: Optional[dict[str, evals_types.AgentConfig]]
+    agent_definitions: Optional[dict[str, evals_types.AgentConfig]]
     """A map of agent IDs to their respective agent config."""
 
 
@@ -2721,6 +2721,7 @@ class _CreateEvaluationRunParameters(_common.BaseModel):
 
     name: Optional[str] = Field(default=None, description="""""")
     display_name: Optional[str] = Field(default=None, description="""""")
+    agent: Optional[str] = Field(default=None, description="""""")
     data_source: Optional[EvaluationRunDataSource] = Field(
         default=None, description=""""""
     )
@@ -2745,6 +2746,9 @@ class _CreateEvaluationRunParametersDict(TypedDict, total=False):
     display_name: Optional[str]
     """"""
 
+    agent: Optional[str]
+    """"""
+
     data_source: Optional[EvaluationRunDataSourceDict]
     """"""
 
@@ -3055,10 +3059,6 @@ class EvalCase(_common.BaseModel):
         default=None,
         description="""This field is experimental and may change in future versions. Intermediate events of a single turn in an agent run or intermediate events of the last turn for multi-turn an agent run.""",
     )
-    agent_info: Optional[evals_types.AgentInfo] = Field(
-        default=None,
-        description="""This field is experimental and may change in future versions. The agent info of the agent under evaluation. This can be extended for multi-agent evaluation.""",
-    )
     agent_data: Optional[evals_types.AgentData] = Field(
         default=None,
         description="""This field is experimental and may change in future versions. The agent data of the agent under evaluation.""",
@@ -3098,9 +3098,6 @@ class EvalCaseDict(TypedDict, total=False):
     intermediate_events: Optional[list[evals_types.Event]]
     """This field is experimental and may change in future versions. Intermediate events of a single turn in an agent run or intermediate events of the last turn for multi-turn an agent run."""
 
-    agent_info: Optional[evals_types.AgentInfo]
-    """This field is experimental and may change in future versions. The agent info of the agent under evaluation. This can be extended for multi-agent evaluation."""
-
     agent_data: Optional[evals_types.AgentData]
     """This field is experimental and may change in future versions. The agent data of the agent under evaluation."""
 
@@ -3330,9 +3327,9 @@ class EvaluationResult(_common.BaseModel):
     metadata: Optional[EvaluationRunMetadata] = Field(
         default=None, description="""Metadata for the evaluation run."""
     )
-    agent_info: Optional[evals_types.AgentInfo] = Field(
+    agent_definitions: Optional[dict[str, evals_types.AgentConfig]] = Field(
         default=None,
-        description="""This field is experimental and may change in future versions. The agent info of the agent under evaluation. This can be extended for multi-agent evaluation.""",
+        description="""This field is experimental and may change in future versions. The agent configs of the agents under evaluation.""",
     )
 
     def show(self, candidate_names: Optional[List[str]] = None) -> None:
@@ -3365,8 +3362,8 @@ class EvaluationResultDict(TypedDict, total=False):
     metadata: Optional[EvaluationRunMetadataDict]
     """Metadata for the evaluation run."""
 
-    agent_info: Optional[evals_types.AgentInfo]
-    """This field is experimental and may change in future versions. The agent info of the agent under evaluation. This can be extended for multi-agent evaluation."""
+    agent_definitions: Optional[dict[str, evals_types.AgentConfig]]
+    """This field is experimental and may change in future versions. The agent configs of the agents under evaluation."""
 
 
 EvaluationResultOrDict = Union[EvaluationResult, EvaluationResultDict]
@@ -5425,7 +5422,7 @@ class _GenerateUserScenariosParameters(_common.BaseModel):
     """Parameters for GenerateUserScenarios."""
 
     location: Optional[str] = Field(default=None, description="""""")
-    agents: Optional[dict[str, evals_types.AgentConfig]] = Field(
+    agent_definitions: Optional[dict[str, evals_types.AgentConfig]] = Field(
         default=None, description=""""""
     )
     root_agent_id: Optional[str] = Field(default=None, description="""""")
@@ -5443,7 +5440,7 @@ class _GenerateUserScenariosParametersDict(TypedDict, total=False):
     location: Optional[str]
     """"""
 
-    agents: Optional[dict[str, evals_types.AgentConfig]]
+    agent_definitions: Optional[dict[str, evals_types.AgentConfig]]
     """"""
 
     root_agent_id: Optional[str]
diff --git a/vertexai/_genai/types/evals.py b/vertexai/_genai/types/evals.py
index a262d9d9f2..dc220def0d 100644
--- a/vertexai/_genai/types/evals.py
+++ b/vertexai/_genai/types/evals.py
@@ -207,6 +207,10 @@ def from_agent(
             description=getattr(agent, "description", None),
             instruction=getattr(agent, "instruction", None),
             tools=AgentConfig._get_tool_declarations_from_agent(agent),
+            sub_agents=[
+                getattr(sa, "name", "agent_0")
+                for sa in getattr(agent, "sub_agents", [])
+            ],
         )
 
 
@@ -366,7 +370,7 @@ class EventsDict(TypedDict, total=False):
 class AgentData(_common.BaseModel):
     """Represents data specific to multi-turn agent evaluations."""
 
-    agents: Optional[dict[str, AgentConfig]] = Field(
+    agent_definitions: Optional[dict[str, AgentConfig]] = Field(
         default=None,
         description="""A map containing the static configurations for each agent in the system.
       Key: agent_id (matches the `author` field in events).
@@ -387,8 +391,8 @@ class AgentData(_common.BaseModel):
     events: Optional[Events] = Field(default=None, description="""A list of events.""")
 
     @classmethod
-    def _get_agents_map(cls, agent: Any) -> dict[str, AgentConfig]:
-        """Recursively gets all agent configs from an agent and its sub-agents.
+    def get_agent_definitions(cls, agent: Any) -> dict[str, AgentConfig]:
+        """Recursively gets all agent definitions from an agent and its sub-agents.
 
         Args:
           agent: The agent to get the agent info from.
@@ -401,7 +405,7 @@ def _get_agents_map(cls, agent: Any) -> dict[str, AgentConfig]:
         agents_map = {agent_id: agent_config}
 
         for sub_agent in getattr(agent, "sub_agents", []):
-            agents_map.update(cls._get_agents_map(sub_agent))
+            agents_map.update(cls.get_agent_definitions(sub_agent))
 
         return agents_map
 
@@ -419,7 +423,7 @@ def from_session(cls, agent: Any, session_history: list[Any]) -> "AgentData":
         Returns:
             An AgentData object containing the segmented history and agent config.
         """
-        agents_map = cls._get_agents_map(agent)
+        agents_map = cls.get_agent_definitions(agent)
         agent_id = getattr(agent, "name", "agent_0") or "agent_0"
 
         turns: list[ConversationTurn] = []
@@ -494,13 +498,15 @@ def from_session(cls, agent: Any, session_history: list[Any]) -> "AgentData":
                 )
             )
 
-        return cls(agents=agents_map, turns=turns)  # pytype: disable=missing-parameter
+        return cls(
+            agent_definitions=agents_map, turns=turns
+        )  # pytype: disable=missing-parameter
 
 
 class AgentDataDict(TypedDict, total=False):
     """Represents data specific to multi-turn agent evaluations."""
 
-    agents: Optional[dict[str, AgentConfigDict]]
+    agent_definitions: Optional[dict[str, AgentConfigDict]]
     """A map containing the static configurations for each agent in the system.
       Key: agent_id (matches the `author` field in events).
       Value: The static configuration of the agent."""
@@ -523,107 +529,6 @@ class AgentDataDict(TypedDict, total=False):
 AgentDataOrDict = Union[AgentData, AgentDataDict]
 
 
-class AgentInfo(_common.BaseModel):
-    """The agent info of an agent, used for agent eval."""
-
-    agent_resource_name: Optional[str] = Field(
-        default=None,
-        description="""The agent engine used to run agent. Agent engine resource name in str type, with format
-            `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""",
-    )
-    name: Optional[str] = Field(
-        default=None, description="""Agent name, used as an identifier."""
-    )
-    instruction: Optional[str] = Field(
-        default=None, description="""Agent developer instruction."""
-    )
-    description: Optional[str] = Field(
-        default=None, description="""Agent description."""
-    )
-    tool_declarations: Optional[genai_types.ToolListUnion] = Field(
-        default=None, description="""List of tools used by the Agent."""
-    )
-
-    @staticmethod
-    def _get_tool_declarations_from_agent(agent: Any) -> genai_types.ToolListUnion:
-        """Gets tool declarations from an agent.
-
-        Args:
-          agent: The agent to get the tool declarations from. Data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK.
-
-        Returns:
-          The tool declarations of the agent.
-        """
-        tool_declarations: genai_types.ToolListUnion = []
-        for tool in agent.tools:
-            tool_declarations.append(
-                {
-                    "function_declarations": [
-                        genai_types.FunctionDeclaration.from_callable_with_api_option(
-                            callable=tool
-                        )
-                    ]
-                }
-            )
-        return tool_declarations
-
-    @classmethod
-    def load_from_agent(
-        cls, agent: Any, agent_resource_name: Optional[str] = None
-    ) -> "AgentInfo":
-        """Loads agent info from an agent.
-
-        Args:
-          agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK.
-          agent_resource_name: Optional. The agent engine resource name.
-
-        Returns:
-          The agent info of the agent.
-
-        Example:
-        ```
-        from vertexai._genai import types
-
-        # Assuming 'my_agent' is an instance of google.adk.agents.LLMAgent
-
-        agent_info = types.evals.AgentInfo.load_from_agent(
-            agent=my_agent,
-            agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456"
-        )
-        ```
-        """
-        return cls(  # pytype: disable=missing-parameter
-            name=agent.name,
-            agent_resource_name=agent_resource_name,
-            instruction=agent.instruction,
-            description=agent.description,
-            tool_declarations=AgentInfo._get_tool_declarations_from_agent(agent),
-        )
-
-
-class AgentInfoDict(TypedDict, total=False):
-    """The agent info of an agent, used for agent eval."""
-
-    agent_resource_name: Optional[str]
-    """The agent engine used to run agent. Agent engine resource name in str type, with format
-            `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`."""
-
-    name: Optional[str]
-    """Agent name, used as an identifier."""
-
-    instruction: Optional[str]
-    """Agent developer instruction."""
-
-    description: Optional[str]
-    """Agent description."""
-
-    tool_declarations: Optional[genai_types.ToolListUnionDict]
-    """List of tools used by the Agent."""
-
-
-AgentInfoOrDict = Union[AgentInfo, AgentInfoDict]
-
-
 class RubricContentProperty(_common.BaseModel):
     """Defines criteria based on a specific property."""