feat: Add consent_audio and voice_consent_signature

MarkDaoust · copybara-github · commit b6d699bb6b81 · 2026-03-18T11:24:45.000-07:00
PiperOrigin-RevId: 884712720
diff --git a/google/genai/types.py b/google/genai/types.py
@@ -4901,32 +4901,85 @@ class ToolConfigDict(TypedDict, total=False):
 ToolConfigOrDict = Union[ToolConfig, ToolConfigDict]
 
 
+class VoiceConsentSignature(_common.BaseModel):
+  """The signature of the voice consent check."""
+
+  signature: Optional[str] = Field(
+      default=None,
+      description="""The signature string.
+      """,
+  )
+
+
+class VoiceConsentSignatureDict(TypedDict, total=False):
+  """The signature of the voice consent check."""
+
+  signature: Optional[str]
+  """The signature string.
+      """
+
+
+VoiceConsentSignatureOrDict = Union[
+    VoiceConsentSignature, VoiceConsentSignatureDict
+]
+
+
 class ReplicatedVoiceConfig(_common.BaseModel):
-  """ReplicatedVoiceConfig is used to configure replicated voice."""
+  """The configuration for the replicated voice to use."""
 
   mime_type: Optional[str] = Field(
       default=None,
-      description="""The mime type of the replicated voice.
+      description="""The mimetype of the voice sample. The only currently supported
+      value is `audio/wav`. This represents 16-bit signed little-endian wav
+      data, with a 24kHz sampling rate.
       """,
   )
   voice_sample_audio: Optional[bytes] = Field(
       default=None,
-      description="""The sample audio of the replicated voice.
+      description="""The sample of the custom voice.
       """,
   )
+  consent_audio: Optional[bytes] = Field(
+      default=None,
+      description="""Recorded consent verifying ownership of the voice. This
+      represents 16-bit signed little-endian wav data, with a 24kHz sampling
+      rate.""",
+  )
+  voice_consent_signature: Optional[VoiceConsentSignature] = Field(
+      default=None,
+      description="""Signature of a previously verified consent audio. This should be
+      populated with a signature generated by the server for a previous
+      request containing the consent_audio field. When provided, the
+      signature is verified instead of the consent_audio field to reduce
+      latency. Requests will fail if the signature is invalid or expired.""",
+  )
 
 
 class ReplicatedVoiceConfigDict(TypedDict, total=False):
-  """ReplicatedVoiceConfig is used to configure replicated voice."""
+  """The configuration for the replicated voice to use."""
 
   mime_type: Optional[str]
-  """The mime type of the replicated voice.
+  """The mimetype of the voice sample. The only currently supported
+      value is `audio/wav`. This represents 16-bit signed little-endian wav
+      data, with a 24kHz sampling rate.
       """
 
   voice_sample_audio: Optional[bytes]
-  """The sample audio of the replicated voice.
+  """The sample of the custom voice.
       """
 
+  consent_audio: Optional[bytes]
+  """Recorded consent verifying ownership of the voice. This
+      represents 16-bit signed little-endian wav data, with a 24kHz sampling
+      rate."""
+
+  voice_consent_signature: Optional[VoiceConsentSignatureDict]
+  """Signature of a previously verified consent audio. This should be
+      populated with a signature generated by the server for a previous
+      request containing the consent_audio field. When provided, the
+      signature is verified instead of the consent_audio field to reduce
+      latency. Requests will fail if the signature is invalid or expired."""
+
 
 ReplicatedVoiceConfigOrDict = Union[
     ReplicatedVoiceConfig, ReplicatedVoiceConfigDict
@@ -4952,20 +5005,26 @@ class PrebuiltVoiceConfigDict(TypedDict, total=False):
 
 
 class VoiceConfig(_common.BaseModel):
+  """The configuration for the voice to use."""
 
   replicated_voice_config: Optional[ReplicatedVoiceConfig] = Field(
       default=None,
-      description="""If true, the model will use a replicated voice for the response.""",
+      description="""The configuration for a replicated voice, which is a clone of a
+      user's voice that can be used for speech synthesis. If this is unset, a
+      default voice is used.""",
   )
   prebuilt_voice_config: Optional[PrebuiltVoiceConfig] = Field(
       default=None, description="""The configuration for a prebuilt voice."""
   )
 
 
 class VoiceConfigDict(TypedDict, total=False):
+  """The configuration for the voice to use."""
 
   replicated_voice_config: Optional[ReplicatedVoiceConfigDict]
-  """If true, the model will use a replicated voice for the response."""
+  """The configuration for a replicated voice, which is a clone of a
+      user's voice that can be used for speech synthesis. If this is unset, a
+      default voice is used."""
 
   prebuilt_voice_config: Optional[PrebuiltVoiceConfigDict]
   """The configuration for a prebuilt voice."""
@@ -5022,10 +5081,11 @@ class MultiSpeakerVoiceConfigDict(TypedDict, total=False):
 
 
 class SpeechConfig(_common.BaseModel):
+  """Config for speech generation and transcription."""
 
   voice_config: Optional[VoiceConfig] = Field(
       default=None,
-      description="""Configuration for the voice of the response.""",
+      description="""The configuration in case of single-voice output.""",
   )
   language_code: Optional[str] = Field(
       default=None,
@@ -5038,9 +5098,10 @@ class SpeechConfig(_common.BaseModel):
 
 
 class SpeechConfigDict(TypedDict, total=False):
+  """Config for speech generation and transcription."""
 
   voice_config: Optional[VoiceConfigDict]
-  """Configuration for the voice of the response."""
+  """The configuration in case of single-voice output."""
 
   language_code: Optional[str]
   """Optional. The language code (ISO 639-1) for the speech synthesis."""