Skip to content

Commit b6d699b

Browse files
MarkDaoustcopybara-github
authored andcommitted
feat: Add consent_audio and voice_consent_signature
PiperOrigin-RevId: 884712720
1 parent 07ae1b1 commit b6d699b

1 file changed

Lines changed: 71 additions & 10 deletions

File tree

google/genai/types.py

Lines changed: 71 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4901,32 +4901,85 @@ class ToolConfigDict(TypedDict, total=False):
49014901
ToolConfigOrDict = Union[ToolConfig, ToolConfigDict]
49024902

49034903

4904+
class VoiceConsentSignature(_common.BaseModel):
4905+
"""The signature of the voice consent check."""
4906+
4907+
signature: Optional[str] = Field(
4908+
default=None,
4909+
description="""The signature string.
4910+
""",
4911+
)
4912+
4913+
4914+
class VoiceConsentSignatureDict(TypedDict, total=False):
4915+
"""The signature of the voice consent check."""
4916+
4917+
signature: Optional[str]
4918+
"""The signature string.
4919+
"""
4920+
4921+
4922+
VoiceConsentSignatureOrDict = Union[
4923+
VoiceConsentSignature, VoiceConsentSignatureDict
4924+
]
4925+
4926+
49044927
class ReplicatedVoiceConfig(_common.BaseModel):
4905-
"""ReplicatedVoiceConfig is used to configure replicated voice."""
4928+
"""The configuration for the replicated voice to use."""
49064929

49074930
mime_type: Optional[str] = Field(
49084931
default=None,
4909-
description="""The mime type of the replicated voice.
4932+
description="""The mimetype of the voice sample. The only currently supported
4933+
value is `audio/wav`. This represents 16-bit signed little-endian wav
4934+
data, with a 24kHz sampling rate.
49104935
""",
49114936
)
49124937
voice_sample_audio: Optional[bytes] = Field(
49134938
default=None,
4914-
description="""The sample audio of the replicated voice.
4939+
description="""The sample of the custom voice.
49154940
""",
49164941
)
4942+
consent_audio: Optional[bytes] = Field(
4943+
default=None,
4944+
description="""Recorded consent verifying ownership of the voice. This
4945+
represents 16-bit signed little-endian wav data, with a 24kHz sampling
4946+
rate.""",
4947+
)
4948+
voice_consent_signature: Optional[VoiceConsentSignature] = Field(
4949+
default=None,
4950+
description="""Signature of a previously verified consent audio. This should be
4951+
populated with a signature generated by the server for a previous
4952+
request containing the consent_audio field. When provided, the
4953+
signature is verified instead of the consent_audio field to reduce
4954+
latency. Requests will fail if the signature is invalid or expired.""",
4955+
)
49174956

49184957

49194958
class ReplicatedVoiceConfigDict(TypedDict, total=False):
4920-
"""ReplicatedVoiceConfig is used to configure replicated voice."""
4959+
"""The configuration for the replicated voice to use."""
49214960

49224961
mime_type: Optional[str]
4923-
"""The mime type of the replicated voice.
4962+
"""The mimetype of the voice sample. The only currently supported
4963+
value is `audio/wav`. This represents 16-bit signed little-endian wav
4964+
data, with a 24kHz sampling rate.
49244965
"""
49254966

49264967
voice_sample_audio: Optional[bytes]
4927-
"""The sample audio of the replicated voice.
4968+
"""The sample of the custom voice.
49284969
"""
49294970

4971+
consent_audio: Optional[bytes]
4972+
"""Recorded consent verifying ownership of the voice. This
4973+
represents 16-bit signed little-endian wav data, with a 24kHz sampling
4974+
rate."""
4975+
4976+
voice_consent_signature: Optional[VoiceConsentSignatureDict]
4977+
"""Signature of a previously verified consent audio. This should be
4978+
populated with a signature generated by the server for a previous
4979+
request containing the consent_audio field. When provided, the
4980+
signature is verified instead of the consent_audio field to reduce
4981+
latency. Requests will fail if the signature is invalid or expired."""
4982+
49304983

49314984
ReplicatedVoiceConfigOrDict = Union[
49324985
ReplicatedVoiceConfig, ReplicatedVoiceConfigDict
@@ -4952,20 +5005,26 @@ class PrebuiltVoiceConfigDict(TypedDict, total=False):
49525005

49535006

49545007
class VoiceConfig(_common.BaseModel):
5008+
"""The configuration for the voice to use."""
49555009

49565010
replicated_voice_config: Optional[ReplicatedVoiceConfig] = Field(
49575011
default=None,
4958-
description="""If true, the model will use a replicated voice for the response.""",
5012+
description="""The configuration for a replicated voice, which is a clone of a
5013+
user's voice that can be used for speech synthesis. If this is unset, a
5014+
default voice is used.""",
49595015
)
49605016
prebuilt_voice_config: Optional[PrebuiltVoiceConfig] = Field(
49615017
default=None, description="""The configuration for a prebuilt voice."""
49625018
)
49635019

49645020

49655021
class VoiceConfigDict(TypedDict, total=False):
5022+
"""The configuration for the voice to use."""
49665023

49675024
replicated_voice_config: Optional[ReplicatedVoiceConfigDict]
4968-
"""If true, the model will use a replicated voice for the response."""
5025+
"""The configuration for a replicated voice, which is a clone of a
5026+
user's voice that can be used for speech synthesis. If this is unset, a
5027+
default voice is used."""
49695028

49705029
prebuilt_voice_config: Optional[PrebuiltVoiceConfigDict]
49715030
"""The configuration for a prebuilt voice."""
@@ -5022,10 +5081,11 @@ class MultiSpeakerVoiceConfigDict(TypedDict, total=False):
50225081

50235082

50245083
class SpeechConfig(_common.BaseModel):
5084+
"""Config for speech generation and transcription."""
50255085

50265086
voice_config: Optional[VoiceConfig] = Field(
50275087
default=None,
5028-
description="""Configuration for the voice of the response.""",
5088+
description="""The configuration in case of single-voice output.""",
50295089
)
50305090
language_code: Optional[str] = Field(
50315091
default=None,
@@ -5038,9 +5098,10 @@ class SpeechConfig(_common.BaseModel):
50385098

50395099

50405100
class SpeechConfigDict(TypedDict, total=False):
5101+
"""Config for speech generation and transcription."""
50415102

50425103
voice_config: Optional[VoiceConfigDict]
5043-
"""Configuration for the voice of the response."""
5104+
"""The configuration in case of single-voice output."""
50445105

50455106
language_code: Optional[str]
50465107
"""Optional. The language code (ISO 639-1) for the speech synthesis."""

0 commit comments

Comments
 (0)