@@ -4901,32 +4901,85 @@ class ToolConfigDict(TypedDict, total=False):
49014901ToolConfigOrDict = Union[ToolConfig, ToolConfigDict]
49024902
49034903
4904+ class VoiceConsentSignature(_common.BaseModel):
4905+ """The signature of the voice consent check."""
4906+
4907+ signature: Optional[str] = Field(
4908+ default=None,
4909+ description="""The signature string.
4910+ """,
4911+ )
4912+
4913+
4914+ class VoiceConsentSignatureDict(TypedDict, total=False):
4915+ """The signature of the voice consent check."""
4916+
4917+ signature: Optional[str]
4918+ """The signature string.
4919+ """
4920+
4921+
4922+ VoiceConsentSignatureOrDict = Union[
4923+ VoiceConsentSignature, VoiceConsentSignatureDict
4924+ ]
4925+
4926+
49044927class ReplicatedVoiceConfig(_common.BaseModel):
4905- """ReplicatedVoiceConfig is used to configure replicated voice."""
4928+ """The configuration for the replicated voice to use ."""
49064929
49074930 mime_type: Optional[str] = Field(
49084931 default=None,
4909- description="""The mime type of the replicated voice.
4932+ description="""The mimetype of the voice sample. The only currently supported
4933+ value is `audio/wav`. This represents 16-bit signed little-endian wav
4934+ data, with a 24kHz sampling rate.
49104935 """,
49114936 )
49124937 voice_sample_audio: Optional[bytes] = Field(
49134938 default=None,
4914- description="""The sample audio of the replicated voice.
4939+ description="""The sample of the custom voice.
49154940 """,
49164941 )
4942+ consent_audio: Optional[bytes] = Field(
4943+ default=None,
4944+ description="""Recorded consent verifying ownership of the voice. This
4945+ represents 16-bit signed little-endian wav data, with a 24kHz sampling
4946+ rate.""",
4947+ )
4948+ voice_consent_signature: Optional[VoiceConsentSignature] = Field(
4949+ default=None,
4950+ description="""Signature of a previously verified consent audio. This should be
4951+ populated with a signature generated by the server for a previous
4952+ request containing the consent_audio field. When provided, the
4953+ signature is verified instead of the consent_audio field to reduce
4954+ latency. Requests will fail if the signature is invalid or expired.""",
4955+ )
49174956
49184957
49194958class ReplicatedVoiceConfigDict(TypedDict, total=False):
4920- """ReplicatedVoiceConfig is used to configure replicated voice."""
4959+ """The configuration for the replicated voice to use ."""
49214960
49224961 mime_type: Optional[str]
4923- """The mime type of the replicated voice.
4962+ """The mimetype of the voice sample. The only currently supported
4963+ value is `audio/wav`. This represents 16-bit signed little-endian wav
4964+ data, with a 24kHz sampling rate.
49244965 """
49254966
49264967 voice_sample_audio: Optional[bytes]
4927- """The sample audio of the replicated voice.
4968+ """The sample of the custom voice.
49284969 """
49294970
4971+ consent_audio: Optional[bytes]
4972+ """Recorded consent verifying ownership of the voice. This
4973+ represents 16-bit signed little-endian wav data, with a 24kHz sampling
4974+ rate."""
4975+
4976+ voice_consent_signature: Optional[VoiceConsentSignatureDict]
4977+ """Signature of a previously verified consent audio. This should be
4978+ populated with a signature generated by the server for a previous
4979+ request containing the consent_audio field. When provided, the
4980+ signature is verified instead of the consent_audio field to reduce
4981+ latency. Requests will fail if the signature is invalid or expired."""
4982+
49304983
49314984ReplicatedVoiceConfigOrDict = Union[
49324985 ReplicatedVoiceConfig, ReplicatedVoiceConfigDict
@@ -4952,20 +5005,26 @@ class PrebuiltVoiceConfigDict(TypedDict, total=False):
49525005
49535006
49545007class VoiceConfig(_common.BaseModel):
5008+ """The configuration for the voice to use."""
49555009
49565010 replicated_voice_config: Optional[ReplicatedVoiceConfig] = Field(
49575011 default=None,
4958- description="""If true, the model will use a replicated voice for the response.""",
5012+ description="""The configuration for a replicated voice, which is a clone of a
5013+ user's voice that can be used for speech synthesis. If this is unset, a
5014+ default voice is used.""",
49595015 )
49605016 prebuilt_voice_config: Optional[PrebuiltVoiceConfig] = Field(
49615017 default=None, description="""The configuration for a prebuilt voice."""
49625018 )
49635019
49645020
49655021class VoiceConfigDict(TypedDict, total=False):
5022+ """The configuration for the voice to use."""
49665023
49675024 replicated_voice_config: Optional[ReplicatedVoiceConfigDict]
4968- """If true, the model will use a replicated voice for the response."""
5025+ """The configuration for a replicated voice, which is a clone of a
5026+ user's voice that can be used for speech synthesis. If this is unset, a
5027+ default voice is used."""
49695028
49705029 prebuilt_voice_config: Optional[PrebuiltVoiceConfigDict]
49715030 """The configuration for a prebuilt voice."""
@@ -5022,10 +5081,11 @@ class MultiSpeakerVoiceConfigDict(TypedDict, total=False):
50225081
50235082
50245083class SpeechConfig(_common.BaseModel):
5084+ """Config for speech generation and transcription."""
50255085
50265086 voice_config: Optional[VoiceConfig] = Field(
50275087 default=None,
5028- description="""Configuration for the voice of the response .""",
5088+ description="""The configuration in case of single-voice output .""",
50295089 )
50305090 language_code: Optional[str] = Field(
50315091 default=None,
@@ -5038,9 +5098,10 @@ class SpeechConfig(_common.BaseModel):
50385098
50395099
50405100class SpeechConfigDict(TypedDict, total=False):
5101+ """Config for speech generation and transcription."""
50415102
50425103 voice_config: Optional[VoiceConfigDict]
5043- """Configuration for the voice of the response ."""
5104+ """The configuration in case of single-voice output ."""
50445105
50455106 language_code: Optional[str]
50465107 """Optional. The language code (ISO 639-1) for the speech synthesis."""
0 commit comments