-
Notifications
You must be signed in to change notification settings - Fork 124
Expand file tree
/
Copy pathgemini_live_vision.py
More file actions
53 lines (46 loc) · 1.42 KB
/
gemini_live_vision.py
File metadata and controls
53 lines (46 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""
---
title: Gemini Realtime Agent with Live Vision
category: realtime
tags: [gemini_realtime, live_vision]
difficulty: beginner
description: Minimal Gemini Realtime model agent setup with live vision capabilities
demonstrates:
- Gemini Realtime model basic usage
- Live vision capabilities
- Session-based generation
- VAD with Silero
---
"""
from dotenv import load_dotenv
from pathlib import Path
from livekit import agents
from livekit.agents import RoomInputOptions
from livekit.agents.voice import AgentSession, Agent
from livekit.plugins import (
silero,
google
)
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
class Assistant(Agent):
def __init__(self) -> None:
super().__init__(instructions="You are a helpful voice AI assistant that can see the world around you.")
async def entrypoint(ctx: agents.JobContext):
session = AgentSession(
llm=google.realtime.RealtimeModel(
model="gemini-2.5-flash-native-audio-preview-09-2025",
voice="Puck",
temperature=0.8,
),
vad=silero.VAD.load()
)
await session.start(
room=ctx.room,
agent=Assistant(),
room_input_options=RoomInputOptions(
video_enabled=True
),
)
await session.generate_reply(instructions="Start by offering assistance")
if __name__ == "__main__":
agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint))