Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 28 additions & 16 deletions src/integrations/prefect-kubernetes/prefect_kubernetes/observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

events_client: EventsClient | None = None
orchestration_client: PrefectClient | None = None
_startup_event_semaphore: asyncio.Semaphore | None = None


@kopf.on.startup()
Expand All @@ -56,8 +57,12 @@ async def initialize_clients(logger: kopf.Logger, **kwargs: Any):
logger.info("Initializing clients")
global events_client
global orchestration_client
global _startup_event_semaphore
orchestration_client = await get_client().__aenter__()
events_client = await get_events_client().__aenter__()
_startup_event_semaphore = asyncio.Semaphore(
settings.observer.startup_event_concurrency
)
logger.info("Clients successfully initialized")


Expand Down Expand Up @@ -124,26 +129,33 @@ async def _replicate_pod_event( # pyright: ignore[reportUnusedFunction]
if event_type is None:
if orchestration_client is None:
raise RuntimeError("Orchestration client not initialized")

# Use the Kubernetes event timestamp for the filter to avoid "Query time range is too large" error
event_filter = EventFilter(
event=EventNameFilter(name=[f"prefect.kubernetes.pod.{phase.lower()}"]),
resource=EventResourceFilter(
id=[f"prefect.kubernetes.pod.{uid}"],
),
occurred=EventOccurredFilter(
since=(
k8s_created_time
if k8s_created_time
else (datetime.now(timezone.utc) - timedelta(hours=1))
)
),
)
if _startup_event_semaphore is None:
raise RuntimeError("Startup event semaphore not initialized")

# Use semaphore to limit concurrent API calls during startup to prevent
# overwhelming the API server when there are many existing pods/jobs
async with _startup_event_semaphore:
# Use the Kubernetes event timestamp for the filter to avoid "Query time range is too large" error
event_filter = EventFilter(
event=EventNameFilter(name=[f"prefect.kubernetes.pod.{phase.lower()}"]),
resource=EventResourceFilter(
id=[f"prefect.kubernetes.pod.{uid}"],
),
occurred=EventOccurredFilter(
since=(
k8s_created_time
if k8s_created_time
else (datetime.now(timezone.utc) - timedelta(hours=1))
)
),
)

response = await orchestration_client.request(
"POST",
"/events/filter",
json=dict(filter=event_filter.model_dump(exclude_unset=True, mode="json")),
json=dict(
filter=event_filter.model_dump(exclude_unset=True, mode="json")
),
)
# If the event already exists, we don't need to emit a new one.
if response.json()["events"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ class KubernetesObserverSettings(PrefectBaseSettings):
"should be provided in the format `key=value`.",
)

startup_event_concurrency: int = Field(
default=5,
description="Maximum number of concurrent API calls when checking for "
"duplicate events during observer startup. This helps prevent overloading "
"the API server when there are many existing pods/jobs in the cluster.",
)


class KubernetesWorkerSettings(PrefectBaseSettings):
model_config = build_settings_config(("integrations", "kubernetes", "worker"))
Expand Down
Loading
Loading