Skip to content

Commit a174938

Browse files
authored
Fix telemetry to work on reinstantiating new lib cli (ogx-ai#761)
# What does this PR do? Since we maintain global state in our telemetry pipeline, reinstantiating lib cli will cause us to add duplicate span processors causing sqlite to lock out because of constraint violations since we now have two span processor writing to sqlite. This PR changes the telemetry adapter for otel to only instantiate the provider once and add the span processsors only once. Also fixes an issue llama stack build ## Test Plan tested with notebook at https://colab.research.google.com/drive/1ck7hXQxRl6UvT-ijNRZ-gMZxH1G3cN2d#scrollTo=9496f75c
1 parent 194d12b commit a174938

3 files changed

Lines changed: 30 additions & 31 deletions

File tree

llama_stack/cli/stack/build.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,20 @@
44
# This source code is licensed under the terms described in the LICENSE file in
55
# the root directory of this source tree.
66
import argparse
7-
87
import importlib.resources
9-
108
import os
119
import shutil
1210
from functools import lru_cache
1311
from pathlib import Path
1412
from typing import List, Optional
1513

1614
from llama_stack.cli.subcommand import Subcommand
17-
1815
from llama_stack.distribution.datatypes import (
1916
BuildConfig,
2017
DistributionSpec,
2118
Provider,
2219
StackRunConfig,
2320
)
24-
2521
from llama_stack.distribution.distribution import get_provider_registry
2622
from llama_stack.distribution.resolver import InvalidProviderError
2723
from llama_stack.distribution.utils.dynamic import instantiate_class_type
@@ -296,6 +292,7 @@ def _run_stack_build_command_from_build_config(
296292
/ f"templates/{template_name}/run.yaml"
297293
)
298294
with importlib.resources.as_file(template_path) as path:
295+
run_config_file = build_dir / f"{build_config.name}-run.yaml"
299296
shutil.copy(path, run_config_file)
300297
# Find all ${env.VARIABLE} patterns
301298
cprint("Build Successful!", color="green")

llama_stack/providers/inline/telemetry/meta_reference/telemetry.py

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,10 @@
3030
Trace,
3131
UnstructuredLogEvent,
3232
)
33-
3433
from llama_stack.distribution.datatypes import Api
35-
3634
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
3735
ConsoleSpanProcessor,
3836
)
39-
4037
from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import (
4138
SQLiteSpanProcessor,
4239
)
@@ -52,6 +49,7 @@
5249
"up_down_counters": {},
5350
}
5451
_global_lock = threading.Lock()
52+
_TRACER_PROVIDER = None
5553

5654

5755
def string_to_trace_id(s: str) -> int:
@@ -80,31 +78,34 @@ def __init__(self, config: TelemetryConfig, deps: Dict[str, Any]) -> None:
8078
}
8179
)
8280

83-
provider = TracerProvider(resource=resource)
84-
trace.set_tracer_provider(provider)
85-
if TelemetrySink.OTEL in self.config.sinks:
86-
otlp_exporter = OTLPSpanExporter(
87-
endpoint=self.config.otel_endpoint,
88-
)
89-
span_processor = BatchSpanProcessor(otlp_exporter)
90-
trace.get_tracer_provider().add_span_processor(span_processor)
91-
metric_reader = PeriodicExportingMetricReader(
92-
OTLPMetricExporter(
81+
global _TRACER_PROVIDER
82+
if _TRACER_PROVIDER is None:
83+
provider = TracerProvider(resource=resource)
84+
trace.set_tracer_provider(provider)
85+
_TRACER_PROVIDER = provider
86+
if TelemetrySink.OTEL in self.config.sinks:
87+
otlp_exporter = OTLPSpanExporter(
9388
endpoint=self.config.otel_endpoint,
9489
)
95-
)
96-
metric_provider = MeterProvider(
97-
resource=resource, metric_readers=[metric_reader]
98-
)
99-
metrics.set_meter_provider(metric_provider)
100-
self.meter = metrics.get_meter(__name__)
101-
if TelemetrySink.SQLITE in self.config.sinks:
102-
trace.get_tracer_provider().add_span_processor(
103-
SQLiteSpanProcessor(self.config.sqlite_db_path)
104-
)
105-
self.trace_store = SQLiteTraceStore(self.config.sqlite_db_path)
106-
if TelemetrySink.CONSOLE in self.config.sinks:
107-
trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor())
90+
span_processor = BatchSpanProcessor(otlp_exporter)
91+
trace.get_tracer_provider().add_span_processor(span_processor)
92+
metric_reader = PeriodicExportingMetricReader(
93+
OTLPMetricExporter(
94+
endpoint=self.config.otel_endpoint,
95+
)
96+
)
97+
metric_provider = MeterProvider(
98+
resource=resource, metric_readers=[metric_reader]
99+
)
100+
metrics.set_meter_provider(metric_provider)
101+
self.meter = metrics.get_meter(__name__)
102+
if TelemetrySink.SQLITE in self.config.sinks:
103+
trace.get_tracer_provider().add_span_processor(
104+
SQLiteSpanProcessor(self.config.sqlite_db_path)
105+
)
106+
self.trace_store = SQLiteTraceStore(self.config.sqlite_db_path)
107+
if TelemetrySink.CONSOLE in self.config.sinks:
108+
trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor())
108109
self._lock = _global_lock
109110

110111
async def initialize(self) -> None:

llama_stack/providers/utils/telemetry/tracing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ def get_current_span(self):
127127
def setup_logger(api: Telemetry, level: int = logging.INFO):
128128
global BACKGROUND_LOGGER
129129

130-
BACKGROUND_LOGGER = BackgroundLogger(api)
130+
if BACKGROUND_LOGGER is None:
131+
BACKGROUND_LOGGER = BackgroundLogger(api)
131132
logger = logging.getLogger()
132133
logger.setLevel(level)
133134
logger.addHandler(TelemetryHandler())

0 commit comments

Comments
 (0)