humanloop
diff --git a/‎src/humanloop/otel/exporter.py‎
Lines changed: 63 additions & 18 deletions b/‎src/humanloop/otel/exporter.py‎
Lines changed: 63 additions & 18 deletions
@@ -36,8 +36,25 @@
 class HumanloopSpanExporter(SpanExporter):
     """Upload Spans created by SDK decorators to Humanloop.
 
-    Spans not created by Humanloop SDK decorators will be ignored.
+    Spans not created by Humanloop SDK decorators will be dropped.
+
+    Each Humanloop Span contains information about the File to log against and
+    the Log to create. We are using the .log actions that pass the kernel in the
+    request. This allows us to create new Versions if the decorated function
+    is changed.
+
+    The exporter uploads Spans top-to-bottom, where a Span is uploaded only after
+    its parent Span has been uploaded. This is necessary for Flow Traces, where
+    the parent Span is a Flow Log and the children are the Logs in the Trace.
+
+    The exporter keeps an upload queue and only uploads a Span if its direct parent has
+    been uploaded.
     """
+    # NOTE: LLM Instrumentors will only intercept calls to the provider made via the
+    # offical libraries e.g. import openai from openai. This is 100% the reason why
+    # prompt call is not intercepted by the Instrumentor. The way to fix this is likely
+    # overriding the hl_client.prompt.call utility. @James I'll do this since it will 
+    # involve looking at the EvaluationContext deep magic.
 
     DEFAULT_NUMBER_THREADS = 4
 
@@ -65,19 +82,20 @@ def __init__(
             for _ in range(worker_threads or self.DEFAULT_NUMBER_THREADS)
         ]
         # Signals threads no more work will arrive and
-        # they should wind down if the queue is empty
+        # they should wind down after they empty the queue
         self._shutdown: bool = False
         for thread in self._threads:
             thread.start()
             logger.debug("Exporter Thread %s started", thread.ident)
         # Flow Log Span ID mapping to children Spans that must be uploaded first
-        self._flow_log_prerequisites: dict[int, set[int]] = {}
+        self._spans_to_complete_flow_trace: dict[int, set[int]] = {}
 
     def export(self, spans: trace.Sequence[ReadableSpan]) -> SpanExportResult:
         def is_evaluated_file(
             span: ReadableSpan,
             evaluation_context: Optional[EvaluationContext],
         ) -> bool:
+            """Check if the Span corresponds to a File evaluated by the run utility."""
             if evaluation_context is None:
                 return False
 
@@ -87,6 +105,8 @@ def is_evaluated_file(
             try:
                 evaluation_context = self._client.evaluation_context_variable.get()
                 if len(spans) > 1:
+                    # Note: the HL logging and run utilities all send a single span
+                    # export accepts multiple spans to adhere to OTEL API
                     raise RuntimeError("HumanloopSpanExporter expected a single span when running an evaluation")
                 if not is_evaluated_file(spans[0], evaluation_context):
                     evaluation_context = None
@@ -98,6 +118,8 @@ def is_evaluated_file(
                     # We pass the EvaluationContext from the eval_run utility thread to
                     # the export thread so the .log action works as expected
                     evaluation_context_copy = None
+                    # Deep magic: the evaluation context is thread-specific global store of data
+                    # We need to copy it to the Exporter threads so the .log action works as expected
                     for context_var, context_var_value in contextvars.copy_context().items():
                         if context_var.name == EVALUATION_CONTEXT_VARIABLE_NAME:
                             evaluation_context_copy = context_var_value
@@ -125,6 +147,18 @@ def is_evaluated_file(
                     spans[0].attributes,
                 )
                 # Mark the EvaluationContext as used
+                # run utility will set an EvaluationContext per thread
+                # we mark it as None so the run utility is notified that
+                # the Exporter will handle logging (not guaranteed to succeed)
+                # NOTE: This is how we avoid double logging: the run utility
+                # calls the File.callable, then checks if the EvaluationContext
+                # is None. If it is, the callable is not a decorated prompt, nor it
+                # contains log statements, so run manually creates a Log. 
+                # NOTE NOTE: This is proof OTEL logic runs on same thread: run utility
+                # knows EvaluationContext is None after calling File.callable, so it must have ran
+                # all the way here. Wondering if the async waiting in HLProcessor breaks this assumption
+                # thus creating the bug. Maybe run utility should wait for a bit too before checking
+                # if the EvaluationContext is None.
                 self._client.evaluation_context_variable.set(None)
             return SpanExportResult.SUCCESS
         else:
@@ -175,6 +209,7 @@ def _do_work(self):
                 #   not resetting the EvaluationContext in the scope of the export thread
                 self._client.evaluation_context_variable.set(evaluation_context)
             except EmptyQueue:
+                # Wait for the another span to arrive
                 continue
             if span_to_export.parent is None:
                 # Span is not part of a Flow Log
@@ -199,8 +234,13 @@ def _do_work(self):
                 self._upload_queue.put((span_to_export, evaluation_context))
             self._upload_queue.task_done()
 
-    def _mark_span_completed(self, span_id: int) -> None:
-        for flow_log_span_id, flow_children_span_ids in self._flow_log_prerequisites.items():
+    def _mark_span_as_uploaded(self, span_id: int) -> None:
+        """Mark a Span as uploaded for Flow trace completion.
+        
+        If this Span corresponds to the last child in the Flow trace,
+        mark the Flow Log as complete.
+        """        
+        for flow_log_span_id, flow_children_span_ids in self._spans_to_complete_flow_trace.items():
             if span_id in flow_children_span_ids:
                 flow_children_span_ids.remove(span_id)
                 if len(flow_children_span_ids) == 0:
@@ -216,6 +256,7 @@ def _mark_span_completed(self, span_id: int) -> None:
                 break
 
     def _export_span_dispatch(self, span: ReadableSpan) -> None:
+        """Call the appropriate BaseHumanloop.X.log based on the Span type."""
         hl_file = read_from_opentelemetry_span(span, key=HUMANLOOP_FILE_KEY)
         file_type = span._attributes.get(HUMANLOOP_FILE_TYPE_KEY)  # type: ignore
         parent_span_id = span.parent.span_id if span.parent else None
@@ -236,16 +277,16 @@ def _export_span_dispatch(self, span: ReadableSpan) -> None:
         )
 
         if file_type == "prompt":
-            export_func = self._export_prompt
+            export_func = self._export_prompt_span
         elif file_type == "tool":
-            export_func = self._export_tool
+            export_func = self._export_tool_span
         elif file_type == "flow":
-            export_func = self._export_flow
+            export_func = self._export_flow_span
         else:
             raise NotImplementedError(f"Unknown span type: {hl_file}")
         export_func(span=span)
 
-    def _export_prompt(self, span: ReadableSpan) -> None:
+    def _export_prompt_span(self, span: ReadableSpan) -> None:
         file_object: dict[str, Any] = read_from_opentelemetry_span(
             span,
             key=HUMANLOOP_FILE_KEY,
@@ -254,8 +295,8 @@ def _export_prompt(self, span: ReadableSpan) -> None:
             span,
             key=HUMANLOOP_LOG_KEY,
         )
-        # NOTE: Due to OTel conventions, attributes with value of None are removed
-        # If not present, instantiate as empty dictionary
+        # NOTE: Due to OTEL conventions, attributes with value of None are removed
+        # on write to Span. If not present, instantiate these as empty
         if "inputs" not in log_object:
             log_object["inputs"] = {}
         if "messages" not in log_object:
@@ -282,9 +323,9 @@ def _export_prompt(self, span: ReadableSpan) -> None:
             self._span_id_to_uploaded_log_id[span.context.span_id] = log_response.id
         except HumanloopApiError:
             self._span_id_to_uploaded_log_id[span.context.span_id] = None
-        self._mark_span_completed(span_id=span.context.span_id)
+        self._mark_span_as_uploaded(span_id=span.context.span_id)
 
-    def _export_tool(self, span: ReadableSpan) -> None:
+    def _export_tool_span(self, span: ReadableSpan) -> None:
         file_object: dict[str, Any] = read_from_opentelemetry_span(
             span,
             key=HUMANLOOP_FILE_KEY,
@@ -301,6 +342,7 @@ def _export_tool(self, span: ReadableSpan) -> None:
         trace_parent_id = self._span_id_to_uploaded_log_id[span_parent_id] if span_parent_id else None
 
         # API expects an empty dictionary if user does not supply attributes
+        # NOTE: see comment in _export_prompt_span about OTEL conventions
         if not tool.get("attributes"):
             tool["attributes"] = {}
         if not tool.get("setup_values"):
@@ -318,9 +360,9 @@ def _export_tool(self, span: ReadableSpan) -> None:
             self._span_id_to_uploaded_log_id[span.context.span_id] = log_response.id
         except HumanloopApiError:
             self._span_id_to_uploaded_log_id[span.context.span_id] = None
-        self._mark_span_completed(span_id=span.context.span_id)
+        self._mark_span_as_uploaded(span_id=span.context.span_id)
 
-    def _export_flow(self, span: ReadableSpan) -> None:
+    def _export_flow_span(self, span: ReadableSpan) -> None:
         file_object: dict[str, Any] = read_from_opentelemetry_span(
             span,
             key=HUMANLOOP_FILE_KEY,
@@ -330,14 +372,17 @@ def _export_flow(self, span: ReadableSpan) -> None:
             key=HUMANLOOP_LOG_KEY,
         )
         # Spans that must be uploaded before the Flow Span is completed
+        # We instantiate the list of prerequisites from the attribute
+        # passed by the Processor. Each uploaded child in the trace
+        # will check if it's the last one and mark the Flow Log as complete
         try:
             prerequisites: list[int] = read_from_opentelemetry_span(  # type: ignore
                 span=span,
                 key=HUMANLOOP_FLOW_PREREQUISITES_KEY,
             )
-            self._flow_log_prerequisites[span.context.span_id] = set(prerequisites)
+            self._spans_to_complete_flow_trace[span.context.span_id] = set(prerequisites)
         except KeyError:
-            self._flow_log_prerequisites[span.context.span_id] = set()
+            self._spans_to_complete_flow_trace[span.context.span_id] = set()
 
         path: str = file_object["path"]
         flow: FlowKernelRequestParams
@@ -362,4 +407,4 @@ def _export_flow(self, span: ReadableSpan) -> None:
         except HumanloopApiError as e:
             logger.error(str(e))
             self._span_id_to_uploaded_log_id[span.context.span_id] = None
-        self._mark_span_completed(span_id=span.context.span_id)
+        self._mark_span_as_uploaded(span_id=span.context.span_id)