Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions cmd/jaeger/internal/extension/jaegermcp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,25 @@ This approach prevents context-window exhaustion in LLMs and enables more effici

✅ **Phase 1: Foundation (Complete)** - Extension scaffold, lifecycle management, and MCP SDK integration

✅ **Phase 2: Storage Integration (Complete)** - Connection to jaegerquery extension for trace access

🚧 **Phase 3: Advanced Tools (In Progress)** - Critical path analysis

Future phases will add:
- Phase 2: Basic MCP tools (search, span details, errors)
- Phase 3: Advanced tools (topology, critical path)
- Phase 2: Remaining basic MCP tools (search, span details, errors, get_services)
- Phase 3: Remaining advanced tools (topology)
- Phase 4: Documentation and observability

See [ADR-002](../../../../docs/adr/002-mcp-server.md) for full design details.

## Available Tools

### Phase 1
- ✅ `health` - Check server health and status

### Phase 3
- ✅ `get_critical_path` - Identify the sequence of spans forming the critical latency path

## Configuration

```yaml
Expand All @@ -49,11 +61,18 @@ This extension depends on the [jaeger_query](../jaegerquery/) extension to acces

## Development Status

Phase 1 implements:
### Phase 1 (Complete)
- ✅ Extension directory structure
- ✅ Configuration validation
- ✅ Factory implementation
- ✅ Server lifecycle management
- ✅ MCP SDK integration
- ✅ Streamable HTTP transport
- ✅ Basic health tool (placeholder for Phase 2)
- ✅ Basic health tool

### Phase 2 (Partial)
- ✅ Storage integration with jaegerquery extension

### Phase 3 (Partial)
- ✅ Critical path algorithm ported from UI
- ✅ `get_critical_path` tool implementation
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright (c) 2026 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package criticalpath

import (
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/ptrace"
)

// CPSpan represents a span used for critical path computation.
// This is a simplified version of ptrace.Span to prevent mutation of the original trace.
type CPSpan struct {
SpanID pcommon.SpanID
StartTime uint64 // in microseconds
Duration uint64 // in microseconds
References []CPSpanReference
ChildSpanIDs []pcommon.SpanID
}

// CPSpanReference represents a reference between spans
type CPSpanReference struct {
RefType string // "CHILD_OF" or "FOLLOWS_FROM"
SpanID pcommon.SpanID
TraceID pcommon.TraceID
Span *CPSpan // Populated during sanitization
}

// CreateCPSpan creates a CPSpan from a ptrace.Span
func CreateCPSpan(span ptrace.Span, childSpanIDs []pcommon.SpanID) CPSpan {
cpSpan := CPSpan{
SpanID: span.SpanID(),
StartTime: uint64(span.StartTimestamp()) / 1000, // Convert nanoseconds to microseconds
Duration: uint64(span.EndTimestamp()-span.StartTimestamp()) / 1000,
ChildSpanIDs: make([]pcommon.SpanID, len(childSpanIDs)),
}

// Copy child span IDs
copy(cpSpan.ChildSpanIDs, childSpanIDs)

// Convert span links to references
// In OTLP, parent relationship is implicit via ParentSpanID, not via links
// We need to handle this differently
if !span.ParentSpanID().IsEmpty() {
cpSpan.References = []CPSpanReference{
{
RefType: "CHILD_OF",
SpanID: span.ParentSpanID(),
TraceID: span.TraceID(),
},
}
}

return cpSpan
}

// CreateCPSpanMap creates a map of CPSpan objects from ptrace spans
// It also builds the parent-child relationships by iterating through all spans
func CreateCPSpanMap(traces ptrace.Traces) map[pcommon.SpanID]CPSpan {
spanMap := make(map[pcommon.SpanID]CPSpan)
childrenMap := make(map[pcommon.SpanID][]pcommon.SpanID)

// First pass: build children map
for i := 0; i < traces.ResourceSpans().Len(); i++ {
rs := traces.ResourceSpans().At(i)
for j := 0; j < rs.ScopeSpans().Len(); j++ {
ss := rs.ScopeSpans().At(j)
for k := 0; k < ss.Spans().Len(); k++ {
span := ss.Spans().At(k)
if !span.ParentSpanID().IsEmpty() {
parentID := span.ParentSpanID()
childrenMap[parentID] = append(childrenMap[parentID], span.SpanID())
}
}
}
}

// Second pass: create CPSpan objects with child relationships
for i := 0; i < traces.ResourceSpans().Len(); i++ {
rs := traces.ResourceSpans().At(i)
for j := 0; j < rs.ScopeSpans().Len(); j++ {
ss := rs.ScopeSpans().At(j)
for k := 0; k < ss.Spans().Len(); k++ {
span := ss.Spans().At(k)
childSpanIDs := childrenMap[span.SpanID()]
cpSpan := CreateCPSpan(span, childSpanIDs)
spanMap[span.SpanID()] = cpSpan
}
}
}

return spanMap
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright (c) 2026 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package criticalpath

import (
"errors"

"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/ptrace"
)

// Section represents a section of the critical path
type Section struct {
SpanID string `json:"span_id"`
SectionStart uint64 `json:"section_start"` // in microseconds
SectionEnd uint64 `json:"section_end"` // in microseconds
}

// computeCriticalPath computes the critical path sections of a trace.
// The algorithm begins with the top-level span and iterates through the last finishing children (LFCs).
// It recursively computes the critical path for each LFC span.
// Upon return from recursion, the algorithm walks backward and picks another child that
// finished just before the LFC's start.
//
// Parameters:
// - spanMap: A map associating span IDs with spans
// - spanID: The ID of the current span
// - criticalPath: An array of critical path sections (accumulated result)
// - returningChildStartTime: Optional parameter representing the span's start time.
// It is provided only during the recursive return phase.
//
// Returns: An array of critical path sections for the trace
//
// Example:
//
// |-------------spanA--------------|
// |--spanB--| |--spanC--|
//
// The LFC of spanA is spanC, as it finishes last among its child spans.
// After invoking CP recursively on LFC, for spanC there is no LFC, so the algorithm walks backward.
// At this point, it uses returningChildStartTime (startTime of spanC) to select another child that finished
// immediately before the LFC's start.
func computeCriticalPath(
spanMap map[pcommon.SpanID]CPSpan,
spanID pcommon.SpanID,
criticalPath []Section,
returningChildStartTime *uint64,
) []Section {
currentSpan, ok := spanMap[spanID]
if !ok {
return criticalPath
}

lastFinishingChildSpan := findLastFinishingChildSpan(spanMap, currentSpan, returningChildStartTime)

var spanCriticalSection Section

if lastFinishingChildSpan != nil {
// There is a last finishing child
endTime := currentSpan.StartTime + currentSpan.Duration
if returningChildStartTime != nil {
endTime = *returningChildStartTime
}

spanCriticalSection = Section{
SpanID: currentSpan.SpanID.String(),
SectionStart: lastFinishingChildSpan.StartTime + lastFinishingChildSpan.Duration,
SectionEnd: endTime,
}

if spanCriticalSection.SectionStart != spanCriticalSection.SectionEnd {
criticalPath = append(criticalPath, spanCriticalSection)
}

// Now focus shifts to the lastFinishingChildSpan of current span
criticalPath = computeCriticalPath(spanMap, lastFinishingChildSpan.SpanID, criticalPath, nil)
} else {
// If there is no last finishing child then total section up to startTime of span is on critical path
endTime := currentSpan.StartTime + currentSpan.Duration
if returningChildStartTime != nil {
endTime = *returningChildStartTime
}

spanCriticalSection = Section{
SpanID: currentSpan.SpanID.String(),
SectionStart: currentSpan.StartTime,
SectionEnd: endTime,
}

if spanCriticalSection.SectionStart != spanCriticalSection.SectionEnd {
criticalPath = append(criticalPath, spanCriticalSection)
}

// Now as there are no LFCs focus shifts to parent span from startTime of span
// return from recursion and walk backwards to one level depth to parent span
// provide span's startTime as returningChildStartTime
if len(currentSpan.References) > 0 {
parentSpanID := currentSpan.References[0].SpanID
criticalPath = computeCriticalPath(spanMap, parentSpanID, criticalPath, &currentSpan.StartTime)
}
}

return criticalPath
}

// ComputeCriticalPath computes the critical path for a given trace
func ComputeCriticalPath(traces ptrace.Traces) ([]Section, error) {
// Find the root span (the one with no parent)
var rootSpanID pcommon.SpanID
found := false

for i := 0; i < traces.ResourceSpans().Len() && !found; i++ {
rs := traces.ResourceSpans().At(i)
for j := 0; j < rs.ScopeSpans().Len() && !found; j++ {
ss := rs.ScopeSpans().At(j)
for k := 0; k < ss.Spans().Len(); k++ {
span := ss.Spans().At(k)
if span.ParentSpanID().IsEmpty() {
rootSpanID = span.SpanID()
found = true
break
}
}
}
}

if !found {
return nil, errors.New("no root span found in trace")
}

// Create a map of CPSpan objects to avoid modifying the original trace
spanMap := CreateCPSpanMap(traces)
if len(spanMap) == 0 {
return nil, errors.New("empty trace")
}

var criticalPath []Section

// Apply the algorithm
defer func() {
if r := recover(); r != nil {
criticalPath = nil
}
}()

refinedSpanMap := getChildOfSpans(spanMap)
sanitizedSpanMap := removeOverflowingChildren(refinedSpanMap)
criticalPath = computeCriticalPath(sanitizedSpanMap, rootSpanID, criticalPath, nil)

if criticalPath == nil {
return nil, errors.New("error while computing critical path for trace")
}

return criticalPath, nil
}
Loading
Loading