Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions extensions/copilot/src/platform/endpoint/node/automodeService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,27 @@ export class AutomodeService extends Disposable implements IAutomodeService {
turn_number: (entry?.turnCount ?? 0) + 1,
};
const routingMethod = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.AutoModeRoutingMethod, this._expService) || undefined;
const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, token.available_models, undefined, contextSignals, conversationId, chatRequest?.id, routingMethod, hasImage(chatRequest));

// Filter available_models to only those the client can actually serve.
Comment thread
aashna marked this conversation as resolved.
// The AutoModels API and Models API are separate CAPI calls that can be
// out of sync (e.g. a new model appears in available_models before the
// Models API returns it). Sending unresolvable models to the router
// causes it to recommend models the client must silently discard.
const knownModelIds = new Set(knownEndpoints.map(e => e.model));
const routableModels: string[] = [];
const droppedModels: string[] = [];
for (const m of token.available_models) {
(knownModelIds.has(m) ? routableModels : droppedModels).push(m);
}
if (!routableModels.length) {
this._logService.warn(`[AutomodeService] No available_models matched knownEndpoints. available_models=[${token.available_models.join(', ')}], knownEndpoints=[${knownEndpoints.map(e => e.model).join(', ')}]`);
return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };
}
if (droppedModels.length) {
this._logService.info(`[AutomodeService] Filtered ${droppedModels.length} unresolvable model(s) before routing: [${droppedModels.join(', ')}]`);
}

const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, routableModels, undefined, contextSignals, conversationId, chatRequest?.id, routingMethod, hasImage(chatRequest));

if (result.fallback) {
this._logService.info(`[AutomodeService] Router signaled fallback: ${result.fallback_reason ?? 'unknown'}, routing_method=${result.routing_method ?? 'n/a'}`);
Expand All @@ -310,11 +330,15 @@ export class AutomodeService extends Disposable implements IAutomodeService {
return { lastRoutedPrompt: prompt, fallbackReason: 'emptyCandidateList' };
}

// Prefer same-provider model, then fall back to the router's top candidate
const selectedModel = (entry?.endpoint && this._findSameProviderModel(entry.endpoint.modelProvider, result.candidate_models, knownEndpoints))
?? knownEndpoints.find(e => e.model === result.candidate_models[0]);
// Trust the router's ranked candidate list directly.
// Same-provider preference is intentionally NOT applied here — the router
// already accounts for available models and re-runs after /compact, so
// overriding its pick with same-provider negates cost-saving decisions.
// Same-provider is still used in _selectDefaultModel (the non-router fallback).
const selectedModel = this._findFirstAvailableModel(result.candidate_models, knownEndpoints);

if (!selectedModel) {
this._logService.warn(`[AutomodeService] None of the router's candidate_models matched knownEndpoints: [${result.candidate_models.join(', ')}]`);
return { lastRoutedPrompt: prompt, fallbackReason: 'noMatchingEndpoint' };
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1140,4 +1140,136 @@ describe('AutomodeService', () => {
expect(selectionEvent).toBeUndefined();
});
});

describe('available_models / knownEndpoints sync', () => {
function mockRouterResponse(available_models: string[], routerResult: { chosen_model: string; candidate_models: string[] }, session_token = 'test-token'): void {
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
if (opts?.type === RequestType.ModelRouter) {
return Promise.resolve({
ok: true,
status: 200,
headers: createMockHeaders(),
text: vi.fn().mockResolvedValue(JSON.stringify({
predicted_label: 'no_reasoning',
confidence: 0.96,
latency_ms: 23,
chosen_model: routerResult.chosen_model,
candidate_models: routerResult.candidate_models,
scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },
sticky_override: false
}))
});
}
return Promise.resolve(
makeMockTokenResponse({
available_models,
expires_at: Math.floor(Date.now() / 1000) + 3600,
session_token,
})
);
});
}

it('should filter out available_models that have no matching knownEndpoint before sending to router', async () => {
enableRouter();
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');
let capturedBody: string | undefined;
(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((req: any, opts: any) => {
if (opts?.type === RequestType.ModelRouter) {
capturedBody = req.body;
return Promise.resolve({
ok: true,
status: 200,
headers: createMockHeaders(),
text: vi.fn().mockResolvedValue(JSON.stringify({
predicted_label: 'no_reasoning',
confidence: 0.96,
latency_ms: 23,
chosen_model: 'gpt-4o',
candidate_models: ['gpt-4o'],
scores: { needs_reasoning: 0.04, no_reasoning: 0.96 },
sticky_override: false
}))
});
}
return Promise.resolve(
makeMockTokenResponse({
available_models: ['claude-haiku-4.5', 'gpt-4o', 'claude-sonnet-4.6'],
expires_at: Math.floor(Date.now() / 1000) + 3600,
session_token: 'test-token',
})
);
});

automodeService = createService();
const chatRequest: Partial<ChatRequest> = {
location: ChatLocation.Panel,
prompt: 'what day is today',
sessionId: 'session-filter-models'
};

await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint]);

expect(capturedBody).toBeDefined();
const parsed = JSON.parse(capturedBody!);
expect(parsed.available_models).toEqual(['gpt-4o']);
expect(parsed.available_models).not.toContain('claude-haiku-4.5');
expect(parsed.available_models).not.toContain('claude-sonnet-4.6');
expect(mockLogService.info).toHaveBeenCalledWith(
expect.stringContaining('Filtered 2 unresolvable model(s)')
);
});

it('should iterate all candidate_models when first candidate has no endpoint', async () => {
enableRouter();
const gpt41Endpoint = createEndpoint('gpt-4.1', 'OpenAI');

mockRouterResponse(
['gpt-4.1'],
{ chosen_model: 'gpt-4.1', candidate_models: ['unknown-new-model', 'gpt-4.1'] }
);

automodeService = createService();
const chatRequest: Partial<ChatRequest> = {
location: ChatLocation.Panel,
prompt: 'what day is today',
sessionId: 'session-iterate-candidates'
};

const result = await automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt41Endpoint]);
expect(result.model).toBe('gpt-4.1');
});

it('should throw when all available_models are unknown to knownEndpoints', async () => {
enableRouter();
const gpt4oEndpoint = createEndpoint('gpt-4o', 'OpenAI');

(mockCAPIClientService.makeRequest as ReturnType<typeof vi.fn>).mockImplementation((_body: any, opts: any) => {
if (opts?.type === RequestType.ModelRouter) {
throw new Error('Router should not be called when no models are routable');
}
return Promise.resolve(
makeMockTokenResponse({
available_models: ['unknown-model-a', 'unknown-model-b'],
expires_at: Math.floor(Date.now() / 1000) + 3600,
session_token: 'test-token',
})
);
});

automodeService = createService();
const chatRequest: Partial<ChatRequest> = {
location: ChatLocation.Panel,
prompt: 'test prompt',
sessionId: 'session-all-unknown'
};

await expect(
automodeService.resolveAutoModeEndpoint(chatRequest as ChatRequest, [gpt4oEndpoint])
).rejects.toThrow('no available model found');
expect(mockLogService.warn).toHaveBeenCalledWith(
expect.stringContaining('No available_models matched knownEndpoints')
);
});
});
});
Loading