Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,14 @@ interface AXNodeTree {
parent: AXNodeTree | null;
}

function createNodeTree(nodes: AXNode[]): AXNodeTree | null {
/**
* Creates a forest of node trees from the given AXNodes.
* When nodes come from multiple frames (e.g., main frame + iframes),
* each frame has its own RootWebArea, resulting in multiple trees.
*/
function createNodeTrees(nodes: AXNode[]): AXNodeTree[] {
if (nodes.length === 0) {
return null;
return [];
}

// Create a map of node IDs to their corresponding nodes for quick lookup
Expand Down Expand Up @@ -141,14 +146,16 @@ function createNodeTree(nodes: AXNode[]): AXNodeTree | null {
}
}

// Find the root node (a node without a parent)
// Find all root nodes (nodes without a parent)
// When nodes come from multiple frames, each frame has its own root
const roots: AXNodeTree[] = [];
for (const node of nodeMap.values()) {
if (!node.parent) {
return node;
roots.push(node);
}
}

return null;
return roots;
}

/**
Expand All @@ -159,23 +166,38 @@ const LINE_MAX_LENGTH = 80;

/**
* Converts an accessibility tree represented by AXNode objects into a markdown string.
* Handles multiple root nodes (e.g., from main frame + iframes) by processing each tree
* and combining the results.
*
* @param uri The URI of the document
* @param axNodes The array of AXNode objects representing the accessibility tree
* @returns A markdown representation of the accessibility tree
*/
export function convertAXTreeToMarkdown(uri: URI, axNodes: AXNode[]): string {
const tree = createNodeTree(axNodes);
if (!tree) {
const trees = createNodeTrees(axNodes);
if (trees.length === 0) {
return ''; // Return empty string for empty tree
}

// Process tree to extract main content and navigation links
const mainContent = extractMainContent(uri, tree);
const navLinks = collectNavigationLinks(tree);
// Process each tree and collect main content and navigation links
const allMainContent: string[] = [];
const allNavLinks: string[] = [];

for (const tree of trees) {
const mainContent = extractMainContent(uri, tree);
const navLinks = collectNavigationLinks(tree);

if (mainContent.trim().length > 0) {
allMainContent.push(mainContent);
}
allNavLinks.push(...navLinks);
}

// Combine all main content from all trees
const combinedMainContent = allMainContent.join('\n\n');

// Combine main content and navigation links
return mainContent + (navLinks.length > 0 ? '\n\n## Additional Links\n' + navLinks.join('\n') : '');
return combinedMainContent + (allNavLinks.length > 0 ? '\n\n## Additional Links\n' + allNavLinks.join('\n') : '');
}

function extractMainContent(uri: URI, tree: AXNodeTree): string {
Expand Down
54 changes: 48 additions & 6 deletions src/vs/platform/webContentExtractor/electron-main/webPageLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import type { BeforeSendResponse, BrowserWindow, BrowserWindowConstructorOptions, Event, OnBeforeSendHeadersListenerDetails } from 'electron';
import { Queue, raceTimeout, TimeoutTimer } from '../../../base/common/async.js';
import { CancellationTokenSource } from '../../../base/common/cancellation.js';
import { CancellationToken, CancellationTokenSource } from '../../../base/common/cancellation.js';
import { createSingleCallFunction } from '../../../base/common/functional.js';
import { Disposable, toDisposable } from '../../../base/common/lifecycle.js';
import { URI } from '../../../base/common/uri.js';
Expand All @@ -21,6 +21,17 @@ type NetworkRequestEventParams = Readonly<{
type?: string;
}>;

type FrameInfo = Readonly<{
id: string;
url?: string;
name?: string;
}>;

type FrameTreeNode = Readonly<{
frame: FrameInfo;
childFrames?: FrameTreeNode[];
}>;

/**
* A web page loader that uses Electron to load web pages and extract their content.
*/
Expand Down Expand Up @@ -336,7 +347,7 @@ export class WebPageLoader extends Disposable {
try {
await raceTimeout((async () => {
if (!cts.token.isCancellationRequested) {
result = await this.extractAccessibilityTreeContent() ?? '';
result = await this.extractAccessibilityTreeContent(cts.token) ?? '';
}

if (!cts.token.isCancellationRequested && result.length < WebPageLoader.MIN_CONTENT_LENGTH) {
Expand Down Expand Up @@ -371,13 +382,44 @@ export class WebPageLoader extends Disposable {

/**
* Extracts content from the Accessibility tree of the loaded web page.
* @return The extracted content, or undefined if extraction fails.
* @param token Cancellation token to abort the operation.
* @return The extracted content, or undefined if extraction fails or is cancelled.
*/
private async extractAccessibilityTreeContent(): Promise<string | undefined> {
private async extractAccessibilityTreeContent(token: CancellationToken): Promise<string | undefined> {
this.trace(`Extracting content using Accessibility domain`);
try {
const { nodes } = await this._debugger.sendCommand('Accessibility.getFullAXTree') as { nodes: AXNode[] };
return convertAXTreeToMarkdown(this._uri, nodes);
// Enable the Page domain to get frame information
await this._debugger.sendCommand('Page.enable');
if (token.isCancellationRequested) {
return undefined;
}

// Get all frames including iframes
const { frameTree } = await this._debugger.sendCommand('Page.getFrameTree') as { frameTree: FrameTreeNode };
if (token.isCancellationRequested) {
return undefined;
}

const frameNodes = [frameTree];
for (let i = 0; i < frameNodes.length; i++) {
frameNodes.push(...frameNodes[i].childFrames ?? []);
}

// Collect accessibility nodes from all frames
const allNodes: AXNode[] = [];
for (const { frame } of frameNodes) {
try {
const { nodes } = await this._debugger.sendCommand('Accessibility.getFullAXTree', { frameId: frame.id }) as { nodes: AXNode[] };
allNodes.push(...nodes);
if (token.isCancellationRequested) {
return undefined;
}
} catch {
// ignore
}
}

return convertAXTreeToMarkdown(this._uri, allNodes);
} catch (error) {
this.trace(`Accessibility tree extraction failed: ${error instanceof Error ? error.message : String(error)}`);
return undefined;
Expand Down
Loading
Loading