Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 105 additions & 3 deletions src/common/utils/git/diffParser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,108 @@ describe("git diff parser (real repository)", () => {
expect(nonPhantomLines.every((l) => l.startsWith("+"))).toBe(true);
});

it("should parse diff headers with non-literal path prefixes", () => {
const diffOutput = [
"diff --git c/foo.ts w/foo.ts",
"index 1111111..2222222 100644",
"--- c/foo.ts",
"+++ w/foo.ts",
"@@ -1 +1 @@",
"-old line",
"+new line",
].join("\n");

const fileDiffs = parseDiff(diffOutput);
const allHunks = extractAllHunks(fileDiffs);

expect(fileDiffs).toHaveLength(1);
expect(fileDiffs[0].filePath).toBe("foo.ts");
expect(fileDiffs[0].oldPath).toBeUndefined();
expect(allHunks).toHaveLength(1);
expect(allHunks[0].content).toContain("+new line");
});

it("should preserve nested paths in --no-prefix diffs", () => {
execSync("git reset --hard HEAD && git clean -fd", { cwd: testRepoPath });
execSync("mkdir -p src/common", { cwd: testRepoPath });
writeFileSync(join(testRepoPath, "src", "common", "no-prefix.ts"), "old line\n");
execSync("git add src/common/no-prefix.ts && git commit -m 'Add nested no-prefix file'", {
cwd: testRepoPath,
});

writeFileSync(join(testRepoPath, "src", "common", "no-prefix.ts"), "new line\n");

const diff = execSync("git diff --no-prefix HEAD", { cwd: testRepoPath, encoding: "utf-8" });
const fileDiffs = parseDiff(diff);

expect(fileDiffs).toHaveLength(1);
expect(fileDiffs[0].filePath).toBe("src/common/no-prefix.ts");
expect(fileDiffs[0].oldPath).toBeUndefined();
expect(fileDiffs[0].hunks).toHaveLength(1);
expect(fileDiffs[0].hunks[0].content).toContain("+new line");
});

it("should parse real mnemonic-prefix diffs", () => {
execSync("git reset --hard HEAD && git clean -fd", { cwd: testRepoPath });
execSync("mkdir -p src/mnemonic", { cwd: testRepoPath });
writeFileSync(join(testRepoPath, "src", "mnemonic", "real.ts"), "before\n");
execSync("git add src/mnemonic/real.ts && git commit -m 'Add mnemonic test file'", {
cwd: testRepoPath,
});

writeFileSync(join(testRepoPath, "src", "mnemonic", "real.ts"), "after\n");

const diff = execSync("git -c diff.mnemonicPrefix=true diff HEAD", {
cwd: testRepoPath,
encoding: "utf-8",
});
const fileDiffs = parseDiff(diff);

expect(fileDiffs).toHaveLength(1);
expect(fileDiffs[0].filePath).toBe("src/mnemonic/real.ts");
expect(fileDiffs[0].oldPath).toBeUndefined();
expect(fileDiffs[0].hunks[0].content).toContain("+after");
});

it("should parse --no-prefix additions that use /dev/null", () => {
execSync("git reset --hard HEAD && git clean -fd", { cwd: testRepoPath });
execSync("mkdir -p nested/dir", { cwd: testRepoPath });
writeFileSync(join(testRepoPath, "nested", "dir", "added-no-prefix.ts"), "added\n");
execSync("git add nested/dir/added-no-prefix.ts", { cwd: testRepoPath });

const diff = execSync("git diff --cached --no-prefix", {
cwd: testRepoPath,
encoding: "utf-8",
});
const fileDiffs = parseDiff(diff);

expect(fileDiffs).toHaveLength(1);
expect(fileDiffs[0].changeType).toBe("added");
expect(fileDiffs[0].filePath).toBe("nested/dir/added-no-prefix.ts");
expect(fileDiffs[0].oldPath).toBeUndefined();
expect(fileDiffs[0].hunks[0].header).toMatch(/^@@ -0,0 \+1(?:,1)? @@/);
});

it("should parse --no-prefix deletions that use /dev/null", () => {
execSync("git reset --hard HEAD && git clean -fd", { cwd: testRepoPath });
execSync("mkdir -p deleted/nested", { cwd: testRepoPath });
writeFileSync(join(testRepoPath, "deleted", "nested", "gone.ts"), "gone\n");
execSync("git add deleted/nested/gone.ts && git commit -m 'Add nested deleted file'", {
cwd: testRepoPath,
});

execSync("rm deleted/nested/gone.ts", { cwd: testRepoPath });

const diff = execSync("git diff --no-prefix HEAD", { cwd: testRepoPath, encoding: "utf-8" });
const fileDiffs = parseDiff(diff);

expect(fileDiffs).toHaveLength(1);
expect(fileDiffs[0].changeType).toBe("deleted");
expect(fileDiffs[0].filePath).toBe("deleted/nested/gone.ts");
expect(fileDiffs[0].oldPath).toBe("deleted/nested/gone.ts");
expect(fileDiffs[0].hunks[0].content).toContain("-gone");
});

it("should normalize CRLF diff output (no \\r in hunk content)", () => {
const diffOutput =
[
Expand Down Expand Up @@ -154,10 +256,10 @@ describe("git diff parser (real repository)", () => {
});

it("should parse file deletion", () => {
// Reset and commit newfile
execSync("git add . && git commit -m 'Add newfile'", { cwd: testRepoPath });
execSync("git reset --hard HEAD && git clean -fd", { cwd: testRepoPath });
writeFileSync(join(testRepoPath, "newfile.md"), "# New File\n\nContent here\n");
execSync("git add newfile.md && git commit -m 'Add newfile'", { cwd: testRepoPath });

// Delete file
execSync("rm newfile.md", { cwd: testRepoPath });

const diff = execSync("git diff HEAD", { cwd: testRepoPath, encoding: "utf-8" });
Expand Down
157 changes: 146 additions & 11 deletions src/common/utils/git/diffParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,70 @@ function parseHunkHeader(line: string): {
};
}

interface ParsedDiffPathLabel {
raw: string;
prefix: string | null;
path: string | null;
}

function parseDiffPathLabel(label: string | undefined): ParsedDiffPathLabel | null {
if (label == null) {
return null;
}

if (label === "/dev/null") {
return {
raw: label,
prefix: null,
path: null,
};
}

const slashIndex = label.indexOf("/");
if (slashIndex === -1) {
return {
raw: label,
prefix: null,
path: label,
};
}

return {
raw: label,
prefix: label.slice(0, slashIndex),
path: label.slice(slashIndex + 1),
};
}

function choosePairedDiffLabel(
primaryLabel: string | undefined,
fallbackLabel: string | undefined
): string | undefined {
return primaryLabel != null && primaryLabel !== "/dev/null" ? primaryLabel : fallbackLabel;
}

function canonicalizeDiffPathLabel(
label: string | undefined,
pairedLabel: string | undefined
): string | undefined {
const parsedLabel = parseDiffPathLabel(label);
if (parsedLabel?.path == null) {
return undefined;
}

const parsedPair = parseDiffPathLabel(pairedLabel);
if (
parsedLabel.prefix &&
parsedPair?.prefix &&
parsedLabel.prefix !== parsedPair.prefix &&
parsedLabel.path === parsedPair.path
) {
return parsedLabel.path;
}

return parsedLabel.raw;
}

/**
* Parse unified diff output into structured file diffs with hunks
* Supports standard git diff format with file headers and hunk markers
Expand All @@ -63,6 +127,52 @@ export function parseDiff(diffOutput: string): FileDiff[] {
let currentFile: FileDiff | null = null;
let currentHunk: Partial<DiffHunk> | null = null;
let hunkLines: string[] = [];
let currentHeaderOldLabel: string | undefined;
let currentHeaderNewLabel: string | undefined;
let currentPatchOldLabel: string | undefined;
let currentPatchNewLabel: string | undefined;
let currentRenameFrom: string | undefined;
let currentRenameTo: string | undefined;

const syncCurrentFilePaths = () => {
if (!currentFile) {
return;
}

const resolvedOldPath =
currentRenameFrom ??
canonicalizeDiffPathLabel(
currentPatchOldLabel ?? currentHeaderOldLabel,
choosePairedDiffLabel(currentPatchNewLabel, currentHeaderNewLabel)
);
const resolvedNewPath =
currentRenameTo ??
canonicalizeDiffPathLabel(
currentPatchNewLabel ?? currentHeaderNewLabel,
choosePairedDiffLabel(currentPatchOldLabel, currentHeaderOldLabel)
);
const filePath = resolvedNewPath ?? resolvedOldPath;
if (filePath) {
currentFile.filePath = filePath;
}

currentFile.oldPath =
resolvedOldPath &&
(currentFile.changeType === "deleted" ||
currentFile.changeType === "renamed" ||
(resolvedNewPath != null && resolvedOldPath !== resolvedNewPath))
? resolvedOldPath
: undefined;
};

const resetCurrentFileLabels = () => {
currentHeaderOldLabel = undefined;
currentHeaderNewLabel = undefined;
currentPatchOldLabel = undefined;
currentPatchNewLabel = undefined;
currentRenameFrom = undefined;
currentRenameTo = undefined;
};

const finishHunk = () => {
if (currentHunk && currentFile && hunkLines.length > 0) {
Expand All @@ -89,28 +199,31 @@ export function parseDiff(diffOutput: string): FileDiff[] {
const finishFile = () => {
finishHunk();
if (currentFile) {
syncCurrentFilePaths();
files.push(currentFile);
currentFile = null;
}
resetCurrentFileLabels();
};

for (const line of lines) {
// File header: diff --git a/... b/...
// File header: git emits path labels here, but they are not guaranteed to be literal a/ and b/.
if (line.startsWith("diff --git ")) {
finishFile();
// Extract file paths from "diff --git a/path b/path"
const regex = /^diff --git a\/(.+) b\/(.+)$/;
const match = regex.exec(line);
if (match) {
const oldPath = match[1];
const newPath = match[2];
// Extract the trailing paths from "diff --git <label>/path <label>/path" without
// assuming specific labels. Review diffs can use other prefixes (for example c/ and w/).
const parts = line.split(" ");
if (parts.length >= 4) {
currentHeaderOldLabel = parts[2];
currentHeaderNewLabel = parts[3];
Comment thread
RainbowDashy marked this conversation as resolved.
Outdated
currentFile = {
filePath: newPath,
oldPath: oldPath !== newPath ? oldPath : undefined,
filePath: "",
oldPath: undefined,
changeType: "modified",
isBinary: false,
hunks: [],
};
syncCurrentFilePaths();
}
continue;
}
Expand All @@ -126,18 +239,40 @@ export function parseDiff(diffOutput: string): FileDiff[] {
// New file mode
if (line.startsWith("new file mode ")) {
currentFile.changeType = "added";
syncCurrentFilePaths();
continue;
}

// Deleted file mode
if (line.startsWith("deleted file mode ")) {
currentFile.changeType = "deleted";
syncCurrentFilePaths();
continue;
}

if (!currentHunk && line.startsWith("--- ")) {
currentPatchOldLabel = line.slice(4);
syncCurrentFilePaths();
Comment thread
RainbowDashy marked this conversation as resolved.
continue;
}

if (!currentHunk && line.startsWith("+++ ")) {
currentPatchNewLabel = line.slice(4);
syncCurrentFilePaths();
continue;
}

if (line.startsWith("rename from ")) {
currentFile.changeType = "renamed";
currentRenameFrom = line.slice("rename from ".length);
syncCurrentFilePaths();
Comment thread
RainbowDashy marked this conversation as resolved.
continue;
}

// Rename marker
if (line.startsWith("rename from ") || line.startsWith("rename to ")) {
if (line.startsWith("rename to ")) {
currentFile.changeType = "renamed";
currentRenameTo = line.slice("rename to ".length);
syncCurrentFilePaths();
continue;
}

Expand Down