Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 80 additions & 4 deletions src/lib/core/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,85 @@ export interface Config {
numOfTokenColumnName?: string;
}

export type DatasetFormat = 'csv' | 'json';

export function detectDatasetFormat(datasetPath: string): DatasetFormat {
const ext = path.extname(datasetPath).toLowerCase();
if (ext === '.csv') {
return 'csv';
}
if (ext === '.json') {
return 'json';
}
throw new Error(`Unsupported dataset format: ${datasetPath}`);
}

function parseCsvLine(line: string): string[] {
const result: string[] = [];
let current = '';
let inQuotes = false;

for (let i = 0; i < line.length; i += 1) {
const char = line[i];
const next = line[i + 1];

if (inQuotes) {
if (char === '"' && next === '"') {
current += '"';
i += 1; // skip escaped quote
} else if (char === '"') {
inQuotes = false;
} else {
current += char;
}
} else if (char === ',') {
result.push(current);
current = '';
} else if (char === '"') {
inQuotes = true;
} else {
current += char;
}
}

result.push(current);
return result;
}

export function parseCsv(content: string): Record<string, string>[] {
const lines = content.split(/\r?\n/).filter((line) => line.trim().length > 0);
if (lines.length === 0) {
return [];
}

const headers = parseCsvLine(lines[0]!);
return lines.slice(1).map((line) => {
const values = parseCsvLine(line);
const row: Record<string, string> = {};
headers.forEach((header, idx) => {
row[header] = values[idx] ?? '';
});
return row;
});
}

export function loadDataset(datasetPath: string): { data: any[]; format: DatasetFormat } {
const datasetFullPath = path.resolve(datasetPath);
const rawData = fs.readFileSync(datasetFullPath, 'utf8');
const format = detectDatasetFormat(datasetFullPath);

if (format === 'json') {
const parsed = JSON.parse(rawData);
if (!Array.isArray(parsed)) {
throw new Error('Dataset JSON must be an array of records');
}
return { data: parsed, format };
}

const parsed = parseCsv(rawData);
return { data: parsed, format };
}

class CLIHandler {
private run: (...args: any[]) => Promise<any>;

Expand All @@ -59,10 +138,7 @@ class CLIHandler {
const options = program.opts();
const { datasetPath, outputDir } = options;

// Load dataset
const datasetFullPath = path.resolve(datasetPath);
const rawData = fs.readFileSync(datasetFullPath, 'utf8');
const dataset = JSON.parse(rawData);
const { data: dataset } = loadDataset(datasetPath);

// Process each item in the dataset dynamically
Promise.all<Output>(
Expand Down
51 changes: 51 additions & 0 deletions tests/cli-dataset.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';

import { loadDataset, parseCsv } from '../src/lib/core/cli';

describe('CLI dataset helpers', () => {
const makeTempDir = (): string => fs.mkdtempSync(path.join(os.tmpdir(), 'ol-cli-'));

it('loads JSON datasets', () => {
const dir = makeTempDir();
const filePath = path.join(dir, 'dataset.json');
const payload = [{ a: 1 }, { a: 2 }];
fs.writeFileSync(filePath, JSON.stringify(payload), 'utf8');

const { data, format } = loadDataset(filePath);

expect(format).toBe('json');
expect(data).toEqual(payload);
});

it('loads CSV datasets', () => {
const dir = makeTempDir();
const filePath = path.join(dir, 'dataset.csv');
const payload = 'id,name\n1,Alice\n2,Bob\n';
fs.writeFileSync(filePath, payload, 'utf8');

const { data, format } = loadDataset(filePath);

expect(format).toBe('csv');
expect(data).toEqual([
{ id: '1', name: 'Alice' },
{ id: '2', name: 'Bob' },
]);
});

it('parses CSV with quoted values containing commas', () => {
const csv = 'prompt,output\nhello,hi\n"quote, comma","ok"\n';
const parsed = parseCsv(csv);

expect(parsed).toEqual([
{ prompt: 'hello', output: 'hi' },
{ prompt: 'quote, comma', output: 'ok' },
]);
});

it('returns empty array for empty CSV content', () => {
expect(parseCsv('')).toEqual([]);
expect(parseCsv(' \n ')).toEqual([]);
});
});
Loading