@@ -19,6 +19,7 @@ import { getCurrentTrace, postProcessTrace } from '../tracing/tracer';
1919export interface RunReturn {
2020 otherFields : { [ key : string ] : any } ;
2121 output : any ;
22+ [ key : string ] : unknown ;
2223}
2324
2425interface Output {
@@ -42,6 +43,148 @@ export interface Config {
4243 numOfTokenColumnName ?: string ;
4344}
4445
46+ export type DatasetFormat = 'csv' | 'json' ;
47+
48+ const DATASET_FILENAMES : Record < DatasetFormat , string > = {
49+ csv : 'dataset.csv' ,
50+ json : 'dataset.json' ,
51+ } ;
52+
53+ export function detectDatasetFormat ( datasetPath : string ) : DatasetFormat {
54+ const ext = path . extname ( datasetPath ) . toLowerCase ( ) ;
55+ if ( ext === '.csv' ) {
56+ return 'csv' ;
57+ }
58+ if ( ext === '.json' ) {
59+ return 'json' ;
60+ }
61+ throw new Error ( `Unsupported dataset format: ${ datasetPath } ` ) ;
62+ }
63+
64+ function parseCsvLine ( line : string ) : string [ ] {
65+ const result : string [ ] = [ ] ;
66+ let current = '' ;
67+ let inQuotes = false ;
68+
69+ for ( let i = 0 ; i < line . length ; i += 1 ) {
70+ const char = line [ i ] ;
71+ const next = line [ i + 1 ] ;
72+
73+ if ( inQuotes ) {
74+ if ( char === '"' && next === '"' ) {
75+ current += '"' ;
76+ i += 1 ; // skip escaped quote
77+ } else if ( char === '"' ) {
78+ inQuotes = false ;
79+ } else {
80+ current += char ;
81+ }
82+ } else if ( char === ',' ) {
83+ result . push ( current ) ;
84+ current = '' ;
85+ } else if ( char === '"' ) {
86+ inQuotes = true ;
87+ } else {
88+ current += char ;
89+ }
90+ }
91+
92+ result . push ( current ) ;
93+ return result ;
94+ }
95+
96+ export function parseCsv ( content : string ) : Record < string , string > [ ] {
97+ const lines = content . split ( / \r ? \n / ) . filter ( ( line ) => line . trim ( ) . length > 0 ) ;
98+ if ( lines . length === 0 ) {
99+ return [ ] ;
100+ }
101+
102+ const headers = parseCsvLine ( lines [ 0 ] ! ) ;
103+ return lines . slice ( 1 ) . map ( ( line ) => {
104+ const values = parseCsvLine ( line ) ;
105+ const row : Record < string , string > = { } ;
106+ headers . forEach ( ( header , idx ) => {
107+ row [ header ] = values [ idx ] ?? '' ;
108+ } ) ;
109+ return row ;
110+ } ) ;
111+ }
112+
113+ function escapeCsvValue ( value : unknown ) : string {
114+ if ( value === undefined || value === null ) {
115+ return '' ;
116+ }
117+ const str = String ( value ) ;
118+ const needsQuotes = / [ " , \n \r ] / . test ( str ) ;
119+ const escaped = str . replace ( / " / g, '""' ) ;
120+ return needsQuotes ? `"${ escaped } "` : escaped ;
121+ }
122+
123+ function collectHeaders ( rows : Record < string , unknown > [ ] ) : string [ ] {
124+ const headers : string [ ] = [ ] ;
125+ rows . forEach ( ( row ) => {
126+ Object . keys ( row ) . forEach ( ( key ) => {
127+ if ( ! headers . includes ( key ) ) {
128+ headers . push ( key ) ;
129+ }
130+ } ) ;
131+ } ) ;
132+ return headers ;
133+ }
134+
135+ export function serializeCsv ( rows : Record < string , unknown > [ ] ) : string {
136+ if ( rows . length === 0 ) {
137+ return '' ;
138+ }
139+ const headers = collectHeaders ( rows ) ;
140+ const headerLine = headers . map ( escapeCsvValue ) . join ( ',' ) ;
141+ const dataLines = rows . map ( ( row ) => headers . map ( ( header ) => escapeCsvValue ( row [ header ] ) ) . join ( ',' ) ) ;
142+ return [ headerLine , ...dataLines ] . join ( '\n' ) ;
143+ }
144+
145+ export function loadDataset ( datasetPath : string ) : { data : any [ ] ; format : DatasetFormat } {
146+ const datasetFullPath = path . resolve ( datasetPath ) ;
147+ const rawData = fs . readFileSync ( datasetFullPath , 'utf8' ) ;
148+ const format = detectDatasetFormat ( datasetFullPath ) ;
149+
150+ if ( format === 'json' ) {
151+ const parsed = JSON . parse ( rawData ) ;
152+ if ( ! Array . isArray ( parsed ) ) {
153+ throw new Error ( 'Dataset JSON must be an array of records' ) ;
154+ }
155+ return { data : parsed , format } ;
156+ }
157+
158+ const parsed = parseCsv ( rawData ) ;
159+ return { data : parsed , format } ;
160+ }
161+
162+ export function writeDataset (
163+ outputDir : string ,
164+ rows : RunReturn [ ] ,
165+ format : DatasetFormat ,
166+ config : Config ,
167+ ) : void {
168+ const outputDirPath = path . resolve ( outputDir ) ;
169+ fs . mkdirSync ( outputDirPath , { recursive : true } ) ;
170+
171+ const datasetFilename = DATASET_FILENAMES [ format ] ;
172+ const datasetPath = path . join ( outputDirPath , datasetFilename ) ;
173+ const configPath = path . join ( outputDirPath , 'config.json' ) ;
174+
175+ if ( format === 'json' ) {
176+ fs . writeFileSync ( datasetPath , JSON . stringify ( rows , null , 4 ) , 'utf8' ) ;
177+ } else {
178+ const csvContent = serializeCsv ( rows ) ;
179+ fs . writeFileSync ( datasetPath , csvContent , 'utf8' ) ;
180+ }
181+
182+ fs . writeFileSync ( configPath , JSON . stringify ( config , null , 4 ) , 'utf8' ) ;
183+
184+ console . info ( `Output written to ${ datasetPath } ` ) ;
185+ console . info ( `Config written to ${ configPath } ` ) ;
186+ }
187+
45188class CLIHandler {
46189 private run : ( ...args : any [ ] ) => Promise < any > ;
47190
@@ -59,10 +202,7 @@ class CLIHandler {
59202 const options = program . opts ( ) ;
60203 const { datasetPath, outputDir } = options ;
61204
62- // Load dataset
63- const datasetFullPath = path . resolve ( datasetPath ) ;
64- const rawData = fs . readFileSync ( datasetFullPath , 'utf8' ) ;
65- const dataset = JSON . parse ( rawData ) ;
205+ const { data : dataset , format } = loadDataset ( datasetPath ) ;
66206
67207 // Process each item in the dataset dynamically
68208 Promise . all < Output > (
0 commit comments