diff --git a/packages/csv-stringify/lib/api/index.js b/packages/csv-stringify/lib/api/index.js index 0f214501..39de6053 100644 --- a/packages/csv-stringify/lib/api/index.js +++ b/packages/csv-stringify/lib/api/index.js @@ -3,6 +3,21 @@ import { is_object } from "../utils/is_object.js"; import { normalize_columns } from "./normalize_columns.js"; import { normalize_options } from "./normalize_options.js"; const bom_utf8 = Buffer.from([239, 187, 191]); +// True when appending `separator` after `value` would let `parse` find +// `separator` starting inside `value`. Besides the field containing the whole +// separator, this also covers boundary fusion: a field whose tail is a +// non-empty prefix of a multi-character separator merges with the appended +// separator (eg value "a:" + delimiter "::" => "a:::", matched at offset 1). +// Such fields must be quoted to round-trip, like RFC 4180 fields containing the +// delimiter, generalized to multi-character delimiters and record delimiters. +const emits_separator = function (value, separator) { + return ( + separator.length !== 0 && + (value.indexOf(separator) !== -1 || + (separator.length > 1 && + (value + separator).indexOf(separator) < value.length)) + ); +}; const stringifier = function (options, state, info) { return { @@ -190,11 +205,13 @@ const stringifier = function (options, state, info) { ), ]; } - const containsdelimiter = - delimiter.length && value.indexOf(delimiter) >= 0; + const containsdelimiter = emits_separator(value, delimiter); const containsQuote = quote !== "" && value.indexOf(quote) >= 0; const containsEscape = value.indexOf(escape) >= 0 && escape !== quote; - const containsRecordDelimiter = value.indexOf(record_delimiter) >= 0; + const containsRecordDelimiter = emits_separator( + value, + record_delimiter, + ); const quotedString = quoted_string && typeof field === "string"; let quotedMatch = quoted_match && diff --git a/packages/csv-stringify/test/option.delimiter.js b/packages/csv-stringify/test/option.delimiter.js index 4c5217cb..f3743e2f 100644 --- a/packages/csv-stringify/test/option.delimiter.js +++ b/packages/csv-stringify/test/option.delimiter.js @@ -22,4 +22,22 @@ describe("Option `delimiter`", function () { message: "option `delimiter` must be a buffer or a string, got 123", }); }); + it("quote a field that would fuse with a multi-character delimiter", function (next) { + // "a:" + "::" emits "a:::", which parse re-tokenizes as two fields, so the + // field must be quoted to round-trip (RFC 4180 ยง2.6, generalized to + // multi-character delimiters). + stringify([["a:", "b"]], { delimiter: "::", eof: false }, (err, data) => { + if (err) return next(err); + data.toString().should.eql('"a:"::b'); + next(); + }); + }); + it("does not quote when the tail cannot fuse with the delimiter", function (next) { + // "a:" + ":x" emits "a::x"; ":x" never starts inside "a:", so no quoting. + stringify([["a:", "b"]], { delimiter: ":x", eof: false }, (err, data) => { + if (err) return next(err); + data.toString().should.eql("a::xb"); + next(); + }); + }); }); diff --git a/packages/csv-stringify/test/option.record_delimiter.js b/packages/csv-stringify/test/option.record_delimiter.js index 86b74582..44181a6f 100644 --- a/packages/csv-stringify/test/option.record_delimiter.js +++ b/packages/csv-stringify/test/option.record_delimiter.js @@ -18,4 +18,17 @@ describe("Option `record_delimiter`", function () { "Invalid Option: record_delimiter must be a buffer or a string, got 123", ); }); + it("quote a field that would fuse with a multi-character record delimiter", function (next) { + // The last field "a#" + record delimiter "##" emits "a###", which parse + // re-tokenizes as a record boundary, so the field must be quoted. + stringify( + [["x", "a#"]], + { record_delimiter: "##", eof: false }, + (err, data) => { + if (err) return next(err); + data.toString().should.eql('x,"a#"'); + next(); + }, + ); + }); });