diff --git a/mysql-test/main/mdev_39933.result b/mysql-test/main/mdev_39933.result new file mode 100644 index 0000000000000..8f49ee48e4668 --- /dev/null +++ b/mysql-test/main/mdev_39933.result @@ -0,0 +1,43 @@ +# +# MDEV-39933: Incorrect result of JSON_NORMALIZE on invalid json data +# +# JSON with trailing literal junk (no NUL) - should return NULL +SELECT JSON_NORMALIZE('{"a":1}0junk'); +JSON_NORMALIZE('{"a":1}0junk') +NULL +# JSON_VALID correctly rejects JSON with embedded NUL byte + trailing junk +SELECT JSON_VALID(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); +JSON_VALID(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)) +0 +# JSON_NORMALIZE should also return NULL for invalid JSON with embedded NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)) +NULL +# Additional case: just a NUL after valid JSON +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0)) USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0)) USING latin1)) +NULL +# Valid JSON through conversion should still work +SELECT JSON_NORMALIZE(CONVERT('{"a":1}' USING latin1)); +JSON_NORMALIZE(CONVERT('{"a":1}' USING latin1)) +{"a":1.0E0} +# Multi-byte source charset (utf16) +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'x') USING utf16)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'x') USING utf16)) +NULL +# NUL embedded inside a JSON string value +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":"b', CHAR(0), 'c"}') USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('{"a":"b', CHAR(0), 'c"}') USING latin1)) +NULL +# JSON array with trailing NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('[1,2,3]', CHAR(0), 'x') USING latin1)); +JSON_NORMALIZE(CONVERT(CONCAT('[1,2,3]', CHAR(0), 'x') USING latin1)) +NULL +# Nested object - valid through conversion +SELECT JSON_NORMALIZE(CONVERT('{"a":{"b":1}}' USING latin1)); +JSON_NORMALIZE(CONVERT('{"a":{"b":1}}' USING latin1)) +{"a":{"b":1.0E0}} +# Empty string +SELECT JSON_NORMALIZE(CONVERT('' USING latin1)); +JSON_NORMALIZE(CONVERT('' USING latin1)) +NULL diff --git a/mysql-test/main/mdev_39933.test b/mysql-test/main/mdev_39933.test new file mode 100644 index 0000000000000..ce9b4d7da9abc --- /dev/null +++ b/mysql-test/main/mdev_39933.test @@ -0,0 +1,33 @@ +--echo # +--echo # MDEV-39933: Incorrect result of JSON_NORMALIZE on invalid json data +--echo # + +--echo # JSON with trailing literal junk (no NUL) - should return NULL +SELECT JSON_NORMALIZE('{"a":1}0junk'); + +--echo # JSON_VALID correctly rejects JSON with embedded NUL byte + trailing junk +SELECT JSON_VALID(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); + +--echo # JSON_NORMALIZE should also return NULL for invalid JSON with embedded NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'junk') USING latin1)); + +--echo # Additional case: just a NUL after valid JSON +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0)) USING latin1)); + +--echo # Valid JSON through conversion should still work +SELECT JSON_NORMALIZE(CONVERT('{"a":1}' USING latin1)); + +--echo # Multi-byte source charset (utf16) +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":1}', CHAR(0), 'x') USING utf16)); + +--echo # NUL embedded inside a JSON string value +SELECT JSON_NORMALIZE(CONVERT(CONCAT('{"a":"b', CHAR(0), 'c"}') USING latin1)); + +--echo # JSON array with trailing NUL +SELECT JSON_NORMALIZE(CONVERT(CONCAT('[1,2,3]', CHAR(0), 'x') USING latin1)); + +--echo # Nested object - valid through conversion +SELECT JSON_NORMALIZE(CONVERT('{"a":{"b":1}}' USING latin1)); + +--echo # Empty string +SELECT JSON_NORMALIZE(CONVERT('' USING latin1)); diff --git a/strings/json_normalize.c b/strings/json_normalize.c index 81575f07069bc..257f0e41fe499 100644 --- a/strings/json_normalize.c +++ b/strings/json_normalize.c @@ -1030,15 +1030,14 @@ json_normalize(DYNAMIC_STRING *result, if (!s_utf8) return 1; memset(s_utf8, 0x00, in_size); - my_convert(s_utf8, (uint32)in_size, &my_charset_utf8mb4_bin, - s, (uint32)size, cs, &convert_err); + in_size= my_convert(s_utf8, (uint32)in_size, &my_charset_utf8mb4_bin, + s, (uint32)size, cs, &convert_err); if (convert_err) { my_free(s_utf8); return 1; } in= s_utf8; - in_size= strlen(s_utf8); }