Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,263 changes: 1,263 additions & 0 deletions benchmark_comprehensive_test.go

Large diffs are not rendered by default.

65 changes: 65 additions & 0 deletions chartable.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package astjson

const (
charWS uint8 = 1 << 0 // whitespace: space, tab, newline, CR
charNumChar uint8 = 1 << 1 // valid in number: digits, ., -, +, e, E
charEscape uint8 = 1 << 2 // needs escaping in JSON string: ", \, < 0x20
)

// charFlags is a 256-byte lookup table for character classification.
// Replaces multi-branch comparisons in hot loops with a single table lookup.
var charFlags [256]uint8

// hexDigit maps ASCII bytes to their hex digit value (0-15).
// Invalid hex chars are mapped to 0xFF.
var hexDigit [256]uint8

func init() {
// Whitespace
charFlags[0x20] |= charWS // space
charFlags[0x09] |= charWS // tab
charFlags[0x0A] |= charWS // newline
charFlags[0x0D] |= charWS // carriage return

// Number characters
for c := byte('0'); c <= '9'; c++ {
charFlags[c] |= charNumChar
}
charFlags['.'] |= charNumChar
charFlags['-'] |= charNumChar
charFlags['+'] |= charNumChar
charFlags['e'] |= charNumChar
charFlags['E'] |= charNumChar

// Characters that need escaping in JSON strings
charFlags['"'] |= charEscape
charFlags['\\'] |= charEscape
for c := range 0x20 {
charFlags[c] |= charEscape
}

// Hex digit lookup (0xFF = invalid)
for i := range hexDigit {
hexDigit[i] = 0xFF
}
for c := byte('0'); c <= '9'; c++ {
hexDigit[c] = c - '0'
}
for c := byte('a'); c <= 'f'; c++ {
hexDigit[c] = c - 'a' + 10
}
for c := byte('A'); c <= 'F'; c++ {
hexDigit[c] = c - 'A' + 10
}
}

// parseHex4 parses 4 hex digits from s into a uint16.
// Returns the value and true on success, or 0 and false on invalid input.
func parseHex4(s string) (uint16, bool) {
a, b, c, d := hexDigit[s[0]], hexDigit[s[1]], hexDigit[s[2]], hexDigit[s[3]]
// Valid hex digits are 0..15 (low nibble); invalid sentinel 0xFF has high bits set.
if (a|b|c|d)&0xF0 != 0 {
return 0, false
}
return uint16(a)<<12 | uint16(b)<<8 | uint16(c)<<4 | uint16(d), true
}
42 changes: 24 additions & 18 deletions fastfloat/parse.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
package fastfloat

import (
"fmt"
"errors"
"math"
"strconv"
"strings"
)

var (
errParseUint64Empty = errors.New("cannot parse uint64 from empty string")
errParseInt64Empty = errors.New("cannot parse int64 from empty string")
errParseFloat64Empty = errors.New("cannot parse float64 from empty string")
)

// ParseUint64BestEffort parses uint64 number s.
//
// It is equivalent to strconv.ParseUint(s, 10, 64), but is faster.
Expand Down Expand Up @@ -54,7 +60,7 @@ func ParseUint64BestEffort(s string) uint64 {
// See also ParseUint64BestEffort.
func ParseUint64(s string) (uint64, error) {
if len(s) == 0 {
return 0, fmt.Errorf("cannot parse uint64 from empty string")
return 0, errParseUint64Empty
}
i := uint(0)
d := uint64(0)
Expand All @@ -77,11 +83,11 @@ func ParseUint64(s string) (uint64, error) {
break
}
if i <= j {
return 0, fmt.Errorf("cannot parse uint64 from %q", s)
return 0, errors.New("cannot parse uint64 from " + strconv.Quote(s))
}
if i < uint(len(s)) {
// Unparsed tail left.
return 0, fmt.Errorf("unparsed tail left after parsing uint64 from %q: %q", s, s[i:])
return 0, errors.New("unparsed tail left after parsing uint64 from " + strconv.Quote(s) + ": " + strconv.Quote(s[i:]))
}
return d, nil
}
Expand Down Expand Up @@ -144,14 +150,14 @@ func ParseInt64BestEffort(s string) int64 {
// See also ParseInt64BestEffort.
func ParseInt64(s string) (int64, error) {
if len(s) == 0 {
return 0, fmt.Errorf("cannot parse int64 from empty string")
return 0, errParseInt64Empty
}
i := uint(0)
minus := s[0] == '-'
if minus {
i++
if i >= uint(len(s)) {
return 0, fmt.Errorf("cannot parse int64 from %q", s)
return 0, errors.New("cannot parse int64 from " + strconv.Quote(s))
}
}

Expand All @@ -175,11 +181,11 @@ func ParseInt64(s string) (int64, error) {
break
}
if i <= j {
return 0, fmt.Errorf("cannot parse int64 from %q", s)
return 0, errors.New("cannot parse int64 from " + strconv.Quote(s))
}
if i < uint(len(s)) {
// Unparsed tail left.
return 0, fmt.Errorf("unparsed tail left after parsing int64 form %q: %q", s, s[i:])
return 0, errors.New("unparsed tail left after parsing int64 from " + strconv.Quote(s) + ": " + strconv.Quote(s[i:]))
}
if minus {
d = -d
Expand Down Expand Up @@ -355,21 +361,21 @@ func ParseBestEffort(s string) float64 {
// See also ParseBestEffort.
func Parse(s string) (float64, error) {
if len(s) == 0 {
return 0, fmt.Errorf("cannot parse float64 from empty string")
return 0, errParseFloat64Empty
}
i := uint(0)
minus := s[0] == '-'
if minus {
i++
if i >= uint(len(s)) {
return 0, fmt.Errorf("cannot parse float64 from %q", s)
return 0, errors.New("cannot parse float64 from " + strconv.Quote(s))
}
}

// the integer part might be elided to remain compliant
// with https://go.dev/ref/spec#Floating-point_literals
if s[i] == '.' && (i+1 >= uint(len(s)) || s[i+1] < '0' || s[i+1] > '9') {
return 0, fmt.Errorf("missing integer and fractional part in %q", s)
return 0, errors.New("missing integer and fractional part in " + strconv.Quote(s))
}

d := uint64(0)
Expand Down Expand Up @@ -405,7 +411,7 @@ func Parse(s string) (float64, error) {
if strings.EqualFold(ss, "nan") {
return nan, nil
}
return 0, fmt.Errorf("unparsed tail left after parsing float64 from %q: %q", s, ss)
return 0, errors.New("unparsed tail left after parsing float64 from " + strconv.Quote(s) + ": " + strconv.Quote(ss))
}
f := float64(d)
if i >= uint(len(s)) {
Expand Down Expand Up @@ -433,7 +439,7 @@ func Parse(s string) (float64, error) {
// The mantissa is out of range. Fall back to standard parsing.
f, err := strconv.ParseFloat(s, 64)
if err != nil && !math.IsInf(f, 0) {
return 0, fmt.Errorf("cannot parse mantissa in %q: %s", s, err)
return 0, errors.New("cannot parse mantissa in " + strconv.Quote(s) + ": " + err.Error())
}
return f, nil
}
Expand All @@ -455,14 +461,14 @@ func Parse(s string) (float64, error) {
// Parse exponent part.
i++
if i >= uint(len(s)) {
return 0, fmt.Errorf("cannot parse exponent in %q", s)
return 0, errors.New("cannot parse exponent in " + strconv.Quote(s))
}
expMinus := false
if s[i] == '+' || s[i] == '-' {
expMinus = s[i] == '-'
i++
if i >= uint(len(s)) {
return 0, fmt.Errorf("cannot parse exponent in %q", s)
return 0, errors.New("cannot parse exponent in " + strconv.Quote(s))
}
}
exp := int16(0)
Expand All @@ -476,7 +482,7 @@ func Parse(s string) (float64, error) {
// Fall back to standard parsing.
f, err := strconv.ParseFloat(s, 64)
if err != nil && !math.IsInf(f, 0) {
return 0, fmt.Errorf("cannot parse exponent in %q: %s", s, err)
return 0, errors.New("cannot parse exponent in " + strconv.Quote(s) + ": " + err.Error())
}
return f, nil
}
Expand All @@ -485,7 +491,7 @@ func Parse(s string) (float64, error) {
break
}
if i <= j {
return 0, fmt.Errorf("cannot parse exponent in %q", s)
return 0, errors.New("cannot parse exponent in " + strconv.Quote(s))
}
if expMinus {
exp = -exp
Expand All @@ -498,7 +504,7 @@ func Parse(s string) (float64, error) {
return f, nil
}
}
return 0, fmt.Errorf("cannot parse float64 from %q", s)
return 0, errors.New("cannot parse float64 from " + strconv.Quote(s))
}

var inf = math.Inf(1)
Expand Down
49 changes: 25 additions & 24 deletions mergevalues.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package astjson

import (
"bytes"
"errors"

"github.com/wundergraph/go-arena"
Expand Down Expand Up @@ -34,18 +33,24 @@ func MergeValues(ar arena.Arena, a, b *Value) (v *Value, changed bool, err error
if b == nil {
return a, false, nil
}
if b.Type() == TypeNull && a.Type() == TypeObject {
at, bt := a.t, b.t
if bt == TypeNull && at == TypeObject {
// we assume that null was returned in an error case for resolving a nested object field
// as we've got an object on the left side, we don't override the whole object with null
// instead, we keep the left object and discard the null on the right side
return a, false, nil
}
aBool, bBool := a.Type() == TypeTrue || a.Type() == TypeFalse, b.Type() == TypeTrue || b.Type() == TypeFalse
booleans := aBool && bBool
if a.Type() != b.Type() && !booleans {
return nil, false, ErrMergeDifferentTypes
if at != bt {
// Only compute boolean compatibility when types actually differ
aBool := at == TypeTrue || at == TypeFalse
bBool := bt == TypeTrue || bt == TypeFalse
if !aBool || !bBool {
return nil, false, ErrMergeDifferentTypes
}
// Types differ but both are booleans — b replaces a
return b, true, nil
}
switch a.Type() {
switch at {
case TypeObject:
ao, _ := a.Object()
bo, _ := b.Object()
Expand Down Expand Up @@ -94,29 +99,25 @@ func MergeValues(ar arena.Arena, a, b *Value) (v *Value, changed bool, err error
}
}
return a, false, nil
case TypeFalse:
if b.Type() == TypeTrue {
return b, true, nil
}
return a, false, nil
case TypeTrue:
if b.Type() == TypeFalse {
return b, true, nil
}
return a, false, nil
case TypeNull:
case TypeTrue, TypeFalse, TypeNull:
// at == bt guaranteed by the check above, no change needed
return a, false, nil
case TypeNumber:
af, _ := a.Float64()
bf, _ := b.Float64()
if af != bf {
// Fast path: if raw number strings are identical, values are equal.
// This avoids expensive float64 parsing in the common case.
if a.s == b.s {
return a, false, nil
}
// Slow path: parse as float64. If either parse fails or values differ,
// treat as changed (b replaces a).
af, aErr := a.Float64()
bf, bErr := b.Float64()
if aErr != nil || bErr != nil || af != bf {
return b, true, nil
}
return a, false, nil
case TypeString:
as, _ := a.StringBytes()
bs, _ := b.StringBytes()
if !bytes.Equal(as, bs) {
if a.s != b.s {
return b, true, nil
}
return a, false, nil
Expand Down
9 changes: 9 additions & 0 deletions mergevalues_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,15 @@ func TestMergeValues(t *testing.T) {
out := merged.MarshalTo(nil)
require.Equal(t, `1.1`, string(out))
})
t.Run("floats equal different representation", func(t *testing.T) {
t.Parallel()
a, b := MustParse(`1.0`), MustParse(`1.00`)
merged, changed, err := MergeValues(nil, a, b)
require.NoError(t, err)
require.Equal(t, false, changed)
out := merged.MarshalTo(nil)
require.Equal(t, `1.0`, string(out))
})
t.Run("arrays", func(t *testing.T) {
t.Parallel()
a, b := MustParse(`[1,2]`), MustParse(`[3,4]`)
Expand Down
Loading