-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathencoder.go
More file actions
491 lines (407 loc) · 12.3 KB
/
encoder.go
File metadata and controls
491 lines (407 loc) · 12.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
package bogo
import (
"bytes"
"fmt"
"io"
"reflect"
"time"
)
// Encoder provides structured encoding with configurable options
type Encoder struct {
// Configuration options
MaxDepth int // Maximum nesting depth for objects/lists (0 = unlimited)
StrictMode bool // Strict type checking and validation
CompactLists bool // Use typed lists when beneficial
ValidateStrings bool // Validate UTF-8 encoding in strings
TagName string // Struct tag name to use (default: "json" for compatibility)
// Internal state
depth int
}
// EncoderOption is a function type for configuring an Encoder
type EncoderOption func(*Encoder)
// NewConfigurableEncoder creates a new Encoder with optional configuration
func NewConfigurableEncoder(options ...EncoderOption) *Encoder {
e := &Encoder{
MaxDepth: 100, // Default max depth
StrictMode: false,
CompactLists: true,
ValidateStrings: true,
TagName: "json", // Default to json tag for compatibility
}
for _, option := range options {
option(e)
}
return e
}
// Encoder option functions
func WithMaxDepth(depth int) EncoderOption {
return func(e *Encoder) {
e.MaxDepth = depth
}
}
func WithStrictMode(strict bool) EncoderOption {
return func(e *Encoder) {
e.StrictMode = strict
}
}
func WithCompactLists(compact bool) EncoderOption {
return func(e *Encoder) {
e.CompactLists = compact
}
}
func WithStringValidation(validate bool) EncoderOption {
return func(e *Encoder) {
e.ValidateStrings = validate
}
}
func WithStructTag(tagName string) EncoderOption {
return func(e *Encoder) {
e.TagName = tagName
}
}
// Encode encodes a value using the configured encoder
func (e *Encoder) Encode(v any) ([]byte, error) {
e.depth = 0 // Reset depth counter
res, err := e.encode(v)
if err != nil {
return nil, err
}
// todo: can i optimize by definiting the length of the slice before i copy into it?
// can i estimate the space occupied by a decoded slice by looking at the current
// memory occupied by the current data? If i can i can allocate a list with a max capacity
// ensuring the the decoded data is always going to be smaller and based on the used length, we report that only
// this might not work due to the fact that inner types are decoded first and we might know
// their position is the backing list. but still work a short. the risk is that, we might need to
// padd, tradding size for speed
return append([]byte{Version}, res...), nil
}
// EncodeTo encodes a value directly to an io.Writer
func (e *Encoder) EncodeTo(w io.Writer, v any) error {
data, err := e.Encode(v)
if err != nil {
return err
}
_, err = w.Write(data)
return err
}
// encode is the internal encoding function with depth tracking
func (e *Encoder) encode(v any) ([]byte, error) {
// Check max depth
if e.MaxDepth > 0 && e.depth > e.MaxDepth {
return nil, fmt.Errorf("bogo encode error: maximum nesting depth exceeded (%d)", e.MaxDepth)
}
// Handle null values
if isNullValue(v) {
return encodeNull(), nil
}
// Delegate to type-specific encoding with validation
switch val := v.(type) {
case string:
if e.ValidateStrings && !isValidUTF8(val) {
return nil, fmt.Errorf("bogo encode error: invalid UTF-8 string")
}
return encodeString(val)
case bool:
return encodeBool(val), nil
case byte:
return encodeByte(val)
case []byte:
return encodeBlob(val)
case time.Time:
return encodeTimestamp(val.UnixMilli())
case []string:
if e.CompactLists {
return e.encodeTypedListWithDepth(val)
}
return e.encodeListWithDepth(val)
case []int:
if e.CompactLists {
return e.encodeTypedListWithDepth(val)
}
return e.encodeListWithDepth(val)
case []int64:
if e.CompactLists {
return e.encodeTypedListWithDepth(val)
}
return e.encodeListWithDepth(val)
case []float64:
if e.CompactLists {
return e.encodeTypedListWithDepth(val)
}
return e.encodeListWithDepth(val)
case []bool:
if e.CompactLists {
return e.encodeTypedListWithDepth(val)
}
return e.encodeListWithDepth(val)
case map[string]any:
return e.encodeObjectWithDepth(val)
default:
// Use reflection for complex types (including structs)
return e.encodeReflected(v)
}
}
// encodeListWithDepth encodes lists with depth tracking
func (e *Encoder) encodeListWithDepth(v any) ([]byte, error) {
e.depth++
defer func() { e.depth-- }()
return encodeList(v)
}
// encodeTypedListWithDepth encodes typed lists with depth tracking
func (e *Encoder) encodeTypedListWithDepth(v any) ([]byte, error) {
e.depth++
defer func() { e.depth-- }()
return encodeTypedList(v)
}
// encodeObjectWithDepth encodes objects with depth tracking
func (e *Encoder) encodeObjectWithDepth(v map[string]any) ([]byte, error) {
// Check depth BEFORE incrementing
if e.MaxDepth > 0 && e.depth >= e.MaxDepth {
return nil, fmt.Errorf("bogo encode error: maximum nesting depth exceeded (%d)", e.MaxDepth)
}
e.depth++
defer func() { e.depth-- }()
// Validate object keys if in strict mode
if e.StrictMode {
for key := range v {
if len(key) > 255 {
return nil, fmt.Errorf("bogo encode error: object key too long (%d bytes, max 255)", len(key))
}
if e.ValidateStrings && !isValidUTF8(key) {
return nil, fmt.Errorf("bogo encode error: invalid UTF-8 in object key")
}
}
}
// Encode the object with proper depth tracking
return e.encodeMapWithDepth(v)
}
// encodeMapWithDepth encodes a map with proper depth tracking and using the encoder
func (e *Encoder) encodeMapWithDepth(obj map[string]any) ([]byte, error) {
fieldsBuf := &bytes.Buffer{}
// Encode each key-value pair as field entries
for key, value := range obj {
fieldEntry, err := e.encodeFieldEntryWithDepth(key, value)
if err != nil {
return nil, fmt.Errorf("bogo encode error: failed to encode field %s: %w", key, err)
}
fieldsBuf.Write(fieldEntry)
}
fieldsData := fieldsBuf.Bytes()
fieldsSize := len(fieldsData)
// Encode the total size of all fields
encodedSizeData, err := encodeUint(uint64(fieldsSize))
if err != nil {
return nil, fmt.Errorf("bogo encode error: failed to encode fields size: %w", err)
}
// Build final object: TypeObject + LenSize + DataSize + FieldData
result := &bytes.Buffer{}
result.WriteByte(TypeObject)
result.Write(encodedSizeData[1:]) // remove type byte from size encoding
result.Write(fieldsData)
return result.Bytes(), nil
}
// encodeFieldEntryWithDepth encodes a field entry using the encoder for depth tracking
func (e *Encoder) encodeFieldEntryWithDepth(key string, value any) ([]byte, error) {
// Encode the value first to know its size using the encoder
encodedValue, err := e.encode(value)
if err != nil {
return nil, err
}
keyBytes := []byte(key)
keyLen := len(keyBytes)
if keyLen > 255 {
return nil, fmt.Errorf("key too long, maximum 255 bytes")
}
// Calculate entry size: keyLen(1) + key + value
entrySize := 1 + keyLen + len(encodedValue)
// Encode entry size
encodedEntrySize, err := encodeUint(uint64(entrySize))
if err != nil {
return nil, err
}
// Build field entry: LenSize + EntrySize + KeyLength + Key + Value
entry := &bytes.Buffer{}
entry.Write(encodedEntrySize[1:]) // remove type byte
entry.WriteByte(byte(keyLen))
entry.Write(keyBytes)
entry.Write(encodedValue)
return entry.Bytes(), nil
}
// encodeReflected handles reflection-based encoding for structs and other complex types
func (e *Encoder) encodeReflected(v any) ([]byte, error) {
rv := reflect.ValueOf(v)
rt := reflect.TypeOf(v)
// Handle pointers
for rv.Kind() == reflect.Ptr {
if rv.IsNil() {
return encodeNull(), nil
}
rv = rv.Elem()
rt = rt.Elem()
}
switch rv.Kind() {
case reflect.Struct:
return e.encodeStruct(rv, rt)
case reflect.Slice, reflect.Array:
return e.encodeReflectedList(rv)
case reflect.Map:
return e.encodeReflectedMap(rv)
case reflect.Interface:
// Handle interface{} by encoding the underlying value
if !rv.IsNil() {
return e.encode(rv.Interface())
}
return encodeNull(), nil
default:
// Fall back to basic type encoding for other types
return encode(v)
}
}
// encodeStruct converts a struct to a map[string]any and encodes it
func (e *Encoder) encodeStruct(rv reflect.Value, rt reflect.Type) ([]byte, error) {
obj := make(map[string]any)
for i := 0; i < rt.NumField(); i++ {
field := rt.Field(i)
fieldValue := rv.Field(i)
// Skip unexported fields
if !field.IsExported() {
continue
}
// Get field name from tag or use field name
fieldName := e.getFieldName(field)
// Skip if tag indicates to omit the field
if fieldName == "-" {
continue
}
// Skip zero values if omitempty is specified
if e.shouldOmitEmpty(field) && e.isZeroValue(fieldValue) {
continue
}
// Recursively encode the field value
obj[fieldName] = fieldValue.Interface()
}
return e.encodeObjectWithDepth(obj)
}
// getFieldName returns the field name to use based on struct tags
func (e *Encoder) getFieldName(field reflect.StructField) string {
tag := field.Tag.Get(e.TagName)
if tag == "" {
return field.Name
}
// Handle "fieldname" and "fieldname,omitempty" formats
if commaIdx := len(tag); commaIdx > 0 {
for i, c := range tag {
if c == ',' {
commaIdx = i
break
}
}
return tag[:commaIdx]
}
return tag
}
// shouldOmitEmpty checks if the field has omitempty tag
func (e *Encoder) shouldOmitEmpty(field reflect.StructField) bool {
tag := field.Tag.Get(e.TagName)
return len(tag) > 0 && (tag == "omitempty" || len(tag) > 10 && tag[len(tag)-10:] == ",omitempty")
}
// isZeroValue reports whether v is the zero value for its type
func (e *Encoder) isZeroValue(v reflect.Value) bool {
switch v.Kind() {
case reflect.Array, reflect.Map, reflect.Slice, reflect.String:
return v.Len() == 0
case reflect.Bool:
return !v.Bool()
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return v.Int() == 0
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return v.Uint() == 0
case reflect.Float32, reflect.Float64:
return v.Float() == 0
case reflect.Interface, reflect.Ptr:
return v.IsNil()
}
return false
}
// encodeReflectedList handles slice/list encoding via reflection
func (e *Encoder) encodeReflectedList(rv reflect.Value) ([]byte, error) {
length := rv.Len()
arr := make([]any, length)
for i := range length {
arr[i] = rv.Index(i).Interface()
}
return e.encodeListWithDepth(arr)
}
// encodeReflectedMap handles map encoding via reflection
func (e *Encoder) encodeReflectedMap(rv reflect.Value) ([]byte, error) {
obj := make(map[string]any)
for _, key := range rv.MapKeys() {
keyStr := fmt.Sprintf("%v", key.Interface())
obj[keyStr] = rv.MapIndex(key).Interface()
}
return e.encodeObjectWithDepth(obj)
}
// isValidUTF8 checks if a string is valid UTF-8
func isValidUTF8(s string) bool {
for _, r := range s {
if r == 0xFFFD { // Unicode replacement character indicates invalid UTF-8
return false
}
}
return true
}
// EncodingStats provides statistics about encoding operations
type EncodingStats struct {
BytesEncoded int64
MaxDepthUsed int
TypesEncoded map[Type]int
ErrorsCount int64
}
// StatsCollector is an encoder that collects statistics
type StatsCollector struct {
*Encoder
Stats EncodingStats
}
// NewStatsCollector creates an encoder that collects encoding statistics
func NewStatsCollector(options ...EncoderOption) *StatsCollector {
return &StatsCollector{
Encoder: NewConfigurableEncoder(options...),
Stats: EncodingStats{
TypesEncoded: make(map[Type]int),
},
}
}
// Encode wraps the parent Encode with statistics collection
func (sc *StatsCollector) Encode(v any) ([]byte, error) {
data, err := sc.Encoder.Encode(v)
if err != nil {
sc.Stats.ErrorsCount++
return nil, err
}
sc.Stats.BytesEncoded += int64(len(data))
if sc.Encoder.depth > sc.Stats.MaxDepthUsed {
sc.Stats.MaxDepthUsed = sc.Encoder.depth
}
// Count type usage (simplified - just count the main type)
if len(data) >= 2 {
typeVal := Type(data[1])
sc.Stats.TypesEncoded[typeVal]++
}
return data, nil
}
// GetStats returns a copy of the current statistics
func (sc *StatsCollector) GetStats() EncodingStats {
stats := sc.Stats
// Deep copy the map
stats.TypesEncoded = make(map[Type]int)
for k, v := range sc.Stats.TypesEncoded {
stats.TypesEncoded[k] = v
}
return stats
}
// ResetStats resets all statistics
func (sc *StatsCollector) ResetStats() {
sc.Stats = EncodingStats{
TypesEncoded: make(map[Type]int),
}
}