You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
527 lines
15 KiB
527 lines
15 KiB
// Copyright 2020 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
//go:build goexperiment.jsonv2 |
|
|
|
package jsontext |
|
|
|
import ( |
|
"bytes" |
|
"errors" |
|
"math" |
|
"strconv" |
|
|
|
"next.orly.dev/pkg/json/internal/jsonflags" |
|
"next.orly.dev/pkg/json/internal/jsonwire" |
|
) |
|
|
|
// NOTE: Token is analogous to v1 json.Token. |
|
|
|
const ( |
|
maxInt64 = math.MaxInt64 |
|
minInt64 = math.MinInt64 |
|
maxUint64 = math.MaxUint64 |
|
minUint64 = 0 // for consistency and readability purposes |
|
|
|
invalidTokenPanic = "invalid jsontext.Token; it has been voided by a subsequent json.Decoder call" |
|
) |
|
|
|
var errInvalidToken = errors.New("invalid jsontext.Token") |
|
|
|
// Token represents a lexical JSON token, which may be one of the following: |
|
// - a JSON literal (i.e., null, true, or false) |
|
// - a JSON string (e.g., "hello, world!") |
|
// - a JSON number (e.g., 123.456) |
|
// - a begin or end delimiter for a JSON object (i.e., { or } ) |
|
// - a begin or end delimiter for a JSON array (i.e., [ or ] ) |
|
// |
|
// A Token cannot represent entire array or object values, while a [Value] can. |
|
// There is no Token to represent commas and colons since |
|
// these structural tokens can be inferred from the surrounding context. |
|
type Token struct { |
|
nonComparable |
|
|
|
// Tokens can exist in either a "raw" or an "exact" form. |
|
// Tokens produced by the Decoder are in the "raw" form. |
|
// Tokens returned by constructors are usually in the "exact" form. |
|
// The Encoder accepts Tokens in either the "raw" or "exact" form. |
|
// |
|
// The following chart shows the possible values for each Token type: |
|
// ╔═════════════════╦════════════╤════════════╤════════════╗ |
|
// ║ Token type ║ raw field │ str field │ num field ║ |
|
// ╠═════════════════╬════════════╪════════════╪════════════╣ |
|
// ║ null (raw) ║ "null" │ "" │ 0 ║ |
|
// ║ false (raw) ║ "false" │ "" │ 0 ║ |
|
// ║ true (raw) ║ "true" │ "" │ 0 ║ |
|
// ║ string (raw) ║ non-empty │ "" │ offset ║ |
|
// ║ string (string) ║ nil │ non-empty │ 0 ║ |
|
// ║ number (raw) ║ non-empty │ "" │ offset ║ |
|
// ║ number (float) ║ nil │ "f" │ non-zero ║ |
|
// ║ number (int64) ║ nil │ "i" │ non-zero ║ |
|
// ║ number (uint64) ║ nil │ "u" │ non-zero ║ |
|
// ║ object (delim) ║ "{" or "}" │ "" │ 0 ║ |
|
// ║ array (delim) ║ "[" or "]" │ "" │ 0 ║ |
|
// ╚═════════════════╩════════════╧════════════╧════════════╝ |
|
// |
|
// Notes: |
|
// - For tokens stored in "raw" form, the num field contains the |
|
// absolute offset determined by raw.previousOffsetStart(). |
|
// The buffer itself is stored in raw.previousBuffer(). |
|
// - JSON literals and structural characters are always in the "raw" form. |
|
// - JSON strings and numbers can be in either "raw" or "exact" forms. |
|
// - The exact zero value of JSON strings and numbers in the "exact" forms |
|
// have ambiguous representation. Thus, they are always represented |
|
// in the "raw" form. |
|
|
|
// raw contains a reference to the raw decode buffer. |
|
// If non-nil, then its value takes precedence over str and num. |
|
// It is only valid if num == raw.previousOffsetStart(). |
|
raw *decodeBuffer |
|
|
|
// str is the unescaped JSON string if num is zero. |
|
// Otherwise, it is "f", "i", or "u" if num should be interpreted |
|
// as a float64, int64, or uint64, respectively. |
|
str string |
|
|
|
// num is a float64, int64, or uint64 stored as a uint64 value. |
|
// It is non-zero for any JSON number in the "exact" form. |
|
num uint64 |
|
} |
|
|
|
// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues? |
|
|
|
var ( |
|
Null Token = rawToken("null") |
|
False Token = rawToken("false") |
|
True Token = rawToken("true") |
|
|
|
BeginObject Token = rawToken("{") |
|
EndObject Token = rawToken("}") |
|
BeginArray Token = rawToken("[") |
|
EndArray Token = rawToken("]") |
|
|
|
zeroString Token = rawToken(`""`) |
|
zeroNumber Token = rawToken(`0`) |
|
|
|
nanString Token = String("NaN") |
|
pinfString Token = String("Infinity") |
|
ninfString Token = String("-Infinity") |
|
) |
|
|
|
func rawToken(s string) Token { |
|
return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}} |
|
} |
|
|
|
// Bool constructs a Token representing a JSON boolean. |
|
func Bool(b bool) Token { |
|
if b { |
|
return True |
|
} |
|
return False |
|
} |
|
|
|
// String constructs a Token representing a JSON string. |
|
// The provided string should contain valid UTF-8, otherwise invalid characters |
|
// may be mangled as the Unicode replacement character. |
|
func String(s string) Token { |
|
if len(s) == 0 { |
|
return zeroString |
|
} |
|
return Token{str: s} |
|
} |
|
|
|
// Float constructs a Token representing a JSON number. |
|
// The values NaN, +Inf, and -Inf will be represented |
|
// as a JSON string with the values "NaN", "Infinity", and "-Infinity". |
|
func Float(n float64) Token { |
|
switch { |
|
case math.Float64bits(n) == 0: |
|
return zeroNumber |
|
case math.IsNaN(n): |
|
return nanString |
|
case math.IsInf(n, +1): |
|
return pinfString |
|
case math.IsInf(n, -1): |
|
return ninfString |
|
} |
|
return Token{str: "f", num: math.Float64bits(n)} |
|
} |
|
|
|
// Int constructs a Token representing a JSON number from an int64. |
|
func Int(n int64) Token { |
|
if n == 0 { |
|
return zeroNumber |
|
} |
|
return Token{str: "i", num: uint64(n)} |
|
} |
|
|
|
// Uint constructs a Token representing a JSON number from a uint64. |
|
func Uint(n uint64) Token { |
|
if n == 0 { |
|
return zeroNumber |
|
} |
|
return Token{str: "u", num: uint64(n)} |
|
} |
|
|
|
// Clone makes a copy of the Token such that its value remains valid |
|
// even after a subsequent [Decoder.Read] call. |
|
func (t Token) Clone() Token { |
|
// TODO: Allow caller to avoid any allocations? |
|
if raw := t.raw; raw != nil { |
|
// Avoid copying globals. |
|
if t.raw.prevStart == 0 { |
|
switch t.raw { |
|
case Null.raw: |
|
return Null |
|
case False.raw: |
|
return False |
|
case True.raw: |
|
return True |
|
case BeginObject.raw: |
|
return BeginObject |
|
case EndObject.raw: |
|
return EndObject |
|
case BeginArray.raw: |
|
return BeginArray |
|
case EndArray.raw: |
|
return EndArray |
|
} |
|
} |
|
|
|
if uint64(raw.previousOffsetStart()) != t.num { |
|
panic(invalidTokenPanic) |
|
} |
|
buf := bytes.Clone(raw.previousBuffer()) |
|
return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}} |
|
} |
|
return t |
|
} |
|
|
|
// Bool returns the value for a JSON boolean. |
|
// It panics if the token kind is not a JSON boolean. |
|
func (t Token) Bool() bool { |
|
switch t.raw { |
|
case True.raw: |
|
return true |
|
case False.raw: |
|
return false |
|
default: |
|
panic("invalid JSON token kind: " + t.Kind().String()) |
|
} |
|
} |
|
|
|
// appendString appends a JSON string to dst and returns it. |
|
// It panics if t is not a JSON string. |
|
func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) { |
|
if raw := t.raw; raw != nil { |
|
// Handle raw string value. |
|
buf := raw.previousBuffer() |
|
if Kind(buf[0]) == '"' { |
|
if jsonwire.ConsumeSimpleString(buf) == len(buf) { |
|
return append(dst, buf...), nil |
|
} |
|
dst, _, err := jsonwire.ReformatString(dst, buf, flags) |
|
return dst, err |
|
} |
|
} else if len(t.str) != 0 && t.num == 0 { |
|
// Handle exact string value. |
|
return jsonwire.AppendQuote(dst, t.str, flags) |
|
} |
|
|
|
panic("invalid JSON token kind: " + t.Kind().String()) |
|
} |
|
|
|
// String returns the unescaped string value for a JSON string. |
|
// For other JSON kinds, this returns the raw JSON representation. |
|
func (t Token) String() string { |
|
// This is inlinable to take advantage of "function outlining". |
|
// This avoids an allocation for the string(b) conversion |
|
// if the caller does not use the string in an escaping manner. |
|
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/ |
|
s, b := t.string() |
|
if len(b) > 0 { |
|
return string(b) |
|
} |
|
return s |
|
} |
|
func (t Token) string() (string, []byte) { |
|
if raw := t.raw; raw != nil { |
|
if uint64(raw.previousOffsetStart()) != t.num { |
|
panic(invalidTokenPanic) |
|
} |
|
buf := raw.previousBuffer() |
|
if buf[0] == '"' { |
|
// TODO: Preserve ValueFlags in Token? |
|
isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf) |
|
return "", jsonwire.UnquoteMayCopy(buf, isVerbatim) |
|
} |
|
// Handle tokens that are not JSON strings for fmt.Stringer. |
|
return "", buf |
|
} |
|
if len(t.str) != 0 && t.num == 0 { |
|
return t.str, nil |
|
} |
|
// Handle tokens that are not JSON strings for fmt.Stringer. |
|
if t.num > 0 { |
|
switch t.str[0] { |
|
case 'f': |
|
return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil |
|
case 'i': |
|
return strconv.FormatInt(int64(t.num), 10), nil |
|
case 'u': |
|
return strconv.FormatUint(uint64(t.num), 10), nil |
|
} |
|
} |
|
return "<invalid jsontext.Token>", nil |
|
} |
|
|
|
// appendNumber appends a JSON number to dst and returns it. |
|
// It panics if t is not a JSON number. |
|
func (t Token) appendNumber(dst []byte, flags *jsonflags.Flags) ([]byte, error) { |
|
if raw := t.raw; raw != nil { |
|
// Handle raw number value. |
|
buf := raw.previousBuffer() |
|
if Kind(buf[0]).normalize() == '0' { |
|
dst, _, err := jsonwire.ReformatNumber(dst, buf, flags) |
|
return dst, err |
|
} |
|
} else if t.num != 0 { |
|
// Handle exact number value. |
|
switch t.str[0] { |
|
case 'f': |
|
return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil |
|
case 'i': |
|
return strconv.AppendInt(dst, int64(t.num), 10), nil |
|
case 'u': |
|
return strconv.AppendUint(dst, uint64(t.num), 10), nil |
|
} |
|
} |
|
|
|
panic("invalid JSON token kind: " + t.Kind().String()) |
|
} |
|
|
|
// Float returns the floating-point value for a JSON number. |
|
// It returns a NaN, +Inf, or -Inf value for any JSON string |
|
// with the values "NaN", "Infinity", or "-Infinity". |
|
// It panics for all other cases. |
|
func (t Token) Float() float64 { |
|
if raw := t.raw; raw != nil { |
|
// Handle raw number value. |
|
if uint64(raw.previousOffsetStart()) != t.num { |
|
panic(invalidTokenPanic) |
|
} |
|
buf := raw.previousBuffer() |
|
if Kind(buf[0]).normalize() == '0' { |
|
fv, _ := jsonwire.ParseFloat(buf, 64) |
|
return fv |
|
} |
|
} else if t.num != 0 { |
|
// Handle exact number value. |
|
switch t.str[0] { |
|
case 'f': |
|
return math.Float64frombits(t.num) |
|
case 'i': |
|
return float64(int64(t.num)) |
|
case 'u': |
|
return float64(uint64(t.num)) |
|
} |
|
} |
|
|
|
// Handle string values with "NaN", "Infinity", or "-Infinity". |
|
if t.Kind() == '"' { |
|
switch t.String() { |
|
case "NaN": |
|
return math.NaN() |
|
case "Infinity": |
|
return math.Inf(+1) |
|
case "-Infinity": |
|
return math.Inf(-1) |
|
} |
|
} |
|
|
|
panic("invalid JSON token kind: " + t.Kind().String()) |
|
} |
|
|
|
// Int returns the signed integer value for a JSON number. |
|
// The fractional component of any number is ignored (truncation toward zero). |
|
// Any number beyond the representation of an int64 will be saturated |
|
// to the closest representable value. |
|
// It panics if the token kind is not a JSON number. |
|
func (t Token) Int() int64 { |
|
if raw := t.raw; raw != nil { |
|
// Handle raw integer value. |
|
if uint64(raw.previousOffsetStart()) != t.num { |
|
panic(invalidTokenPanic) |
|
} |
|
neg := false |
|
buf := raw.previousBuffer() |
|
if len(buf) > 0 && buf[0] == '-' { |
|
neg, buf = true, buf[1:] |
|
} |
|
if numAbs, ok := jsonwire.ParseUint(buf); ok { |
|
if neg { |
|
if numAbs > -minInt64 { |
|
return minInt64 |
|
} |
|
return -1 * int64(numAbs) |
|
} else { |
|
if numAbs > +maxInt64 { |
|
return maxInt64 |
|
} |
|
return +1 * int64(numAbs) |
|
} |
|
} |
|
} else if t.num != 0 { |
|
// Handle exact integer value. |
|
switch t.str[0] { |
|
case 'i': |
|
return int64(t.num) |
|
case 'u': |
|
if t.num > maxInt64 { |
|
return maxInt64 |
|
} |
|
return int64(t.num) |
|
} |
|
} |
|
|
|
// Handle JSON number that is a floating-point value. |
|
if t.Kind() == '0' { |
|
switch fv := t.Float(); { |
|
case fv >= maxInt64: |
|
return maxInt64 |
|
case fv <= minInt64: |
|
return minInt64 |
|
default: |
|
return int64(fv) // truncation toward zero |
|
} |
|
} |
|
|
|
panic("invalid JSON token kind: " + t.Kind().String()) |
|
} |
|
|
|
// Uint returns the unsigned integer value for a JSON number. |
|
// The fractional component of any number is ignored (truncation toward zero). |
|
// Any number beyond the representation of an uint64 will be saturated |
|
// to the closest representable value. |
|
// It panics if the token kind is not a JSON number. |
|
func (t Token) Uint() uint64 { |
|
// NOTE: This accessor returns 0 for any negative JSON number, |
|
// which might be surprising, but is at least consistent with the behavior |
|
// of saturating out-of-bounds numbers to the closest representable number. |
|
|
|
if raw := t.raw; raw != nil { |
|
// Handle raw integer value. |
|
if uint64(raw.previousOffsetStart()) != t.num { |
|
panic(invalidTokenPanic) |
|
} |
|
neg := false |
|
buf := raw.previousBuffer() |
|
if len(buf) > 0 && buf[0] == '-' { |
|
neg, buf = true, buf[1:] |
|
} |
|
if num, ok := jsonwire.ParseUint(buf); ok { |
|
if neg { |
|
return minUint64 |
|
} |
|
return num |
|
} |
|
} else if t.num != 0 { |
|
// Handle exact integer value. |
|
switch t.str[0] { |
|
case 'u': |
|
return t.num |
|
case 'i': |
|
if int64(t.num) < minUint64 { |
|
return minUint64 |
|
} |
|
return uint64(int64(t.num)) |
|
} |
|
} |
|
|
|
// Handle JSON number that is a floating-point value. |
|
if t.Kind() == '0' { |
|
switch fv := t.Float(); { |
|
case fv >= maxUint64: |
|
return maxUint64 |
|
case fv <= minUint64: |
|
return minUint64 |
|
default: |
|
return uint64(fv) // truncation toward zero |
|
} |
|
} |
|
|
|
panic("invalid JSON token kind: " + t.Kind().String()) |
|
} |
|
|
|
// Kind returns the token kind. |
|
func (t Token) Kind() Kind { |
|
switch { |
|
case t.raw != nil: |
|
raw := t.raw |
|
if uint64(raw.previousOffsetStart()) != t.num { |
|
panic(invalidTokenPanic) |
|
} |
|
return Kind(t.raw.buf[raw.prevStart]).normalize() |
|
case t.num != 0: |
|
return '0' |
|
case len(t.str) != 0: |
|
return '"' |
|
default: |
|
return invalidKind |
|
} |
|
} |
|
|
|
// Kind represents each possible JSON token kind with a single byte, |
|
// which is conveniently the first byte of that kind's grammar |
|
// with the restriction that numbers always be represented with '0': |
|
// |
|
// - 'n': null |
|
// - 'f': false |
|
// - 't': true |
|
// - '"': string |
|
// - '0': number |
|
// - '{': object begin |
|
// - '}': object end |
|
// - '[': array begin |
|
// - ']': array end |
|
// |
|
// An invalid kind is usually represented using 0, |
|
// but may be non-zero due to invalid JSON data. |
|
type Kind byte |
|
|
|
const invalidKind Kind = 0 |
|
|
|
// String prints the kind in a humanly readable fashion. |
|
func (k Kind) String() string { |
|
switch k { |
|
case 'n': |
|
return "null" |
|
case 'f': |
|
return "false" |
|
case 't': |
|
return "true" |
|
case '"': |
|
return "string" |
|
case '0': |
|
return "number" |
|
case '{': |
|
return "{" |
|
case '}': |
|
return "}" |
|
case '[': |
|
return "[" |
|
case ']': |
|
return "]" |
|
default: |
|
return "<invalid jsontext.Kind: " + jsonwire.QuoteRune(string(k)) + ">" |
|
} |
|
} |
|
|
|
// normalize coalesces all possible starting characters of a number as just '0'. |
|
func (k Kind) normalize() Kind { |
|
if k == '-' || ('0' <= k && k <= '9') { |
|
return '0' |
|
} |
|
return k |
|
}
|
|
|