You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
514 lines
13 KiB
514 lines
13 KiB
// Copyright 2010 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
//go:build !goexperiment.jsonv2 |
|
|
|
package json |
|
|
|
import ( |
|
"bytes" |
|
"errors" |
|
"io" |
|
) |
|
|
|
// A Decoder reads and decodes JSON values from an input stream. |
|
type Decoder struct { |
|
r io.Reader |
|
buf []byte |
|
d decodeState |
|
scanp int // start of unread data in buf |
|
scanned int64 // amount of data already scanned |
|
scan scanner |
|
err error |
|
|
|
tokenState int |
|
tokenStack []int |
|
} |
|
|
|
// NewDecoder returns a new decoder that reads from r. |
|
// |
|
// The decoder introduces its own buffering and may |
|
// read data from r beyond the JSON values requested. |
|
func NewDecoder(r io.Reader) *Decoder { |
|
return &Decoder{r: r} |
|
} |
|
|
|
// UseNumber causes the Decoder to unmarshal a number into an |
|
// interface value as a [Number] instead of as a float64. |
|
func (dec *Decoder) UseNumber() { dec.d.useNumber = true } |
|
|
|
// DisallowUnknownFields causes the Decoder to return an error when the destination |
|
// is a struct and the input contains object keys which do not match any |
|
// non-ignored, exported fields in the destination. |
|
func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } |
|
|
|
// Decode reads the next JSON-encoded value from its |
|
// input and stores it in the value pointed to by v. |
|
// |
|
// See the documentation for [Unmarshal] for details about |
|
// the conversion of JSON into a Go value. |
|
func (dec *Decoder) Decode(v any) error { |
|
if dec.err != nil { |
|
return dec.err |
|
} |
|
|
|
if err := dec.tokenPrepareForDecode(); err != nil { |
|
return err |
|
} |
|
|
|
if !dec.tokenValueAllowed() { |
|
return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} |
|
} |
|
|
|
// Read whole value into buffer. |
|
n, err := dec.readValue() |
|
if err != nil { |
|
return err |
|
} |
|
dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) |
|
dec.scanp += n |
|
|
|
// Don't save err from unmarshal into dec.err: |
|
// the connection is still usable since we read a complete JSON |
|
// object from it before the error happened. |
|
err = dec.d.unmarshal(v) |
|
|
|
// fixup token streaming state |
|
dec.tokenValueEnd() |
|
|
|
return err |
|
} |
|
|
|
// Buffered returns a reader of the data remaining in the Decoder's |
|
// buffer. The reader is valid until the next call to [Decoder.Decode]. |
|
func (dec *Decoder) Buffered() io.Reader { |
|
return bytes.NewReader(dec.buf[dec.scanp:]) |
|
} |
|
|
|
// readValue reads a JSON value into dec.buf. |
|
// It returns the length of the encoding. |
|
func (dec *Decoder) readValue() (int, error) { |
|
dec.scan.reset() |
|
|
|
scanp := dec.scanp |
|
var err error |
|
Input: |
|
// help the compiler see that scanp is never negative, so it can remove |
|
// some bounds checks below. |
|
for scanp >= 0 { |
|
|
|
// Look in the buffer for a new value. |
|
for ; scanp < len(dec.buf); scanp++ { |
|
c := dec.buf[scanp] |
|
dec.scan.bytes++ |
|
switch dec.scan.step(&dec.scan, c) { |
|
case scanEnd: |
|
// scanEnd is delayed one byte so we decrement |
|
// the scanner bytes count by 1 to ensure that |
|
// this value is correct in the next call of Decode. |
|
dec.scan.bytes-- |
|
break Input |
|
case scanEndObject, scanEndArray: |
|
// scanEnd is delayed one byte. |
|
// We might block trying to get that byte from src, |
|
// so instead invent a space byte. |
|
if stateEndValue(&dec.scan, ' ') == scanEnd { |
|
scanp++ |
|
break Input |
|
} |
|
case scanError: |
|
dec.err = dec.scan.err |
|
return 0, dec.scan.err |
|
} |
|
} |
|
|
|
// Did the last read have an error? |
|
// Delayed until now to allow buffer scan. |
|
if err != nil { |
|
if err == io.EOF { |
|
if dec.scan.step(&dec.scan, ' ') == scanEnd { |
|
break Input |
|
} |
|
if nonSpace(dec.buf) { |
|
err = io.ErrUnexpectedEOF |
|
} |
|
} |
|
dec.err = err |
|
return 0, err |
|
} |
|
|
|
n := scanp - dec.scanp |
|
err = dec.refill() |
|
scanp = dec.scanp + n |
|
} |
|
return scanp - dec.scanp, nil |
|
} |
|
|
|
func (dec *Decoder) refill() error { |
|
// Make room to read more into the buffer. |
|
// First slide down data already consumed. |
|
if dec.scanp > 0 { |
|
dec.scanned += int64(dec.scanp) |
|
n := copy(dec.buf, dec.buf[dec.scanp:]) |
|
dec.buf = dec.buf[:n] |
|
dec.scanp = 0 |
|
} |
|
|
|
// Grow buffer if not large enough. |
|
const minRead = 512 |
|
if cap(dec.buf)-len(dec.buf) < minRead { |
|
newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) |
|
copy(newBuf, dec.buf) |
|
dec.buf = newBuf |
|
} |
|
|
|
// Read. Delay error for next iteration (after scan). |
|
n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) |
|
dec.buf = dec.buf[0 : len(dec.buf)+n] |
|
|
|
return err |
|
} |
|
|
|
func nonSpace(b []byte) bool { |
|
for _, c := range b { |
|
if !isSpace(c) { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
// An Encoder writes JSON values to an output stream. |
|
type Encoder struct { |
|
w io.Writer |
|
err error |
|
escapeHTML bool |
|
|
|
indentBuf []byte |
|
indentPrefix string |
|
indentValue string |
|
} |
|
|
|
// NewEncoder returns a new encoder that writes to w. |
|
func NewEncoder(w io.Writer) *Encoder { |
|
return &Encoder{w: w, escapeHTML: true} |
|
} |
|
|
|
// Encode writes the JSON encoding of v to the stream, |
|
// with insignificant space characters elided, |
|
// followed by a newline character. |
|
// |
|
// See the documentation for [Marshal] for details about the |
|
// conversion of Go values to JSON. |
|
func (enc *Encoder) Encode(v any) error { |
|
if enc.err != nil { |
|
return enc.err |
|
} |
|
|
|
e := newEncodeState() |
|
defer encodeStatePool.Put(e) |
|
|
|
err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
// Terminate each value with a newline. |
|
// This makes the output look a little nicer |
|
// when debugging, and some kind of space |
|
// is required if the encoded value was a number, |
|
// so that the reader knows there aren't more |
|
// digits coming. |
|
e.WriteByte('\n') |
|
|
|
b := e.Bytes() |
|
if enc.indentPrefix != "" || enc.indentValue != "" { |
|
enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue) |
|
if err != nil { |
|
return err |
|
} |
|
b = enc.indentBuf |
|
} |
|
if _, err = enc.w.Write(b); err != nil { |
|
enc.err = err |
|
} |
|
return err |
|
} |
|
|
|
// SetIndent instructs the encoder to format each subsequent encoded |
|
// value as if indented by the package-level function Indent(dst, src, prefix, indent). |
|
// Calling SetIndent("", "") disables indentation. |
|
func (enc *Encoder) SetIndent(prefix, indent string) { |
|
enc.indentPrefix = prefix |
|
enc.indentValue = indent |
|
} |
|
|
|
// SetEscapeHTML specifies whether problematic HTML characters |
|
// should be escaped inside JSON quoted strings. |
|
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e |
|
// to avoid certain safety problems that can arise when embedding JSON in HTML. |
|
// |
|
// In non-HTML settings where the escaping interferes with the readability |
|
// of the output, SetEscapeHTML(false) disables this behavior. |
|
func (enc *Encoder) SetEscapeHTML(on bool) { |
|
enc.escapeHTML = on |
|
} |
|
|
|
// RawMessage is a raw encoded JSON value. |
|
// It implements [Marshaler] and [Unmarshaler] and can |
|
// be used to delay JSON decoding or precompute a JSON encoding. |
|
type RawMessage []byte |
|
|
|
// MarshalJSON returns m as the JSON encoding of m. |
|
func (m RawMessage) MarshalJSON() ([]byte, error) { |
|
if m == nil { |
|
return []byte("null"), nil |
|
} |
|
return m, nil |
|
} |
|
|
|
// UnmarshalJSON sets *m to a copy of data. |
|
func (m *RawMessage) UnmarshalJSON(data []byte) error { |
|
if m == nil { |
|
return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") |
|
} |
|
*m = append((*m)[0:0], data...) |
|
return nil |
|
} |
|
|
|
var _ Marshaler = (*RawMessage)(nil) |
|
var _ Unmarshaler = (*RawMessage)(nil) |
|
|
|
// A Token holds a value of one of these types: |
|
// |
|
// - [Delim], for the four JSON delimiters [ ] { } |
|
// - bool, for JSON booleans |
|
// - float64, for JSON numbers |
|
// - [Number], for JSON numbers |
|
// - string, for JSON string literals |
|
// - nil, for JSON null |
|
type Token any |
|
|
|
const ( |
|
tokenTopValue = iota |
|
tokenArrayStart |
|
tokenArrayValue |
|
tokenArrayComma |
|
tokenObjectStart |
|
tokenObjectKey |
|
tokenObjectColon |
|
tokenObjectValue |
|
tokenObjectComma |
|
) |
|
|
|
// advance tokenstate from a separator state to a value state |
|
func (dec *Decoder) tokenPrepareForDecode() error { |
|
// Note: Not calling peek before switch, to avoid |
|
// putting peek into the standard Decode path. |
|
// peek is only called when using the Token API. |
|
switch dec.tokenState { |
|
case tokenArrayComma: |
|
c, err := dec.peek() |
|
if err != nil { |
|
return err |
|
} |
|
if c != ',' { |
|
return &SyntaxError{"expected comma after array element", dec.InputOffset()} |
|
} |
|
dec.scanp++ |
|
dec.tokenState = tokenArrayValue |
|
case tokenObjectColon: |
|
c, err := dec.peek() |
|
if err != nil { |
|
return err |
|
} |
|
if c != ':' { |
|
return &SyntaxError{"expected colon after object key", dec.InputOffset()} |
|
} |
|
dec.scanp++ |
|
dec.tokenState = tokenObjectValue |
|
} |
|
return nil |
|
} |
|
|
|
func (dec *Decoder) tokenValueAllowed() bool { |
|
switch dec.tokenState { |
|
case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
func (dec *Decoder) tokenValueEnd() { |
|
switch dec.tokenState { |
|
case tokenArrayStart, tokenArrayValue: |
|
dec.tokenState = tokenArrayComma |
|
case tokenObjectValue: |
|
dec.tokenState = tokenObjectComma |
|
} |
|
} |
|
|
|
// A Delim is a JSON array or object delimiter, one of [ ] { or }. |
|
type Delim rune |
|
|
|
func (d Delim) String() string { |
|
return string(d) |
|
} |
|
|
|
// Token returns the next JSON token in the input stream. |
|
// At the end of the input stream, Token returns nil, [io.EOF]. |
|
// |
|
// Token guarantees that the delimiters [ ] { } it returns are |
|
// properly nested and matched: if Token encounters an unexpected |
|
// delimiter in the input, it will return an error. |
|
// |
|
// The input stream consists of basic JSON values—bool, string, |
|
// number, and null—along with delimiters [ ] { } of type [Delim] |
|
// to mark the start and end of arrays and objects. |
|
// Commas and colons are elided. |
|
func (dec *Decoder) Token() (Token, error) { |
|
for { |
|
c, err := dec.peek() |
|
if err != nil { |
|
return nil, err |
|
} |
|
switch c { |
|
case '[': |
|
if !dec.tokenValueAllowed() { |
|
return dec.tokenError(c) |
|
} |
|
dec.scanp++ |
|
dec.tokenStack = append(dec.tokenStack, dec.tokenState) |
|
dec.tokenState = tokenArrayStart |
|
return Delim('['), nil |
|
|
|
case ']': |
|
if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { |
|
return dec.tokenError(c) |
|
} |
|
dec.scanp++ |
|
dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] |
|
dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] |
|
dec.tokenValueEnd() |
|
return Delim(']'), nil |
|
|
|
case '{': |
|
if !dec.tokenValueAllowed() { |
|
return dec.tokenError(c) |
|
} |
|
dec.scanp++ |
|
dec.tokenStack = append(dec.tokenStack, dec.tokenState) |
|
dec.tokenState = tokenObjectStart |
|
return Delim('{'), nil |
|
|
|
case '}': |
|
if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { |
|
return dec.tokenError(c) |
|
} |
|
dec.scanp++ |
|
dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] |
|
dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] |
|
dec.tokenValueEnd() |
|
return Delim('}'), nil |
|
|
|
case ':': |
|
if dec.tokenState != tokenObjectColon { |
|
return dec.tokenError(c) |
|
} |
|
dec.scanp++ |
|
dec.tokenState = tokenObjectValue |
|
continue |
|
|
|
case ',': |
|
if dec.tokenState == tokenArrayComma { |
|
dec.scanp++ |
|
dec.tokenState = tokenArrayValue |
|
continue |
|
} |
|
if dec.tokenState == tokenObjectComma { |
|
dec.scanp++ |
|
dec.tokenState = tokenObjectKey |
|
continue |
|
} |
|
return dec.tokenError(c) |
|
|
|
case '"': |
|
if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { |
|
var x string |
|
old := dec.tokenState |
|
dec.tokenState = tokenTopValue |
|
err := dec.Decode(&x) |
|
dec.tokenState = old |
|
if err != nil { |
|
return nil, err |
|
} |
|
dec.tokenState = tokenObjectColon |
|
return x, nil |
|
} |
|
fallthrough |
|
|
|
default: |
|
if !dec.tokenValueAllowed() { |
|
return dec.tokenError(c) |
|
} |
|
var x any |
|
if err := dec.Decode(&x); err != nil { |
|
return nil, err |
|
} |
|
return x, nil |
|
} |
|
} |
|
} |
|
|
|
func (dec *Decoder) tokenError(c byte) (Token, error) { |
|
var context string |
|
switch dec.tokenState { |
|
case tokenTopValue: |
|
context = " looking for beginning of value" |
|
case tokenArrayStart, tokenArrayValue, tokenObjectValue: |
|
context = " looking for beginning of value" |
|
case tokenArrayComma: |
|
context = " after array element" |
|
case tokenObjectKey: |
|
context = " looking for beginning of object key string" |
|
case tokenObjectColon: |
|
context = " after object key" |
|
case tokenObjectComma: |
|
context = " after object key:value pair" |
|
} |
|
return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} |
|
} |
|
|
|
// More reports whether there is another element in the |
|
// current array or object being parsed. |
|
func (dec *Decoder) More() bool { |
|
c, err := dec.peek() |
|
return err == nil && c != ']' && c != '}' |
|
} |
|
|
|
func (dec *Decoder) peek() (byte, error) { |
|
var err error |
|
for { |
|
for i := dec.scanp; i < len(dec.buf); i++ { |
|
c := dec.buf[i] |
|
if isSpace(c) { |
|
continue |
|
} |
|
dec.scanp = i |
|
return c, nil |
|
} |
|
// buffer has been scanned, now report any error |
|
if err != nil { |
|
return 0, err |
|
} |
|
err = dec.refill() |
|
} |
|
} |
|
|
|
// InputOffset returns the input stream byte offset of the current decoder position. |
|
// The offset gives the location of the end of the most recently returned token |
|
// and the beginning of the next token. |
|
func (dec *Decoder) InputOffset() int64 { |
|
return dec.scanned + int64(dec.scanp) |
|
}
|
|
|