ubjson/parse.go (748 lines of code) (raw):
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package ubjson
import (
"encoding/binary"
"errors"
"io"
"math"
structform "github.com/elastic/go-structform"
)
type Parser struct {
visitor structform.Visitor
strVisitor structform.StringRefVisitor
// last fail state
err error
buffer []byte
// parser state machine
state stateStack
valueState stateStack
length lengthStack
buffer0 [64]byte
// internal parser state
marker byte
valueType structform.BaseType
}
//go:generate stringer -type=stateType
type stateType uint8
//go:generate stringer -type=stateStep
type stateStep uint8
type state struct {
stateType
stateStep
}
const (
stFail stateType = iota
stNext
stFixed // values of fixed size
stHighPrec // high precision number
stString // string
stArray // array
stArrayDyn // dynamic array
stArrayCount // array with element count
stArrayTyped // typed array with element count
stObject // object
stObjectDyn // dynamic object
stObjectCount // object with known # of fields
stObjectTyped // object with all values of same type
)
const (
stStart stateStep = iota
// stValue sub-states
stNil
stNoop
stTrue
stFalse
stInt8
stUInt8
stInt16
stInt32
stInt64
stFloat32
stFloat64
stChar
// variable size primitive value types
stWithLen
// array/object states
stWithType0
stWithType1
stCont
stFieldName
stFieldNameLen
)
var (
errUnknownMarker = errors.New("unknown ubjson marker")
errIncomplete = errors.New("Incomplete UBJSON input")
errNegativeLen = errors.New("negative length encountered")
errInvalidState = errors.New("invalid state")
errMissingArrEnd = errors.New("missing ']'")
errMissingObjEnd = errors.New("missing '}'")
errMissingCount = errors.New("missing count marker")
)
func ParseReader(in io.Reader, vs structform.Visitor) (int64, error) {
return NewParser(vs).ParseReader(in)
}
func Parse(b []byte, vs structform.Visitor) error {
return NewParser(vs).Parse(b)
}
func ParseString(str string, vs structform.Visitor) error {
return NewParser(vs).ParseString(str)
}
func NewParser(vs structform.Visitor) *Parser {
p := &Parser{}
p.init(vs)
return p
}
func (p *Parser) init(vs structform.Visitor) {
*p = Parser{
visitor: vs,
strVisitor: structform.MakeStringRefVisitor(vs),
}
p.buffer = p.buffer0[:0]
p.length.stack = p.length.stack0[:0]
p.state.current = state{stNext, stStart}
p.state.stack = p.state.stack0[:0]
p.valueState.stack = p.valueState.stack0[:0]
}
func (p *Parser) Parse(b []byte) error {
p.err = p.feed(b)
if p.err == nil {
p.err = p.finalize()
}
return p.err
}
func (p *Parser) ParseReader(in io.Reader) (int64, error) {
n, err := io.Copy(p, in)
if err == nil {
err = p.finalize()
}
return n, err
}
func (p *Parser) ParseString(s string) error {
return p.Parse(str2Bytes(s))
}
func (p *Parser) finalize() error {
for len(p.state.stack) > 0 {
var err error
switch p.state.current.stateType {
case stArrayCount, stArrayTyped:
if p.length.current != 0 || p.state.current.stateStep != stCont {
return errMissingArrEnd
}
err = p.visitor.OnArrayFinished()
case stObjectCount, stObjectTyped:
step := p.state.current.stateStep
l := p.length.current
if l != 0 || step != stFieldName {
return errMissingObjEnd
}
err = p.visitor.OnObjectFinished()
}
if err != nil {
return err
}
_, err = p.popState()
}
st := &p.state.current
incomplete := len(p.state.stack) > 0 ||
st.stateStep != stStart ||
st.stateType != stNext
if incomplete {
return errIncomplete
}
return nil
}
func (p *Parser) Write(b []byte) (int, error) {
p.err = p.feed(b)
if p.err != nil {
p.state.current = state{stFail, stStart}
return 0, p.err
}
return len(b), nil
}
func (p *Parser) feed(b []byte) error {
for len(b) > 0 {
var err error
n, _, err := p.feedUntil(b)
if err != nil {
return err
}
b = b[n:]
}
return nil
}
func (p *Parser) feedUntil(b []byte) (int, bool, error) {
var (
orig = b
done bool
err error
)
for {
b, done, err = p.execStep(b)
if done || err != nil {
break
}
if len(b) == 0 {
break
}
}
return len(orig) - len(b), done, err
}
func (p *Parser) execStep(b []byte) ([]byte, bool, error) {
var (
err error
done bool
)
switch p.state.current.stateType {
case stFail:
return b, false, p.err
case stNext:
b, done, err = p.stepValue(b)
case stFixed:
b, done, err = p.stepFixedValue(b)
case stHighPrec:
b, done, err = p.stepString(b)
case stString:
b, done, err = p.stepString(b)
case stArray:
b, err = p.stepArrayInit(b)
case stArrayDyn:
b, done, err = p.stepArrayDyn(b)
case stArrayCount:
b, done, err = p.stepArrayCount(b)
case stArrayTyped:
b, done, err = p.stepArrayTyped(b)
case stObject:
b, err = p.stepObjectInit(b)
case stObjectDyn:
b, done, err = p.stepObjectDyn(b)
case stObjectCount:
b, done, err = p.stepObjectCount(b)
case stObjectTyped:
b, done, err = p.stepObjectTyped(b)
default:
err = errInvalidState
}
if err != nil {
p.err = err
}
return b, done, err
}
func (p *Parser) stepFixedValue(b []byte) ([]byte, bool, error) {
var (
tmp []byte
err error
done bool
)
switch p.state.current.stateStep {
case stNil:
done, err = true, p.visitor.OnNil()
case stNoop:
case stTrue:
done, err = true, p.visitor.OnBool(true)
case stFalse:
done, err = true, p.visitor.OnBool(false)
case stInt8:
b, done, err = b[1:], true, p.visitor.OnInt8(int8(b[0]))
case stUInt8:
b, done, err = b[1:], true, p.visitor.OnUint8(b[0])
case stChar:
b, tmp = p.collect(b, 1)
if done = tmp != nil; done {
err = p.visitor.OnByte(tmp[0])
}
case stInt16:
b, tmp = p.collect(b, 2)
if done = tmp != nil; done {
err = p.visitor.OnInt16(readInt16(tmp))
}
case stInt32:
b, tmp = p.collect(b, 4)
if done = tmp != nil; done {
err = p.visitor.OnInt32(readInt32(tmp))
}
case stInt64:
b, tmp = p.collect(b, 8)
if done = tmp != nil; done {
err = p.visitor.OnInt64(readInt64(tmp))
}
case stFloat32:
b, tmp = p.collect(b, 4)
if done = tmp != nil; done {
err = p.visitor.OnFloat32(readFloat32(tmp))
}
case stFloat64:
b, tmp = p.collect(b, 8)
if done = tmp != nil; done {
err = p.visitor.OnFloat64(readFloat64(tmp))
}
default:
return b, false, err
}
if done && err == nil {
done, err = p.popState()
}
return b, done, err
}
func (p *Parser) stepString(b []byte) ([]byte, bool, error) {
var (
err error
done bool
st = &p.state.current
)
switch st.stateStep {
case stStart:
b, err = p.stepLen(b, st.withStep(stWithLen))
if !(err == nil && st.stateStep == stWithLen) {
break
}
fallthrough
case stWithLen:
L := p.length.current
if L == 0 {
done = true
err = p.visitor.OnString("")
} else {
var tmp []byte
if b, tmp = p.collect(b, int(L)); tmp != nil {
done = true
err = p.strVisitor.OnStringRef(tmp)
}
}
}
if done {
done, err = p.popLenState()
}
return b, done, err
}
func (p *Parser) stepArrayInit(b []byte) ([]byte, error) {
var (
err error
st = &p.state.current
)
switch b[0] {
case countMarker:
b, st.stateType = b[1:], stArrayCount
case typeMarker:
b, st.stateType = b[1:], stArrayTyped
default:
st.stateType = stArrayDyn
err = p.visitor.OnArrayStart(-1, structform.AnyType)
}
return b, err
}
func (p *Parser) stepArrayDyn(b []byte) ([]byte, bool, error) {
if b[0] == arrEndMarker {
err := p.visitor.OnArrayFinished()
done := true
if err == nil {
done, err = p.popState()
}
return b[1:], done, err
}
if st := &p.state.current; st.stateStep == stStart {
st.stateStep = stCont // ensure continuation state is pushed to stack
b, _, err := p.stepValue(b)
return b, false, err
}
b, _, err := p.stepValue(b)
return b, false, err
}
func (p *Parser) stepArrayCount(b []byte) ([]byte, bool, error) {
var (
st = &p.state.current
step = st.stateStep
)
// parse array header
if step == stStart {
b, err := p.stepLen(b, st.withStep(stWithLen))
return b, false, err
}
l := int(p.length.current)
if step == stWithLen {
p.state.current.stateStep = stCont
err := p.visitor.OnArrayStart(l, structform.AnyType)
if err != nil {
return b, false, err
}
}
if l == 0 {
err := p.visitor.OnArrayFinished()
done := true
if err == nil {
done, err = p.popLenState()
}
return b, done, err
}
p.length.current--
b, _, err := p.stepValue(b)
return b, false, err
}
func (p *Parser) stepArrayTyped(b []byte) ([]byte, bool, error) {
step := p.state.current.stateStep
// parse typed array header
switch step {
case stStart, stWithType0, stWithType1:
b, err := p.stepTypeLenHeader(b, stWithLen)
return b, false, err
}
l := int(p.length.current)
if step == stWithLen {
p.state.current.stateStep = stCont
err := p.visitor.OnArrayStart(l, p.valueType)
if err != nil {
return b, false, err
}
}
if l == 0 {
err := p.visitor.OnArrayFinished()
done := true
if err == nil {
done, err = p.popLenState()
}
return b, done, err
}
p.length.current--
vs := p.valueState.current
p.pushState(vs)
b, _, err := p.execStep(b)
return b, false, err
}
func (p *Parser) stepTypeLenHeader(b []byte, cont stateStep) ([]byte, error) {
st := p.state.current
step := st.stateStep
switch step {
case stStart:
return p.stepType(b, st.withStep(stWithType0))
case stWithType0:
if b[0] != countMarker {
return b, errMissingCount
}
p.state.current = st.withStep(stWithType1)
return b[1:], nil
case stWithType1:
return p.stepLen(b, st.withStep(cont))
default:
return b, nil
}
}
func (p *Parser) stepObjectInit(b []byte) ([]byte, error) {
var (
st = &p.state.current
err error
)
switch b[0] {
case countMarker:
b, st.stateType = b[1:], stObjectCount
case typeMarker:
b, st.stateType = b[1:], stObjectTyped
default:
st.stateType, err = stObjectDyn, p.visitor.OnObjectStart(-1, structform.AnyType)
}
return b, err
}
func (p *Parser) stepObjectDyn(b []byte) ([]byte, bool, error) {
var (
err error
st = &p.state.current
step = st.stateStep
)
if step == stStart {
if b[0] == objEndMarker {
err := p.visitor.OnObjectFinished()
done := true
if err == nil {
done, err = p.popState()
}
return b[1:], done, err
}
}
switch step {
case stStart:
b, err = p.stepLen(b, st.withStep(stFieldNameLen))
case stFieldNameLen:
L := p.length.current
var tmp []byte
if b, tmp = p.collect(b, int(L)); tmp != nil {
p.popLen()
err = p.strVisitor.OnKeyRef(tmp)
}
st.stateStep = stCont
case stCont:
st.stateStep = stStart
b, _, err = p.stepValue(b)
}
return b, false, err
}
func (p *Parser) stepObjectCount(b []byte) ([]byte, bool, error) {
var (
st = &p.state.current
step = st.stateStep
)
if step == stStart {
b, err := p.stepLen(b, st.withStep(stWithLen))
return b, false, err
}
done, b, err := p.stepObjectCountedContent(b, false)
if done {
done, err = p.popLenState()
}
return b, done, err
}
func (p *Parser) stepObjectTyped(b []byte) ([]byte, bool, error) {
st := &p.state.current
step := st.stateStep
switch step {
case stStart, stWithType0, stWithType1:
b, err := p.stepTypeLenHeader(b, stWithLen)
return b, false, err
}
done, b, err := p.stepObjectCountedContent(b, true)
if done {
p.valueState.pop()
done, err = p.popLenState()
}
return b, done, err
}
func (p *Parser) stepObjectCountedContent(b []byte, typed bool) (bool, []byte, error) {
var (
err error
st = &p.state.current
step = st.stateStep
end = false
)
switch step {
case stWithLen:
L := p.length.current
err := p.visitor.OnObjectStart(int(L), structform.AnyType)
if err != nil {
return end, b, err
}
if L == 0 {
end = p.length.current == 0
break
}
st.stateStep = stFieldName
fallthrough
case stFieldName:
end = p.length.current == 0
if end {
break
}
b, err = p.stepLen(b, st.withStep(stFieldNameLen))
case stFieldNameLen:
L := p.length.current
var tmp []byte
if b, tmp = p.collect(b, int(L)); tmp != nil {
p.popLen()
err = p.strVisitor.OnKeyRef(tmp)
}
st.stateStep = stCont
case stCont:
p.length.current--
st.stateStep = stFieldName
// handle object field value
if typed {
p.pushState(p.valueState.current)
} else {
b, _, err = p.stepValue(b)
}
}
if end {
err = p.visitor.OnObjectFinished()
}
return end, b, err
}
func (p *Parser) stepType(b []byte, cont state) ([]byte, error) {
marker := b[0]
b = b[1:]
p.state.current = cont
// TODO: analyze marker
state, err := markerToStartState(marker)
if err != nil {
return nil, err
}
p.valueState.push(state)
p.valueType = markerToBaseType(marker)
return b, nil
}
func (p *Parser) stepLen(b []byte, cont state) ([]byte, error) {
if p.marker == noMarker {
p.marker = b[0]
b = b[1:]
if len(b) == 0 {
return nil, nil
}
}
var tmp []byte
complete := false
L := int64(-1)
switch p.marker {
case int8Marker:
complete, L, b = true, int64(int8(b[0])), b[1:]
case uint8Marker:
complete, L, b = true, int64(b[0]), b[1:]
case int16Marker:
if b, tmp = p.collect(b, 2); tmp != nil {
complete, L = true, int64(readInt16(tmp))
}
case int32Marker:
if b, tmp = p.collect(b, 4); tmp != nil {
complete, L = true, int64(readInt32(tmp))
}
case int64Marker:
if b, tmp = p.collect(b, 8); tmp != nil {
complete, L = true, readInt64(tmp)
}
}
if !complete {
return b, nil
}
if L < 0 {
return nil, errNegativeLen
}
p.marker = noMarker
p.state.current = cont
p.pushLen(L)
return b, nil
}
func (p *Parser) collect(b []byte, count int) ([]byte, []byte) {
if len(p.buffer) > 0 {
delta := count - len(p.buffer)
if delta > 0 {
N := delta
complete := true
if N > len(b) {
complete = false
N = len(b)
}
p.buffer = append(p.buffer, b[:N]...)
if !complete {
return nil, nil
}
// advance read buffer
b = b[N:]
}
if len(p.buffer) >= count {
tmp := p.buffer[:count]
if len(p.buffer) == count {
p.buffer = p.buffer0[:0]
} else {
p.buffer = p.buffer[count:]
}
return b, tmp
}
}
if len(b) >= count {
return b[count:], b[:count]
}
p.buffer = append(p.buffer, b...)
return nil, nil
}
func (p *Parser) stepValue(b []byte) ([]byte, bool, error) {
state, err := markerToStartState(b[0])
if err != nil {
return nil, false, err
}
done := true
switch state.stateStep {
case stNil:
b, err = b[1:], p.visitor.OnNil()
case stNoop:
done = false
b, err = b[1:], nil
case stTrue:
b, err = b[1:], p.visitor.OnBool(true)
case stFalse:
b, err = b[1:], p.visitor.OnBool(false)
default:
done = false
b, err = p.advanceMarker(state, b)
}
return b, done, err
}
func (p *Parser) advanceMarker(s state, b []byte) ([]byte, error) {
p.pushState(s)
return b[1:], nil
}
func (p *Parser) pushLen(l int64) { p.length.push(l) }
func (p *Parser) popLen() { p.length.pop() }
func (p *Parser) pushState(next state) { p.state.push(next) }
func (p *Parser) popState() (bool, error) {
p.state.pop()
return len(p.state.stack) == 0, nil
}
func (p *Parser) popLenState() (bool, error) {
p.popLen()
return p.popState()
}
func readInt16(b []byte) int16 {
return int16(binary.BigEndian.Uint16(b))
}
func readInt32(b []byte) int32 {
return int32(binary.BigEndian.Uint32(b))
}
func readInt64(b []byte) int64 {
return int64(binary.BigEndian.Uint64(b))
}
func readFloat32(b []byte) float32 {
bits := binary.BigEndian.Uint32(b)
return math.Float32frombits(bits)
}
func readFloat64(b []byte) float64 {
bits := binary.BigEndian.Uint64(b)
return math.Float64frombits(bits)
}
func markerToStartState(marker byte) (state, error) {
switch marker {
case nullMarker:
return state{stFixed, stNil}, nil
case noopMarker:
return state{stFixed, stNoop}, nil
case trueMarker:
return state{stFixed, stTrue}, nil
case falseMarker:
return state{stFixed, stFalse}, nil
case int8Marker:
return state{stFixed, stInt8}, nil
case uint8Marker:
return state{stFixed, stUInt8}, nil
case int16Marker:
return state{stFixed, stInt16}, nil
case int32Marker:
return state{stFixed, stInt32}, nil
case int64Marker:
return state{stFixed, stInt64}, nil
case float32Marker:
return state{stFixed, stFloat32}, nil
case float64Marker:
return state{stFixed, stFloat64}, nil
case highPrecMarker:
return state{stHighPrec, stStart}, nil
case charMarker:
return state{stFixed, stChar}, nil
case stringMarker:
return state{stString, stStart}, nil
case objStartMarker:
return state{stObject, stStart}, nil
case arrStartMarker:
return state{stArray, stStart}, nil
default:
return state{stFail, stStart}, errUnknownMarker
}
}
func markerToBaseType(marker byte) structform.BaseType {
switch marker {
case falseMarker, trueMarker:
return structform.BoolType
case charMarker:
return structform.ByteType
case int8Marker:
return structform.Int8Type
case uint8Marker:
return structform.Uint8Type
case int16Marker:
return structform.Int16Type
case int32Marker:
return structform.Int32Type
case int64Marker:
return structform.Int64Type
case float32Marker:
return structform.Float32Type
case float64Marker:
return structform.Float64Type
case highPrecMarker, stringMarker:
return structform.StringType
default:
return structform.AnyType
}
}
func (st state) withStep(s stateStep) state {
st.stateStep = s
return st
}