cborl/parse.go (617 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package cborl import ( "encoding/binary" "io" "math" structform "github.com/elastic/go-structform" ) type Parser struct { visitor structform.Visitor strVisitor structform.StringRefVisitor // last fail state err error buffer []byte // parser state machine state stateStack length lengthStack buffer0 [64]byte } type state struct { major uint8 minor uint8 } // additional parser state 'major' types const ( stFail uint8 = 1 stValue uint8 = 2 stLen uint8 = 3 stStartX uint8 = 4 stIndef uint8 = 1 stStartArr uint8 = majorArr | stStartX stStartMap uint8 = majorMap | stStartX stStartIndefArr uint8 = majorArr | stStartX | stIndef stStartIndefMap uint8 = majorMap | stStartX | stIndef stKey uint8 = majorMap | 8 stElem uint8 = majorMap | 9 ) const ( stStart uint8 = iota + 1 stCont ) func NewParser(vs structform.Visitor) *Parser { p := &Parser{} p.init(vs) return p } func ParseReader(in io.Reader, vs structform.Visitor) (int64, error) { p := NewParser(vs) i, err := io.Copy(p, in) return i, err } func Parse(b []byte, vs structform.Visitor) error { return NewParser(vs).Parse(b) } func ParseString(str string, vs structform.Visitor) error { return NewParser(vs).ParseString(str) } func (p *Parser) init(vs structform.Visitor) { *p = Parser{ visitor: vs, strVisitor: structform.MakeStringRefVisitor(vs), } p.buffer = p.buffer0[:0] p.length.init() p.state.init(state{stValue, stStart}) } func (p *Parser) Write(b []byte) (int, error) { p.err = p.feed(b) if p.err != nil { return 0, p.err } return len(b), nil } func (p *Parser) ParseString(str string) error { return p.Parse(str2Bytes(str)) } func (p *Parser) Parse(b []byte) error { return p.feed(b) } func (p *Parser) feed(b []byte) error { for len(b) > 0 { n, _, err := p.feedUntil(b) if err != nil { return err } b = b[n:] } return nil } func (p *Parser) feedUntil(b []byte) (int, bool, error) { var ( orig = b done bool err error ) for { b, done, err = p.execStep(b) if done || err != nil { break } // continue parsing if input buffer is not empty, or structure with length // fields must be initialized // -> structures with length 0 will be reported immediately contParse := len(b) != 0 || (p.state.current.major&(stStartX|stIndef)) == stStartX if !contParse { break } } return len(orig) - len(b), done, err } func (p *Parser) execStep(b []byte) ([]byte, bool, error) { var ( err error done bool ) switch p.state.current.major { case stFail: return b, false, p.err case stValue: b, done, err = p.stepValue(b) case stLen: b = p.stepLen(b) case majorUint: b, done, err = p.stepUint(b) case majorNeg: b, done, err = p.stepNeg(b) case codeSingleFloat: b, done, err = p.stepSingleFloat(b) case codeDoubleFloat: b, done, err = p.stepDoubleFloat(b) case majorBytes | stStartX: if p.length.current == 0 { err = p.visitor.OnArrayStart(0, structform.ByteType) if err == nil { err = p.visitor.OnArrayFinished() p.length.pop() if err == nil { done, err = p.popState() } } break } p.state.current.major &= ^stStartX if len(b) == 0 { break } fallthrough case majorBytes: b, done, err = p.stepBytes(b) case majorText | stStartX: if p.length.current == 0 { p.length.pop() err = p.visitor.OnString("") if err == nil { done, err = p.popState() } break } p.state.current.major &= ^stStartX if len(b) == 0 { break } fallthrough case majorText: b, done, err = p.stepText(b) case stStartArr: err = p.visitor.OnArrayStart(int(p.length.current), structform.AnyType) if err != nil { break } p.state.pop() fallthrough case majorArr: b, done, err = p.stepArray(b) case stStartIndefArr: err = p.visitor.OnArrayStart(-1, structform.AnyType) if err != nil { break } p.state.pop() fallthrough case majorArr | stIndef: if b[0] == codeBreak { b = b[1:] err = p.visitor.OnArrayFinished() if err == nil { done, err = p.popState() } } else { b, done, err = p.stepValue(b) } case stStartMap: err = p.visitor.OnObjectStart(int(p.length.current), structform.AnyType) if err != nil { break } p.state.pop() fallthrough case majorMap: b, done, err = p.stepMap(b) case stStartIndefMap: err = p.visitor.OnObjectStart(-1, structform.AnyType) if err != nil { break } p.state.pop() fallthrough case majorMap | stIndef: if b[0] == codeBreak { err = p.visitor.OnObjectFinished() b = b[1:] if err == nil { done, err = p.popState() } } else { b, done, err = p.initMapKey(b) } case stKey | stStartX: if p.length.current == 0 { err = errEmptyKey break } p.state.current.major &= (^stStartX) fallthrough case stKey: b, done, err = p.stepKey(b) case stElem: p.state.pop() b, done, err = p.stepValue(b) default: err = errTODO() } return b, done, err } func (p *Parser) popState() (bool, error) { p.state.pop() return p.onValue() } func (p *Parser) onValue() (bool, error) { switch p.state.current.major { case majorArr: p.length.current-- _, done, err := p.arrayHandleLen() return done, err case majorMap: p.length.current-- _, done, err := p.mapHandleLen() return done, err case majorArr | stIndef, majorMap | stIndef: return false, nil } return true, nil } func (p *Parser) stepValue(b []byte) ([]byte, bool, error) { if len(b) == 0 { return b, false, nil } major := b[0] & majorMask switch major { case majorUint: if b[0] < len8b { err := p.visitor.OnUint8(b[0]) done := false if err == nil { done, err = p.onValue() } return b[1:], done, err } p.state.push(state{major, b[0] & minorMask}) return b[1:], false, nil case majorNeg: minor := b[0] & minorMask if v := minor; v < len8b { err := p.visitor.OnInt8(int8(^v)) done := false if err == nil { done, err = p.onValue() } return b[1:], done, err } p.state.push(state{major, minor}) return b[1:], false, nil case majorBytes, majorText: minor := b[0] & minorMask if minor == lenIndef { return nil, false, errIndefByteSeq } else { return p.initByteSeq(major, minor, b[1:]) } case majorArr, majorMap: minor := b[0] & minorMask return p.initSub(major, minor, b[1:]) case majorTag: return nil, false, errTODO() default: var ( err error done bool ) switch b[0] { case codeFalse: err = p.visitor.OnBool(false) if err == nil { done, err = p.onValue() } return b[1:], done, err case codeTrue: err = p.visitor.OnBool(true) if err == nil { done, err = p.onValue() } return b[1:], done, err case codeNull, codeUndef: err = p.visitor.OnNil() if err == nil { done, err = p.onValue() } return b[1:], done, err case codeHalfFloat: return b[1:], false, errTODO() case codeSingleFloat, codeDoubleFloat: p.state.push(state{b[0], stStart}) return b[1:], false, nil } } return nil, false, errInvalidCode } func (p *Parser) stepUint(in []byte) (b []byte, done bool, err error) { b = in switch p.state.current.minor { case len8b: b, done, err = b[1:], true, p.visitor.OnUint8(b[0]) case len16b: var v uint16 if b, done, v = p.getUint16(b); done { err = p.visitor.OnUint16(v) } case len32b: var v uint32 if b, done, v = p.getUint32(b); done { err = p.visitor.OnUint32(v) } case len64b: var v uint64 if b, done, v = p.getUint64(b); done { err = p.visitor.OnUint64(v) } } if done && err == nil { done, err = p.popState() } return } func (p *Parser) stepBytes(b []byte) ([]byte, bool, error) { // stream raw bytes via array visitor var ( st = &p.state.current err error ) if st.minor == stStart { err = p.visitor.OnArrayStart(int(p.length.current), structform.ByteType) if err != nil { return nil, false, err } st.minor = stCont } L := int(p.length.current) done := len(b) >= L if !done { L = len(b) p.length.current -= int64(L) } for _, c := range b[:L] { if err := p.visitor.OnByte(c); err != nil { return nil, false, err } } b = b[L:] if done { err = p.visitor.OnArrayFinished() p.length.pop() if err == nil { done, err = p.popState() } } return b, done, err } func (p *Parser) stepText(b []byte) ([]byte, bool, error) { b, tmp := p.collect(b, int(p.length.current)) if tmp == nil { return nil, false, nil } p.length.pop() done := true err := p.strVisitor.OnStringRef(tmp) if err == nil { done, err = p.popState() } return b, done, err } func (p *Parser) stepArray(b []byte) ([]byte, bool, error) { val, done, err := p.arrayHandleLen() if val { b, done, err = p.stepValue(b) } return b, done, err } func (p *Parser) arrayHandleLen() (value, done bool, err error) { if p.length.current > 0 { return true, false, nil } err = p.visitor.OnArrayFinished() if err == nil { p.length.pop() done, err = p.popState() } return false, done, err } func (p *Parser) stepMap(b []byte) ([]byte, bool, error) { kv, done, err := p.mapHandleLen() if kv && len(b) > 0 { b, done, err = p.initMapKey(b) } return b, done, err } func (p *Parser) mapHandleLen() (kv, done bool, err error) { if p.length.current > 0 { return true, false, nil } err = p.visitor.OnObjectFinished() if err == nil { p.length.pop() done, err = p.popState() } return false, done, err } func (p *Parser) initMapKey(b []byte) ([]byte, bool, error) { // parse key: major := b[0] & majorMask if major != majorText { return nil, false, errTextKeyRequired } minor := b[0] & minorMask if minor == lenIndef { return nil, false, errIndefByteSeq } return p.initByteSeq(stKey, minor, b[1:]) } func (p *Parser) stepKey(b []byte) ([]byte, bool, error) { b, tmp := p.collect(b, int(p.length.current)) if tmp == nil { return nil, false, nil } err := p.strVisitor.OnKeyRef(tmp) if err == nil { p.length.pop() p.state.current.major = stElem } return b, false, err } func (p *Parser) initByteSeq(major, minor uint8, b []byte) ([]byte, bool, error) { if v := minor; v < len8b { p.state.push(state{major | stStartX, stStart}) p.length.push(int64(v)) return b, false, nil } p.state.push(state{major | stStartX, stStart}) p.state.push(state{stLen, minor}) return b, false, nil } func (p *Parser) initSub(major, minor uint8, b []byte) ([]byte, bool, error) { if minor == lenIndef { // TODO: replace 2 state pushes with 1 state push + mask removing startX from current state p.state.push(state{major | stIndef, stStart}) p.state.push(state{major | stStartX | stIndef, stStart}) return b, false, nil } if v := minor; v < len8b { p.state.push(state{major, stStart}) p.state.push(state{major | stStartX, stStart}) p.length.push(int64(v)) return b, false, nil } p.state.push(state{major, stStart}) p.state.push(state{major | stStartX, stStart}) p.state.push(state{stLen, minor}) return b, false, nil } func (p *Parser) stepLen(b []byte) []byte { var done bool switch p.state.current.minor { case len8b: p.length.push(int64(b[0])) b, done = b[1:], true case len16b: var v uint16 if b, done, v = p.getUint16(b); done { p.length.push(int64(v)) } case len32b: var v uint32 if b, done, v = p.getUint32(b); done { p.length.push(int64(v)) } case len64b: var v uint64 if b, done, v = p.getUint64(b); done { p.length.push(int64(v)) } } if done { p.state.pop() } return b } func (p *Parser) stepNeg(in []byte) (b []byte, done bool, err error) { b = in switch p.state.current.minor { case len8b: b, done, err = b[1:], true, p.visitor.OnInt8(int8(^b[0])) case len16b: var v uint16 if b, done, v = p.getUint16(b); done { err = p.visitor.OnInt16(int16(^v)) } case len32b: var v uint32 if b, done, v = p.getUint32(b); done { err = p.visitor.OnInt32(int32(^v)) } case len64b: var v uint64 if b, done, v = p.getUint64(b); done { err = p.visitor.OnInt64(int64(^v)) } } if done && err == nil { done, err = p.popState() } return } func (p *Parser) stepSingleFloat(in []byte) (b []byte, done bool, err error) { var tmp uint32 if b, done, tmp = p.getUint32(in); done { err = p.visitor.OnFloat32(math.Float32frombits(tmp)) if err == nil { done, err = p.popState() } } return } func (p *Parser) stepDoubleFloat(in []byte) (b []byte, done bool, err error) { var tmp uint64 if b, done, tmp = p.getUint64(in); done { err = p.visitor.OnFloat64(math.Float64frombits(tmp)) if err == nil { done, err = p.popState() } } return } func (p *Parser) getUint8(b []byte) ([]byte, bool, uint8) { return b[1:], true, b[0] } func (p *Parser) getUint16(b []byte) ([]byte, bool, uint16) { b, tmp := p.collect(b, 2) if tmp == nil { return nil, false, 0 } return b, true, binary.BigEndian.Uint16(tmp) } func (p *Parser) getUint32(b []byte) ([]byte, bool, uint32) { b, tmp := p.collect(b, 4) if tmp == nil { return b, false, 0 } return b, true, binary.BigEndian.Uint32(tmp) } func (p *Parser) getUint64(b []byte) ([]byte, bool, uint64) { b, tmp := p.collect(b, 8) if tmp == nil { return nil, false, 0 } return b, true, binary.BigEndian.Uint64(tmp) } func (p *Parser) collect(b []byte, count int) ([]byte, []byte) { if len(p.buffer) > 0 { delta := count - len(p.buffer) if delta > 0 { N := delta complete := true if N > len(b) { complete = false N = len(b) } p.buffer = append(p.buffer, b[:N]...) if !complete { return nil, nil } // advance read buffer b = b[N:] } if len(p.buffer) >= count { tmp := p.buffer[:count] if len(p.buffer) == count { p.buffer = p.buffer0[:0] } else { p.buffer = p.buffer[count:] } return b, tmp } } if len(b) >= count { return b[count:], b[:count] } p.buffer = append(p.buffer, b...) return nil, nil } func numBytes(code uint8) uint8 { return 1 << ((code & minorMask) - len8b) } func readInt16(b []byte) int16 { return int16(^readUint16(b)) } func readInt32(b []byte) int32 { return int32(^readUint32(b)) } func readInt64(b []byte) int64 { return int64(^readUint64(b)) } func readUint16(b []byte) uint16 { return binary.BigEndian.Uint16(b) } func readUint32(b []byte) uint32 { return binary.BigEndian.Uint32(b) } func readUint64(b []byte) uint64 { return binary.BigEndian.Uint64(b) }