lib/xml/xml.go (206 lines of code) (raw):
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Package xml provides an XSD-based dynamically typed xml decoder.
package xml
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"strconv"
"strings"
"aqwari.net/xml/xsd"
)
// Detail is a type and plurality Detail node in a XSD tree.
type Detail struct {
// Type is the type of the tree node.
Type Type
// Plural is whether the node is a list.
Plural bool
// Children are the node's children.
Children map[string]Detail
}
func (d Detail) isZero() bool {
return d.Type == 0 && !d.Plural && d.Children == nil
}
// Type is an enriched JSON type system reflecting Go's treatment of it for numbers.
type Type int
const (
StringType = iota
IntType
FloatType
BoolType
)
// Details returns type and plurality details obtained from the provided XSD doc. Only
// interesting nodes are retained in the type hint tree. Interesting nodes are either
// plural, integer, float or bool, or have children at some depth that are plural, integer
// float or bool.
func Details(doc []byte) (map[string]Detail, error) {
schema, err := xsd.Parse(doc)
if err != nil {
return nil, err
}
tree := make(map[string]string)
leaves := make(map[string]Detail)
for _, s := range schema {
for n, t := range s.Types {
switch t := t.(type) {
case xsd.Builtin:
case *xsd.SimpleType:
case *xsd.ComplexType:
// Ignore external name-spaced names.
if t.Name.Space != "" {
continue
}
// Ignore anonymous node and the root.
if strings.HasPrefix(n.Local, "_") {
continue
}
for _, e := range t.Elements {
var d Detail
tree[e.Name.Local] = n.Local
switch builtinTypeFor(e.Type) {
case xsd.Boolean:
d.Type = BoolType
case xsd.Int, xsd.Integer, xsd.Long, xsd.NonNegativeInteger,
xsd.NonPositiveInteger, xsd.PositiveInteger, xsd.Short,
xsd.UnsignedByte, xsd.UnsignedInt, xsd.UnsignedLong, xsd.UnsignedShort:
d.Type = IntType
case xsd.Decimal, xsd.Double, xsd.Float:
d.Type = FloatType
}
d.Plural = e.Plural
if d.isZero() {
continue
}
leaves[e.Name.Local] = d
}
default:
panic(fmt.Sprintf("unknown type: %T", t))
}
}
}
details := Detail{Children: make(map[string]Detail)}
var path []string
for p, d := range leaves {
path = append(path[:0], p)
for i := 0; i <= len(tree); i++ {
parent, ok := tree[p]
if !ok {
break
}
path = append(path, parent)
p = parent
}
reverse(path)
n := details
for i, e := range path {
c := n.Children[e]
if c.Children == nil && i < len(path)-1 {
c.Children = make(map[string]Detail)
}
if i == len(path)-1 {
c.Type = d.Type
c.Plural = d.Plural
}
n.Children[e] = c
n = c
}
}
return details.Children, nil
}
func reverse(s []string) {
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
s[i], s[j] = s[j], s[i]
}
}
// builtinTypeFor returns the built-in type for the type if available. Otherwise it returns xsd.Anytype.
func builtinTypeFor(typ xsd.Type) xsd.Builtin {
for {
switch t := typ.(type) {
case xsd.Builtin:
return t
case *xsd.SimpleType:
typ = xsd.Base(t.Base)
default:
return xsd.AnyType
}
}
}
// Unmarshal decodes the data in r using type and plurality hints in details. If details is
// nil, best effort plurality assessment will be made and all data will be represented as
// strings.
func Unmarshal(r io.Reader, details map[string]Detail) (cdata string, elems map[string]any, err error) {
dec := xml.NewDecoder(r)
dec.CharsetReader = func(_ string, input io.Reader) (io.Reader, error) { return input, nil }
cdata, elems, err = walkXML(dec, nil, details)
if err == nil && len(elems) == 0 {
// If we have no elems, there cannot have been any root element,
// so the XML is invalid. We do not check for the required XML
// declaration, according to Postel's Law.
err = io.ErrUnexpectedEOF
}
return cdata, elems, err
}
func walkXML(dec *xml.Decoder, attrs []xml.Attr, details map[string]Detail) (cdata string, elems map[string]any, err error) {
elems = map[string]any{}
for {
t, err := dec.Token()
if err != nil {
if err == io.EOF {
return "", elems, nil
}
return "", nil, err
}
switch elem := t.(type) {
case xml.StartElement:
key := elem.Name.Local
det := details[key]
var part map[string]any
cdata, part, err = walkXML(dec, elem.Attr, det.Children)
if err != nil {
return "", nil, err
}
// Combine sub-elements and cdata.
var add any = part
if len(part) == 0 {
add = cdata
} else if len(cdata) != 0 {
part["#text"] = cdata
}
// Add the data to the current object while taking into account
// if the current key already exists (in the case of lists).
value := elems[key]
switch v := value.(type) {
case nil:
add = entype(add, det.Type)
if det.Plural {
elems[key] = []any{add}
} else {
elems[key] = add
}
case []any:
add = entype(add, det.Type)
elems[key] = append(v, add)
default:
add = entype(add, det.Type)
elems[key] = []any{v, add}
}
case xml.CharData:
cdata = string(bytes.TrimSpace(elem.Copy()))
case xml.EndElement:
for _, attr := range attrs {
elems[attr.Name.Local] = attr.Value
}
return cdata, elems, nil
}
}
}
// entype attempts to render the element value as the expected type, falling
// back to a string if it is not possible.
func entype(v any, t Type) any {
switch v := v.(type) {
case string:
switch t {
case BoolType:
switch v {
case "TRUE":
return true
case "FALSE":
return false
default:
return v
}
case IntType:
i, err := strconv.ParseInt(v, 10, 64)
if err != nil {
return v
}
return i
case FloatType:
f, err := strconv.ParseFloat(v, 64)
if err != nil {
return v
}
return f
default:
return v
}
default:
return v
}
}