testing/xml/xmlToStruct.go (241 lines of code) (raw):
package xml
import (
"encoding/xml"
"fmt"
"io"
"sort"
"strings"
)
// A XMLNode contains the values to be encoded or decoded.
type XMLNode struct {
Name xml.Name `json:",omitempty"`
Children map[string][]*XMLNode `json:",omitempty"`
Text string `json:",omitempty"`
Attr []xml.Attr `json:",omitempty"`
namespaces map[string]string
parent *XMLNode
}
// NewXMLElement returns a pointer to a new XMLNode initialized to default values.
func NewXMLElement(name xml.Name) *XMLNode {
return &XMLNode{
Name: name,
Children: map[string][]*XMLNode{},
Attr: []xml.Attr{},
}
}
// AddChild adds child to the XMLNode.
func (n *XMLNode) AddChild(child *XMLNode) {
child.parent = n
if _, ok := n.Children[child.Name.Local]; !ok {
// flattened will have multiple children with same tag name
n.Children[child.Name.Local] = []*XMLNode{}
}
n.Children[child.Name.Local] = append(n.Children[child.Name.Local], child)
}
// XMLToStruct converts a xml.Decoder stream to XMLNode with nested values.
func XMLToStruct(d *xml.Decoder, s *xml.StartElement, ignoreIndentation bool) (*XMLNode, error) {
out := &XMLNode{}
for {
tok, err := d.Token()
if err != nil {
if err == io.EOF {
break
} else {
return out, err
}
}
if tok == nil {
break
}
switch typed := tok.(type) {
case xml.CharData:
text := string(typed.Copy())
if ignoreIndentation {
text = strings.TrimSpace(text)
}
if len(text) != 0 {
out.Text = text
}
case xml.StartElement:
el := typed.Copy()
out.Attr = el.Attr
if out.Children == nil {
out.Children = map[string][]*XMLNode{}
}
name := typed.Name.Local
slice := out.Children[name]
if slice == nil {
slice = []*XMLNode{}
}
node, e := XMLToStruct(d, &el, ignoreIndentation)
out.findNamespaces()
if e != nil {
return out, e
}
node.Name = typed.Name
node.findNamespaces()
// Add attributes onto the node
node.Attr = el.Attr
tempOut := *out
// Save into a temp variable, simply because out gets squashed during
// loop iterations
node.parent = &tempOut
slice = append(slice, node)
out.Children[name] = slice
case xml.EndElement:
if s != nil && s.Name.Local == typed.Name.Local { // matching end token
return out, nil
}
out = &XMLNode{}
}
}
return out, nil
}
func (n *XMLNode) findNamespaces() {
ns := map[string]string{}
for _, a := range n.Attr {
if a.Name.Space == "xmlns" {
ns[a.Value] = a.Name.Local
}
}
n.namespaces = ns
}
func (n *XMLNode) findElem(name string) (string, bool) {
for node := n; node != nil; node = node.parent {
for _, a := range node.Attr {
namespace := a.Name.Space
if v, ok := node.namespaces[namespace]; ok {
namespace = v
}
if name == fmt.Sprintf("%s:%s", namespace, a.Name.Local) {
return a.Value, true
}
}
}
return "", false
}
// StructToXML writes an XMLNode to a xml.Encoder as tokens.
func StructToXML(e *xml.Encoder, node *XMLNode, sorted bool) error {
var err error
// Sort Attributes
attrs := node.Attr
if sorted {
sortedAttrs := make([]xml.Attr, len(attrs))
for _, k := range node.Attr {
sortedAttrs = append(sortedAttrs, k)
}
sort.Sort(xmlAttrSlice(sortedAttrs))
attrs = sortedAttrs
}
st := xml.StartElement{Name: node.Name, Attr: attrs}
e.EncodeToken(st)
// return fmt.Errorf("encoder string : %s, %s, %s", node.Name.Local, node.Name.Space, st.Attr)
if node.Text != "" {
e.EncodeToken(xml.CharData([]byte(node.Text)))
} else if sorted {
sortedNames := []string{}
for k := range node.Children {
sortedNames = append(sortedNames, k)
}
sort.Strings(sortedNames)
for _, k := range sortedNames {
// we should sort the []*xml.Node for each key if len >1
flattenedNodes := node.Children[k]
// Meaning this has multiple nodes
if len(flattenedNodes) > 1 {
// sort flattened nodes
flattenedNodes, err = sortFlattenedNodes(flattenedNodes)
if err != nil {
return err
}
}
for _, v := range flattenedNodes {
err = StructToXML(e, v, sorted)
if err != nil {
return err
}
}
}
} else {
for _, c := range node.Children {
for _, v := range c {
err = StructToXML(e, v, sorted)
if err != nil {
return err
}
}
}
}
e.EncodeToken(xml.EndElement{Name: node.Name})
return e.Flush()
}
// sortFlattenedNodes sorts nodes with nodes having same element tag
// but overall different values. The function will return list of pointer to
// XMLNode and an error.
//
// Overall sort order is followed is:
// Nodes with concrete value (no nested node as value) are given precedence
// and are added to list after sorting them
//
// Next nested nodes within a flattened list are given precedence.
//
// Next nodes within a flattened map are sorted based on either key or value
// which ever has lower value and then added to the global sorted list.
// If value was initially chosen, but has nested nodes; key will be chosen as comparable
// as it is unique and will always have concrete data ie. string.
func sortFlattenedNodes(nodes []*XMLNode) ([]*XMLNode, error) {
var sortedNodes []*XMLNode
// concreteNodeMap stores concrete value associated with a list of nodes
// This is possible in case multiple members of a flatList has same values.
concreteNodeMap := make(map[string][]*XMLNode, 0)
// flatListNodeMap stores flat list or wrapped list members associated with a list of nodes
// This will have only flattened list with members that are Nodes and not concrete values.
flatListNodeMap := make(map[string][]*XMLNode, 0)
// flatMapNodeMap stores flat map or map entry members associated with a list of nodes
// This will have only flattened map concrete value members. It is possible to limit this
// to concrete value as map key is expected to be concrete.
flatMapNodeMap := make(map[string][]*XMLNode, 0)
// nodes with concrete value are prioritized and appended based on sorting order
sortedNodesWithConcreteValue := []string{}
// list with nested nodes are second in priority and appended based on sorting order
sortedNodesWithListValue := []string{}
// map are last in priority and appended based on sorting order
sortedNodesWithMapValue := []string{}
for _, node := range nodes {
// node has no children element, then we consider it as having concrete value
if len(node.Children) == 0 {
sortedNodesWithConcreteValue = append(sortedNodesWithConcreteValue, node.Text)
if v, ok := concreteNodeMap[node.Text]; ok {
concreteNodeMap[node.Text] = append(v, node)
} else {
concreteNodeMap[node.Text] = []*XMLNode{node}
}
}
// if node has a single child, then it is a flattened list node
if len(node.Children) == 1 {
for _, nestedNodes := range node.Children {
nestedNodeName := nestedNodes[0].Name.Local
// append to sorted node name for list value
sortedNodesWithListValue = append(sortedNodesWithListValue, nestedNodeName)
if v, ok := flatListNodeMap[nestedNodeName]; ok {
flatListNodeMap[nestedNodeName] = append(v, nestedNodes[0])
} else {
flatListNodeMap[nestedNodeName] = []*XMLNode{nestedNodes[0]}
}
}
}
// if node has two children, then it is a flattened map node
if len(node.Children) == 2 {
nestedPair := []*XMLNode{}
for _, k := range node.Children {
nestedPair = append(nestedPair, k[0])
}
comparableValues := []string{nestedPair[0].Name.Local, nestedPair[1].Name.Local}
sort.Strings(comparableValues)
comparableValue := comparableValues[0]
for _, nestedNode := range nestedPair {
if comparableValue == nestedNode.Name.Local && len(nestedNode.Children) != 0 {
// if value was selected and is nested node, skip it and use key instead
comparableValue = comparableValues[1]
continue
}
// now we are certain there is no nested node
if comparableValue == nestedNode.Name.Local {
// get chardata for comparison
comparableValue = nestedNode.Text
sortedNodesWithMapValue = append(sortedNodesWithMapValue, comparableValue)
if v, ok := flatMapNodeMap[comparableValue]; ok {
flatMapNodeMap[comparableValue] = append(v, node)
} else {
flatMapNodeMap[comparableValue] = []*XMLNode{node}
}
break
}
}
}
// we don't support multiple same name nodes in an xml doc except for in flattened maps, list.
if len(node.Children) > 2 {
return nodes, fmt.Errorf("malformed xml: multiple nodes with same key name exist, " +
"but are not associated with flattened maps (2 children) or list (0 or 1 child)")
}
}
// sort concrete value node name list and append corresponding nodes
// to sortedNodes
sort.Strings(sortedNodesWithConcreteValue)
for _, name := range sortedNodesWithConcreteValue {
for _, node := range concreteNodeMap[name] {
sortedNodes = append(sortedNodes, node)
}
}
// sort nested nodes with a list and append corresponding nodes
// to sortedNodes
sort.Strings(sortedNodesWithListValue)
for _, name := range sortedNodesWithListValue {
// if two nested nodes have same name, then sort them separately.
if len(flatListNodeMap[name]) > 1 {
// return nodes, fmt.Errorf("flat list node name are %s %v", flatListNodeMap[name][0].Name.Local, len(flatListNodeMap[name]))
nestedFlattenedList, err := sortFlattenedNodes(flatListNodeMap[name])
if err != nil {
return nodes, err
}
// append the identical but sorted nodes
for _, nestedNode := range nestedFlattenedList {
sortedNodes = append(sortedNodes, nestedNode)
}
} else {
// append the sorted nodes
sortedNodes = append(sortedNodes, flatListNodeMap[name][0])
}
}
// sorted nodes with a map and append corresponding nodes to sortedNodes
sort.Strings(sortedNodesWithMapValue)
for _, name := range sortedNodesWithMapValue {
sortedNodes = append(sortedNodes, flatMapNodeMap[name][0])
}
return sortedNodes, nil
}