internal/pkg/template/diff/diff.go (271 lines of code) (raw):
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
// Package diff provides functionalities to compare two YAML documents.
package diff
import (
"fmt"
"io"
"sort"
"gopkg.in/yaml.v3"
)
// Tree represents a difference tree between two YAML documents.
type Tree struct {
root diffNode
}
func (t Tree) Write(w io.Writer) error {
tw := &treeWriter{t, w}
return tw.write()
}
// diffNode is the interface to represents the difference between two *yaml.Node.
type diffNode interface {
key() string
newYAML() *yaml.Node
oldYAML() *yaml.Node
children() []diffNode
}
// keyNode is a concrete implementation of a diffNode.
type keyNode struct {
keyValue string
childNodes []diffNode // A list of non-empty pointers to the children nodes.
oldV *yaml.Node // Only populated for a leaf node (i.e. that has no child node).
newV *yaml.Node // Only populated for a leaf node (i.e. that has no child node).
}
func (n *keyNode) key() string {
return n.keyValue
}
func (n *keyNode) newYAML() *yaml.Node {
return n.newV
}
func (n *keyNode) oldYAML() *yaml.Node {
return n.oldV
}
func (n *keyNode) children() []diffNode {
return n.childNodes
}
type unchangedNode struct {
count int
}
func (n *unchangedNode) children() []diffNode {
return nil
}
func (n *unchangedNode) key() string {
return ""
}
func (n *unchangedNode) newYAML() *yaml.Node {
return nil
}
func (n *unchangedNode) oldYAML() *yaml.Node {
return nil
}
func (n *unchangedNode) unchangedCount() int {
return n.count
}
type seqItemNode struct {
keyNode
}
// From is the YAML document that another YAML document is compared against.
type From []byte
// ParseWithCFNOverriders constructs a diff tree that represent the differences of a YAML document against the From document with
// overriders designed for CFN documents, including:
// 1. An ignorer that ignores diffs under "Metadata.Manifest".
// 2. An overrider that is able to compare intrinsic functions with full/short form correctly.
func (from From) ParseWithCFNOverriders(to []byte) (Tree, error) {
return from.Parse(to,
&ignorer{
curr: &ignoreSegment{
key: "Metadata",
next: &ignoreSegment{
key: "Manifest",
},
},
},
&getAttConverter{},
&intrinsicFuncMapTagConverter{})
}
// Parse constructs a diff tree that represent the differences of a YAML document against the From document.
func (from From) Parse(to []byte, overriders ...overrider) (Tree, error) {
var toNode, fromNode yaml.Node
if err := yaml.Unmarshal(to, &toNode); err != nil {
return Tree{}, fmt.Errorf("unmarshal current template: %w", err)
}
if err := yaml.Unmarshal(from, &fromNode); err != nil {
return Tree{}, fmt.Errorf("unmarshal old template: %w", err)
}
var root diffNode
var err error
switch {
// NOTE: If Kind is 0, it means the document is empty and nothing is unmarshalled.
case fromNode.Kind == 0 && toNode.Kind == 0:
return Tree{}, nil
case fromNode.Kind == 0:
root, err = parse(nil, &toNode, "", overriders...)
case toNode.Kind == 0:
root, err = parse(&fromNode, nil, "", overriders...)
default:
root, err = parse(&fromNode, &toNode, "", overriders...)
}
if err != nil {
return Tree{}, err
}
if root == nil {
return Tree{}, nil
}
return Tree{
root: root,
}, nil
}
func parse(from, to *yaml.Node, key string, overriders ...overrider) (diffNode, error) {
for _, overrider := range overriders {
if overrider.match(from, to, key, overrider) {
return overrider.parse(from, to, key, overrider)
}
}
// Handle base cases.
if to == nil || from == nil || to.Kind != from.Kind {
return &keyNode{
keyValue: key,
newV: to,
oldV: from,
}, nil
}
if isYAMLLeaf(to) && isYAMLLeaf(from) {
if to.Value == from.Value {
return nil, nil
}
return &keyNode{
keyValue: key,
newV: to,
oldV: from,
}, nil
}
var children []diffNode
var err error
switch {
case to.Kind == yaml.SequenceNode && from.Kind == yaml.SequenceNode:
children, err = parseSequence(from, to, overriders...)
case to.Kind == yaml.DocumentNode && from.Kind == yaml.DocumentNode:
fallthrough
case to.Kind == yaml.MappingNode && from.Kind == yaml.MappingNode:
children, err = parseMap(from, to, overriders...)
default:
return nil, fmt.Errorf("unknown combination of node kinds: %v, %v", to.Kind, from.Kind)
}
if err != nil {
return nil, fmt.Errorf("parse YAML content with key %s: %w", key, err)
}
if len(children) == 0 {
return nil, nil
}
return &keyNode{
keyValue: key,
childNodes: children,
}, nil
}
func isYAMLLeaf(node *yaml.Node) bool {
return len(node.Content) == 0
}
func parseSequence(fromNode, toNode *yaml.Node, overriders ...overrider) ([]diffNode, error) {
fromSeq, toSeq := make([]yaml.Node, len(fromNode.Content)), make([]yaml.Node, len(toNode.Content)) // NOTE: should be the same as calling `Decode`.
for idx, v := range fromNode.Content {
fromSeq[idx] = *v
}
for idx, v := range toNode.Content {
toSeq[idx] = *v
}
type cachedEntry struct {
node diffNode
err error
}
cachedDiff := make(map[string]cachedEntry)
lcsIndices := longestCommonSubsequence(fromSeq, toSeq, func(idxFrom, idxTo int) bool {
// Note: This function passed as `eq` should be a pure function. Therefore, its output is the same
// given the same `idxFrom` and `idxTo`. Hence, it is not necessary to parse the nodes again.
// In `lcs.go`, `eq` can be called twice on the same indices: once when computing LCS length, and
// once when back-tracing to construct the LCS.
if diff, ok := cachedDiff[cacheKey(idxFrom, idxTo)]; ok {
return diff.err == nil && diff.node == nil
}
diff, err := parse(&(fromSeq[idxFrom]), &(toSeq[idxTo]), "", overriders...)
if diff != nil { // NOTE: cache the diff only if a modification could have happened at this position.
cachedDiff[cacheKey(idxFrom, idxTo)] = cachedEntry{
node: diff,
err: err,
}
}
return err == nil && diff == nil
})
// No difference if the two sequences have the same size and the LCS is the entire sequence.
if len(fromSeq) == len(toSeq) && len(lcsIndices) == len(fromSeq) {
return nil, nil
}
var children []diffNode
var matchCount int
inspector := newLCSStateMachine(fromSeq, toSeq, lcsIndices)
for action := inspector.action(); action != actionDone; action = inspector.action() {
switch action {
case actionMatch:
matchCount++
if action := inspector.peek(); action != actionMatch {
children = append(children, &unchangedNode{count: matchCount})
matchCount = 0
}
case actionMod:
diff := cachedDiff[cacheKey(inspector.fromIndex(), inspector.toIndex())]
if diff.err != nil {
return nil, diff.err
}
children = append(children, &seqItemNode{
keyNode{
keyValue: diff.node.key(),
childNodes: diff.node.children(),
oldV: diff.node.oldYAML(),
newV: diff.node.newYAML(),
},
})
case actionDel:
item := inspector.fromItem()
children = append(children, &seqItemNode{
keyNode{
oldV: &item,
},
})
case actionInsert:
item := inspector.toItem()
children = append(children, &seqItemNode{
keyNode{
newV: &item,
},
})
}
inspector.next()
}
return children, nil
}
func parseMap(from, to *yaml.Node, overriders ...overrider) ([]diffNode, error) {
currMap, oldMap := make(map[string]yaml.Node), make(map[string]yaml.Node)
if err := to.Decode(currMap); err != nil {
return nil, err
}
if err := from.Decode(oldMap); err != nil {
return nil, err
}
keys := unionOfKeys(currMap, oldMap)
sort.SliceStable(keys, func(i, j int) bool { return keys[i] < keys[j] }) // NOTE: to avoid flaky unit tests.
var children []diffNode
for _, k := range keys {
var currV, oldV *yaml.Node
if v, ok := oldMap[k]; ok {
oldV = &v
}
if v, ok := currMap[k]; ok {
currV = &v
}
kDiff, err := parse(oldV, currV, k, overriders...)
if err != nil {
return nil, err
}
if kDiff != nil {
children = append(children, kDiff)
}
}
return children, nil
}
func unionOfKeys[T any](a, b map[string]T) []string {
exists, keys := struct{}{}, make(map[string]struct{})
for k := range a {
keys[k] = exists
}
for k := range b {
keys[k] = exists
}
keySlice, idx := make([]string, len(keys)), 0
for k := range keys {
keySlice[idx] = k
idx++
}
return keySlice
}
func cacheKey(inFrom, inTo int) string {
return fmt.Sprintf("%d,%d", inFrom, inTo)
}