collector/logs/transforms/parser/keyvalue.go (123 lines of code) (raw):
package parser
import (
"strconv"
"strings"
"unicode"
"github.com/Azure/adx-mon/collector/logs/types"
)
const ParserTypeKeyValue ParserType = "keyvalue"
type KeyValueParserConfig struct{}
type KeyValueParser struct {
parsed map[string]interface{}
tokens []string
currentToken strings.Builder
}
func NewKeyValueParser(config KeyValueParserConfig) (*KeyValueParser, error) {
return &KeyValueParser{
parsed: make(map[string]interface{}),
tokens: make([]string, 0),
currentToken: strings.Builder{},
}, nil
}
// Parse parses a space-separated string of key-value pairs and standalone keys
// and adds them to the log.Body map.
func (p *KeyValueParser) Parse(log *types.Log, msg string) error {
clear(p.parsed)
msg = strings.TrimSpace(msg)
if msg == "" {
return nil
}
tokens := p.tokenize(msg)
for _, token := range tokens {
parts := strings.SplitN(token, "=", 2)
if len(parts) == 2 {
key, value := parts[0], unquote(parts[1])
log.SetBodyValue(key, reflectValue(value))
} else {
log.SetBodyValue(parts[0], "")
}
}
return nil
}
// tokenize splits a string into tokens, preserving quoted values
func (p *KeyValueParser) tokenize(s string) []string {
// Reset tokens slice while preserving capacity
p.tokens = p.tokens[:0]
// Reset the current token
p.currentToken.Reset()
inQuotes := false
var quoteChar rune
for i, char := range s {
switch {
// Handle quote characters, toggling the inQuotes state
case (char == '"' || char == '\'') && (i == 0 || s[i-1] != '\\'):
if inQuotes && char == quoteChar {
inQuotes = false // Closing quote found
} else if !inQuotes {
inQuotes = true // Opening quote found
quoteChar = char // Remember the type of quote used
}
p.currentToken.WriteRune(char)
// Token boundary detected (whitespace outside quotes)
case unicode.IsSpace(char) && !inQuotes:
if p.currentToken.Len() > 0 {
p.tokens = append(p.tokens, p.currentToken.String())
p.currentToken.Reset()
}
// Regular character, append to current token
default:
p.currentToken.WriteRune(char)
}
}
// Append the last token if present
if p.currentToken.Len() > 0 {
p.tokens = append(p.tokens, p.currentToken.String())
}
return p.tokens
}
// unquote removes surrounding quotes from a string, if present
func unquote(s string) string {
if len(s) >= 2 {
if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'') {
return s[1 : len(s)-1]
}
}
return s
}
// reflectValue attempts to convert a string to an appropriate Go type
// Optimized to reduce allocations and improve performance
func reflectValue(value string) interface{} {
if len(value) == 0 {
return ""
}
// Fast path for boolean detection based on first character and length
first := value[0]
if (first == 't' || first == 'T') && len(value) == 4 {
if strings.EqualFold(value, "true") {
return true
}
} else if (first == 'f' || first == 'F') && len(value) == 5 {
if strings.EqualFold(value, "false") {
return false
}
}
// Quickly determine if the string represents a numeric value
isNumber := true
isFloat := false
for i := 0; i < len(value); i++ {
c := value[i]
// Allow leading sign characters
if i == 0 && (c == '+' || c == '-') {
continue
}
// Check for decimal point to identify floats
if c == '.' {
if isFloat { // Multiple decimal points invalidate number
isNumber = false
break
}
isFloat = true
continue
}
// Non-digit character invalidates number
if c < '0' || c > '9' {
isNumber = false
break
}
}
// Attempt numeric conversion if valid number detected
if isNumber {
if isFloat {
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
} else {
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
}
}
// Default to returning the original string if no conversion succeeded
return value
}