pkg/data-query/parser.go (246 lines of code) (raw):
package data_query
import (
"errors"
"fmt"
"math"
"regexp"
"strings"
"github.com/JetBrains/ij-perf-report-aggregator/pkg/http-error"
"github.com/valyala/fastjson"
)
// https://clickhouse.yandex/docs/en/query_language/syntax/#syntax-identifiers
var reFieldName = regexp.MustCompile(`^[a-zA-Z_]\w*$`)
// opposite to reFieldName, dot is supported for nested fields
var (
reNestedFieldName = regexp.MustCompile(`^[a-zA-Z_][.\da-zA-Z_]*$`)
reMetricName = regexp.MustCompile(`^[a-zA-Z\d _]+$`)
)
// add ().space,'*
var reAggregator = regexp.MustCompile(`^[a-zA-Z_'(][\da-zA-Z_(). ,'*@<>\-/+]*$`)
// for db name the same validation rules as for field name
var reDbName = reFieldName
var queryParsers fastjson.ParserPool
func isValidFieldName(v string) bool {
return reFieldName.MatchString(v)
}
func isValidFilterFieldName(v string) bool {
return reNestedFieldName.MatchString(v)
}
func readQuery(s []byte) ([]Query, error) {
parser := queryParsers.Get()
defer queryParsers.Put(parser)
value, err := parser.ParseBytes(s)
if err != nil {
return nil, fmt.Errorf("cannot parse query: %w", err)
}
var queries []Query
if value.Type() == fastjson.TypeArray {
for _, v := range value.GetArray() {
query, err := readQueryValue(v)
if err != nil {
return queries, err
}
queries = append(queries, *query)
}
} else {
query, err := readQueryValue(value)
if err != nil {
return queries, err
}
queries = append(queries, *query)
}
return queries, nil
}
func readQueryValue(value *fastjson.Value) (*Query, error) {
query := &Query{
Database: string(value.GetStringBytes("db")),
Table: string(value.GetStringBytes("table")),
Flat: value.GetBool("flat"),
}
switch {
case query.Database == "":
query.Database = "default"
case !reDbName.MatchString(query.Database):
return nil, http_error.NewHttpError(400, fmt.Sprintf("Database name %s contains illegal chars", query.Database))
case query.Table != "" && !reDbName.MatchString(query.Table):
return nil, http_error.NewHttpError(400, fmt.Sprintf("Table name %s contains illegal chars", query.Table))
}
err := readDimensions(value.GetArray("fields"), &query.Fields)
if err != nil {
return nil, err
}
err = readFilters(value.GetArray("filters"), query)
if err != nil {
return nil, err
}
orderValue := value.Get("order")
if orderValue == nil {
return nil, http_error.NewHttpError(400, "order is missing")
}
if orderValue.Type() == fastjson.TypeString {
field := string(orderValue.GetStringBytes())
if !reNestedFieldName.MatchString(field) {
return nil, http_error.NewHttpError(400, fmt.Sprintf("Order %s is not a valid field name", field))
}
query.Order = []string{field}
} else {
for _, v := range value.GetArray("order") {
field := string(v.GetStringBytes())
if !reNestedFieldName.MatchString(field) {
return nil, http_error.NewHttpError(400, fmt.Sprintf("Order %s is not a valid field name", field))
}
query.Order = append(query.Order, field)
}
}
query.Aggregator = string(value.GetStringBytes("aggregator"))
if query.Aggregator != "" && !reAggregator.MatchString(query.Aggregator) {
return nil, http_error.NewHttpError(400, fmt.Sprintf("Aggregator %s contains illegal chars", query.Aggregator))
}
err = readDimensions(value.GetArray("dimensions"), &query.Dimensions)
if err != nil {
return nil, err
}
query.TimeDimensionFormat = string(value.GetStringBytes("timeDimensionFormat"))
if query.Aggregator != "" && !reAggregator.MatchString(query.Aggregator) {
return nil, http_error.NewHttpError(400, fmt.Sprintf("timeDimensionFormat %s contains illegal chars", query.TimeDimensionFormat))
}
return query, nil
}
func readDimensions(list []*fastjson.Value, result *[]QueryDimension) error {
for _, v := range list {
t, err := readDimension(v)
if err != nil {
return err
}
if t.Sql != "" && !reAggregator.MatchString(t.Sql) {
return http_error.NewHttpError(400, fmt.Sprintf("Dimension SQL %s contains illegal chars", t.Sql))
}
if t.resultPropertyName != "" && !isValidFieldName(t.resultPropertyName) {
return http_error.NewHttpError(400, fmt.Sprintf("resultPropertyName %s is not a valid field name", t.Name))
}
*result = append(*result, *t)
}
return nil
}
func readDimension(v *fastjson.Value) (*QueryDimension, error) {
var t QueryDimension
if v.Type() == fastjson.TypeString {
t = QueryDimension{
Name: string(v.GetStringBytes()),
}
} else {
subNameValue := v.Get("subName")
if subNameValue == nil {
t = QueryDimension{
Name: string(v.GetStringBytes("n")),
Sql: string(v.GetStringBytes("sql")),
}
} else {
arrayJoin := string(v.GetStringBytes("n"))
t = QueryDimension{
Name: arrayJoin + "." + string(subNameValue.GetStringBytes()),
arrayJoin: arrayJoin,
Sql: string(v.GetStringBytes("sql")),
resultPropertyName: string(v.GetStringBytes("resultKey")),
}
if !reNestedFieldName.MatchString(t.Name) {
return nil, http_error.NewHttpError(400, fmt.Sprintf("Name %s is not a valid field name", t.Name))
}
if !isValidFieldName(t.arrayJoin) {
return nil, http_error.NewHttpError(400, fmt.Sprintf("subName %s is not a valid field name", t.Name))
}
return &t, nil
}
}
t.resultPropertyName = string(v.GetStringBytes("resultKey"))
qualifierDotIndex := strings.IndexRune(t.Name, '.')
if qualifierDotIndex != -1 {
t.metricPath = t.Name[0:qualifierDotIndex]
t.metricName = t.Name[qualifierDotIndex+1:]
t.metricValueName = 'd'
metricNameLength := len(t.metricName)
if metricNameLength > 2 && t.metricName[metricNameLength-2] == '.' {
t.metricValueName = rune(t.metricName[metricNameLength-1])
t.metricName = t.metricName[:metricNameLength-2]
}
if t.resultPropertyName == "" {
t.resultPropertyName = strings.ReplaceAll(t.metricName, " ", "_")
}
if !isValidFieldName(t.metricPath) || !reMetricName.MatchString(t.metricName) {
return nil, http_error.NewHttpError(400, fmt.Sprintf("Name %s is not a valid field name", t.Name))
}
}
return &t, nil
}
func readFilters(list []*fastjson.Value, query *Query) error {
for _, v := range list {
t := QueryFilter{
Field: string(v.GetStringBytes("f")),
Sql: string(v.GetStringBytes("q")),
Operator: string(v.GetStringBytes("o")),
Split: false,
}
if t.Sql == "" {
t.Sql = string(v.GetStringBytes("sql"))
}
value := v.Get("v")
t.Split = v.GetBool("s")
if value == nil {
value = v.Get("value")
}
if !isValidFilterFieldName(t.Field) && t.Sql == "" {
return http_error.NewHttpError(400, t.Field+" is not a valid filter field name")
}
if t.Sql == "" && value == nil {
return errors.New("filter value is not specified")
}
if t.Sql == "" {
switch value.Type() {
case fastjson.TypeString:
t.Value = string(value.GetStringBytes())
case fastjson.TypeNumber:
number, err := value.Float64()
if err != nil {
return fmt.Errorf("cannot parse filter value %s: %w", value, err)
}
if number == math.Trunc(number) {
// convert to int (to be able to use time unix timestamps from client side)
t.Value = int(number)
} else {
t.Value = number
}
case fastjson.TypeArray:
t.Value = readArray(value)
case fastjson.TypeFalse:
t.Value = value.GetBool()
case fastjson.TypeTrue:
t.Value = value.GetBool()
default:
return fmt.Errorf("filter value %v is not supported", value)
}
if t.Operator == "" {
t.Operator = "="
} else if t.Operator != ">" && t.Operator != "<" && t.Operator != "=" && t.Operator != "!=" && t.Operator != "like" {
return fmt.Errorf("operator %s is not supported", t.Operator)
}
} else {
// by intention sql string is not validated
if t.Operator != "" {
return http_error.NewHttpError(400, fmt.Sprintf("sql and operator are mutually exclusive (filter=%s)", t.Field))
}
if t.Value != nil {
return http_error.NewHttpError(400, fmt.Sprintf("sql and value are mutually exclusive (filter=%s)", t.Field))
}
}
query.Filters = append(query.Filters, t)
}
return nil
}
func readArray(parentValue *fastjson.Value) []any {
list := make([]any, 0)
for _, v := range parentValue.GetArray() {
switch v.Type() {
case fastjson.TypeFalse:
list = append(list, false)
case fastjson.TypeTrue:
list = append(list, true)
default:
list = append(list, string(v.GetStringBytes()))
}
}
return list
}