cpp-ch/local-engine/proto/substrait/algebra.proto (1,290 lines of code) (raw):
// SPDX-License-Identifier: Apache-2.0
syntax = "proto3";
package substrait;
import "google/protobuf/any.proto";
import "substrait/extensions/extensions.proto";
import "substrait/type.proto";
option csharp_namespace = "Substrait.Protobuf";
option go_package = "github.com/substrait-io/substrait-go/proto";
option java_multiple_files = true;
option java_package = "io.substrait.proto";
// Common fields for all relational operators
message RelCommon {
oneof emit_kind {
// The underlying relation is output as is (no reordering or projection of columns)
Direct direct = 1;
// Allows to control for order and inclusion of fields
Emit emit = 2;
}
Hint hint = 3;
substrait.extensions.AdvancedExtension advanced_extension = 4;
// Direct indicates no change on presence and ordering of fields in the output
message Direct {}
// Remap which fields are output and in which order
message Emit {
repeated int32 output_mapping = 1;
}
// Changes to the operation that can influence efficiency/performance but
// should not impact correctness.
message Hint {
Stats stats = 1;
RuntimeConstraint constraint = 2;
substrait.extensions.AdvancedExtension advanced_extension = 10;
// The statistics related to a hint (physical properties of records)
message Stats {
double row_count = 1;
double record_size = 2;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
message RuntimeConstraint {
// TODO: nodes, cpu threads/%, memory, iops, etc.
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
}
}
// The scan operator of base data (physical or virtual), including filtering and projection.
message ReadRel {
RelCommon common = 1;
NamedStruct base_schema = 2;
Expression filter = 3;
Expression best_effort_filter = 11;
Expression.MaskExpression projection = 4;
substrait.extensions.AdvancedExtension advanced_extension = 10;
// Definition of which type of scan operation is to be performed
oneof read_type {
VirtualTable virtual_table = 5;
LocalFiles local_files = 6;
NamedTable named_table = 7;
ExtensionTable extension_table = 8;
bool stream_kafka = 9;
}
// A base table. The list of string is used to represent namespacing (e.g., mydb.mytable).
// This assumes shared catalog between systems exchanging a message.
message NamedTable {
repeated string names = 1;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// A table composed of literals.
message VirtualTable {
repeated Expression.Literal.Struct values = 1;
}
// A stub type that can be used to extend/introduce new table types outside
// the specification.
message ExtensionTable {
google.protobuf.Any detail = 1;
}
// Used to KafkaBatch or KafkaContinuous source
message StreamKafka {
message TopicPartition {
string topic = 1;
int32 partition = 2;
}
TopicPartition topic_partition = 1;
int64 start_offset = 2;
int64 end_offset = 3;
map<string, string> params = 4;
int64 poll_timeout_ms = 5;
bool fail_on_data_loss = 6;
bool include_headers = 7;
}
// Represents a list of files in input of a scan operation
message LocalFiles {
repeated FileOrFiles items = 1;
substrait.extensions.AdvancedExtension advanced_extension = 10;
// Many files consist of indivisible chunks (e.g. parquet row groups
// or CSV rows). If a slice partially selects an indivisible chunk
// then the consumer should employ some rule to decide which slice to
// include the chunk in (e.g. include it in the slice that contains
// the midpoint of the chunk)
message FileOrFiles {
oneof path_type {
// A URI that can refer to either a single folder or a single file
string uri_path = 1;
// A URI where the path portion is a glob expression that can
// identify zero or more paths.
// Consumers should support the POSIX syntax. The recursive
// globstar (**) may not be supported.
string uri_path_glob = 2;
// A URI that refers to a single file
string uri_file = 3;
// A URI that refers to a single folder
string uri_folder = 4;
}
// Original file format enum, superseded by the file_format oneof.
reserved 5;
reserved "format";
// The index of the partition this item belongs to
uint64 partition_index = 6;
// The start position in byte to read from this item
uint64 start = 7;
// The length in byte to read from this item
uint64 length = 8;
message ParquetReadOptions {
bool enable_row_group_maxmin_index = 1;
}
message ArrowReadOptions {}
message OrcReadOptions {}
message DwrfReadOptions {}
message TextReadOptions {
string field_delimiter = 1;
uint64 max_block_size = 2;
NamedStruct schema = 3 [deprecated=true];
string quote = 4;
uint64 header = 5;
string escape = 6;
string null_value = 7;
bool empty_as_default = 8;
}
message JsonReadOptions {
uint64 max_block_size = 1;
NamedStruct schema = 2 [deprecated=true];
}
message IcebergReadOptions {
enum FileContent {
DATA = 0;
POSITION_DELETES = 1;
EQUALITY_DELETES = 2;
}
message DeleteFile {
message Map {
message KeyValue {
int32 key = 1;
string value = 2;
}
repeated KeyValue key_values = 1;
}
FileContent fileContent = 1;
string filePath = 2;
uint64 fileSize = 3;
uint64 recordCount = 4;
oneof file_format {
ParquetReadOptions parquet = 5;
OrcReadOptions orc = 6;
}
repeated int32 equalityFieldIds = 7;
Map lowerBounds = 8;
Map upperBounds = 9;
}
oneof file_format {
ParquetReadOptions parquet = 1;
OrcReadOptions orc = 2;
}
repeated DeleteFile delete_files = 3;
}
// File reading options
oneof file_format {
ParquetReadOptions parquet = 9;
ArrowReadOptions arrow = 10;
OrcReadOptions orc = 11;
google.protobuf.Any extension = 12;
DwrfReadOptions dwrf = 13;
TextReadOptions text = 14;
JsonReadOptions json = 15;
IcebergReadOptions iceberg = 16;
}
message partitionColumn {
string key = 1;
string value = 2;
}
repeated partitionColumn partition_columns = 17;
/// File schema
NamedStruct schema = 18;
message metadataColumn {
string key = 1;
string value = 2;
}
repeated metadataColumn metadata_columns = 19;
// File properties contained in split
message fileProperties {
int64 fileSize = 1;
int64 modificationTime = 2;
}
fileProperties properties = 20;
message otherConstantMetadataColumnValues {
string key = 1;
google.protobuf.Any value = 2;
}
repeated otherConstantMetadataColumnValues other_const_metadata_columns = 21;
}
}
}
// This operator allows to represent calculated expressions of fields (e.g., a+b). Direct/Emit are used to represent classical relational projections
message ProjectRel {
RelCommon common = 1;
Rel input = 2;
repeated Expression expressions = 3;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The binary JOIN relational operator left-join-right, including various join types, a join condition and post_join_filter expression
message JoinRel {
RelCommon common = 1;
Rel left = 2;
Rel right = 3;
Expression expression = 4;
Expression post_join_filter = 5;
JoinType type = 6;
enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
JOIN_TYPE_OUTER = 2;
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
JOIN_TYPE_LEFT_ANTI = 6;
JOIN_TYPE_LEFT_SINGLE = 7;
JOIN_TYPE_RIGHT_SEMI = 8;
JOIN_TYPE_RIGHT_ANTI = 9;
JOIN_TYPE_RIGHT_SINGLE = 10;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// Cartesian product relational operator of two tables (left and right)
message CrossRel {
RelCommon common = 1;
Rel left = 2;
Rel right = 3;
Expression expression = 4;
JoinType type = 5;
// TODO -- Remove this unnecessary type.
enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
JOIN_TYPE_OUTER = 2;
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
JOIN_TYPE_LEFT_ANTI = 6;
JOIN_TYPE_LEFT_SINGLE = 7;
JOIN_TYPE_RIGHT_SEMI = 8;
JOIN_TYPE_RIGHT_ANTI = 9;
JOIN_TYPE_RIGHT_SINGLE = 10;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The relational operator representing LIMIT/OFFSET or TOP type semantics.
message FetchRel {
RelCommon common = 1;
Rel input = 2;
// the offset expressed in number of records
int64 offset = 3;
// the amount of records to return
int64 count = 4;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The relational operator representing TOP N calculation
message TopNRel {
RelCommon common = 1;
Rel input = 2;
int64 n = 3;
repeated SortField sorts = 4;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The relational operator representing a GROUP BY Aggregate
message AggregateRel {
RelCommon common = 1;
// Input of the aggregation
Rel input = 2;
// A list of expression grouping that the aggregation measured should be calculated for.
repeated Grouping groupings = 3;
// A list of one or more aggregate expressions along with an optional filter.
repeated Measure measures = 4;
substrait.extensions.AdvancedExtension advanced_extension = 10;
message Grouping {
repeated Expression grouping_expressions = 1;
}
message Measure {
AggregateFunction measure = 1;
// An optional boolean expression that acts to filter which records are
// included in the measure. True means include this record for calculation
// within the measure.
// Helps to support SUM(<c>) FILTER(WHERE...) syntax without masking opportunities for optimization
Expression filter = 2;
}
}
// The ORDERY BY (or sorting) relational operator. Beside describing a base relation, it includes a list of fields to sort on
message SortRel {
RelCommon common = 1;
Rel input = 2;
repeated SortField sorts = 3;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
message WindowRel {
RelCommon common = 1;
Rel input = 2;
repeated Measure measures = 3;
repeated Expression partition_expressions = 4;
repeated SortField sorts = 5;
substrait.extensions.AdvancedExtension advanced_extension = 10;
message Measure {
Expression.WindowFunction measure = 1;
}
}
message WindowGroupLimitRel {
RelCommon common = 1;
Rel input = 2;
repeated Expression partition_expressions = 3;
repeated SortField sorts = 4;
int32 limit = 5;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The relational operator capturing simple FILTERs (as in the WHERE clause of SQL)
message FilterRel {
RelCommon common = 1;
Rel input = 2;
Expression condition = 3;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The relational set operators (intersection/union/etc..)
message SetRel {
RelCommon common = 1;
// The first input is the primary input, the remaining are secondary
// inputs. There must be at least two inputs.
repeated Rel inputs = 2;
SetOp op = 3;
substrait.extensions.AdvancedExtension advanced_extension = 10;
enum SetOp {
SET_OP_UNSPECIFIED = 0;
SET_OP_MINUS_PRIMARY = 1;
SET_OP_MINUS_MULTISET = 2;
SET_OP_INTERSECTION_PRIMARY = 3;
SET_OP_INTERSECTION_MULTISET = 4;
SET_OP_UNION_DISTINCT = 5;
SET_OP_UNION_ALL = 6;
}
}
// Stub to support extension with a single input
message ExtensionSingleRel {
RelCommon common = 1;
Rel input = 2;
google.protobuf.Any detail = 3;
}
// Stub to support extension with a zero inputs
message ExtensionLeafRel {
RelCommon common = 1;
google.protobuf.Any detail = 2;
}
// Stub to support extension with multiple inputs
message ExtensionMultiRel {
RelCommon common = 1;
repeated Rel inputs = 2;
google.protobuf.Any detail = 3;
}
// A redistribution operation
message ExchangeRel {
RelCommon common = 1;
Rel input = 2;
int32 partition_count = 3;
repeated ExchangeTarget targets = 4;
// the type of exchange used
oneof exchange_kind {
ScatterFields scatter_by_fields = 5;
SingleBucketExpression single_target = 6;
MultiBucketExpression multi_target = 7;
RoundRobin round_robin = 8;
Broadcast broadcast = 9;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
message ScatterFields {
repeated Expression.FieldReference fields = 1;
}
// Returns a single bucket number per record.
message SingleBucketExpression {
Expression expression = 1;
}
// Returns zero or more bucket numbers per record
message MultiBucketExpression {
Expression expression = 1;
bool constrained_to_count = 2;
}
// Send all data to every target.
message Broadcast {}
// Route approximately
message RoundRobin {
// whether the round robin behavior is required to exact (per record) or
// approximate. Defaults to approximate.
bool exact = 1;
}
// The message to describe partition targets of an exchange
message ExchangeTarget {
// Describes the partition id(s) to send. If this is empty, all data is sent
// to this target.
repeated int32 partition_id = 1;
oneof target_type {
string uri = 2;
google.protobuf.Any extended = 3;
}
}
}
// Duplicates records, possibly switching output expressions between each duplicate.
// Default output is all of the fields declared followed by one int64 field that contains the
// duplicate_id which is a zero-index ordinal of which duplicate of the original record this
// corresponds to.
message ExpandRel {
RelCommon common = 1;
Rel input = 2;
repeated ExpandField fields = 4;
substrait.extensions.AdvancedExtension advanced_extension = 10;
message ExpandField {
oneof field_type {
// Field that switches output based on which duplicate_id we're outputting
SwitchingField switching_field = 2;
// Field that outputs the same value no matter which duplicate_id we're on.
Expression consistent_field = 3;
}
}
message SwitchingField {
// Array that contains an expression to output per duplicate_id
// each `switching_field` must have the same number of expressions
// all expressions within a switching field be the same type class but can differ in nullability.
// this column will be nullable if any of the expressions are nullable.
repeated Expression duplicates = 1;
}
}
// A relation with output field names.
//
// This is for use at the root of a `Rel` tree.
message RelRoot {
// A relation
Rel input = 1;
// Field names in depth-first order
repeated string names = 2;
Type.Struct output_schema = 3;
}
// A relation (used internally in a plan)
message Rel {
oneof rel_type {
ReadRel read = 1;
FilterRel filter = 2;
FetchRel fetch = 3;
AggregateRel aggregate = 4;
SortRel sort = 5;
JoinRel join = 6;
ProjectRel project = 7;
SetRel set = 8;
ExtensionSingleRel extension_single = 9;
ExtensionMultiRel extension_multi = 10;
ExtensionLeafRel extension_leaf = 11;
CrossRel cross = 12;
//Physical relations
HashJoinRel hash_join = 13;
MergeJoinRel merge_join = 14;
ExpandRel expand = 15;
WindowRel window = 16;
GenerateRel generate = 17;
WriteRel write = 18;
TopNRel top_n = 19;
WindowGroupLimitRel windowGroupLimit = 20;
}
}
// A base object for writing (e.g., a table or a view).
message NamedObjectWrite {
// The list of string is used to represent namespacing (e.g., mydb.mytable).
// This assumes shared catalog between systems exchanging a message.
repeated string names = 1;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// A stub type that can be used to extend/introduce new table types outside
// the specification.
message ExtensionObject {
google.protobuf.Any detail = 1;
}
message DdlRel {
// Definition of which type of object we are operating on
oneof write_type {
NamedObjectWrite named_object = 1;
ExtensionObject extension_object = 2;
}
// The columns that will be modified (representing after-image of a schema change)
NamedStruct table_schema = 3;
// The default values for the columns (representing after-image of a schema change)
// E.g., in case of an ALTER TABLE that changes some of the column default values, we expect
// the table_defaults Struct to report a full list of default values reflecting the result of applying
// the ALTER TABLE operator successfully
Expression.Literal.Struct table_defaults = 4;
// Which type of object we operate on
DdlObject object = 5;
// The type of operation to perform
DdlOp op = 6;
// The body of the CREATE VIEW
Rel view_definition = 7;
enum DdlObject {
DDL_OBJECT_UNSPECIFIED = 0;
// A Table object in the system
DDL_OBJECT_TABLE = 1;
// A View object in the system
DDL_OBJECT_VIEW = 2;
}
enum DdlOp {
DDL_OP_UNSPECIFIED = 0;
// A create operation (for any object)
DDL_OP_CREATE = 1;
// A create operation if the object does not exist, or replaces it (equivalent to a DROP + CREATE) if the object already exists
DDL_OP_CREATE_OR_REPLACE = 2;
// An operation that modifies the schema (e.g., column names, types, default values) for the target object
DDL_OP_ALTER = 3;
// An operation that removes an object from the system
DDL_OP_DROP = 4;
// An operation that removes an object from the system (without throwing an exception if the object did not exist)
DDL_OP_DROP_IF_EXIST = 5;
}
//TODO add PK/constraints/indexes/etc..?
}
// The operator that modifies the content of a database (operates on 1 table at a time, but tuple-selection/source can be
// based on joining of multiple tables).
message WriteRel {
// Definition of which TABLE we are operating on
oneof write_type {
NamedObjectWrite named_table = 1;
ExtensionObject extension_table = 2;
}
// The schema of the table (must align with Rel input (e.g., number of leaf fields must match))
NamedStruct table_schema = 3;
// The type of operation to perform
WriteOp op = 4;
// The relation that determines the tuples to add/remove/modify
// the schema must match with table_schema. Default values must be explicitly stated
// in a ProjectRel at the top of the input. The match must also
// occur in case of DELETE to ensure multi-engine plans are unequivocal.
Rel input = 5;
// Output mode determines what is the output of executing this rel
OutputMode output = 6;
// The bucket spec for the writer.
BucketSpec bucket_spec = 7;
enum WriteOp {
WRITE_OP_UNSPECIFIED = 0;
// The insert of new tuples in a table
WRITE_OP_INSERT = 1;
// The removal of tuples from a table
WRITE_OP_DELETE = 2;
// The modification of existing tuples within a table
WRITE_OP_UPDATE = 3;
// The Creation of a new table, and the insert of new tuples in the table
WRITE_OP_CTAS = 4;
}
enum OutputMode {
OUTPUT_MODE_UNSPECIFIED = 0;
// return no tuples at all
OUTPUT_MODE_NO_OUTPUT = 1;
// this mode makes the operator return all the tuple INSERTED/DELETED/UPDATED by the operator.
// The operator returns the AFTER-image of any change. This can be further manipulated by operators upstreams
// (e.g., retunring the typical "count of modified tuples").
// For scenarios in which the BEFORE image is required, the user must implement a spool (via references to
// subplans in the body of the Rel input) and return those with anounter PlanRel.relations.
OUTPUT_MODE_MODIFIED_TUPLES = 2;
}
// A container for bucketing information.
message BucketSpec {
int32 num_buckets = 1;
repeated string bucket_column_names = 2;
repeated string sort_column_names = 3;
}
}
// The hash equijoin join operator will build a hash table out of the right input based on a set of join keys.
// It will then probe that hash table for incoming inputs, finding matches.
message HashJoinRel {
RelCommon common = 1;
Rel left = 2;
Rel right = 3;
repeated Expression.FieldReference left_keys = 4;
repeated Expression.FieldReference right_keys = 5;
Expression post_join_filter = 6;
JoinType type = 7;
enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
JOIN_TYPE_OUTER = 2;
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
JOIN_TYPE_RIGHT_SEMI = 6;
JOIN_TYPE_LEFT_ANTI = 7;
JOIN_TYPE_RIGHT_ANTI = 8;
JOIN_TYPE_LEFT_SINGLE = 9;
JOIN_TYPE_RIGHT_SINGLE = 10;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The merge equijoin does a join by taking advantage of two sets that are sorted on the join keys.
// This allows the join operation to be done in a streaming fashion.
message MergeJoinRel {
RelCommon common = 1;
Rel left = 2;
Rel right = 3;
repeated Expression.FieldReference left_keys = 4;
repeated Expression.FieldReference right_keys = 5;
Expression post_join_filter = 6;
JoinType type = 7;
enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
JOIN_TYPE_OUTER = 2;
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
JOIN_TYPE_LEFT_SEMI = 5;
JOIN_TYPE_RIGHT_SEMI = 6;
JOIN_TYPE_LEFT_ANTI = 7;
JOIN_TYPE_RIGHT_ANTI = 8;
}
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The argument of a function
message FunctionArgument {
oneof arg_type {
string enum = 1;
Type type = 2;
Expression value = 3;
}
}
// An optional function argument. Typically used for specifying behavior in
// invalid or corner cases.
message FunctionOption {
// Name of the option to set. If the consumer does not recognize the
// option, it must reject the plan. The name is matched case-insensitively
// with option names defined for the function.
string name = 1;
// List of behavior options allowed by the producer. At least one must be
// specified; to leave an option unspecified, simply don't add an entry to
// `options`. The consumer must use the first option from the list that it
// supports. If the consumer supports none of the specified options, it
// must reject the plan. The name is matched case-insensitively and must
// match one of the option values defined for the option.
repeated string preference = 2;
}
message Expression {
oneof rex_type {
Literal literal = 1;
FieldReference selection = 2;
ScalarFunction scalar_function = 3;
WindowFunction window_function = 5;
IfThen if_then = 6;
SwitchExpression switch_expression = 7;
SingularOrList singular_or_list = 8;
MultiOrList multi_or_list = 9;
Cast cast = 11;
Subquery subquery = 12;
Nested nested = 13;
// deprecated: enum literals are only sensible in the context of
// function arguments, for which FunctionArgument should now be
// used
Enum enum = 10 [deprecated = true];
}
message Enum {
option deprecated = true;
oneof enum_kind {
string specified = 1;
Empty unspecified = 2;
}
message Empty {
option deprecated = true;
}
}
message Literal {
oneof literal_type {
bool boolean = 1;
int32 i8 = 2;
int32 i16 = 3;
int32 i32 = 5;
int64 i64 = 7;
float fp32 = 10;
double fp64 = 11;
string string = 12;
bytes binary = 13;
// Timestamp in units of microseconds since the UNIX epoch.
int64 timestamp = 14;
// Date in units of days since the UNIX epoch.
int32 date = 16;
// Time in units of microseconds past midnight
int64 time = 17;
IntervalYearToMonth interval_year_to_month = 19;
IntervalDayToSecond interval_day_to_second = 20;
string fixed_char = 21;
VarChar var_char = 22;
bytes fixed_binary = 23;
Decimal decimal = 24;
Struct struct = 25;
Map map = 26;
// Timestamp in units of microseconds since the UNIX epoch.
int64 timestamp_tz = 27;
bytes uuid = 28;
Type null = 29; // a typed null literal
List list = 30;
Type.List empty_list = 31;
Type.Map empty_map = 32;
UserDefined user_defined = 33;
}
// whether the literal type should be treated as a nullable type. Applies to
// all members of union other than the Typed null (which should directly
// declare nullability).
bool nullable = 50;
// optionally points to a type_variation_anchor defined in this plan.
// Applies to all members of union other than the Typed null (which should
// directly declare the type variation).
uint32 type_variation_reference = 51;
message VarChar {
string value = 1;
uint32 length = 2;
}
message Decimal {
// little-endian twos-complement integer representation of complete value
// (ignoring precision) Always 16 bytes in length
bytes value = 1;
// The maximum number of digits allowed in the value.
// the maximum precision is 38.
int32 precision = 2;
// declared scale of decimal literal
int32 scale = 3;
}
message Map {
message KeyValue {
Literal key = 1;
Literal value = 2;
}
repeated KeyValue key_values = 1;
}
message IntervalYearToMonth {
int32 years = 1;
int32 months = 2;
}
message IntervalDayToSecond {
int32 days = 1;
int32 seconds = 2;
int32 microseconds = 3;
}
message Struct {
// A possibly heterogeneously typed list of literals
repeated Literal fields = 1;
}
message List {
// A homogeneously typed list of literals
repeated Literal values = 1;
}
message UserDefined {
// points to a type_anchor defined in this plan
uint32 type_reference = 1;
// The parameters to be bound to the type class, if the type class is
// parameterizable.
repeated Type.Parameter type_parameters = 3;
// the value of the literal, serialized using some type-specific
// protobuf message
google.protobuf.Any value = 2;
}
}
// Expression to dynamically construct nested types.
message Nested {
// Whether the returned nested type is nullable.
bool nullable = 1;
// Optionally points to a type_variation_anchor defined in this plan for
// the returned nested type.
uint32 type_variation_reference = 2;
oneof nested_type {
Struct struct = 3;
List list = 4;
Map map = 5;
}
message Map {
message KeyValue {
// Mandatory key/value expressions.
Expression key = 1;
Expression value = 2;
}
// One or more key-value pairs. To specify an empty map, use
// Literal.empty_map (otherwise type information would be missing).
repeated KeyValue key_values = 1;
}
message Struct {
// Zero or more possibly heterogeneously-typed list of expressions that
// form the struct fields.
repeated Expression fields = 1;
}
message List {
// A homogeneously-typed list of one or more expressions that form the
// list entries. To specify an empty list, use Literal.empty_list
// (otherwise type information would be missing).
repeated Expression values = 1;
}
}
// A scalar function call.
message ScalarFunction {
// Points to a function_anchor defined in this plan, which must refer
// to a scalar function in the associated YAML file. Required; avoid
// using anchor/reference zero.
uint32 function_reference = 1;
// The arguments to be bound to the function. This must have exactly the
// number of arguments specified in the function definition, and the
// argument types must also match exactly:
//
// - Value arguments must be bound using FunctionArgument.value, and
// the expression in that must yield a value of a type that a function
// overload is defined for.
// - Type arguments must be bound using FunctionArgument.type.
// - Enum arguments must be bound using FunctionArgument.enum
// followed by Enum.specified, with a string that case-insensitively
// matches one of the allowed options.
repeated FunctionArgument arguments = 4;
// Options to specify behavior for corner cases, or leave behavior
// unspecified if the consumer does not need specific behavior in these
// cases.
repeated FunctionOption options = 5;
// Must be set to the return type of the function, exactly as derived
// using the declaration in the extension.
Type output_type = 3;
// Deprecated; use arguments instead.
repeated Expression args = 2 [deprecated = true];
}
// A window function call.
message WindowFunction {
// Points to a function_anchor defined in this plan, which must refer
// to a window function in the associated YAML file. Required; 0 is
// considered to be a valid anchor/reference.
uint32 function_reference = 1;
// The arguments to be bound to the function. This must have exactly the
// number of arguments specified in the function definition, and the
// argument types must also match exactly:
//
// - Value arguments must be bound using FunctionArgument.value, and
// the expression in that must yield a value of a type that a function
// overload is defined for.
// - Type arguments must be bound using FunctionArgument.type, and a
// function overload must be defined for that type.
// - Enum arguments must be bound using FunctionArgument.enum
// followed by Enum.specified, with a string that case-insensitively
// matches one of the allowed options.
repeated FunctionArgument arguments = 9;
// Options to specify behavior for corner cases, or leave behavior
// unspecified if the consumer does not need specific behavior in these
// cases.
repeated FunctionOption options = 11;
// Must be set to the return type of the function, exactly as derived
// using the declaration in the extension.
Type output_type = 7;
// Describes which part of the window function to perform within the
// context of distributed algorithms. Required. Must be set to
// INITIAL_TO_RESULT for window functions that are not decomposable.
AggregationPhase phase = 6;
// If specified, the records that are part of the window defined by
// upper_bound and lower_bound are ordered according to this list
// before they are aggregated. The first sort field has the highest
// priority; only if a sort field determines two records to be equivalent
// is the next field queried. This field is optional, and is only allowed
// if the window function is defined to support sorting.
repeated SortField sorts = 3;
// Specifies whether equivalent records are merged before being aggregated.
// Optional, defaults to AGGREGATION_INVOCATION_ALL.
AggregateFunction.AggregationInvocation invocation = 10;
// When one or more partition expressions are specified, two records are
// considered to be in the same partition if and only if these expressions
// yield an equal tuple of values for both. When computing the window
// function, only the subset of records within the bounds that are also in
// the same partition as the current record are aggregated.
repeated Expression partitions = 2;
// Defines the record relative to the current record from which the window
// extends. The bound is inclusive. If the lower bound indexes a record
// greater than the upper bound, TODO (null range/no records passed?
// wrapping around as if lower/upper were swapped? error? null?).
// Optional; defaults to the start of the partition.
Bound lower_bound = 5;
string column_name = 12;
WindowType window_type = 13;
// Defines the record relative to the current record up to which the window
// extends. The bound is inclusive. If the upper bound indexes a record
// less than the lower bound, TODO (null range/no records passed?
// wrapping around as if lower/upper were swapped? error? null?).
// Optional; defaults to the end of the partition.
Bound upper_bound = 4;
// Deprecated; use arguments instead.
repeated Expression args = 8 [deprecated = true];
// Defines one of the two boundaries for the window of a window function.
message Bound {
// Defines that the bound extends this far back from the current record.
message Preceding {
oneof kind {
// A strictly positive integer specifying the number of records that
// the window extends back from the current record. Use
// CurrentRow for offset zero and Following for negative offsets.
int64 offset = 1;
// the reference to pre-project range frame boundary.
Expression ref = 2;
}
}
// Defines that the bound extends this far ahead of the current record.
message Following {
oneof kind {
// A strictly positive integer specifying the number of records that
// the window extends ahead of the current record. Use
// CurrentRow for offset zero and Preceding for negative offsets.
int64 offset = 1;
// the reference to pre-project range frame boundary.
Expression ref = 2;
}
}
// Defines that the bound extends to or from the current record.
message CurrentRow {}
message Unbounded_Preceding {}
message Unbounded_Following {}
oneof kind {
// The bound extends some number of records behind the current record.
Preceding preceding = 1;
// The bound extends some number of records ahead of the current
// record.
Following following = 2;
// The bound extends to the current record.
CurrentRow current_row = 3;
Unbounded_Preceding unbounded_preceding = 4;
Unbounded_Following unbounded_following = 5;
}
}
}
message IfThen {
repeated IfClause ifs = 1;
Expression else = 2;
message IfClause {
Expression if = 1;
Expression then = 2;
}
}
message Cast {
Type type = 1;
Expression input = 2;
FailureBehavior failure_behavior = 3;
enum FailureBehavior {
FAILURE_BEHAVIOR_UNSPECIFIED = 0;
FAILURE_BEHAVIOR_RETURN_NULL = 1;
FAILURE_BEHAVIOR_THROW_EXCEPTION = 2;
}
}
message SwitchExpression {
Expression match = 3;
repeated IfValue ifs = 1;
Expression else = 2;
message IfValue {
Literal if = 1;
Expression then = 2;
}
}
message SingularOrList {
Expression value = 1;
repeated Expression options = 2;
}
message MultiOrList {
repeated Expression value = 1;
repeated Record options = 2;
message Record {
repeated Expression fields = 1;
}
}
message EmbeddedFunction {
repeated Expression arguments = 1;
Type output_type = 2;
oneof kind {
PythonPickleFunction python_pickle_function = 3;
WebAssemblyFunction web_assembly_function = 4;
}
message PythonPickleFunction {
bytes function = 1;
repeated string prerequisite = 2;
}
message WebAssemblyFunction {
bytes script = 1;
repeated string prerequisite = 2;
}
}
// A way to reference the inner property of a complex record. Can reference
// either a map key by literal, a struct field by the ordinal position of
// the desired field or a particular element in an array. Supports
// expressions that would roughly translate to something similar to:
// a.b[2].c['my_map_key'].x where a,b,c and x are struct field references
// (ordinalized in the internal representation here), [2] is a list offset
// and ['my_map_key'] is a reference into a map field.
message ReferenceSegment {
oneof reference_type {
MapKey map_key = 1;
StructField struct_field = 2;
ListElement list_element = 3;
}
message MapKey {
// literal based reference to specific possible value in map.
Literal map_key = 1;
// Optional child segment
ReferenceSegment child = 2;
}
message StructField {
// zero-indexed ordinal position of field in struct
int32 field = 1;
// Optional child segment
ReferenceSegment child = 2;
}
message ListElement {
// zero-indexed ordinal position of element in list
int32 offset = 1;
// Optional child segment
ReferenceSegment child = 2;
}
}
// A reference that takes an existing subtype and selectively removes fields
// from it. For example, one might initially have an inner struct with 100
// fields but a a particular operation only needs to interact with only 2 of
// those 100 fields. In this situation, one would use a mask expression to
// eliminate the 98 fields that are not relevant to the rest of the operation
// pipeline.
//
// Note that this does not fundamentally alter the structure of data beyond
// the elimination of unecessary elements.
message MaskExpression {
StructSelect select = 1;
bool maintain_singular_struct = 2;
message Select {
oneof type {
StructSelect struct = 1;
ListSelect list = 2;
MapSelect map = 3;
}
}
message StructSelect {
repeated StructItem struct_items = 1;
}
message StructItem {
int32 field = 1;
Select child = 2;
}
message ListSelect {
repeated ListSelectItem selection = 1;
Select child = 2;
message ListSelectItem {
oneof type {
ListElement item = 1;
ListSlice slice = 2;
}
message ListElement {
int32 field = 1;
}
message ListSlice {
int32 start = 1;
int32 end = 2;
}
}
}
message MapSelect {
oneof select {
MapKey key = 1;
MapKeyExpression expression = 2;
}
Select child = 3;
message MapKey {
string map_key = 1;
}
message MapKeyExpression {
string map_key_expression = 1;
}
}
}
// A reference to an inner part of a complex object. Can reference reference a
// single element or a masked version of elements
message FieldReference {
// Whether this is composed of a single element reference or a masked
// element subtree
oneof reference_type {
ReferenceSegment direct_reference = 1;
MaskExpression masked_reference = 2;
}
// Whether this reference has an origin of a root struct or is based on the
// ouput of an expression. When this is a RootReference and direct_reference
// above is used, the direct_reference must be of a type StructField.
oneof root_type {
Expression expression = 3;
RootReference root_reference = 4;
OuterReference outer_reference = 5;
}
// Singleton that expresses this FieldReference is rooted off the root
// incoming record type
message RootReference {}
// A root reference for the outer relation's subquery
message OuterReference {
// number of subquery boundaries to traverse up for this field's reference
//
// This value must be >= 1
uint32 steps_out = 1;
}
}
// Subquery relation expression
message Subquery {
oneof subquery_type {
// Scalar subquery
Scalar scalar = 1;
// x IN y predicate
InPredicate in_predicate = 2;
// EXISTS/UNIQUE predicate
SetPredicate set_predicate = 3;
// ANY/ALL predicate
SetComparison set_comparison = 4;
}
// A subquery with one row and one column. This is often an aggregate
// though not required to be.
message Scalar {
Rel input = 1;
}
// Predicate checking that the left expression is contained in the right
// subquery
//
// Examples:
//
// x IN (SELECT * FROM t)
// (x, y) IN (SELECT a, b FROM t)
message InPredicate {
repeated Expression needles = 1;
Rel haystack = 2;
}
// A predicate over a set of rows in the form of a subquery
// EXISTS and UNIQUE are common SQL forms of this operation.
message SetPredicate {
enum PredicateOp {
PREDICATE_OP_UNSPECIFIED = 0;
PREDICATE_OP_EXISTS = 1;
PREDICATE_OP_UNIQUE = 2;
}
// TODO: should allow expressions
PredicateOp predicate_op = 1;
Rel tuples = 2;
}
// A subquery comparison using ANY or ALL.
// Examples:
//
// SELECT *
// FROM t1
// WHERE x < ANY(SELECT y from t2)
message SetComparison {
enum ComparisonOp {
COMPARISON_OP_UNSPECIFIED = 0;
COMPARISON_OP_EQ = 1;
COMPARISON_OP_NE = 2;
COMPARISON_OP_LT = 3;
COMPARISON_OP_GT = 4;
COMPARISON_OP_LE = 5;
COMPARISON_OP_GE = 6;
}
enum ReductionOp {
REDUCTION_OP_UNSPECIFIED = 0;
REDUCTION_OP_ANY = 1;
REDUCTION_OP_ALL = 2;
}
// ANY or ALL
ReductionOp reduction_op = 1;
// A comparison operator
ComparisonOp comparison_op = 2;
// left side of the expression
Expression left = 3;
// right side of the expression
Rel right = 4;
}
}
}
message GenerateRel {
RelCommon common = 1;
Rel input = 2;
Expression generator = 3;
repeated Expression child_output = 4;
bool outer = 5;
substrait.extensions.AdvancedExtension advanced_extension = 10;
}
// The description of a field to sort on (including the direction of sorting and null semantics)
message SortField {
Expression expr = 1;
oneof sort_kind {
SortDirection direction = 2;
uint32 comparison_function_reference = 3;
}
enum SortDirection {
SORT_DIRECTION_UNSPECIFIED = 0;
SORT_DIRECTION_ASC_NULLS_FIRST = 1;
SORT_DIRECTION_ASC_NULLS_LAST = 2;
SORT_DIRECTION_DESC_NULLS_FIRST = 3;
SORT_DIRECTION_DESC_NULLS_LAST = 4;
SORT_DIRECTION_CLUSTERED = 5;
}
}
// Describes which part of an aggregation or window function to perform within
// the context of distributed algorithms.
enum AggregationPhase {
// Implies `INTERMEDIATE_TO_RESULT`.
AGGREGATION_PHASE_UNSPECIFIED = 0;
// Specifies that the function should be run only up to the point of
// generating an intermediate value, to be further aggregated later using
// INTERMEDIATE_TO_INTERMEDIATE or INTERMEDIATE_TO_RESULT.
AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE = 1;
// Specifies that the inputs of the aggregate or window function are the
// intermediate values of the function, and that the output should also be
// an intermediate value, to be further aggregated later using
// INTERMEDIATE_TO_INTERMEDIATE or INTERMEDIATE_TO_RESULT.
AGGREGATION_PHASE_INTERMEDIATE_TO_INTERMEDIATE = 2;
// A complete invocation: the function should aggregate the given set of
// inputs to yield a single return value. This style must be used for
// aggregate or window functions that are not decomposable.
AGGREGATION_PHASE_INITIAL_TO_RESULT = 3;
// Specifies that the inputs of the aggregate or window function are the
// intermediate values of the function, generated previously using
// INITIAL_TO_INTERMEDIATE and possibly INTERMEDIATE_TO_INTERMEDIATE calls.
// This call should combine the intermediate values to yield the final
// return value.
AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT = 4;
}
enum WindowType {
ROWS = 0;
RANGE = 1;
}
// An aggregate function.
message AggregateFunction {
// Points to a function_anchor defined in this plan, which must refer
// to an aggregate function in the associated YAML file. Required; 0 is
// considered to be a valid anchor/reference.
uint32 function_reference = 1;
// The arguments to be bound to the function. This must have exactly the
// number of arguments specified in the function definition, and the
// argument types must also match exactly:
//
// - Value arguments must be bound using FunctionArgument.value, and
// the expression in that must yield a value of a type that a function
// overload is defined for.
// - Type arguments must be bound using FunctionArgument.type, and a
// function overload must be defined for that type.
// - Enum arguments must be bound using FunctionArgument.enum
// followed by Enum.specified, with a string that case-insensitively
// matches one of the allowed options.
// - Optional enum arguments must be bound using FunctionArgument.enum
// followed by either Enum.specified or Enum.unspecified. If specified,
// the string must case-insensitively match one of the allowed options.
repeated FunctionArgument arguments = 7;
// Options to specify behavior for corner cases, or leave behavior
// unspecified if the consumer does not need specific behavior in these
// cases.
repeated FunctionOption options = 8;
// Must be set to the return type of the function, exactly as derived
// using the declaration in the extension.
Type output_type = 5;
// Describes which part of the aggregation to perform within the context of
// distributed algorithms. Required. Must be set to INITIAL_TO_RESULT for
// aggregate functions that are not decomposable.
AggregationPhase phase = 4;
// If specified, the aggregated records are ordered according to this list
// before they are aggregated. The first sort field has the highest
// priority; only if a sort field determines two records to be equivalent is
// the next field queried. This field is optional.
repeated SortField sorts = 3;
// Specifies whether equivalent records are merged before being aggregated.
// Optional, defaults to AGGREGATION_INVOCATION_ALL.
AggregationInvocation invocation = 6;
// deprecated; use arguments instead
repeated Expression args = 2 [deprecated = true];
// Method in which equivalent records are merged before being aggregated.
enum AggregationInvocation {
// This default value implies AGGREGATION_INVOCATION_ALL.
AGGREGATION_INVOCATION_UNSPECIFIED = 0;
// Use all values in the aggregation calculation.
AGGREGATION_INVOCATION_ALL = 1;
// Use only distinct values in the aggregation calculation.
AGGREGATION_INVOCATION_DISTINCT = 2;
}
// This rel is used to create references,
// in case we refer to a RelRoot field names will be ignored
message ReferenceRel {
int32 subtree_ordinal = 1;
}
}