flex/utils/proto/physical.proto (313 lines of code) (raw):

/** * Copyright 2020 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ syntax = "proto3"; package physical; option java_package = "com.alibaba.graphscope.gaia.proto"; option java_outer_classname = "GraphAlgebraPhysical"; import "common.proto"; import "expr.proto"; import "schema.proto"; import "type.proto"; import "algebra.proto"; import "stored_procedure.proto"; import "google/protobuf/wrappers.proto"; import "cypher_write.proto"; // To project a relation on certain attributes or further their properties message Project { message ExprAlias { // The expression to be aliased common.Expression expr = 1; // The alias that is either given by the query or by the system google.protobuf.Int32Value alias = 2; } // A list of [expression -> alias] to be projected from the graph relation. // e.g., project_{a.birthday / 100 % 100 as birth_month } repeated ExprAlias mappings = 1; // An indicator to tell the runtime whether the projected value is appending to or replacing // existing relation. bool is_append = 2; } message GroupBy { // To define the function to be applied on each group after performing grouping. For example, // GROUPBY({a, b}, COUNT_DISTINCT(c, d) as e), one shall count distinct present of the pairs // (c, d), and alias the count as e. message AggFunc { enum Aggregate { SUM = 0; MIN = 1; MAX = 2; COUNT = 3; COUNT_DISTINCT = 4; TO_LIST = 5; TO_SET = 6; AVG = 7; FIRST = 8; } // The variables to apply this aggregation repeated common.Variable vars = 1; // The aggregate function Aggregate aggregate = 2; // The alias for the aggregated value google.protobuf.Int32Value alias = 3; } message KeyAlias { // The key to perform grouping common.Variable key = 1; // The alias for the key google.protobuf.Int32Value alias = 2; } // A collection of key-alias mappings repeated KeyAlias mappings = 1; // The grouping functions repeated AggFunc functions = 2; } message Unfold { // The tag of a graph relation that **must** refer to a data of collection type google.protobuf.Int32Value tag = 1; // The alias tag for the elements that are unfolded from the collection google.protobuf.Int32Value alias = 2; } message Union { repeated PhysicalPlan sub_plans = 1; } message Intersect { repeated PhysicalPlan sub_plans = 1; int32 key = 2; } message Join { enum JoinKind { // Inner join INNER = 0; // Left outer join LEFT_OUTER = 1; // Right outer join RIGHT_OUTER = 2; // Full outer join FULL_OUTER = 3; // Left semi-join, right alternative can be naturally adapted SEMI = 4; // Left anti-join, right alternative can be naturally adapted ANTI = 5; // aka. Cartesian product TIMES = 6; } // The key to perform Join (on results output by left_plan) repeated common.Variable left_keys = 1; // The key to perform Join (on results output by right_plan) repeated common.Variable right_keys = 2; JoinKind join_kind = 3; PhysicalPlan left_plan = 4; PhysicalPlan right_plan = 5; } message Apply { // Define how to join the result of the subtask back to the original relation Join.JoinKind join_kind = 1; // The variables to perform grouping, or segmenting. // If not specified, it is an Apply; otherwise, it is a SegmentApply repeated common.Variable keys = 2; // The subtask will be perform on certain tags as keys of the input relation PhysicalPlan sub_plan = 3; // Define the alias of output from `Apply`. Optional Field. google.protobuf.Int32Value alias = 4; } // Scan is an operator that transforms the source data format (defined by the database) // into internal data format (defined/used by runtime) message Scan { enum ScanOpt { // Scanning the vertices VERTEX = 0; // Scanning the edges EDGE = 1; // Scanning a relational table TABLE = 2; } ScanOpt scan_opt = 1; // The alias of the item if it is a vertex or edge google.protobuf.Int32Value alias = 2; // The required parameters for the scan algebra.QueryParams params = 3; // The optional filtering predicate for the field that have been indexed algebra.IndexPredicate idx_predicate = 4; // The flag that indicates to SCAN + COUNT bool is_count_only = 5; } // It is typical to use the operator when: // 1. The input entry is an edge entry, and to obtain some properties (or filter) on the adjacent vertex of the input edge // 2. The input entry is a vertex entry, and directly obtain some properties (or filter) on the input vertex message GetV { enum VOpt { // The case when getting the start vertex of the edge START = 0; // The case when getting the end vertex of the edge/path END = 1; // The case when getting the other vertex of the edge. We are calibrating to Gremlin's bothE.otherV semantics OTHER = 2; // The case when getting both vertices of the edge BOTH = 3; // The case when tag refers to vertices ITSELF = 4; } // The tag that refers to the edge/path where the end vertex will be retrieved google.protobuf.Int32Value tag = 1; // Determine what vertex to get from the tagged object VOpt opt = 2; // The query parameters of getting vertices algebra.QueryParams params = 3; // The alias of this vertex google.protobuf.Int32Value alias = 4; } message EdgeExpand { enum Direction { OUT = 0; IN = 1; BOTH = 2; } enum ExpandOpt { VERTEX = 0; EDGE = 1; DEGREE = 2; } // The tag that refers to the starting vertex google.protobuf.Int32Value v_tag = 1; // The direction of the expanded edge Direction direction = 2; // The query parameters define the condition that the edge/end vertex must satisfy. // Note that whether the query parameters apply to the edge or end vertex depends on // the `is_edge` indicator, with one exception that the `tables` parameter in `EdgeExpand` // **always** applies to the edge for now (may be fixed later) . algebra.QueryParams params = 3; // An optional alias for the object of the expansion google.protobuf.Int32Value alias = 4; // Expand option, i.e., expand vertices/edges/degree. ExpandOpt expand_opt = 5; // Whether the expand is optional, if true, the expand will return a `None` if the edge does not exist bool is_optional = 6; } message PathExpand { // The expand base of PathExpand message ExpandBase { // Can either be a single EdgeExpand (with ExpandOpt = Vertex), or EdgeExpand (with ExpandOpt = Edge) + GetV EdgeExpand edge_expand = 1; GetV get_V = 2; } enum PathOpt { // an arbitrary path, in which both vertex/edge may duplicate ARBITRARY = 0; // a path without vertex duplications SIMPLE = 1; // a path without edge duplications TRAIL = 2; // Define the shortest option according to the GQL standard: // Selects one path with shortest length, Non-deterministic. ANY_SHORTEST = 3; // Selects all paths that have the minimal length. Deterministic. ALL_SHORTEST = 4; } // Define what result is required for this path. We currently support `EndV` and `AllV`, while an option to // include all edges and vertices may be needed in the future. enum ResultOpt { // only end vertex is required for this expansion END_V = 0; // all vertices of this path are required for this expansion. ALL_V = 1; // all vertices and edges of this path are required for this expansion. ALL_V_E = 2; } // A path expansion has a base of edge expansion ExpandBase base = 1; // The tag that refers to the starting vertex google.protobuf.Int32Value start_tag = 2; // An optional alias for the object of the expansion google.protobuf.Int32Value alias = 3; // The range that defines the minimum and maximum number of hops of the path expansion. // This is optional, if not set, the minimum number is 1, and the maximum is INT_MAX algebra.Range hop_range = 4; // Path option, including to expand an arbitrary path or a simple path PathOpt path_opt = 5; // Result option, including to take end vertex (EndV), or whole path (AllV), for this expansion ResultOpt result_opt = 6; // A condition formulated as an expression predicate common.Expression condition = 7; // Whether the path expand is optional, if true, the path expand will return a `None` if the path does not exist bool is_optional = 8; } message Sink { message OptTag { google.protobuf.Int32Value tag = 1; } // Define the tags of columns to sink. If no tags given, sink all **tagged** columns by default. repeated OptTag tags = 1; // Define the target of sink, e.g., to Client as default, to Graph such as Vineyard etc. algebra.Sink.SinkTarget sink_target = 2; } message Repartition { message Shuffle { google.protobuf.Int32Value shuffle_key = 1; } message Broadcast {} oneof Strategy { Shuffle to_another = 1; Broadcast to_others = 2; } } // A dummy node to delegate a source opr for multiple scan cases. message Root {} message ProcedureCall { procedure.Query query = 1; } message PhysicalOpr { message Operator { oneof op_kind { Project project = 1; algebra.Select select = 2; GroupBy group_by = 3; algebra.OrderBy order_by = 4; algebra.Dedup dedup = 5; Unfold unfold = 6; algebra.Limit limit = 7; Scan scan = 8; Sink sink = 9; Apply apply = 10; Join join = 11; Union union = 12; Intersect intersect = 13; Repartition repartition = 14; Root root = 16; algebra.Sample sample = 17; // Saving the room for relational operators GetV vertex = 30; EdgeExpand edge = 31; PathExpand path = 32; ProcedureCall procedure_call = 33; // write operators cypher.Set set = 34; cypher.Load load = 35; cypher.Delete delete = 36; } } message MetaData { common.IrDataType type = 1; int32 alias = 2; } // The node's operator Operator opr = 1; // The meta data of outputs of operator repeated MetaData meta_data = 2; } message PhysicalPlan { int32 plan_id = 1; repeated PhysicalOpr plan = 2; }