lib/protocol/tablestore_search.proto (849 lines of code) (raw):
syntax = "proto2";
package search.proto;
enum AggregationType {
AGG_AVG = 1;
AGG_MAX = 2;
AGG_MIN = 3;
AGG_SUM = 4;
AGG_COUNT = 5;
AGG_DISTINCT_COUNT = 6;
AGG_TOP_ROWS = 7;
AGG_PERCENTILES = 8;
}
enum GroupByType {
GROUP_BY_FIELD = 1;
GROUP_BY_RANGE = 2;
GROUP_BY_FILTER = 3;
GROUP_BY_GEO_DISTANCE = 4;
GROUP_BY_HISTOGRAM = 5;
GROUP_BY_DATE_HISTOGRAM = 6;
GROUP_BY_GEO_GRID = 7;
GROUP_BY_COMPOSITE = 8;
}
// agg & group by
message Aggregation {
optional string name = 1;
optional AggregationType type = 2;
optional bytes body = 3;
}
message Aggregations {
repeated Aggregation aggs = 1;
}
message GroupBy {
optional string name = 1;
optional GroupByType type = 2;
optional bytes body = 3;
}
message GroupBys {
repeated GroupBy group_bys = 1;
}
// single agg
message AvgAggregation {
optional string field_name = 1;
optional bytes missing = 2; //encoded as SQLVariant, check in SearchProxy
}
message MaxAggregation {
optional string field_name = 1;
optional bytes missing = 2;
}
message MinAggregation {
optional string field_name = 1;
optional bytes missing = 2;
}
message SumAggregation {
optional string field_name = 1;
optional bytes missing = 2;
}
message CountAggregation {
optional string field_name = 1;
}
message DistinctCountAggregation {
optional string field_name = 1;
optional bytes missing = 2;
}
message TopRowsAggregation {
optional int32 limit = 1;
optional Sort sort = 2;
}
message PercentilesAggregation {
optional string field_name = 1;
repeated double percentiles = 2;
optional bytes missing = 3;
}
// group agg
message FieldRange {
optional bytes min = 1;
optional bytes max = 2;
}
message GroupByDateHistogram {
optional string field_name = 1;
optional DateTimeValue interval = 2;
optional FieldRange field_range = 3;
optional bytes missing = 4;
optional int64 min_doc_count = 5;
optional string time_zone = 6;
optional GroupBySort sort = 7;
optional Aggregations sub_aggs = 8;
optional GroupBys sub_group_bys = 9;
optional DateTimeValue offset = 10;
}
message GroupByHistogram {
optional string field_name = 1;
optional bytes interval = 2;
optional bytes missing = 3;
optional int64 min_doc_count = 4;
optional GroupBySort sort = 5;
optional FieldRange field_range = 6;
optional Aggregations sub_aggs = 7;
optional GroupBys sub_group_bys = 8;
optional bytes offset = 9;
}
message GroupKeySort {
optional SortOrder order = 1;
}
message RowCountSort {
optional SortOrder order = 1;
}
message SubAggSort {
optional string sub_agg_name = 1;
optional SortOrder order = 2;
}
message GroupBySorter {
optional GroupKeySort group_key_sort = 1;
optional RowCountSort row_count_sort = 2;
optional SubAggSort sub_agg_sort = 3;
}
message GroupBySort {
repeated GroupBySorter sorters = 1;
}
message GroupByComposite {
optional GroupBys sources = 1;
optional int32 size = 2;
optional Aggregations sub_aggs = 4;
optional GroupBys sub_group_bys = 5;
optional string next_token = 6;
}
message GroupByCompositeResultItem {
repeated string keys = 1;
optional int64 row_count = 2;
optional AggregationsResult sub_aggs_result = 3;
optional GroupBysResult sub_group_bys_result = 4;
}
message GroupByCompositeResult {
repeated GroupByCompositeResultItem group_by_composite_result_items = 1;
repeated string source_group_by_names = 2;
optional string next_token = 3;
}
message GroupByField {
optional string field_name = 1;
optional int32 size = 2;
optional GroupBySort sort = 3;
optional Aggregations sub_aggs = 4;
optional GroupBys sub_group_bys = 5;
optional int64 min_doc_count = 6;
}
message Range {
optional double from = 1;
optional double to = 2;
}
message GroupByRange {
optional string field_name = 1;
repeated Range ranges = 2;
optional Aggregations sub_aggs = 3;
optional GroupBys sub_group_bys = 4;
}
message GroupByFilter {
repeated Query filters = 1;
optional Aggregations sub_aggs = 2;
optional GroupBys sub_group_bys = 3;
}
message GeoPoint {
optional double lat = 1;
optional double lon = 2;
}
message GroupByGeoDistance {
optional string field_name = 1;
optional GeoPoint origin = 2;
repeated Range ranges = 3;
optional Aggregations sub_aggs = 4;
optional GroupBys sub_group_bys = 5;
}
message GroupByGeoGrid {
optional string field_name = 1;
optional GeoHashPrecision precision = 2;//meter
optional int32 size = 3;
optional Aggregations sub_aggs = 4;
optional GroupBys sub_group_bys = 5;
}
enum GeoHashPrecision {
GHP_5009KM_4992KM_1 = 1;
GHP_1252KM_624KM_2 = 2;
GHP_156KM_156KM_3 = 3;
GHP_39KM_19KM_4 = 4;
GHP_4900M_4900M_5 = 5;
GHP_1200M_609M_6 = 6;
GHP_152M_152M_7 = 7;
GHP_38M_19M_8 = 8;
GHP_480CM_480CM_9 = 9;
GHP_120CM_595MM_10 = 10;
GHP_149MM_149MM_11 = 11;
GHP_37MM_19MM_12 = 12;
}
message GeoGrid {
optional GeoPoint top_left = 1;
optional GeoPoint bottom_right = 2;
}
//single agg result
message AvgAggregationResult {
optional double value = 1;
}
message TopRowsAggregationResult {
repeated bytes rows = 1;
}
message PercentilesAggregationItem {
optional double key = 1;
optional bytes value = 2;
}
message PercentilesAggregationResult {
repeated PercentilesAggregationItem percentiles_aggregation_items = 1;
}
message DistinctCountAggregationResult {
optional int64 value = 1;
}
message MaxAggregationResult {
optional double value = 1;
}
message MinAggregationResult {
optional double value = 1;
}
message SumAggregationResult {
optional double value = 1;
}
message CountAggregationResult {
optional int64 value = 1;
}
message AggregationResult {
optional string name = 1;
optional AggregationType type = 2;
optional bytes agg_result = 3; // encoded by XxxAggregationResult
}
message AggregationsResult {
repeated AggregationResult agg_results = 1;
}
//group by result
message GroupByFieldResultItem {
optional string key = 1;
optional int64 row_count = 2;
optional AggregationsResult sub_aggs_result = 3;
optional GroupBysResult sub_group_bys_result = 4;
}
message GroupByFieldResult {
repeated GroupByFieldResultItem group_by_field_result_items = 1;
}
message GroupByRangeResultItem {
optional double from = 1;
optional double to = 2;
optional int64 row_count = 3;
optional AggregationsResult sub_aggs_result = 4;
optional GroupBysResult sub_group_bys_result = 5;
}
message GroupByRangeResult {
repeated GroupByRangeResultItem group_by_range_result_items = 1;
}
message GroupByGeoDistanceResultItem {
optional double from = 1;
optional double to = 2;
optional int64 row_count = 3;
optional AggregationsResult sub_aggs_result = 4;
optional GroupBysResult sub_group_bys_result = 5;
}
message GroupByGeoDistanceResult {
repeated GroupByGeoDistanceResultItem group_by_geo_distance_result_items = 1;
}
message GroupByFilterResultItem {
optional int64 row_count = 1;
optional AggregationsResult sub_aggs_result = 2;
optional GroupBysResult sub_group_bys_result = 3;
}
message GroupByFilterResult {
repeated GroupByFilterResultItem group_by_filter_result_items = 1;
}
message GroupByResult {
optional string name = 1;
optional GroupByType type = 2;
optional bytes group_by_result = 3; //encoded by XxxGroupByResult
}
message GroupBysResult {
repeated GroupByResult group_by_results = 1; //sibling pipeline / parallel group bys
}
message GroupByHistogramItem {
optional bytes key = 1;
optional int64 value = 2;
optional AggregationsResult sub_aggs_result = 3;
optional GroupBysResult sub_group_bys_result = 4;
}
message GroupByHistogramResult {
repeated GroupByHistogramItem group_by_histogram_items = 1;
}
message GroupByDateHistogramItem {
optional int64 timestamp = 1;
optional int64 row_count = 2;
optional AggregationsResult sub_aggs_result = 3;
optional GroupBysResult sub_group_bys_result = 4;
}
message GroupByDateHistogramResult {
repeated GroupByDateHistogramItem group_by_date_histogram_items = 1;
}
message GroupByGeoGridResultItem {
optional string key = 1;
optional GeoGrid geo_grid = 2;
optional int64 row_count = 3;
optional AggregationsResult sub_aggs_result = 4;
optional GroupBysResult sub_group_bys_result = 5;
}
message GroupByGeoGridResult {
repeated GroupByGeoGridResultItem group_by_geo_grid_result_items = 1;
}
enum QueryType {
MATCH_QUERY = 1;
MATCH_PHRASE_QUERY = 2;
TERM_QUERY = 3;
RANGE_QUERY = 4;
PREFIX_QUERY = 5;
BOOL_QUERY = 6;
CONST_SCORE_QUERY = 7;
FUNCTION_SCORE_QUERY = 8;
NESTED_QUERY = 9;
WILDCARD_QUERY = 10;
MATCH_ALL_QUERY = 11;
GEO_BOUNDING_BOX_QUERY = 12;
GEO_DISTANCE_QUERY = 13;
GEO_POLYGON_QUERY = 14;
TERMS_QUERY = 15;
EXISTS_QUERY = 16;
KNN_VECTOR_QUERY = 17;
FUNCTIONS_SCORE_QUERY = 18;
}
enum QueryOperator {
OR = 1;
AND = 2;
}
message MatchQuery {
optional string field_name = 1;
optional string text = 2;
optional int32 minimum_should_match = 3;
optional QueryOperator operator = 4;
optional float weight = 5;
}
message MatchPhraseQuery {
optional string field_name = 1;
optional string text = 2;
optional float weight = 3;
}
message MatchAllQuery {
}
message TermQuery {
optional string field_name = 1;
optional bytes term = 2;
optional float weight = 3;
}
message TermsQuery {
optional string field_name = 1;
repeated bytes terms = 2;
optional float weight = 3;
}
message RangeQuery {
optional string field_name = 1;
optional bytes range_from = 2; // variant value
optional bytes range_to = 3; // variant value
optional bool include_lower = 4;
optional bool include_upper = 5;
}
message PrefixQuery {
optional string field_name = 1;
optional string prefix = 2;
optional float weight = 3;
}
message WildcardQuery {
optional string field_name = 1;
optional string value = 2;
optional float weight = 3;
}
message BoolQuery {
repeated Query must_queries = 1;
repeated Query must_not_queries = 2;
repeated Query filter_queries = 3;
repeated Query should_queries = 4;
optional int32 minimum_should_match = 5;
}
message ConstScoreQuery {
optional Query filter = 1;
}
message FieldValueFactor {
optional string field_name = 1;
}
message FunctionScoreQuery {
optional Query query = 1;
optional FieldValueFactor field_value_factor = 2;
}
message FunctionsScoreQuery {
optional Query query = 1;
repeated Function functions = 2;
optional FunctionScoreMode score_mode = 3;
optional FunctionCombineMode combine_mode = 4;
optional float min_score = 5;
optional float max_score = 6;
}
message Function {
optional FieldValueFactorFunction field_value_factor = 1;
optional RandomScoreFunction random = 2;
optional DecayFunction decay = 3;
optional float weight = 4;
optional Query filter = 5;
}
message FieldValueFactorFunction {
optional string field_name = 1;
optional float factor = 2;
optional FunctionModifier modifier = 3;
optional double missing = 4;
}
enum FunctionModifier {
FM_NONE = 1;
FM_LOG = 2;
FM_LOG1P = 3;
FM_LOG2P = 4;
FM_LN = 5;
FM_LN1P = 6;
FM_LN2P = 7;
FM_SQUARE = 8;
FM_SQRT = 9;
FM_RECIPROCAL = 10;
}
message DecayFunction {
optional string field_name = 1;
optional DecayMathFunction math_function = 2;
optional DecayFuncParamType param_type = 3;
optional bytes param = 4;
optional double decay = 5;
optional MultiValueMode multi_value_mode = 6;
}
enum DecayFuncParamType {
DF_DATE_PARAM = 1;
DF_NUMERIC_PARAM = 2;
DF_GEO_PARAM = 3;
}
message DecayFuncDateParam{
optional int64 origin_long = 1;
optional string origin_string = 2;
optional DateTimeValue scale = 3;
optional DateTimeValue offset = 4;
}
message DecayFuncNumericParam{
optional double origin = 1;
optional double scale = 2;
optional double offset = 3;
}
message DecayFuncGeoParam{
optional string origin = 1;
optional double scale = 2;
optional double offset = 3;
}
enum DecayMathFunction {
GAUSS = 1;
EXP = 2;
LINEAR = 3;
}
enum MultiValueMode {
MVM_MAX = 1;
MVM_MIN = 2;
MVM_SUM = 3;
MVM_AVG = 4;
}
message RandomScoreFunction {
}
enum FunctionScoreMode {
FSM_AVG = 1;
FSM_MAX = 2;
FSM_SUM = 3;
FSM_MIN = 4;
FSM_MULTIPLY = 5;
FSM_FIRST = 6;
}
enum FunctionCombineMode {
FCM_MULTIPLY = 1;
FCM_AVG = 2;
FCM_MAX = 3;
FCM_SUM = 4;
FCM_MIN = 5;
FCM_REPLACE = 6;
}
enum ScoreMode {
SCORE_MODE_NONE = 1;
SCORE_MODE_AVG = 2;
SCORE_MODE_MAX = 3;
SCORE_MODE_TOTAL = 4;
SCORE_MODE_MIN = 5;
}
message NestedQuery {
optional string path = 1;
optional Query query = 2;
optional ScoreMode score_mode = 3;
optional float weight = 4;
optional InnerHits inner_hits = 5;
}
message InnerHits {
optional Sort sort = 1;
optional int32 offset = 2;
optional int32 limit = 3;
optional Highlight highlight = 4;
}
message GeoBoundingBoxQuery {
optional string field_name = 1;
optional string top_left = 2;
optional string bottom_right = 3;
}
message GeoDistanceQuery {
optional string field_name = 1;
optional string center_point = 2;
optional double distance = 3;
}
message GeoPolygonQuery {
optional string field_name = 1;
repeated string points = 2;
}
message ExistsQuery {
optional string field_name = 1;
}
message KnnVectorQuery {
optional string field_name = 1;
optional int32 top_k = 2;
repeated float float32_query_vector = 4;
optional Query filter = 5;
optional float weight = 6;
}
message Query {
optional QueryType type = 1;
optional bytes query = 2;
}
message Collapse {
optional string field_name = 1;
}
message NestedFilter {
optional string path = 1;
optional Query filter = 2;
}
enum SortOrder {
SORT_ORDER_ASC = 0;
SORT_ORDER_DESC = 1;
}
enum SortMode {
SORT_MODE_MIN = 0;
SORT_MODE_MAX = 1;
SORT_MODE_AVG = 2;
}
message ScoreSort {
optional SortOrder order = 1;
}
message FieldSort {
optional string field_name = 1;
optional SortOrder order = 2;
optional SortMode mode = 3;
optional NestedFilter nested_filter = 4;
optional bytes missing_value = 5; //encoded as SQLVariant
optional string missing_field = 6;
}
message DocSort {
optional SortOrder order = 1;
}
enum GeoDistanceType {
GEO_DISTANCE_ARC = 0;
GEO_DISTANCE_PLANE = 1;
}
message GeoDistanceSort {
optional string field_name = 1;
repeated string points = 2;
optional SortOrder order = 3;
optional SortMode mode = 4;
optional GeoDistanceType distance_type = 5;
optional NestedFilter nested_filter = 6;
}
message PrimaryKeySort {
optional SortOrder order = 1;
}
message Sorter {
optional FieldSort field_sort = 1;
optional GeoDistanceSort geo_distance_sort = 2;
optional ScoreSort score_sort = 3;
optional PrimaryKeySort pk_sort = 4;
optional DocSort doc_sort = 5;
}
message Sort {
repeated Sorter sorter = 1;
}
message SearchQuery {
optional int32 offset = 1;
optional int32 limit = 2;
optional Query query = 4;
optional Collapse collapse = 5;
optional Sort sort = 6;
optional bool getTotalCount = 8;
optional bytes token = 9;
optional Aggregations aggs = 10;
optional GroupBys group_bys = 11;
optional Highlight highlight = 12;
}
message Highlight {
repeated HighlightParameter highlight_parameters = 1;
optional HighlightEncoder highlight_encoder = 2 [default=PLAIN_MODE];
}
message HighlightParameter {
optional string field_name = 1;
optional int32 number_of_fragments = 2;
optional int32 fragment_size = 3;
optional string pre_tag = 4;
optional string post_tag = 5;
optional HighlightFragmentOrder fragments_order = 6 [default=TEXT_SEQUENCE];
}
enum HighlightFragmentOrder {
TEXT_SEQUENCE = 1;
SCORE = 2;
}
enum HighlightEncoder {
PLAIN_MODE = 1;
HTML_MODE = 2;
}
enum ColumnReturnType {
RETURN_ALL = 1;
RETURN_SPECIFIED = 2;
RETURN_NONE = 3;
RETURN_ALL_FROM_INDEX = 4;
}
message ColumnsToGet {
optional ColumnReturnType return_type = 1;
repeated string column_names = 2;
}
message SearchRequest {
optional string table_name = 1;
optional string index_name = 2;
optional ColumnsToGet columns_to_get = 3;
optional bytes search_query = 4;
repeated bytes routing_values = 5;
optional int32 timeout_ms = 6;
}
/**
* Response部分:
**/
message SearchResponse {
optional int64 total_hits = 1;
repeated bytes rows = 2;
optional bool is_all_succeeded = 3;
repeated SearchHit search_hits = 5;
optional bytes next_token = 6;
optional bytes aggs = 7;
optional bytes group_bys = 8;
optional ConsumedCapacity consumed = 9;
optional ConsumedCapacity reserved_consumed = 10;
}
message SearchHit {
optional double score = 3;
optional HighlightResult highlight_result = 4;
repeated SearchInnerHit search_inner_hits = 5;
optional int32 nested_doc_offset = 6;
}
message SearchInnerHit {
optional string path = 1;
repeated SearchHit search_hits = 2;
}
message HighlightResult {
repeated HighlightField highlight_fields = 1;
}
message HighlightField {
optional string field_name = 1;
repeated string field_fragments = 2;
}
/* Create Search Index */
enum IndexOptions {
DOCS = 1;
FREQS = 2;
POSITIONS = 3;
OFFSETS = 4;
}
enum FieldType {
LONG = 1;
DOUBLE = 2;
BOOLEAN = 3;
KEYWORD = 4;
TEXT = 5;
NESTED = 6;
GEO_POINT = 7;
DATE = 8;
VECTOR = 9;
}
enum DateTimeUnit {
YEAR = 1;
QUARTER_YEAR = 2; // 一个季度
MONTH = 3;
WEEK = 4;
DAY = 5;
HOUR = 6;
MINUTE = 7;
SECOND = 8;
MILLISECOND = 9;
}
message DateTimeValue {
optional int32 value = 1;
optional DateTimeUnit unit = 2;
}
message SingleWordAnalyzerParameter {
optional bool case_sensitive = 1;
optional bool delimit_word = 2;
}
message SplitAnalyzerParameter {
optional string delimiter = 1;
}
message FuzzyAnalyzerParameter {
optional int32 min_chars = 1;
optional int32 max_chars = 2;
}
message FieldSchema {
optional string field_name = 1;
optional FieldType field_type = 2;
optional IndexOptions index_options = 3;
optional string analyzer = 4;
optional bool index = 5;
optional bool doc_values = 6;
optional bool store = 7;
repeated FieldSchema field_schemas = 8; // only for nested type
optional bool is_array = 9;
optional bytes analyzer_parameter = 10;
optional bool is_virtual_field = 11;
repeated string source_field_names = 12;
repeated string date_formats = 13;
optional bool enable_highlighting = 14;
optional VectorOptions vector_options = 15;
}
enum VectorDataType {
VD_FLOAT_32 = 2;
}
message VectorOptions {
optional VectorDataType data_type = 1;
optional int32 dimension = 2;
optional VectorMetricType metric_type = 3;
}
enum VectorMetricType {
VM_EUCLIDEAN = 0;
VM_COSINE = 1;
VM_DOT_PRODUCT = 2;
}
message IndexSchema {
repeated FieldSchema field_schemas = 1;
optional IndexSetting index_setting = 2;
optional Sort index_sort = 3;
}
message IndexSetting {
optional int32 number_of_shards = 1;
repeated string routing_fields = 2;
optional int32 routing_partition_size = 3;
}
message CreateSearchIndexRequest {
required string table_name = 1;
required string index_name = 2;
optional IndexSchema schema = 3;
optional string source_index_name = 4;
optional int32 time_to_live = 5; // unit is seconds
}
message CreateSearchIndexResponse {
}
message UpdateSearchIndexRequest {
optional string table_name = 1;
optional string index_name = 2;
optional int32 time_to_live = 5; // unit is seconds
}
message UpdateSearchIndexResponse {
}
/* List Search Index */
message IndexInfo {
optional string table_name = 1;
optional string index_name = 2;
}
message ListSearchIndexRequest {
optional string table_name = 1;
}
message ListSearchIndexResponse {
repeated IndexInfo indices = 1;
}
/* Delete Search Index */
message DeleteSearchIndexRequest {
optional string table_name = 1;
optional string index_name = 2;
}
message DeleteSearchIndexResponse {
}
/* Describe Search Index */
enum SyncPhase {
FULL = 1;
INCR = 2;
}
message SyncStat {
optional SyncPhase sync_phase = 1;
optional int64 current_sync_timestamp = 2; // 同步进度,参考TunnelService。
}
message MeteringInfo {
optional int64 storage_size = 1;
optional int64 row_count = 2;
optional int64 reserved_read_cu = 3;
optional int64 timestamp = 4;
}
message DescribeSearchIndexRequest {
optional string table_name = 1;
optional string index_name = 2;
}
message DescribeSearchIndexResponse {
optional IndexSchema schema = 1;
optional SyncStat sync_stat = 2;
optional MeteringInfo metering_info = 3;
optional string brother_index_name = 4;
optional int64 create_time = 6;
optional int32 time_to_live = 7;
optional IndexStatus index_status = 8;
}
message IndexStatus {
optional IndexStatusEnum status = 1;
optional string status_description = 2;
}
enum IndexStatusEnum {
PENDING = 1;
FAILED = 2;
RUNNING = 3;
}
message ComputeSplitsRequest {
optional string table_name = 1;
optional SearchIndexSplitsOptions search_index_splits_options = 2;
}
message SearchIndexSplitsOptions {
optional string index_name = 1;
}
message ComputeSplitsResponse {
optional bytes session_id = 1;
optional int32 splits_size = 2;
}
message ScanQuery {
optional Query query = 1;
optional int32 limit = 2;
optional int32 alive_time = 3; //unit is second
optional bytes token = 4;
optional int32 current_parallel_id = 5;
optional int32 max_parallel = 6;
}
message ParallelScanRequest {
optional string table_name = 1;
optional string index_name = 2;
optional ColumnsToGet columns_to_get = 3;
optional bytes session_id = 4;
optional ScanQuery scan_query = 5;
optional int32 timeout_ms = 6;
}
message ParallelScanResponse {
repeated bytes rows = 1;
optional bytes next_token = 2;
}