protobuf/api/search_service.proto (612 lines of code) (raw):

/* * Copyright 2021 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Copyright 2010 Google Inc. All Rights Reserved. // // This protocol buffer defines the search service used by clients to // build indexes and to search those indexes. // LINT: ALLOW_GROUPS syntax = "proto2"; package java.apphosting; import "document.proto"; option java_package = "com.google.appengine.api.search.proto"; option java_outer_classname = "SearchServicePb"; // A protocol buffer which contains an enumeration of errors. message SearchServiceError { // Errors that are generated for each document contained in a // IndexDocumentRequest or DeleteDocumentRequest. enum ErrorCode { // Not an error. OK = 0; // The client made an invalid request, something about the request // needs to be changed. INVALID_REQUEST = 1; // A transient problem occurred. Try again later. TRANSIENT_ERROR = 2; // An internal error occurred. INTERNAL_ERROR = 3; // Client does not have permission to make the request. PERMISSION_DENIED = 4; // Operation could not finish within the deadline. TIMEOUT = 5; // Multiple updates to the same document at the same time. CONCURRENT_TRANSACTION = 6; } } // The processed status of a request or element of a request. message RequestStatus { // An error that occurred on processing a request or element // of a request. required SearchServiceError.ErrorCode code = 1; // Any detail about the error. optional string error_detail = 2; // Canonical error code. optional int32 canonical_code = 3; } // Full specification of an index. An index is uniquely identified by source, // application name (not part of index spec), namespace, and name of the index. // The index consistency impacts how documents are added to the index and how // searches are performed on the index. The mode defines how urgent a given // operation is and currently, only impacts addition and deletion. message IndexSpec { // The name of the index. required string name = 1; // The consistency requirement on the index. enum Consistency { // The index is globally consistent. GLOBAL = 0; // The index is only consistent on a document level. PER_DOCUMENT = 1; } // The consistency mode (either local or global) for this index. // Once set, the consistency is fixed for the life of the index. // It is set on the first indexing call and only checked subsequently. optional Consistency consistency = 2 [default = PER_DOCUMENT]; // The namespace associated with this index; if not specified we // assume the default namespace. optional string namespace = 3; // The version of the index on which we wish to operate; if // not specified we assume the active version. NOT USED. optional int32 version = 4; // The source of documents being indexed. enum Source { // The documents were indexed using the Search API. SEARCH = 0; // The documents are automatic conversions of Datastore Entities. DATASTORE = 1; // The documents are automatic conversions of Cloud Storage Objects. CLOUD_STORAGE = 2; } // The source of the documents being indexed. optional Source source = 5 [default = SEARCH]; // The mode in which the index is to operate. This currently // impacts only add and remove operations on the index. enum Mode { // Indicates that index operations should be executed as promptly as // possible. For GLOBAL indexes, this means that adding or removing // documents should block until the changes are visible in search. // For PER_DOCUMENT indexes tokenization is started by the time the // Index.add() method returns and finishes in time proportional // to the complexity of the task, rather than the number of free // cycles available to the backend. PRIORITY = 0; // Returns as soon as possible. This mode does not offer any guarantees // as to how fast the backend commits the results of the operation. In // particular, there may be a significant delay between adding a document // to an index and that document being retured in search results. BACKGROUND = 1; } // The mode in which index operations are committed. The mode is not // persisted and can be changed from one RPC call to the next. optional Mode mode = 6 [default = PRIORITY]; } // Extended information about an index. message IndexMetadata { // Index specification (fully qualified name). required IndexSpec index_spec = 1; // Index schema. Can be optionally returned in response to ListIndexesRequest. // It will be ignored in other requests for now. repeated storage_onestore_v3.FieldTypes field = 2; // Amount of storage currently used by this index. Note that in // unusual circumstances the amount_used could be more than the // limit, either because the quota enforcement mechanism sacrifices // accuracy for throughput, or in a case where the limit were // initially high but then was subsequently changed to become lower. message Storage { optional int64 amount_used = 1; optional int64 limit = 2; } optional Storage storage = 3; // The index state could be modified upon a user request or a scanner. // When user issues a delete request, the state is set to SOFT_DELETED, // while it is still possible for user to cancel that deletion. // When the scanner finds out that index_delete_time has passed, // or a Dexter instance finds out index_delete_time has passed upon a RPC, // it sets the index's state to PURGING by writing to Megastore. // User is now not able to cancel the deletion. // When the scanner sees that all traces of the index are deleted, // it removes the index metadata row. // // We transition between state as follows: // // Index // +--------+ Created +---------+ // start----->| Non- |------------>| ACTIVE | // |existent| | | // Scanner +--------+ +---------+ // finds out ^ ^ | // all deletes | User cancels | | User issues // completed | index deletion | | index deletion // | | v // +--------+ +---------+ // | PURGE- |<------------| SOFT_ | // | ING | Scanner/ | DELETED | // +--------+ Dexter +---------+ // finds out // index_delete_time // passed enum IndexState { // Index is active. ACTIVE = 0; // Index seen as marked deleted; purge trigger time not yet seen. // The deletion could still be cancelled at this point. SOFT_DELETED = 1; // Purge trigger time seen, purge processes should run. PURGING = 2; } optional IndexState index_state = 4 [default = ACTIVE]; // The scheduled deletion time, in milliseconds from 1970-01-01T00:00:00Z // Present iff index_state is SOFT_DELETED or PURGING. optional int64 index_delete_time = 5; // Number of shards in index. optional int32 num_shards = 6 [default = 1]; } // Write some documents to a named index, update existing documents. message IndexDocumentParams { // The documents to write to the index. repeated storage_onestore_v3.Document document = 1; // When to write the documents to the index. enum Freshness { // Blocks until documents are searchable. SYNCHRONOUSLY = 0; // Inserts documents when convenient for the back end. WHEN_CONVENIENT = 1; } // When the documents will be searchable. optional Freshness freshness = 2 [default = SYNCHRONOUSLY, deprecated = true]; // The specification of the index. required IndexSpec index_spec = 3; } // A request to index specific documents. message IndexDocumentRequest { // The parameters describing document to index. required IndexDocumentParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 3; } // A response to an index document request. message IndexDocumentResponse { // The status of a individual document index request. repeated RequestStatus status = 1; // The id of the document indexed. If the id was missing, // then will be the id chosen by the service for the Document. repeated string doc_id = 2; } // Delete the specified documents from a named index. message DeleteDocumentParams { // The identifiers of the documents to delete. repeated string doc_id = 1; // The specification of the index. required IndexSpec index_spec = 2; } // The request to delete given set of documents. message DeleteDocumentRequest { // The parameters describing documents to delete. required DeleteDocumentParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 3; } // A response to a delete document request. message DeleteDocumentResponse { // The status of a document delete. repeated RequestStatus status = 1; } // The parameters for listing documents in an index. message ListDocumentsParams { // The index from which to list documents. required IndexSpec index_spec = 1; // The doc_id from which to list documents from. Only documents with Ids // greater than or equal to start_doc_id are returned, and they are returned // in doc_id order. optional string start_doc_id = 2; // Whether or not to include the document with the name // specified by the start_doc_id parameter. By default // we return the document with the name matching start_doc_id. optional bool include_start_doc = 3 [default = true]; // The maximum number of documents to return. optional int32 limit = 4 [default = 100]; // Specifies whether complete documents are returned, or just their keys. optional bool keys_only = 5; } // The request to list documents in an index. message ListDocumentsRequest { // The parameters of documents to be listed. required ListDocumentsParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 2; } // The response to a request to list documents. message ListDocumentsResponse { // The status of the list documents request. required RequestStatus status = 1; // The document list. repeated storage_onestore_v3.Document document = 2; } // The parameters for deleting an index. message DeleteIndexParams { // The index to be deleted. required IndexSpec index_spec = 1; } // The request to delete an index. message DeleteIndexRequest { // The parameters of index to be deleted. required DeleteIndexParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 2; } // The response to a request to delete an index. message DeleteIndexResponse { // The status of the delete index request. required RequestStatus status = 1; } // The parameters for undeleting an index. message CancelDeleteIndexParams { // The index to be undeleted. required IndexSpec index_spec = 1; } // The request to undelete an index. message CancelDeleteIndexRequest { // The parameters of index to be undeleted. required CancelDeleteIndexParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 2; } // The response to a request to undelete index. message CancelDeleteIndexResponse { // The status of the undelete index request. required RequestStatus status = 1; } // Get a list of indexes available. message ListIndexesParams { // Fetch schema of corresponding indexes if the flag is true. optional bool fetch_schema = 1; // The number of indexes returned. If not specified we return // 20 indexes. optional int32 limit = 2 [default = 20]; // The namespace of indexes to returned. If not specified // we return indexes that belong to the default namespace. optional string namespace = 3; // The name of the first index to be returned by the list // indexes operation. If the exclude_start_index is set // to true, we return the index with the name following // the start_index_name. optional string start_index_name = 4; // Whether or not to include the index with the name // specified via start_index_name parameter. By default // we return the index with the name matching start_index_name. optional bool include_start_index = 5 [default = true]; // The prefix of the index name. This parameter can be used to // request all indexes with names that have this prefix. optional string index_name_prefix = 6; // The offset to be used with limit for arbitrary paging. optional int32 offset = 7; // The source of the documents being indexed. optional IndexSpec.Source source = 8 [default = SEARCH]; // Whether to return indexes belonging to all namespaces instead of only // returning indexes belonging to the namespace specified via the namespace // parameter. Cannot be combined with index_name_prefix. If true, the // namespace parameter is used only to determine the first index to be // returned (in conjunction with the start_index_name parameter). optional bool all_namespaces = 9; } // The request to list all known indexes. message ListIndexesRequest { // The parameters of indexes to be listed. required ListIndexesParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 3; } // The response to a request to list the indexes available. message ListIndexesResponse { // The status of a individual document index request. required RequestStatus status = 1; // The information about available indexes. repeated IndexMetadata index_metadata = 2; } // Delete the schemas from the specified indexes. message DeleteSchemaParams { // The source of the indexes having schemas removed. optional IndexSpec.Source source = 1 [default = SEARCH]; // The specification of indexes to have their schema deleted. repeated IndexSpec index_spec = 2; // Require the index to be empty (contain no documents) before // allowing DeleteSchema. If true and documents exist, DeleteSchema will // usually fail. However, due to search's eventual consistency, a successful // DeleteSchema with require_empty_index true may leave behind documents. For // example, a customer may write new documents in one thread while calling // DeleteSchema in another. DeleteSchema will not see a // written-but-not-applied document and succeed, leaving the newly-written // document in an "orphaned" state (this document can be found later via // ListDocuments). This flag's goal is to prevent customers from orphaning // large numbers of documents they will get billed for. optional bool require_empty_index = 3; } // The request to delete schemas from specified indexes. message DeleteSchemaRequest { // The parameters describing schemas to delete. required DeleteSchemaParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 3; } // A response to a delete schema request. message DeleteSchemaResponse { // The status of a schema delete. repeated RequestStatus status = 1; } // Sorting specification for a single dimension. Multi-dimenionsal sorting // is supported by a collection of SortSpecs. message SortSpec { // An expression that assigns a "sorting score" to a document, i.e. // the position in the sorted list. A corresponding default must be // provided below. required string sort_expression = 1; // Indicates whether the search results are returned in "descending order" // of their "sorting score". optional bool sort_descending = 2 [default = true]; // Default value to use in case the sort expression evaluation results // in a runtime error. Must provide for text sorts. optional string default_value_text = 4; // Default value to use in case the sort expression evaluation results // in a runtime error. Must provide for numeric sorts. optional double default_value_numeric = 5; } // A specification of the scorer to invoke on a search result. message ScorerSpec { // The types of scorers available. enum Scorer { // A generic scorer that uses match scoring and rescoring. RESCORING_MATCH_SCORER = 0; // A scorer that returns a score based on term frequency divided by document // frequency. MATCH_SCORER = 2; } // The type of scorer to invoke. optional Scorer scorer = 1 [default = MATCH_SCORER]; // Limit on number of top retrieved results to score. optional int32 limit = 2 [default = 1000]; // Parameters for the match scorer encoded in a string. optional string match_scorer_parameters = 9; } // Defines which fields will be returned. message FieldSpec { // The name of each field to be returned in documents in the search results, // such as "location" or "price" or "title". repeated string name = 1; // An expression that will be computed for each result returned. For // example, if the client wants the total price, the expression might // be "Price + Tax" and the name might be TotalPrice. If the client // wants a snippet of field named "content" matching a query "good times", // the expression would be "snippet(\"good times\", content)" // and the name might be snippet. repeated group Expression = 2 { required string name = 3; required string expression = 4; } } message FacetRange { // Label of the range. could be (start)...(end) or a custom name optional string name = 1; // start and end of the range. At least one of these must be supplied. optional string start = 2; optional string end = 3; } message FacetRequestParam { // how many facet values should be returned for this facet optional int32 value_limit = 1; // if provided, return values in range instead of single values // Note: number fields should always have range, otherwise // one single range min...max will be created for them repeated FacetRange range = 2; // if provided, only count these values. Cannot be used // with ranges repeated string value_constraint = 3; } message FacetAutoDetectParam { // how many facet values should be returned for each facet optional int32 value_limit = 1 [default = 10]; } message FacetRequest { // name of the facet required string name = 1; // Specify parameters for this facet optional FacetRequestParam params = 2; } message FacetRefinement { // e.g., "genre" required string name = 1; // e.g., "adventure" optional string value = 2; // the refinement range // Note: Either value or range should be set. They cannot both // be set in the same request. message Range { // start of the range. e.g., "1" for "1..100" // At least one of start or end value must be supplied. optional string start = 1; // end of the range. e.g., "100" for "1..100" // At least one of start or end value must be supplied. optional string end = 2; } // e.g. "1..100" optional Range range = 3; } // Parameters to search an index for documents which match a query, // restricting the document fields to those given, and sorting and scoring the // results, whilst supporting pagination. message SearchParams { // The full specification of the index to search. required IndexSpec index_spec = 1; // The query string must be in the search query syntax. These strings can // be constructed by hand, or by using the query api. required string query = 2; // Use a cursor returned from a previous set of search results as // a starting point to retrieve the next set of results. This can get you // better performance, and also improves the consistency of pagination // through index updates. optional string cursor = 4; // Use the offset if you want to move to arbitrary result. You can use to // move to the location independent of the last returned results. However, // this freedom comes at a price, as the backend reads all documents // preceding the given offset and just does not return them. optional int32 offset = 11; // What sort of cursor should be returned with the results? enum CursorType { // No cursor will be returned. NONE = 0; // A single cursor will be returned to continue from the end of the // results. SINGLE = 1; // One cursor will be returned with each search result so you can // continue after any result. PER_RESULT = 2; } // The type of the cursor returned with a search result. optional CursorType cursor_type = 5 [default = NONE]; // Limit on number of search results to return. The backend supports // pagination. Clients are encouraged to request only those search // results needed for display (instead of requesting the typical 1000 // results and filtering). optional int32 limit = 6 [default = 20]; // Sets the minimum accuracy requirement for SearchResponse.matched_count. // If set, matched_count will be accurate up to at least that number. // For example, when set to 100, any matched_count <= 100 is accurate. // This option may add considerable latency / expense, especially when used // with field_spec. By default, accuracy is the same as the limit. optional int32 matched_count_accuracy = 7; // Multi-dimensional sort the search results. repeated SortSpec sort_spec = 8; // The client can control scoring on a query-by-query basis. optional ScorerSpec scorer_spec = 9; // Which fields from the documents the client wants returned. // If the field_spec message is not set, then all fields // are returned. If the FieldSpec has no field names specified, // then no fields are returned. optional FieldSpec field_spec = 10; // Specifies whether complete documents are returned, or just their keys. optional bool keys_only = 12; // Possible parsing modes. Currently we only support two modes: strict // and relaxed. In strict mode any query that is invalid according to // our grammar is rejected. In relaxed mode, we make the best attempt // at parsing the query. In the worst case we treat it as a bunch of text. enum ParsingMode { // Any error causes an exception. STRICT = 0; // Errors are consumed. RELAXED = 1; } // Whether the query parsing should be strict. optional ParsingMode parsing_mode = 13 [default = STRICT]; // Specify how many facets should be discovered based on the result. optional int32 auto_discover_facet_count = 15 [default = 0]; // Specify which facet to be included always in the result. repeated FacetRequest include_facet = 16; // Refine results based on given facets. Refinements would be conjunction if // the names are different and disjunction if the names are the same. repeated FacetRefinement facet_refinement = 17; // Parameters for facet auto-detection optional FacetAutoDetectParam facet_auto_detect_param = 18; // Set number of search result we should go ahead to // gather facet information. // For example, when set to 1000, any document in // first 1000 results will be used to compute facets. optional int32 facet_depth = 19 [default = 1000]; // Whether to enable or disable query rewrite. optional bool enable_query_rewrite = 20 [default = false]; } // Request to search index for the specified parameters. message SearchRequest { // The parameters of the search. required SearchParams params = 1; // Name of the app_id - only works for admin console. optional bytes app_id = 3; } message FacetResultValue { // Value (constraint) name. could be a simple value name (e.g., // "red" or "white" for "Wine Type") or a range name (e.g, // "1..100" when requested a number range facet) required string name = 1; required int32 count = 2; // The refinement message that can be used to refine future search requests // using this facet result value. required FacetRefinement refinement = 3; } message FacetResult { // facet name (e.g. "Wine Type") required string name = 1; // list of values/ranges for this facet repeated FacetResultValue value = 2; } // A document which matches the search query. message SearchResult { // The document which matches the query. required storage_onestore_v3.Document document = 1; // A key-value pair containing the result of any extra expressions // requested. For example, a request with an expression named TotalPrice // defined as "Price + Tax" means a Field with a key named TotalPrice and // with value set to the computed sum will be returned in a search result // expression. // As another example, if a request contains an expression named // snippet defined as "snippet(\"good times\", content)" then // the returned expression will be a Field named snippet with text value // containing a snippet of text from the Field named "content", // matching the query "good times" . repeated storage_onestore_v3.Field expression = 4; // Deprecated: score assigned during evaluation. // Clients should retrieve scores with a FieldSpec whose Expression // uses the special variable name '_score'. repeated double score = 2; // A cursor to be used for issuing a subsequent search that will return // elements beginning after this result. optional string cursor = 3; } // The response to a search request. message SearchResponse { // The documents which matched the search query. repeated SearchResult result = 1; // The count of documents that matched the query (greater than // or equal to the number of documents actually returned). // Note that this is an approximation and not an exact count unless // it is less than or equal to SearchRequest.matched_count_accuracy. required int64 matched_count = 2; // The status of the search request. required RequestStatus status = 3; // A cursor to be used for issuing a subsequent search that will return // elements beginning after this set of results. optional string cursor = 4; // a list of facets with their values and counts repeated FacetResult facet_result = 5; // The count of documents scored during query execution. optional int32 docs_scored = 6; }