protobuf/api/document.proto (228 lines of code) (raw):
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This protocol buffer defines documents used by indexing
// and searching services.
//
// This file should be kept in sync with:
// google3/googledata/apphosting/megastore/qa/proto/document.proto
// google3/googledata/apphosting/megastore/prod-appengine/proto/document.proto
syntax = "proto2";
package storage_onestore_v3;
option java_package = "com.google.apphosting.api.search";
option java_outer_classname = "DocumentPb";
// The value of a document field.
message FieldValue {
// The type of content in the field value.
enum ContentType {
// The content is plain text.
TEXT = 0;
// The content is HTML.
HTML = 1;
// The content is indivisible text.
ATOM = 2;
// The content is a date.
DATE = 3;
// The content is a number.
NUMBER = 4;
// The content is a location.
GEO = 5;
// The content is an untokenized prefix field.
UNTOKENIZED_PREFIX = 6;
// The content is a tokenized prefix field.
TOKENIZED_PREFIX = 7;
// The content is a vector of double numbers
VECTOR = 8;
}
// The type of content in this field value.
optional ContentType type = 1 [default = TEXT];
// The language of the content. The language must be a valid ISO 639-1 code.
optional string language = 2 [default = "en"];
// The field in which HTML, TEXT, ATOM, DATE, NUMBER, UNTOKENIZED_PREFIX, and
// TOKENIZED_PREFIX values are stored. For DATE fields, it is the milliseconds
// since Unix epoch, formatted as what Java's Long.parseLong accepts.
// For NUMBER fields, it is formatted as what Java's Double.parseDouble
// accepts.
optional string string_value = 3;
// The field in which GEO values are stored
optional group Geo = 4 {
required double lat = 5;
required double lng = 6;
}
// The field in which VECTOR values are stored. Vector is defined as an
// ordered set of doubles.
repeated double vector_value = 7;
}
// A document field.
message Field {
// The name of the field.
required string name = 1;
// The value of the field.
required FieldValue value = 2;
}
// A mapping from a field name to known types associated with it.
message FieldTypes {
// The user given field name.
required string name = 1;
// The types associated with the given field name.
repeated FieldValue.ContentType type = 2;
}
message IndexShardSettings {
// Total number of shards in the previous configuration.
repeated int32 prev_num_shards = 1;
// Current total number of shards for this index.
required int32 num_shards = 2 [default = 1];
// The set of prev_num_shards that are not searchable. This is a disjoint set
// relative to the prev_num_shards list.
repeated int32 prev_num_shards_search_false = 3;
// The local replica to use for searches
optional string local_replica = 4 [default = ""];
}
message IndexMetadata {
optional bool is_over_field_number_threshold = 1 [default = false];
optional IndexShardSettings index_shard_settings = 2;
// The index state could be modified upon a user request or a scanner.
// When user issues a delete request, the state is set to SOFT_DELETED,
// while it is still possible for user to cancel that deletion.
// When the scanner finds out that index_delete_time has passed,
// or a Dexter instance finds out index_delete_time has passed upon a RPC,
// it sets the index's state to PURGING by writing to Megastore.
// User is now not able to cancel the deletion.
// When the scanner sees that all traces of the index are deleted,
// it removes the index metadata row.
//
// We transition between state as follows:
//
// Index
// +--------+ Created +---------+
// start----->| Non- |------------>| ACTIVE |
// |existent| | |
// Scanner +--------+ +---------+
// finds out ^ ^ |
// all deletes | User cancels | | User issues
// completed | index deletion | | index deletion
// | | v
// +--------+ +---------+
// | PURGE- |<------------| SOFT_ |
// | ING | Scanner/ | DELETED |
// +--------+ Dexter +---------+
// finds out
// index_delete_time
// passed
enum IndexState {
// Index is active.
ACTIVE = 0;
// Index seen as marked deleted; purge trigger time not yet seen.
// The deletion could still be cancelled at this point.
SOFT_DELETED = 1;
// Purge trigger time seen, purge processes should run.
PURGING = 2;
}
optional IndexState index_state = 3 [default = ACTIVE];
// The scheduled deletion time, in milliseconds from 1970-01-01T00:00:00Z
// Present iff index_state is SOFT_DELETED or PURGING.
optional int64 index_delete_time = 4;
// The maximum size of the index (if quota override is present).
// Used to: recreate dynamic quota profiles and purge them during wipeout.
optional int64 max_index_size_bytes = 5;
message DeletionStatus {
optional int64 started_time = 3;
optional int64 completed_time = 4;
}
// Tracks the status of each component that is part of the index deletion
// process. Each details represents the status of one replica.
message IndexDeletionDetails {
// Which replica these details describe
required string replica_name = 1;
// Track if all Megastore writes have been applied
optional DeletionStatus precheck = 2;
// Tracks ST-BTI data deletion
optional DeletionStatus st_bti = 3;
// Tracks deletion from the Megastore CustomerDocument table
optional DeletionStatus ms_docs = 4;
}
// Details about which components have completed for index deletion,
// one per replica.
repeated IndexDeletionDetails replica_deletion = 6;
}
// The value of a document facet.
message FacetValue {
// The type of content in the facet value.
// Keeping the numbering consistent with FieldType
enum ContentType {
// The content is indivisible text.
ATOM = 2;
// The content is a number.
NUMBER = 4;
}
// The type of content in this facet value.
optional ContentType type = 1 [default = ATOM];
// The field in which ATOM or NUMBER values are stored.
// For NUMBER fields, it is formatted as what Java's
// Double.parseDouble accepts.
optional string string_value = 3;
}
// A document facet.
message Facet {
// The name of the facet.
required string name = 1;
// The value of the facet.
required FacetValue value = 2;
}
// The metadata of a document.
message DocumentMetadata {
// The version number of this document. This should increment every time the
// document is modified. DocumentMetadata will stay around after deletes, so
// the version number will carry over after a delete and a document of the
// same name is indexed.
optional int64 version = 1;
// The highest version number of this document that has been written to the
// ST index.
optional int64 committed_st_version = 2;
}
// A document is a collection of fields.
message Document {
// An identifier for the document. If missing, a unique id will
// automatically be chosen for the document.
optional string id = 1;
// The language the document is written in. The language must be a valid ISO
// 639-1 code.
optional string language = 2 [default = "en"];
// The set of fields of the document.
repeated Field field = 3;
// An id specified by the client, used to return documents in a defined
// order in search results. If it is not specified, then the number
// of seconds since 2011/1/1.
optional int32 order_id = 4;
// Did we generate the order id ("rank") or did the client provide it?
optional OrderIdSource order_id_source = 6 [default = SUPPLIED];
enum OrderIdSource {
// We generated an order ID.
DEFAULTED = 0;
// The client provided an explicit order ID.
SUPPLIED = 1;
}
// What sort of storage the index is built on.
enum Storage {
// Use a disk-based index.
DISK = 0;
}
// The storage type the index is built on.
optional Storage storage = 5 [default = DISK];
// The set of facets/categories, the document belongs to.
repeated Facet facet = 8;
}