common/thrift/Frontend.thrift

// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. namespace py impala_thrift_gen.Frontend namespace cpp impala namespace java org.apache.impala.thrift include "Types.thrift" include "RuntimeProfile.thrift" include "Descriptors.thrift" include "Data.thrift" include "Results.thrift" include "TCLIService.thrift" include "Status.thrift" include "CatalogObjects.thrift" include "CatalogService.thrift" include "LineageGraph.thrift" include "Query.thrift" // These are supporting structs for JniFrontend.java, which serves as the glue // between our C++ execution environment and the Java frontend. // Struct for HiveUdf expr to create the proper execution object in the FE // java side. See exprs/hive-udf-call.h for how hive Udfs are executed in general. // TODO: this could be the UdfID, collapsing the first 3 arguments but synchronizing // the id will will not be possible without the catalog service. struct THiveUdfExecutorCtorParams { 1: required Types.TFunction fn // Local path to the UDF's jar file 2: required string local_location // The byte offset for each argument in the input buffer. The BE will // call the Java executor with a buffer for all the inputs. // input_byte_offsets[0] is the byte offset in the buffer for the first // argument; input_byte_offsets[1] is the second, etc. 3: required list<i32> input_byte_offsets // Native input buffer ptr (cast as i64) for the inputs. The input arguments // are written to this buffer directly and read from java with no copies // input_null_ptr[i] is true if the i-th input is null. // input_buffer_ptr[input_byte_offsets[i]] is the value of the i-th input. 4: required i64 input_nulls_ptr 5: required i64 input_buffer_ptr // Native output buffer ptr. For non-variable length types, the output is // written here and read from the native side with no copies. // The UDF should set *output_null_ptr to true, if the result of the UDF is // NULL. 6: required i64 output_null_ptr 7: required i64 output_buffer_ptr } // Arguments to getTableNames, which returns a list of tables that are of specified table // types and match an optional pattern. struct TGetTablesParams { // If not set, match tables in all DBs 1: optional string db // If not set, match every table 2: optional string pattern // Session state for the user who initiated this request. If authorization is // enabled, only the tables this user has access to will be returned. If not // set, access checks will be skipped (used for internal Impala requests) 3: optional Query.TSessionState session // This specifies the types of tables that should be returned. If not set, all types of // tables are considered when their names are matched against pattern. 4: optional set<CatalogService.TImpalaTableType> table_types = [] } // Arguments to getMetadataTableNames, which returns the list of metadata tables of the // specified table. struct TGetMetadataTablesParams { 1: required string db 2: required string tbl // If not set, match every table 3: optional string pattern // Session state for the user who initiated this request. If authorization is // enabled, only the tables this user has access to will be returned. If not // set, access checks will be skipped (used for internal Impala requests) 4: optional Query.TSessionState session } // getTableNames returns a list of unqualified table names struct TGetTablesResult { 1: list<string> tables } // getCatalogInfo returns a list of catalog info strings struct TGetCatalogInfoResult { 1: list<string> info } // Arguments to getTableMetrics, which returns the metrics of a specific table. struct TGetTableMetricsParams { 1: required CatalogObjects.TTableName table_name } // Response to a getTableMetrics request. The response contains all the collected metrics // pretty-printed into a string. struct TGetTableMetricsResponse { 1: required string metrics } // Response from a call to getCatalogMetrics. struct TGetCatalogMetricsResult { 1: required i32 num_dbs 2: required i32 num_tables // Following cache metrics are set only in local catalog mode. These map to Guava's // CacheStats. Accounts for all the cache requests since the process boot time. 3: optional i64 cache_eviction_count 4: optional i64 cache_hit_count 5: optional i64 cache_load_count 6: optional i64 cache_load_exception_count 7: optional i64 cache_load_success_count 8: optional i64 cache_miss_count 9: optional i64 cache_request_count 10: optional i64 cache_total_load_time 11: optional double cache_avg_load_time 12: optional double cache_hit_rate 13: optional double cache_load_exception_rate 14: optional double cache_miss_rate 15: optional double cache_entry_median_size 16: optional double cache_entry_99th_size } // Arguments to getDbs, which returns a list of dbs that match an optional pattern struct TGetDbsParams { // If not set, match every database 1: optional string pattern // Session state for the user who initiated this request. If authorization is // enabled, only the databases this user has access to will be returned. If not // set, access checks will be skipped (used for internal Impala requests) 2: optional Query.TSessionState session } // getDbs returns a list of databases struct TGetDbsResult { 1: list<CatalogObjects.TDatabase> dbs } // Arguments to getDataSrcsNames, which returns a list of data sources that match an // optional pattern struct TGetDataSrcsParams { // If not set, match every data source 1: optional string pattern } // getDataSrcsNames returns a list of data source names struct TGetDataSrcsResult { 1: required list<string> data_src_names 2: required list<string> locations 3: required list<string> class_names 4: required list<string> api_versions } // Used by DESCRIBE DATABASE <db> and DESCRIBE <table> statements to control // what information is returned and how to format the output. enum TDescribeOutputStyle { // The default output style if no options are specified for // DESCRIBE DATABASE <db> and DESCRIBE <table>. MINIMAL = 0 // Output additional information on the database or table. // Set for both DESCRIBE DATABASE FORMATTED|EXTENDED <db> // and DESCRIBE FORMATTED|EXTENDED <table> statements. EXTENDED = 1 FORMATTED = 2 } // Arguments to DescribeDb, which returns a list of properties for a given database. // What information is returned is controlled by the given TDescribeOutputStyle. // NOTE: This struct should only be used for intra-process communication. struct TDescribeDbParams { 1: required string db // Controls the output style for this describe command. 2: required TDescribeOutputStyle output_style } // Arguments to DescribeTable, which returns a list of column descriptors and additional // metadata for a given table. What information is returned is controlled by the // given TDescribeOutputStyle. // NOTE: This struct should only be used for intra-process communication. struct TDescribeTableParams { // Controls the output style for this describe command. 1: required TDescribeOutputStyle output_style // Set when describing a table. 2: optional CatalogObjects.TTableName table_name // Set for metadata tables 3: optional string metadata_table_name // Set when describing a path to a nested collection. 4: optional Types.TColumnType result_struct // Session state for the user who initiated this request. 5: optional Query.TSessionState session } // Results of a call to describeDb() and describeTable() // NOTE: This struct should only be used for intra-process communication. struct TDescribeResult { // Output from a DESCRIBE DATABASE command or a DESCRIBE TABLE command. 1: required list<Data.TResultRow> results } // Parameters for SHOW DATA SOURCES commands struct TShowDataSrcsParams { // Optional pattern to match data source names. If not set, all data sources are // returned. 1: optional string show_pattern } // Parameters for SHOW DATABASES commands struct TShowDbsParams { // Optional pattern to match database names. If not set, all databases are returned. 1: optional string show_pattern } // Used by SHOW STATS and SHOW PARTITIONS to control what information is returned. enum TShowStatsOp { TABLE_STATS = 0 COLUMN_STATS = 1 PARTITIONS = 2 RANGE_PARTITIONS = 3 HASH_SCHEMA = 4 } // Parameters for SHOW TABLE/COLUMN STATS and SHOW PARTITIONS commands struct TShowStatsParams { 1: TShowStatsOp op 2: CatalogObjects.TTableName table_name 3: optional bool show_column_minmax_stats } // Parameters for DESCRIBE HISTORY command struct TDescribeHistoryParams { 1: CatalogObjects.TTableName table_name 2: optional i64 between_start_time 3: optional i64 between_end_time 4: optional i64 from_time } // Parameters for SHOW FUNCTIONS commands struct TShowFunctionsParams { // Category of function to show. 1: Types.TFunctionCategory category // Database to use for SHOW FUNCTIONS 2: optional string db // Optional pattern to match function names. If not set, all functions are returned. 3: optional string show_pattern } // Parameters for SHOW TABLES, SHOW METADATA TABLES and SHOW VIEWS commands struct TShowTablesParams { // Database to use for SHOW TABLES 1: optional string db // Set for querying the metadata tables of the given table. 2: optional string tbl // Optional pattern to match tables names. If not set, all tables from the given // database are returned. 3: optional string show_pattern // This specifies the types of tables that should be returned. If not set, all types of // tables are considered when their names are matched against pattern. 4: optional set<CatalogService.TImpalaTableType> table_types = [] } // Parameters for SHOW FILES commands struct TShowFilesParams { 1: required CatalogObjects.TTableName table_name // An optional partition set. Set if this operation should apply to a list of // partitions rather than the base table. 2: optional list<list<CatalogObjects.TPartitionKeyValue>> partition_set } // Parameters for SHOW [CURRENT] ROLES and SHOW ROLE GRANT GROUP <groupName> commands struct TShowRolesParams { // The effective user who submitted this request. 1: optional string requesting_user // True if this opertion requires admin privileges on the Sentry Service. This is // needed to check for the case where an operation is_user_scope, but the user does // not belong to the specified grant_group. // REMOVED: 2: required bool is_admin_op // True if the statement is "SHOW CURRENT ROLES". 3: required bool is_show_current_roles // Filters roles to the specified grant group. If null or not set, show roles for all // groups. 4: optional string grant_group } // Result of a SHOW ROLES command struct TShowRolesResult { 1: required list<string> role_names } // Represents one row in the DESCRIBE HISTORY command's result. struct TGetTableHistoryResultItem { // Timestamp in millis 1: required i64 creation_time 2: required i64 snapshot_id 3: optional i64 parent_id 4: required bool is_current_ancestor } // Result of the DESCRIBE HISTORY command. struct TGetTableHistoryResult { 1: required list<TGetTableHistoryResultItem> result } // Parameters for SHOW GRANT ROLE/USER commands struct TShowGrantPrincipalParams { // The effective user who submitted this request. 1: optional string requesting_user // The target name. 2: required string name // The principal type. 3: required CatalogObjects.TPrincipalType principal_type; // True if this operation requires admin privileges on the Sentry Service (when // the requesting user has not been granted the target role name). // REMOVED: 4: required bool is_admin_op // An optional filter to show grants that match a specific privilege spec. 5: optional CatalogObjects.TPrivilege privilege } // Arguments to getFunctions(), which returns a list of non-qualified function // signatures that match an optional pattern. Parameters for SHOW FUNCTIONS. struct TGetFunctionsParams { 1: required Types.TFunctionCategory category // Database to use for SHOW FUNCTIONS 2: optional string db // If not set, match every function 3: optional string pattern // Session state for the user who initiated this request. If authorization is // enabled, only the functions this user has access to will be returned. If not // set, access checks will be skipped (used for internal Impala requests) 4: optional Query.TSessionState session } // getFunctions() returns a list of function signatures struct TGetFunctionsResult { 1: list<string> fn_signatures 2: list<string> fn_ret_types 3: list<string> fn_binary_types 4: list<string> fn_persistence } // Parameters for the USE db command struct TUseDbParams { 1: required string db } // Results of an EXPLAIN struct TExplainResult { // each line in the explain plan occupies an entry in the list 1: required list<Data.TResultRow> results } // Request for a LOAD DATA statement. LOAD DATA is only supported for HDFS backed tables. struct TLoadDataReq { // Fully qualified table name to load data into. 1: required CatalogObjects.TTableName table_name // The source data file or directory to load into the table. 2: required string source_path // If true, loaded files will overwrite all data in the target table/partition's // directory. If false, new files will be added alongside existing files. If there are // any file name conflicts, the new files will be uniquified by appending a UUID to the // base file name preserving the extension if one exists. 3: required bool overwrite // An optional partition spec. Set if this operation should apply to a specific // partition rather than the base table. 4: optional list<CatalogObjects.TPartitionKeyValue> partition_spec // True if the destination table is an Iceberg table, in this case we need to insert // data to the Iceberg table based on the given files. 5: optional bool iceberg_tbl // For Iceberg data load. Query template to create a temporary with table location // pointing to the new files. The table location is unknown during planning, these are // filled during execution. 6: optional string create_tmp_tbl_query_template // For Iceberg data load. Query to insert into the destination table from the // temporary table. 7: optional string insert_into_dst_tbl_query // For Iceberg data load. Query to drop the temporary table. 8: optional string drop_tmp_tbl_query } // Response of a LOAD DATA statement. struct TLoadDataResp { // A result row that contains information on the result of the LOAD operation. This // includes details like the number of files moved as part of the request. 1: required Data.TResultRow load_summary // The loaded file paths 2: required list<string> loaded_files // This is needed to issue TUpdateCatalogRequest 3: string partition_name = "" // For Iceberg data load. The query template after the required fields are substituted. 4: optional string create_tmp_tbl_query // For Iceberg data load. The temporary table location, used to restore data in case of // query failure. 5: optional string create_location } enum TCatalogOpType { SHOW_TABLES = 0 SHOW_DBS = 1 SHOW_STATS = 2 USE = 3 DESCRIBE_TABLE = 4 DESCRIBE_DB = 5 SHOW_FUNCTIONS = 6 RESET_METADATA = 7 DDL = 8 SHOW_CREATE_TABLE = 9 SHOW_DATA_SRCS = 10 SHOW_ROLES = 11 SHOW_GRANT_PRINCIPAL = 12 SHOW_FILES = 13 SHOW_CREATE_FUNCTION = 14 DESCRIBE_HISTORY = 15 SHOW_VIEWS = 16 SHOW_METADATA_TABLES = 17 } // TODO: Combine SHOW requests with a single struct that contains a field // indicating which type of show request it is. struct TCatalogOpRequest { 1: required TCatalogOpType op_type // True if SYNC_DDL is used in the query options 2: required bool sync_ddl // Parameters for USE commands 3: optional TUseDbParams use_db_params // Parameters for DESCRIBE DATABASE db commands 4: optional TDescribeDbParams describe_db_params // Parameters for DESCRIBE table commands 5: optional TDescribeTableParams describe_table_params // Parameters for SHOW DATABASES 6: optional TShowDbsParams show_dbs_params // Parameters for SHOW TABLES 7: optional TShowTablesParams show_tables_params // Parameters for SHOW FUNCTIONS 8: optional TShowFunctionsParams show_fns_params // Parameters for SHOW DATA SOURCES 9: optional TShowDataSrcsParams show_data_srcs_params // Parameters for SHOW ROLES 10: optional TShowRolesParams show_roles_params // Parameters for SHOW GRANT ROLE/USER 11: optional TShowGrantPrincipalParams show_grant_principal_params // Parameters for DDL requests executed using the CatalogServer // such as CREATE, ALTER, and DROP. See CatalogService.TDdlExecRequest // for details. 12: optional CatalogService.TDdlExecRequest ddl_params // Parameters for RESET/INVALIDATE METADATA, executed using the CatalogServer. // See CatalogService.TResetMetadataRequest for more details. 13: optional CatalogService.TResetMetadataRequest reset_metadata_params // Parameters for SHOW TABLE/COLUMN STATS 14: optional TShowStatsParams show_stats_params // Parameters for SHOW CREATE TABLE 15: optional CatalogObjects.TTableName show_create_table_params // Parameters for SHOW FILES 16: optional TShowFilesParams show_files_params // Column lineage graph 17: optional LineageGraph.TLineageGraph lineage_graph // Parameters for SHOW_CREATE_FUNCTION 18: optional TGetFunctionsParams show_create_function_params // Parameters for DESCRIBE HISTORY 19: optional TDescribeHistoryParams describe_history_params } // Query options type enum TQueryOptionType { SET_ONE = 0 SET_ALL = 1 UNSET_ALL = 2 } // Parameters for the SET query option command struct TSetQueryOptionRequest { // Set for "SET key=value", unset for "SET" and "SET ALL" statements. 1: optional string key 2: optional string value // query option type 3: optional TQueryOptionType query_option_type } struct TShutdownParams { // Set if a backend was specified as an argument to the shutdown function. If not set, // the current impala daemon will be shut down. If the port was specified, it is set // in 'backend'. If it was not specified, it is 0 and the port configured for this // Impala daemon is assumed. 1: optional Types.TNetworkAddress backend // Deadline in seconds for shutting down. 2: optional i64 deadline_s } // The type of administrative function to be executed. enum TAdminRequestType { SHUTDOWN = 0 EVENT_PROCESSOR = 1 } // Parameters for administrative function statement. This is essentially a tagged union // that contains parameters for the type of administrative statement to be executed. struct TAdminRequest { 1: required TAdminRequestType type // The below member corresponding to 'type' should be set. 2: optional TShutdownParams shutdown_params 3: optional CatalogService.TEventProcessorCmdParams event_processor_cmd_params } // HiveServer2 Metadata operations (JniFrontend.hiveServer2MetadataOperation) enum TMetadataOpcode { GET_TYPE_INFO = 0 GET_CATALOGS = 1 GET_SCHEMAS = 2 GET_TABLES = 3 GET_TABLE_TYPES = 4 GET_COLUMNS = 5 GET_FUNCTIONS = 6 GET_PRIMARY_KEYS = 7 GET_CROSS_REFERENCE = 8 } // Input parameter to JniFrontend.hiveServer2MetadataOperation // Each request has an opcode and a corresponding TGet*Req input parameter struct TMetadataOpRequest { // opcode 1: required TMetadataOpcode opcode // input parameters 2: optional TCLIService.TGetInfoReq get_info_req 3: optional TCLIService.TGetTypeInfoReq get_type_info_req 4: optional TCLIService.TGetCatalogsReq get_catalogs_req 5: optional TCLIService.TGetSchemasReq get_schemas_req 6: optional TCLIService.TGetTablesReq get_tables_req 7: optional TCLIService.TGetTableTypesReq get_table_types_req 8: optional TCLIService.TGetColumnsReq get_columns_req 9: optional TCLIService.TGetFunctionsReq get_functions_req // Session state for the user who initiated this request. If authorization is // enabled, only the server objects this user has access to will be returned. // If not set, access checks will be skipped (used for internal Impala requests) 10: optional Query.TSessionState session 11: optional TCLIService.TGetPrimaryKeysReq get_primary_keys_req 12: optional TCLIService.TGetCrossReferenceReq get_cross_reference_req } // Tracks accesses to Catalog objects for use during auditing. This information, paired // with the current session information, provides a view into what objects a user's // query accessed struct TAccessEvent { // Fully qualified object name 1: required string name // The object type (ex. DATABASE, VIEW, TABLE) 2: required CatalogObjects.TCatalogObjectType object_type // The requested privilege on the object // TODO: Create an enum for this? 3: required string privilege } // Request for "ALTER TABLE ... CONVERT TO" statements struct TConvertTableRequest { 1: required CatalogObjects.TTableName table_name 2: required CatalogObjects.TTableName hdfs_table_name 3: required CatalogObjects.THdfsFileFormat file_format 4: optional map<string, string> properties 5: optional string set_hdfs_table_properties_query 6: optional string rename_hdfs_table_to_temporary_query 7: optional string refresh_temporary_hdfs_table_query 8: optional string reset_table_name_query 9: optional string create_iceberg_table_query 10: optional string invalidate_metadata_query 11: optional string post_create_alter_table_query 12: optional string drop_temporary_hdfs_table_query } // Request for a KILL QUERY statement. struct TKillQueryReq { 1: required Types.TUniqueId query_id // The effective user who submitted this request. 2: required string requesting_user; // True if the requesting_user is an admin. 3: required bool is_admin; } // Result of call to createExecRequest() struct TExecRequest { 1: required Types.TStmtType stmt_type = TStmtType.UNKNOWN // Copied from the corresponding TClientRequest 2: required Query.TQueryOptions query_options // TQueryExecRequest for the backend // Set iff stmt_type is QUERY or DML 3: optional Query.TQueryExecRequest query_exec_request // Set if stmt_type is DDL 4: optional TCatalogOpRequest catalog_op_request // Metadata of the query result set (not set for DML) 5: optional Results.TResultSetMetadata result_set_metadata // Result of EXPLAIN. Set iff stmt_type is EXPLAIN 6: optional TExplainResult explain_result // Request for LOAD DATA statements. 7: optional TLoadDataReq load_data_request // List of catalog objects accessed by this request. May be empty in this // case that the query did not access any Catalog objects. 8: optional list<TAccessEvent> access_events // List of warnings that were generated during analysis. May be empty. 9: required list<string> analysis_warnings // Set if stmt_type is SET 10: optional TSetQueryOptionRequest set_query_option_request // Timeline of planner's operation, for profiling // TODO(todd): should integrate this with the 'profile' member instead. 11: optional RuntimeProfile.TEventSequence timeline // If false, the user that runs this statement doesn't have access to the runtime // profile. For example, a user can't access the runtime profile of a query // that has a view for which the user doesn't have access to the underlying tables. 12: optional bool user_has_profile_access // Set iff stmt_type is ADMIN_FN. 13: optional TAdminRequest admin_request // Profile information from the planning process. 14: optional RuntimeProfile.TRuntimeProfileNode profile // Set iff stmt_type is TESTCASE 15: optional string testcase_data_path // Coordinator time when plan was submitted by external frontend 16: optional i64 remote_submit_time // Additional profile nodes to be displayed nested right under 'profile' field. 17: optional list<RuntimeProfile.TRuntimeProfileNode> profile_children // True if request pool is set by Frontend rather than user specifically setting it via // REQUEST_POOL query option. 18: optional bool request_pool_set_by_frontend = false // Request for "ALTER TABLE ... CONVERT TO" statements. 19: optional TConvertTableRequest convert_table_request // Tables referenced in the query. 20: optional list<CatalogObjects.TTableName> tables // Columns referenced in a select list. 21: optional list<string> select_columns // Columns referenced in a where clause. 22: optional list<string> where_columns // Columns referenced in a join clause. 23: optional list<string> join_columns // Columns referenced in an aggregation. 24: optional list<string> aggregate_columns // Columns referenced in an order by clause. 25: optional list<string> orderby_columns // Request for "KILL QUERY" statements. 26: optional TKillQueryReq kill_query_request } // Parameters to FeSupport.cacheJar(). struct TCacheJarParams { // HDFS URI for the jar 1: required string hdfs_location } // Result from FeSupport.cacheJar(). struct TCacheJarResult { 1: required Status.TStatus status // Local path for the jar. Set only if status is OK. 2: optional string local_path } // A UDF may include optional prepare and close functions in addition the main evaluation // function. This enum distinguishes between these when doing a symbol lookup. enum TSymbolType { UDF_EVALUATE = 0 UDF_PREPARE = 1 UDF_CLOSE = 2 } // Parameters to pass to validate that the binary contains the symbol. If the // symbols is fully specified (i.e. full mangled name), we validate that the // mangled name is correct. If only the function name is specified, we try // to find the fully mangled name in the binary. // The result is returned in TSymbolLookupResult. struct TSymbolLookupParams { // HDFS path for the function binary. This binary must exist at the time the // function is created. 1: required string location // This can either be a mangled symbol or before mangling function name. 2: required string symbol // Type of the udf. e.g. hive, native, ir 3: required Types.TFunctionBinaryType fn_binary_type // The types of the arguments to the function 4: required list<Types.TColumnType> arg_types // If true, this function takes var args. 5: required bool has_var_args // If set this function needs to have an return out argument of this type. 6: optional Types.TColumnType ret_arg_type // Determines the signature of the mangled symbol 7: required TSymbolType symbol_type // Does the lookup require the backend lib-cache entry be refreshed? // If so, the file system is checked for a newer version of the file // referenced by 'location'. If not, the entry in the lib-cache is used // if present, otherwise the file is read from file-system. 8: required bool needs_refresh } enum TSymbolLookupResultCode { SYMBOL_FOUND = 0 BINARY_NOT_FOUND = 1 SYMBOL_NOT_FOUND = 2 } struct TSymbolLookupResult { // The result of the symbol lookup. 1: required TSymbolLookupResultCode result_code // The symbol that was found. set if result_code == SYMBOL_FOUND. 2: optional string symbol // The error message if the symbol found not be found. 3: optional string error_msg // Last modified time in backend lib-cache entry for the file referenced by 'location'. 4: optional i64 last_modified_time } // Sent from the impalad BE to FE with the results of each CatalogUpdate heartbeat. // The catalog object updates are passed separately via NativeGetCatalogUpdate() callback. struct TUpdateCatalogCacheRequest { // True if update only contains entries changed from the previous update. Otherwise, // contains the entire topic. 1: required bool is_delta // The Catalog Service ID this update came from. A request should has either this field // set or a Catalog typed catalog object in the update list. 2: optional Types.TUniqueId catalog_service_id // New or modified items. Empty list if no items were updated. Deprecated after // IMPALA-5990. 3: optional list<CatalogObjects.TCatalogObject> updated_objects_deprecated // Empty if no items were removed or is_delta is false. Deprecated after IMPALA-5990. 4: optional list<CatalogObjects.TCatalogObject> removed_objects_deprecated // The native ptr for calling back NativeGetCatalogUpdate(). 5: required i64 native_iterator_ptr } // Response from a TUpdateCatalogCacheRequest. struct TUpdateCatalogCacheResponse { // The catalog service id this version is from. 1: required Types.TUniqueId catalog_service_id // The lower bound of catalog object versions after CatalogUpdate() was processed. 2: required i64 catalog_object_version_lower_bound // The updated catalog version needed by the backend. 3: required i64 new_catalog_version } // Types of executor groups struct TExecutorGroupSet { // The current max number of executors among all healthy groups of this group set. 1: i32 curr_num_executors = 0 // The expected size of the executor groups. Can be used to plan queries when // no healthy executor groups are present(curr_num_executors is 0). 2: i32 expected_num_executors = 0 // The name of the request pool associated with this executor group type. All // executor groups that match this prefix will be included as a part of this set. // Note: this will be empty when 'default' executor group is used or // 'expected_executor_group_sets' startup flag is not specified. 3: string exec_group_name_prefix // The optional max_mem_limit to determine which executor group set to run for a query. // The max_mem_limit value is set to the max_query_mem_limit attribute of the group set // with name prefix 'exec_group_name_prefix' from the pool service. For each query, // the frontend computes the per host estimated-memory after a compilation with a // number of executor nodes from this group set and compares it with this variable. 4: optional i64 max_mem_limit // The optional num_cores_per_executor is used to determine which executor group set to // run for a query. The num_cores_per_executor value is set to // max_query_cpu_core_per_node_limit attribute of the group set with name prefix // 'exec_group_name_prefix' from the pool service. // The total number of CPU cores among all executors in this executor group equals // num_cores_per_executor * curr_num_executors if curr_num_executors is greater than 0, // otherwise it equals num_cores_per_executor * expected_num_executors. // For each query, the frontend computes the estimated total CPU core count required // for a query to run efficiently after a compilation with a number of executor nodes // from this group set and compare it with the total number of CPU cores in this // executor group. 5: optional i32 num_cores_per_executor } // Sent from the impalad BE to FE with the latest membership snapshot of the // executors on the cluster resulting from the Membership heartbeat. struct TUpdateExecutorMembershipRequest { // The hostnames of the executor nodes. // Note: There can be multiple executors running on the same host. 1: required set<string> hostnames // The ip addresses of the executor nodes. // Note: There can be multiple executors running on the same ip addresses. 2: required set<string> ip_addresses // Info about existing executor group sets. 3: list<TExecutorGroupSet> exec_group_sets } // Contains all interesting statistics from a single 'memory pool' in the JVM. // All numeric values are measured in bytes. struct TJvmMemoryPool { // Memory committed by the operating system to this pool (i.e. not just virtual address // space) 1: required i64 committed // The initial amount of memory committed to this pool 2: required i64 init // The maximum amount of memory this pool will use. 3: required i64 max // The amount of memory currently in use by this pool (will be <= committed). 4: required i64 used // Maximum committed memory over time 5: required i64 peak_committed // Should be always == init 6: required i64 peak_init // Peak maximum memory over time (usually will not change) 7: required i64 peak_max // Peak consumed memory over time 8: required i64 peak_used // Name of this pool, defined by the JVM 9: required string name } // Response from JniUtil::GetJvmMemoryMetrics() struct TGetJvmMemoryMetricsResponse { // One entry for every pool tracked by the Jvm, plus a synthetic aggregate pool called // 'total' 1: required list<TJvmMemoryPool> memory_pools // Metrics from JvmPauseMonitor, measuring how much time is spend // pausing, presumably because of Garbage Collection. These // names are consistent with Hadoop's metric names. 2: required i64 gc_num_warn_threshold_exceeded 3: required i64 gc_num_info_threshold_exceeded 4: required i64 gc_total_extra_sleep_time_millis // Metrics for JVM Garbage Collection, from the management beans; // these are cumulative across all types of GCs. 5: required i64 gc_count 6: required i64 gc_time_millis } // Contains information about a JVM thread struct TJvmThreadInfo { // Summary of a JVM thread. Includes stacktraces, locked monitors // and synchronizers. 1: required string summary // The total CPU time for this thread in nanoseconds 2: required i64 cpu_time_in_ns // The CPU time that this thread has executed in user mode in nanoseconds 3: required i64 user_time_in_ns // The number of times this thread blocked to enter or reenter a monitor 4: required i64 blocked_count // Approximate accumulated elapsed time (in milliseconds) that this thread has blocked // to enter or reenter a monitor 5: required i64 blocked_time_in_ms // True if this thread is executing native code via the Java Native Interface (JNI) 6: required bool is_in_native } // Request to get information about JVM threads struct TGetJvmThreadsInfoRequest { // If set, return complete info about JVM threads. Otherwise, return only // the total number of live JVM threads. 1: required bool get_complete_info } struct TGetJvmThreadsInfoResponse { // The current number of live threads including both daemon and non-daemon threads 1: required i32 total_thread_count // The current number of live daemon threads 2: required i32 daemon_thread_count // The peak live thread count since the Java virtual machine started 3: required i32 peak_thread_count // Information about JVM threads. It is not included when // TGetJvmThreadsInfoRequest.get_complete_info is false. 4: optional list<TJvmThreadInfo> threads } struct TGetJMXJsonResponse { // JMX of the JVM serialized to a json string. 1: required string jmx_json } struct TGetHadoopConfigRequest { // The value of the <name> in the config <property> 1: required string name } struct TGetHadoopConfigResponse { // The corresponding value if one exists 1: optional string value } struct TGetAllHadoopConfigsResponse { 1: optional map<string, string> configs; } struct TGetHadoopGroupsRequest { // The user name to get the groups from. 1: required string user } struct TGetHadoopGroupsResponse { // The list of groups that the user belongs to. 1: required list<string> groups } // For creating a test descriptor table. The tuples and their memory layout are computed // in the FE. struct TBuildTestDescriptorTableParams { // Every entry describes the slot types of one tuple. 1: required list<list<Types.TColumnType>> slot_types } // Output format for generating a testcase for a given query_stmt. The resulting bytes // are compressed before writing to a file. // TODO: Add the EXPLAIN string from the source cluster on which the testcase was // collected. struct TTestCaseData { // Query statemnt for which this test case data is generated. 1: required string query_stmt // All referenced table and view defs. 2: optional list<CatalogObjects.TTable> tables_and_views // All databases referenced in the query. 3: optional list<CatalogObjects.TDatabase> dbs // Output path 4: required string testcase_data_path // Impala version that was used to generate this testcase. // TODO: How to deal with version incompatibilities? E.g: A testcase collected on // Impala version v1 may or may not be compatible to Impala version v2 if the // underlying thrift layout changes. 5: required string impala_version } // Information about a query sent to the FE QueryEventHooks // after query execution struct TQueryCompleteContext { // the serialized lineage graph of the query, with optional BE-populated information // // this is an experimental feature and the format will likely change // in a future version 1: required string lineage_string } // Contains all information from a HTTP request. // Currently used to pass from BE to FE to do SAML authentication in Java. struct TWrappedHttpRequest { 1: required string method // Currently only POST is used. // The following members come from parsing the URL: // server_name:server_port/path?params... 2: required string server_name 3: required i32 server_port 4: required string path 5: required map<string, string> params // Headers and cookies come from parsing the HTTP header. 6: required map<string, string> headers 7: required map<string, string> cookies // Filling the content is optional to allow inspecting the header in FE and // continue processing the request in BE. 8: optional string content 9: required string remote_ip 10: required bool secure // True if TLS/SSL was used. } // Contains all information needed to respond to a HTTP request. // Currently used to pass from FE to BE to do SAML authentication in Java. struct TWrappedHttpResponse { 1: required i16 status_code 2: required string status_text 3: required map<string, string> headers 4: required map<string, string> cookies 5: optional string content 6: optional string content_type } // Captures civil time - local time in a specific time zone - mirroring // cctz::civil_second. Used to serialize Java timezone conversions back to C++ code. // Omits subsecond measurements because // - matches cctz::civil_second; no known timezone libraries have subsecond adjustments // - Java timezone conversion is only accurate to milliseconds, but we use nanoseconds struct TCivilTime { 1: required i32 year 2: required i32 month 3: required i32 day 4: required i32 hour 5: required i32 minute 6: required i32 second }

common/thrift/Frontend.thrift (490 lines of code) (raw):