python/pyspark/sql/connect/proto/relations_pb2.pyi (2,915 lines of code) (raw):

# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """ @generated by mypy-protobuf. Do not edit manually! isort:skip_file Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import builtins import collections.abc import google.protobuf.any_pb2 import google.protobuf.descriptor import google.protobuf.internal.containers import google.protobuf.internal.enum_type_wrapper import google.protobuf.message import pyspark.sql.connect.proto.catalog_pb2 import pyspark.sql.connect.proto.expressions_pb2 import pyspark.sql.connect.proto.types_pb2 import sys import typing if sys.version_info >= (3, 10): import typing as typing_extensions else: import typing_extensions DESCRIPTOR: google.protobuf.descriptor.FileDescriptor class Relation(google.protobuf.message.Message): """The main [[Relation]] type. Fundamentally, a relation is a typed container that has exactly one explicit relation type set. When adding new relation types, they have to be registered here. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor COMMON_FIELD_NUMBER: builtins.int READ_FIELD_NUMBER: builtins.int PROJECT_FIELD_NUMBER: builtins.int FILTER_FIELD_NUMBER: builtins.int JOIN_FIELD_NUMBER: builtins.int SET_OP_FIELD_NUMBER: builtins.int SORT_FIELD_NUMBER: builtins.int LIMIT_FIELD_NUMBER: builtins.int AGGREGATE_FIELD_NUMBER: builtins.int SQL_FIELD_NUMBER: builtins.int LOCAL_RELATION_FIELD_NUMBER: builtins.int SAMPLE_FIELD_NUMBER: builtins.int OFFSET_FIELD_NUMBER: builtins.int DEDUPLICATE_FIELD_NUMBER: builtins.int RANGE_FIELD_NUMBER: builtins.int SUBQUERY_ALIAS_FIELD_NUMBER: builtins.int REPARTITION_FIELD_NUMBER: builtins.int TO_DF_FIELD_NUMBER: builtins.int WITH_COLUMNS_RENAMED_FIELD_NUMBER: builtins.int SHOW_STRING_FIELD_NUMBER: builtins.int DROP_FIELD_NUMBER: builtins.int TAIL_FIELD_NUMBER: builtins.int WITH_COLUMNS_FIELD_NUMBER: builtins.int HINT_FIELD_NUMBER: builtins.int UNPIVOT_FIELD_NUMBER: builtins.int TO_SCHEMA_FIELD_NUMBER: builtins.int REPARTITION_BY_EXPRESSION_FIELD_NUMBER: builtins.int MAP_PARTITIONS_FIELD_NUMBER: builtins.int COLLECT_METRICS_FIELD_NUMBER: builtins.int PARSE_FIELD_NUMBER: builtins.int GROUP_MAP_FIELD_NUMBER: builtins.int CO_GROUP_MAP_FIELD_NUMBER: builtins.int WITH_WATERMARK_FIELD_NUMBER: builtins.int APPLY_IN_PANDAS_WITH_STATE_FIELD_NUMBER: builtins.int HTML_STRING_FIELD_NUMBER: builtins.int CACHED_LOCAL_RELATION_FIELD_NUMBER: builtins.int CACHED_REMOTE_RELATION_FIELD_NUMBER: builtins.int COMMON_INLINE_USER_DEFINED_TABLE_FUNCTION_FIELD_NUMBER: builtins.int FILL_NA_FIELD_NUMBER: builtins.int DROP_NA_FIELD_NUMBER: builtins.int REPLACE_FIELD_NUMBER: builtins.int SUMMARY_FIELD_NUMBER: builtins.int CROSSTAB_FIELD_NUMBER: builtins.int DESCRIBE_FIELD_NUMBER: builtins.int COV_FIELD_NUMBER: builtins.int CORR_FIELD_NUMBER: builtins.int APPROX_QUANTILE_FIELD_NUMBER: builtins.int FREQ_ITEMS_FIELD_NUMBER: builtins.int SAMPLE_BY_FIELD_NUMBER: builtins.int CATALOG_FIELD_NUMBER: builtins.int EXTENSION_FIELD_NUMBER: builtins.int UNKNOWN_FIELD_NUMBER: builtins.int @property def common(self) -> global___RelationCommon: ... @property def read(self) -> global___Read: ... @property def project(self) -> global___Project: ... @property def filter(self) -> global___Filter: ... @property def join(self) -> global___Join: ... @property def set_op(self) -> global___SetOperation: ... @property def sort(self) -> global___Sort: ... @property def limit(self) -> global___Limit: ... @property def aggregate(self) -> global___Aggregate: ... @property def sql(self) -> global___SQL: ... @property def local_relation(self) -> global___LocalRelation: ... @property def sample(self) -> global___Sample: ... @property def offset(self) -> global___Offset: ... @property def deduplicate(self) -> global___Deduplicate: ... @property def range(self) -> global___Range: ... @property def subquery_alias(self) -> global___SubqueryAlias: ... @property def repartition(self) -> global___Repartition: ... @property def to_df(self) -> global___ToDF: ... @property def with_columns_renamed(self) -> global___WithColumnsRenamed: ... @property def show_string(self) -> global___ShowString: ... @property def drop(self) -> global___Drop: ... @property def tail(self) -> global___Tail: ... @property def with_columns(self) -> global___WithColumns: ... @property def hint(self) -> global___Hint: ... @property def unpivot(self) -> global___Unpivot: ... @property def to_schema(self) -> global___ToSchema: ... @property def repartition_by_expression(self) -> global___RepartitionByExpression: ... @property def map_partitions(self) -> global___MapPartitions: ... @property def collect_metrics(self) -> global___CollectMetrics: ... @property def parse(self) -> global___Parse: ... @property def group_map(self) -> global___GroupMap: ... @property def co_group_map(self) -> global___CoGroupMap: ... @property def with_watermark(self) -> global___WithWatermark: ... @property def apply_in_pandas_with_state(self) -> global___ApplyInPandasWithState: ... @property def html_string(self) -> global___HtmlString: ... @property def cached_local_relation(self) -> global___CachedLocalRelation: ... @property def cached_remote_relation(self) -> global___CachedRemoteRelation: ... @property def common_inline_user_defined_table_function( self, ) -> global___CommonInlineUserDefinedTableFunction: ... @property def fill_na(self) -> global___NAFill: """NA functions""" @property def drop_na(self) -> global___NADrop: ... @property def replace(self) -> global___NAReplace: ... @property def summary(self) -> global___StatSummary: """stat functions""" @property def crosstab(self) -> global___StatCrosstab: ... @property def describe(self) -> global___StatDescribe: ... @property def cov(self) -> global___StatCov: ... @property def corr(self) -> global___StatCorr: ... @property def approx_quantile(self) -> global___StatApproxQuantile: ... @property def freq_items(self) -> global___StatFreqItems: ... @property def sample_by(self) -> global___StatSampleBy: ... @property def catalog(self) -> pyspark.sql.connect.proto.catalog_pb2.Catalog: """Catalog API (experimental / unstable)""" @property def extension(self) -> google.protobuf.any_pb2.Any: """This field is used to mark extensions to the protocol. When plugins generate arbitrary relations they can add them here. During the planning the correct resolution is done. """ @property def unknown(self) -> global___Unknown: ... def __init__( self, *, common: global___RelationCommon | None = ..., read: global___Read | None = ..., project: global___Project | None = ..., filter: global___Filter | None = ..., join: global___Join | None = ..., set_op: global___SetOperation | None = ..., sort: global___Sort | None = ..., limit: global___Limit | None = ..., aggregate: global___Aggregate | None = ..., sql: global___SQL | None = ..., local_relation: global___LocalRelation | None = ..., sample: global___Sample | None = ..., offset: global___Offset | None = ..., deduplicate: global___Deduplicate | None = ..., range: global___Range | None = ..., subquery_alias: global___SubqueryAlias | None = ..., repartition: global___Repartition | None = ..., to_df: global___ToDF | None = ..., with_columns_renamed: global___WithColumnsRenamed | None = ..., show_string: global___ShowString | None = ..., drop: global___Drop | None = ..., tail: global___Tail | None = ..., with_columns: global___WithColumns | None = ..., hint: global___Hint | None = ..., unpivot: global___Unpivot | None = ..., to_schema: global___ToSchema | None = ..., repartition_by_expression: global___RepartitionByExpression | None = ..., map_partitions: global___MapPartitions | None = ..., collect_metrics: global___CollectMetrics | None = ..., parse: global___Parse | None = ..., group_map: global___GroupMap | None = ..., co_group_map: global___CoGroupMap | None = ..., with_watermark: global___WithWatermark | None = ..., apply_in_pandas_with_state: global___ApplyInPandasWithState | None = ..., html_string: global___HtmlString | None = ..., cached_local_relation: global___CachedLocalRelation | None = ..., cached_remote_relation: global___CachedRemoteRelation | None = ..., common_inline_user_defined_table_function: global___CommonInlineUserDefinedTableFunction | None = ..., fill_na: global___NAFill | None = ..., drop_na: global___NADrop | None = ..., replace: global___NAReplace | None = ..., summary: global___StatSummary | None = ..., crosstab: global___StatCrosstab | None = ..., describe: global___StatDescribe | None = ..., cov: global___StatCov | None = ..., corr: global___StatCorr | None = ..., approx_quantile: global___StatApproxQuantile | None = ..., freq_items: global___StatFreqItems | None = ..., sample_by: global___StatSampleBy | None = ..., catalog: pyspark.sql.connect.proto.catalog_pb2.Catalog | None = ..., extension: google.protobuf.any_pb2.Any | None = ..., unknown: global___Unknown | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "aggregate", b"aggregate", "apply_in_pandas_with_state", b"apply_in_pandas_with_state", "approx_quantile", b"approx_quantile", "cached_local_relation", b"cached_local_relation", "cached_remote_relation", b"cached_remote_relation", "catalog", b"catalog", "co_group_map", b"co_group_map", "collect_metrics", b"collect_metrics", "common", b"common", "common_inline_user_defined_table_function", b"common_inline_user_defined_table_function", "corr", b"corr", "cov", b"cov", "crosstab", b"crosstab", "deduplicate", b"deduplicate", "describe", b"describe", "drop", b"drop", "drop_na", b"drop_na", "extension", b"extension", "fill_na", b"fill_na", "filter", b"filter", "freq_items", b"freq_items", "group_map", b"group_map", "hint", b"hint", "html_string", b"html_string", "join", b"join", "limit", b"limit", "local_relation", b"local_relation", "map_partitions", b"map_partitions", "offset", b"offset", "parse", b"parse", "project", b"project", "range", b"range", "read", b"read", "rel_type", b"rel_type", "repartition", b"repartition", "repartition_by_expression", b"repartition_by_expression", "replace", b"replace", "sample", b"sample", "sample_by", b"sample_by", "set_op", b"set_op", "show_string", b"show_string", "sort", b"sort", "sql", b"sql", "subquery_alias", b"subquery_alias", "summary", b"summary", "tail", b"tail", "to_df", b"to_df", "to_schema", b"to_schema", "unknown", b"unknown", "unpivot", b"unpivot", "with_columns", b"with_columns", "with_columns_renamed", b"with_columns_renamed", "with_watermark", b"with_watermark", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "aggregate", b"aggregate", "apply_in_pandas_with_state", b"apply_in_pandas_with_state", "approx_quantile", b"approx_quantile", "cached_local_relation", b"cached_local_relation", "cached_remote_relation", b"cached_remote_relation", "catalog", b"catalog", "co_group_map", b"co_group_map", "collect_metrics", b"collect_metrics", "common", b"common", "common_inline_user_defined_table_function", b"common_inline_user_defined_table_function", "corr", b"corr", "cov", b"cov", "crosstab", b"crosstab", "deduplicate", b"deduplicate", "describe", b"describe", "drop", b"drop", "drop_na", b"drop_na", "extension", b"extension", "fill_na", b"fill_na", "filter", b"filter", "freq_items", b"freq_items", "group_map", b"group_map", "hint", b"hint", "html_string", b"html_string", "join", b"join", "limit", b"limit", "local_relation", b"local_relation", "map_partitions", b"map_partitions", "offset", b"offset", "parse", b"parse", "project", b"project", "range", b"range", "read", b"read", "rel_type", b"rel_type", "repartition", b"repartition", "repartition_by_expression", b"repartition_by_expression", "replace", b"replace", "sample", b"sample", "sample_by", b"sample_by", "set_op", b"set_op", "show_string", b"show_string", "sort", b"sort", "sql", b"sql", "subquery_alias", b"subquery_alias", "summary", b"summary", "tail", b"tail", "to_df", b"to_df", "to_schema", b"to_schema", "unknown", b"unknown", "unpivot", b"unpivot", "with_columns", b"with_columns", "with_columns_renamed", b"with_columns_renamed", "with_watermark", b"with_watermark", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["rel_type", b"rel_type"] ) -> typing_extensions.Literal[ "read", "project", "filter", "join", "set_op", "sort", "limit", "aggregate", "sql", "local_relation", "sample", "offset", "deduplicate", "range", "subquery_alias", "repartition", "to_df", "with_columns_renamed", "show_string", "drop", "tail", "with_columns", "hint", "unpivot", "to_schema", "repartition_by_expression", "map_partitions", "collect_metrics", "parse", "group_map", "co_group_map", "with_watermark", "apply_in_pandas_with_state", "html_string", "cached_local_relation", "cached_remote_relation", "common_inline_user_defined_table_function", "fill_na", "drop_na", "replace", "summary", "crosstab", "describe", "cov", "corr", "approx_quantile", "freq_items", "sample_by", "catalog", "extension", "unknown", ] | None: ... global___Relation = Relation class Unknown(google.protobuf.message.Message): """Used for testing purposes only.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor def __init__( self, ) -> None: ... global___Unknown = Unknown class RelationCommon(google.protobuf.message.Message): """Common metadata of all relations.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor SOURCE_INFO_FIELD_NUMBER: builtins.int PLAN_ID_FIELD_NUMBER: builtins.int source_info: builtins.str """(Required) Shared relation metadata.""" plan_id: builtins.int """(Optional) A per-client globally unique id for a given connect plan.""" def __init__( self, *, source_info: builtins.str = ..., plan_id: builtins.int | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info", b"source_info" ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"] ) -> typing_extensions.Literal["plan_id"] | None: ... global___RelationCommon = RelationCommon class SQL(google.protobuf.message.Message): """Relation that uses a SQL query to generate the output.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor class ArgsEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor KEY_FIELD_NUMBER: builtins.int VALUE_FIELD_NUMBER: builtins.int key: builtins.str @property def value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: ... def __init__( self, *, key: builtins.str = ..., value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["value", b"value"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] ) -> None: ... QUERY_FIELD_NUMBER: builtins.int ARGS_FIELD_NUMBER: builtins.int POS_ARGS_FIELD_NUMBER: builtins.int query: builtins.str """(Required) The SQL query.""" @property def args( self, ) -> google.protobuf.internal.containers.MessageMap[ builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ]: """(Optional) A map of parameter names to literal expressions.""" @property def pos_args( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ]: """(Optional) A sequence of literal expressions for positional parameters in the SQL query text.""" def __init__( self, *, query: builtins.str = ..., args: collections.abc.Mapping[ builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ] | None = ..., pos_args: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ] | None = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal[ "args", b"args", "pos_args", b"pos_args", "query", b"query" ], ) -> None: ... global___SQL = SQL class Read(google.protobuf.message.Message): """Relation that reads from a file / table or other data source. Does not have additional inputs. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor class NamedTable(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor class OptionsEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor KEY_FIELD_NUMBER: builtins.int VALUE_FIELD_NUMBER: builtins.int key: builtins.str value: builtins.str def __init__( self, *, key: builtins.str = ..., value: builtins.str = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] ) -> None: ... UNPARSED_IDENTIFIER_FIELD_NUMBER: builtins.int OPTIONS_FIELD_NUMBER: builtins.int unparsed_identifier: builtins.str """(Required) Unparsed identifier for the table.""" @property def options( self, ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: """Options for the named table. The map key is case insensitive.""" def __init__( self, *, unparsed_identifier: builtins.str = ..., options: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal[ "options", b"options", "unparsed_identifier", b"unparsed_identifier" ], ) -> None: ... class DataSource(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor class OptionsEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor KEY_FIELD_NUMBER: builtins.int VALUE_FIELD_NUMBER: builtins.int key: builtins.str value: builtins.str def __init__( self, *, key: builtins.str = ..., value: builtins.str = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] ) -> None: ... FORMAT_FIELD_NUMBER: builtins.int SCHEMA_FIELD_NUMBER: builtins.int OPTIONS_FIELD_NUMBER: builtins.int PATHS_FIELD_NUMBER: builtins.int PREDICATES_FIELD_NUMBER: builtins.int format: builtins.str """(Optional) Supported formats include: parquet, orc, text, json, parquet, csv, avro. If not set, the value from SQL conf 'spark.sql.sources.default' will be used. """ schema: builtins.str """(Optional) If not set, Spark will infer the schema. This schema string should be either DDL-formatted or JSON-formatted. """ @property def options( self, ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: """Options for the data source. The context of this map varies based on the data source format. This options could be empty for valid data source format. The map key is case insensitive. """ @property def paths( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) A list of path for file-system backed data sources.""" @property def predicates( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) Condition in the where clause for each partition. This is only supported by the JDBC data source. """ def __init__( self, *, format: builtins.str | None = ..., schema: builtins.str | None = ..., options: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., paths: collections.abc.Iterable[builtins.str] | None = ..., predicates: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_format", b"_format", "_schema", b"_schema", "format", b"format", "schema", b"schema", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_format", b"_format", "_schema", b"_schema", "format", b"format", "options", b"options", "paths", b"paths", "predicates", b"predicates", "schema", b"schema", ], ) -> None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_format", b"_format"] ) -> typing_extensions.Literal["format"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_schema", b"_schema"] ) -> typing_extensions.Literal["schema"] | None: ... NAMED_TABLE_FIELD_NUMBER: builtins.int DATA_SOURCE_FIELD_NUMBER: builtins.int IS_STREAMING_FIELD_NUMBER: builtins.int @property def named_table(self) -> global___Read.NamedTable: ... @property def data_source(self) -> global___Read.DataSource: ... is_streaming: builtins.bool """(Optional) Indicates if this is a streaming read.""" def __init__( self, *, named_table: global___Read.NamedTable | None = ..., data_source: global___Read.DataSource | None = ..., is_streaming: builtins.bool = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "data_source", b"data_source", "named_table", b"named_table", "read_type", b"read_type" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "data_source", b"data_source", "is_streaming", b"is_streaming", "named_table", b"named_table", "read_type", b"read_type", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["read_type", b"read_type"] ) -> typing_extensions.Literal["named_table", "data_source"] | None: ... global___Read = Read class Project(google.protobuf.message.Message): """Projection of a bag of expressions for a given input relation. The input relation must be specified. The projected expression can be an arbitrary expression. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int EXPRESSIONS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Optional) Input relation is optional for Project. For example, `SELECT ABS(-1)` is valid plan without an input plan. """ @property def expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) A Project requires at least one expression.""" def __init__( self, *, input: global___Relation | None = ..., expressions: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["expressions", b"expressions", "input", b"input"], ) -> None: ... global___Project = Project class Filter(google.protobuf.message.Message): """Relation that applies a boolean expression `condition` on each row of `input` to produce the output result. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int CONDITION_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for a Filter.""" @property def condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: """(Required) A Filter must have a condition expression.""" def __init__( self, *, input: global___Relation | None = ..., condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["condition", b"condition", "input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["condition", b"condition", "input", b"input"] ) -> None: ... global___Filter = Filter class Join(google.protobuf.message.Message): """Relation of type [[Join]]. `left` and `right` must be present. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor class _JoinType: ValueType = typing.NewType("ValueType", builtins.int) V: typing_extensions.TypeAlias = ValueType class _JoinTypeEnumTypeWrapper( google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Join._JoinType.ValueType], builtins.type, ): # noqa: F821 DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor JOIN_TYPE_UNSPECIFIED: Join._JoinType.ValueType # 0 JOIN_TYPE_INNER: Join._JoinType.ValueType # 1 JOIN_TYPE_FULL_OUTER: Join._JoinType.ValueType # 2 JOIN_TYPE_LEFT_OUTER: Join._JoinType.ValueType # 3 JOIN_TYPE_RIGHT_OUTER: Join._JoinType.ValueType # 4 JOIN_TYPE_LEFT_ANTI: Join._JoinType.ValueType # 5 JOIN_TYPE_LEFT_SEMI: Join._JoinType.ValueType # 6 JOIN_TYPE_CROSS: Join._JoinType.ValueType # 7 class JoinType(_JoinType, metaclass=_JoinTypeEnumTypeWrapper): ... JOIN_TYPE_UNSPECIFIED: Join.JoinType.ValueType # 0 JOIN_TYPE_INNER: Join.JoinType.ValueType # 1 JOIN_TYPE_FULL_OUTER: Join.JoinType.ValueType # 2 JOIN_TYPE_LEFT_OUTER: Join.JoinType.ValueType # 3 JOIN_TYPE_RIGHT_OUTER: Join.JoinType.ValueType # 4 JOIN_TYPE_LEFT_ANTI: Join.JoinType.ValueType # 5 JOIN_TYPE_LEFT_SEMI: Join.JoinType.ValueType # 6 JOIN_TYPE_CROSS: Join.JoinType.ValueType # 7 class JoinDataType(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor IS_LEFT_STRUCT_FIELD_NUMBER: builtins.int IS_RIGHT_STRUCT_FIELD_NUMBER: builtins.int is_left_struct: builtins.bool """If the left data type is a struct.""" is_right_struct: builtins.bool """If the right data type is a struct.""" def __init__( self, *, is_left_struct: builtins.bool = ..., is_right_struct: builtins.bool = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal[ "is_left_struct", b"is_left_struct", "is_right_struct", b"is_right_struct" ], ) -> None: ... LEFT_FIELD_NUMBER: builtins.int RIGHT_FIELD_NUMBER: builtins.int JOIN_CONDITION_FIELD_NUMBER: builtins.int JOIN_TYPE_FIELD_NUMBER: builtins.int USING_COLUMNS_FIELD_NUMBER: builtins.int JOIN_DATA_TYPE_FIELD_NUMBER: builtins.int @property def left(self) -> global___Relation: """(Required) Left input relation for a Join.""" @property def right(self) -> global___Relation: """(Required) Right input relation for a Join.""" @property def join_condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: """(Optional) The join condition. Could be unset when `using_columns` is utilized. This field does not co-exist with using_columns. """ join_type: global___Join.JoinType.ValueType """(Required) The join type.""" @property def using_columns( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """Optional. using_columns provides a list of columns that should present on both sides of the join inputs that this Join will join on. For example A JOIN B USING col_name is equivalent to A JOIN B on A.col_name = B.col_name. This field does not co-exist with join_condition. """ @property def join_data_type(self) -> global___Join.JoinDataType: """(Optional) Only used by joinWith. Set the left and right join data types.""" def __init__( self, *, left: global___Relation | None = ..., right: global___Relation | None = ..., join_condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., join_type: global___Join.JoinType.ValueType = ..., using_columns: collections.abc.Iterable[builtins.str] | None = ..., join_data_type: global___Join.JoinDataType | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_join_data_type", b"_join_data_type", "join_condition", b"join_condition", "join_data_type", b"join_data_type", "left", b"left", "right", b"right", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_join_data_type", b"_join_data_type", "join_condition", b"join_condition", "join_data_type", b"join_data_type", "join_type", b"join_type", "left", b"left", "right", b"right", "using_columns", b"using_columns", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_join_data_type", b"_join_data_type"] ) -> typing_extensions.Literal["join_data_type"] | None: ... global___Join = Join class SetOperation(google.protobuf.message.Message): """Relation of type [[SetOperation]]""" DESCRIPTOR: google.protobuf.descriptor.Descriptor class _SetOpType: ValueType = typing.NewType("ValueType", builtins.int) V: typing_extensions.TypeAlias = ValueType class _SetOpTypeEnumTypeWrapper( google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[ SetOperation._SetOpType.ValueType ], builtins.type, ): # noqa: F821 DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor SET_OP_TYPE_UNSPECIFIED: SetOperation._SetOpType.ValueType # 0 SET_OP_TYPE_INTERSECT: SetOperation._SetOpType.ValueType # 1 SET_OP_TYPE_UNION: SetOperation._SetOpType.ValueType # 2 SET_OP_TYPE_EXCEPT: SetOperation._SetOpType.ValueType # 3 class SetOpType(_SetOpType, metaclass=_SetOpTypeEnumTypeWrapper): ... SET_OP_TYPE_UNSPECIFIED: SetOperation.SetOpType.ValueType # 0 SET_OP_TYPE_INTERSECT: SetOperation.SetOpType.ValueType # 1 SET_OP_TYPE_UNION: SetOperation.SetOpType.ValueType # 2 SET_OP_TYPE_EXCEPT: SetOperation.SetOpType.ValueType # 3 LEFT_INPUT_FIELD_NUMBER: builtins.int RIGHT_INPUT_FIELD_NUMBER: builtins.int SET_OP_TYPE_FIELD_NUMBER: builtins.int IS_ALL_FIELD_NUMBER: builtins.int BY_NAME_FIELD_NUMBER: builtins.int ALLOW_MISSING_COLUMNS_FIELD_NUMBER: builtins.int @property def left_input(self) -> global___Relation: """(Required) Left input relation for a Set operation.""" @property def right_input(self) -> global___Relation: """(Required) Right input relation for a Set operation.""" set_op_type: global___SetOperation.SetOpType.ValueType """(Required) The Set operation type.""" is_all: builtins.bool """(Optional) If to remove duplicate rows. True to preserve all results. False to remove duplicate rows. """ by_name: builtins.bool """(Optional) If to perform the Set operation based on name resolution. Only UNION supports this option. """ allow_missing_columns: builtins.bool """(Optional) If to perform the Set operation and allow missing columns. Only UNION supports this option. """ def __init__( self, *, left_input: global___Relation | None = ..., right_input: global___Relation | None = ..., set_op_type: global___SetOperation.SetOpType.ValueType = ..., is_all: builtins.bool | None = ..., by_name: builtins.bool | None = ..., allow_missing_columns: builtins.bool | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_allow_missing_columns", b"_allow_missing_columns", "_by_name", b"_by_name", "_is_all", b"_is_all", "allow_missing_columns", b"allow_missing_columns", "by_name", b"by_name", "is_all", b"is_all", "left_input", b"left_input", "right_input", b"right_input", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_allow_missing_columns", b"_allow_missing_columns", "_by_name", b"_by_name", "_is_all", b"_is_all", "allow_missing_columns", b"allow_missing_columns", "by_name", b"by_name", "is_all", b"is_all", "left_input", b"left_input", "right_input", b"right_input", "set_op_type", b"set_op_type", ], ) -> None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_allow_missing_columns", b"_allow_missing_columns"], ) -> typing_extensions.Literal["allow_missing_columns"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_by_name", b"_by_name"] ) -> typing_extensions.Literal["by_name"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_is_all", b"_is_all"] ) -> typing_extensions.Literal["is_all"] | None: ... global___SetOperation = SetOperation class Limit(google.protobuf.message.Message): """Relation of type [[Limit]] that is used to `limit` rows from the input relation.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int LIMIT_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for a Limit.""" limit: builtins.int """(Required) the limit.""" def __init__( self, *, input: global___Relation | None = ..., limit: builtins.int = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["input", b"input", "limit", b"limit"] ) -> None: ... global___Limit = Limit class Offset(google.protobuf.message.Message): """Relation of type [[Offset]] that is used to read rows staring from the `offset` on the input relation. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int OFFSET_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for an Offset.""" offset: builtins.int """(Required) the limit.""" def __init__( self, *, input: global___Relation | None = ..., offset: builtins.int = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["input", b"input", "offset", b"offset"] ) -> None: ... global___Offset = Offset class Tail(google.protobuf.message.Message): """Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int LIMIT_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for an Tail.""" limit: builtins.int """(Required) the limit.""" def __init__( self, *, input: global___Relation | None = ..., limit: builtins.int = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["input", b"input", "limit", b"limit"] ) -> None: ... global___Tail = Tail class Aggregate(google.protobuf.message.Message): """Relation of type [[Aggregate]].""" DESCRIPTOR: google.protobuf.descriptor.Descriptor class _GroupType: ValueType = typing.NewType("ValueType", builtins.int) V: typing_extensions.TypeAlias = ValueType class _GroupTypeEnumTypeWrapper( google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Aggregate._GroupType.ValueType], builtins.type, ): # noqa: F821 DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor GROUP_TYPE_UNSPECIFIED: Aggregate._GroupType.ValueType # 0 GROUP_TYPE_GROUPBY: Aggregate._GroupType.ValueType # 1 GROUP_TYPE_ROLLUP: Aggregate._GroupType.ValueType # 2 GROUP_TYPE_CUBE: Aggregate._GroupType.ValueType # 3 GROUP_TYPE_PIVOT: Aggregate._GroupType.ValueType # 4 class GroupType(_GroupType, metaclass=_GroupTypeEnumTypeWrapper): ... GROUP_TYPE_UNSPECIFIED: Aggregate.GroupType.ValueType # 0 GROUP_TYPE_GROUPBY: Aggregate.GroupType.ValueType # 1 GROUP_TYPE_ROLLUP: Aggregate.GroupType.ValueType # 2 GROUP_TYPE_CUBE: Aggregate.GroupType.ValueType # 3 GROUP_TYPE_PIVOT: Aggregate.GroupType.ValueType # 4 class Pivot(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor COL_FIELD_NUMBER: builtins.int VALUES_FIELD_NUMBER: builtins.int @property def col(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: """(Required) The column to pivot""" @property def values( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ]: """(Optional) List of values that will be translated to columns in the output DataFrame. Note that if it is empty, the server side will immediately trigger a job to collect the distinct values of the column. """ def __init__( self, *, col: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., values: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["col", b"col"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["col", b"col", "values", b"values"] ) -> None: ... INPUT_FIELD_NUMBER: builtins.int GROUP_TYPE_FIELD_NUMBER: builtins.int GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int AGGREGATE_EXPRESSIONS_FIELD_NUMBER: builtins.int PIVOT_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for a RelationalGroupedDataset.""" group_type: global___Aggregate.GroupType.ValueType """(Required) How the RelationalGroupedDataset was built.""" @property def grouping_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) Expressions for grouping keys""" @property def aggregate_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) List of values that will be translated to columns in the output DataFrame.""" @property def pivot(self) -> global___Aggregate.Pivot: """(Optional) Pivots a column of the current `DataFrame` and performs the specified aggregation.""" def __init__( self, *, input: global___Relation | None = ..., group_type: global___Aggregate.GroupType.ValueType = ..., grouping_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., aggregate_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., pivot: global___Aggregate.Pivot | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input", "pivot", b"pivot"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "aggregate_expressions", b"aggregate_expressions", "group_type", b"group_type", "grouping_expressions", b"grouping_expressions", "input", b"input", "pivot", b"pivot", ], ) -> None: ... global___Aggregate = Aggregate class Sort(google.protobuf.message.Message): """Relation of type [[Sort]].""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int ORDER_FIELD_NUMBER: builtins.int IS_GLOBAL_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for a Sort.""" @property def order( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression.SortOrder ]: """(Required) The ordering expressions""" is_global: builtins.bool """(Optional) if this is a global sort.""" def __init__( self, *, input: global___Relation | None = ..., order: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression.SortOrder ] | None = ..., is_global: builtins.bool | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_is_global", b"_is_global", "input", b"input", "is_global", b"is_global" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_is_global", b"_is_global", "input", b"input", "is_global", b"is_global", "order", b"order", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_is_global", b"_is_global"] ) -> typing_extensions.Literal["is_global"] | None: ... global___Sort = Sort class Drop(google.protobuf.message.Message): """Drop specified columns.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLUMNS_FIELD_NUMBER: builtins.int COLUMN_NAMES_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def columns( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Optional) columns to drop.""" @property def column_names( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) names of columns to drop.""" def __init__( self, *, input: global___Relation | None = ..., columns: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] | None = ..., column_names: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "column_names", b"column_names", "columns", b"columns", "input", b"input" ], ) -> None: ... global___Drop = Drop class Deduplicate(google.protobuf.message.Message): """Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only the subset of columns or all the columns. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLUMN_NAMES_FIELD_NUMBER: builtins.int ALL_COLUMNS_AS_KEYS_FIELD_NUMBER: builtins.int WITHIN_WATERMARK_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for a Deduplicate.""" @property def column_names( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) Deduplicate based on a list of column names. This field does not co-use with `all_columns_as_keys`. """ all_columns_as_keys: builtins.bool """(Optional) Deduplicate based on all the columns of the input relation. This field does not co-use with `column_names`. """ within_watermark: builtins.bool """(Optional) Deduplicate within the time range of watermark.""" def __init__( self, *, input: global___Relation | None = ..., column_names: collections.abc.Iterable[builtins.str] | None = ..., all_columns_as_keys: builtins.bool | None = ..., within_watermark: builtins.bool | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_all_columns_as_keys", b"_all_columns_as_keys", "_within_watermark", b"_within_watermark", "all_columns_as_keys", b"all_columns_as_keys", "input", b"input", "within_watermark", b"within_watermark", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_all_columns_as_keys", b"_all_columns_as_keys", "_within_watermark", b"_within_watermark", "all_columns_as_keys", b"all_columns_as_keys", "column_names", b"column_names", "input", b"input", "within_watermark", b"within_watermark", ], ) -> None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_all_columns_as_keys", b"_all_columns_as_keys"], ) -> typing_extensions.Literal["all_columns_as_keys"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_within_watermark", b"_within_watermark"] ) -> typing_extensions.Literal["within_watermark"] | None: ... global___Deduplicate = Deduplicate class LocalRelation(google.protobuf.message.Message): """A relation that does not need to be qualified by name.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor DATA_FIELD_NUMBER: builtins.int SCHEMA_FIELD_NUMBER: builtins.int data: builtins.bytes """(Optional) Local collection data serialized into Arrow IPC streaming format which contains the schema of the data. """ schema: builtins.str """(Optional) The schema of local data. It should be either a DDL-formatted type string or a JSON string. The server side will update the column names and data types according to this schema. If the 'data' is not provided, then this schema will be required. """ def __init__( self, *, data: builtins.bytes | None = ..., schema: builtins.str | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_data", b"_data", "_schema", b"_schema", "data", b"data", "schema", b"schema" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_data", b"_data", "_schema", b"_schema", "data", b"data", "schema", b"schema" ], ) -> None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_data", b"_data"] ) -> typing_extensions.Literal["data"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_schema", b"_schema"] ) -> typing_extensions.Literal["schema"] | None: ... global___LocalRelation = LocalRelation class CachedLocalRelation(google.protobuf.message.Message): """A local relation that has been cached already.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor USERID_FIELD_NUMBER: builtins.int SESSIONID_FIELD_NUMBER: builtins.int HASH_FIELD_NUMBER: builtins.int userId: builtins.str """(Required) An identifier of the user which created the local relation""" sessionId: builtins.str """(Required) An identifier of the Spark SQL session in which the user created the local relation.""" hash: builtins.str """(Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation.""" def __init__( self, *, userId: builtins.str = ..., sessionId: builtins.str = ..., hash: builtins.str = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal[ "hash", b"hash", "sessionId", b"sessionId", "userId", b"userId" ], ) -> None: ... global___CachedLocalRelation = CachedLocalRelation class CachedRemoteRelation(google.protobuf.message.Message): """Represents a remote relation that has been cached on server.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor RELATION_ID_FIELD_NUMBER: builtins.int relation_id: builtins.str """(Required) ID of the remote related (assigned by the service).""" def __init__( self, *, relation_id: builtins.str = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal["relation_id", b"relation_id"] ) -> None: ... global___CachedRemoteRelation = CachedRemoteRelation class Sample(google.protobuf.message.Message): """Relation of type [[Sample]] that samples a fraction of the dataset.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int LOWER_BOUND_FIELD_NUMBER: builtins.int UPPER_BOUND_FIELD_NUMBER: builtins.int WITH_REPLACEMENT_FIELD_NUMBER: builtins.int SEED_FIELD_NUMBER: builtins.int DETERMINISTIC_ORDER_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for a Sample.""" lower_bound: builtins.float """(Required) lower bound.""" upper_bound: builtins.float """(Required) upper bound.""" with_replacement: builtins.bool """(Optional) Whether to sample with replacement.""" seed: builtins.int """(Optional) The random seed.""" deterministic_order: builtins.bool """(Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it. This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the provided weights. Otherwise, it is false. """ def __init__( self, *, input: global___Relation | None = ..., lower_bound: builtins.float = ..., upper_bound: builtins.float = ..., with_replacement: builtins.bool | None = ..., seed: builtins.int | None = ..., deterministic_order: builtins.bool = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_seed", b"_seed", "_with_replacement", b"_with_replacement", "input", b"input", "seed", b"seed", "with_replacement", b"with_replacement", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_seed", b"_seed", "_with_replacement", b"_with_replacement", "deterministic_order", b"deterministic_order", "input", b"input", "lower_bound", b"lower_bound", "seed", b"seed", "upper_bound", b"upper_bound", "with_replacement", b"with_replacement", ], ) -> None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_seed", b"_seed"] ) -> typing_extensions.Literal["seed"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_with_replacement", b"_with_replacement"] ) -> typing_extensions.Literal["with_replacement"] | None: ... global___Sample = Sample class Range(google.protobuf.message.Message): """Relation of type [[Range]] that generates a sequence of integers.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor START_FIELD_NUMBER: builtins.int END_FIELD_NUMBER: builtins.int STEP_FIELD_NUMBER: builtins.int NUM_PARTITIONS_FIELD_NUMBER: builtins.int start: builtins.int """(Optional) Default value = 0""" end: builtins.int """(Required)""" step: builtins.int """(Required)""" num_partitions: builtins.int """Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if it is set, or 2) spark default parallelism. """ def __init__( self, *, start: builtins.int | None = ..., end: builtins.int = ..., step: builtins.int = ..., num_partitions: builtins.int | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_num_partitions", b"_num_partitions", "_start", b"_start", "num_partitions", b"num_partitions", "start", b"start", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_num_partitions", b"_num_partitions", "_start", b"_start", "end", b"end", "num_partitions", b"num_partitions", "start", b"start", "step", b"step", ], ) -> None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_num_partitions", b"_num_partitions"] ) -> typing_extensions.Literal["num_partitions"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_start", b"_start"] ) -> typing_extensions.Literal["start"] | None: ... global___Range = Range class SubqueryAlias(google.protobuf.message.Message): """Relation alias.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int ALIAS_FIELD_NUMBER: builtins.int QUALIFIER_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation of SubqueryAlias.""" alias: builtins.str """(Required) The alias.""" @property def qualifier( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) Qualifier of the alias.""" def __init__( self, *, input: global___Relation | None = ..., alias: builtins.str = ..., qualifier: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "alias", b"alias", "input", b"input", "qualifier", b"qualifier" ], ) -> None: ... global___SubqueryAlias = SubqueryAlias class Repartition(google.protobuf.message.Message): """Relation repartition.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int NUM_PARTITIONS_FIELD_NUMBER: builtins.int SHUFFLE_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation of Repartition.""" num_partitions: builtins.int """(Required) Must be positive.""" shuffle: builtins.bool """(Optional) Default value is false.""" def __init__( self, *, input: global___Relation | None = ..., num_partitions: builtins.int = ..., shuffle: builtins.bool | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_shuffle", b"_shuffle", "input", b"input", "shuffle", b"shuffle" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_shuffle", b"_shuffle", "input", b"input", "num_partitions", b"num_partitions", "shuffle", b"shuffle", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_shuffle", b"_shuffle"] ) -> typing_extensions.Literal["shuffle"] | None: ... global___Repartition = Repartition class ShowString(google.protobuf.message.Message): """Compose the string representing rows for output. It will invoke 'Dataset.showString' to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int NUM_ROWS_FIELD_NUMBER: builtins.int TRUNCATE_FIELD_NUMBER: builtins.int VERTICAL_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" num_rows: builtins.int """(Required) Number of rows to show.""" truncate: builtins.int """(Required) If set to more than 0, truncates strings to `truncate` characters and all cells will be aligned right. """ vertical: builtins.bool """(Required) If set to true, prints output rows vertically (one line per column value).""" def __init__( self, *, input: global___Relation | None = ..., num_rows: builtins.int = ..., truncate: builtins.int = ..., vertical: builtins.bool = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "input", b"input", "num_rows", b"num_rows", "truncate", b"truncate", "vertical", b"vertical", ], ) -> None: ... global___ShowString = ShowString class HtmlString(google.protobuf.message.Message): """Compose the string representing rows for output. It will invoke 'Dataset.htmlString' to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int NUM_ROWS_FIELD_NUMBER: builtins.int TRUNCATE_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" num_rows: builtins.int """(Required) Number of rows to show.""" truncate: builtins.int """(Required) If set to more than 0, truncates strings to `truncate` characters and all cells will be aligned right. """ def __init__( self, *, input: global___Relation | None = ..., num_rows: builtins.int = ..., truncate: builtins.int = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "input", b"input", "num_rows", b"num_rows", "truncate", b"truncate" ], ) -> None: ... global___HtmlString = HtmlString class StatSummary(google.protobuf.message.Message): """Computes specified statistics for numeric and string columns. It will invoke 'Dataset.summary' (same as 'StatFunctions.summary') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int STATISTICS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def statistics( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) Statistics from to be computed. Available statistics are: count mean stddev min max arbitrary approximate percentiles specified as a percentage (e.g. 75%) count_distinct approx_count_distinct If no statistics are given, this function computes 'count', 'mean', 'stddev', 'min', 'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'. """ def __init__( self, *, input: global___Relation | None = ..., statistics: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["input", b"input", "statistics", b"statistics"] ) -> None: ... global___StatSummary = StatSummary class StatDescribe(google.protobuf.message.Message): """Computes basic statistics for numeric and string columns, including count, mean, stddev, min, and max. If no columns are given, this function computes statistics for all numerical or string columns. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def cols( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) Columns to compute statistics on.""" def __init__( self, *, input: global___Relation | None = ..., cols: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["cols", b"cols", "input", b"input"] ) -> None: ... global___StatDescribe = StatDescribe class StatCrosstab(google.protobuf.message.Message): """Computes a pair-wise frequency table of the given columns. Also known as a contingency table. It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COL1_FIELD_NUMBER: builtins.int COL2_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" col1: builtins.str """(Required) The name of the first column. Distinct items will make the first item of each row. """ col2: builtins.str """(Required) The name of the second column. Distinct items will make the column names of the DataFrame. """ def __init__( self, *, input: global___Relation | None = ..., col1: builtins.str = ..., col2: builtins.str = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["col1", b"col1", "col2", b"col2", "input", b"input"], ) -> None: ... global___StatCrosstab = StatCrosstab class StatCov(google.protobuf.message.Message): """Calculate the sample covariance of two numerical columns of a DataFrame. It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COL1_FIELD_NUMBER: builtins.int COL2_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" col1: builtins.str """(Required) The name of the first column.""" col2: builtins.str """(Required) The name of the second column.""" def __init__( self, *, input: global___Relation | None = ..., col1: builtins.str = ..., col2: builtins.str = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["col1", b"col1", "col2", b"col2", "input", b"input"], ) -> None: ... global___StatCov = StatCov class StatCorr(google.protobuf.message.Message): """Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as 'StatFunctions.pearsonCorrelation') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COL1_FIELD_NUMBER: builtins.int COL2_FIELD_NUMBER: builtins.int METHOD_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" col1: builtins.str """(Required) The name of the first column.""" col2: builtins.str """(Required) The name of the second column.""" method: builtins.str """(Optional) Default value is 'pearson'. Currently only supports the Pearson Correlation Coefficient. """ def __init__( self, *, input: global___Relation | None = ..., col1: builtins.str = ..., col2: builtins.str = ..., method: builtins.str | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_method", b"_method", "input", b"input", "method", b"method" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_method", b"_method", "col1", b"col1", "col2", b"col2", "input", b"input", "method", b"method", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_method", b"_method"] ) -> typing_extensions.Literal["method"] | None: ... global___StatCorr = StatCorr class StatApproxQuantile(google.protobuf.message.Message): """Calculates the approximate quantiles of numerical columns of a DataFrame. It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLS_FIELD_NUMBER: builtins.int PROBABILITIES_FIELD_NUMBER: builtins.int RELATIVE_ERROR_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def cols( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Required) The names of the numerical columns.""" @property def probabilities( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: """(Required) A list of quantile probabilities. Each number must belong to [0, 1]. For example 0 is the minimum, 0.5 is the median, 1 is the maximum. """ relative_error: builtins.float """(Required) The relative target precision to achieve (greater than or equal to 0). If set to zero, the exact quantiles are computed, which could be very expensive. Note that values greater than 1 are accepted but give the same result as 1. """ def __init__( self, *, input: global___Relation | None = ..., cols: collections.abc.Iterable[builtins.str] | None = ..., probabilities: collections.abc.Iterable[builtins.float] | None = ..., relative_error: builtins.float = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "cols", b"cols", "input", b"input", "probabilities", b"probabilities", "relative_error", b"relative_error", ], ) -> None: ... global___StatApproxQuantile = StatApproxQuantile class StatFreqItems(google.protobuf.message.Message): """Finding frequent items for columns, possibly with false positives. It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLS_FIELD_NUMBER: builtins.int SUPPORT_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def cols( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Required) The names of the columns to search frequent items in.""" support: builtins.float """(Optional) The minimum frequency for an item to be considered `frequent`. Should be greater than 1e-4. """ def __init__( self, *, input: global___Relation | None = ..., cols: collections.abc.Iterable[builtins.str] | None = ..., support: builtins.float | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_support", b"_support", "input", b"input", "support", b"support" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_support", b"_support", "cols", b"cols", "input", b"input", "support", b"support" ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_support", b"_support"] ) -> typing_extensions.Literal["support"] | None: ... global___StatFreqItems = StatFreqItems class StatSampleBy(google.protobuf.message.Message): """Returns a stratified sample without replacement based on the fraction given on each stratum. It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor class Fraction(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor STRATUM_FIELD_NUMBER: builtins.int FRACTION_FIELD_NUMBER: builtins.int @property def stratum(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: """(Required) The stratum.""" fraction: builtins.float """(Required) The fraction value. Must be in [0, 1].""" def __init__( self, *, stratum: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., fraction: builtins.float = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["stratum", b"stratum"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["fraction", b"fraction", "stratum", b"stratum"], ) -> None: ... INPUT_FIELD_NUMBER: builtins.int COL_FIELD_NUMBER: builtins.int FRACTIONS_FIELD_NUMBER: builtins.int SEED_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def col(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: """(Required) The column that defines strata.""" @property def fractions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___StatSampleBy.Fraction ]: """(Required) Sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as zero. """ seed: builtins.int """(Optional) The random seed.""" def __init__( self, *, input: global___Relation | None = ..., col: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ..., fractions: collections.abc.Iterable[global___StatSampleBy.Fraction] | None = ..., seed: builtins.int | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_seed", b"_seed", "col", b"col", "input", b"input", "seed", b"seed" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_seed", b"_seed", "col", b"col", "fractions", b"fractions", "input", b"input", "seed", b"seed", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_seed", b"_seed"] ) -> typing_extensions.Literal["seed"] | None: ... global___StatSampleBy = StatSampleBy class NAFill(google.protobuf.message.Message): """Replaces null values. It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results. Following 3 parameter combinations are supported: 1, 'values' only contains 1 item, 'cols' is empty: replaces null values in all type-compatible columns. 2, 'values' only contains 1 item, 'cols' is not empty: replaces null values in specified columns. 3, 'values' contains more than 1 items, then 'cols' is required to have the same length: replaces each specified column with corresponding value. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLS_FIELD_NUMBER: builtins.int VALUES_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def cols( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) Optional list of column names to consider.""" @property def values( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ]: """(Required) Values to replace null values with. Should contain at least 1 item. Only 4 data types are supported now: bool, long, double, string """ def __init__( self, *, input: global___Relation | None = ..., cols: collections.abc.Iterable[builtins.str] | None = ..., values: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression.Literal ] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "cols", b"cols", "input", b"input", "values", b"values" ], ) -> None: ... global___NAFill = NAFill class NADrop(google.protobuf.message.Message): """Drop rows containing null values. It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLS_FIELD_NUMBER: builtins.int MIN_NON_NULLS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def cols( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) Optional list of column names to consider. When it is empty, all the columns in the input relation will be considered. """ min_non_nulls: builtins.int """(Optional) The minimum number of non-null and non-NaN values required to keep. When not set, it is equivalent to the number of considered columns, which means a row will be kept only if all columns are non-null. 'how' options ('all', 'any') can be easily converted to this field: - 'all' -> set 'min_non_nulls' 1; - 'any' -> keep 'min_non_nulls' unset; """ def __init__( self, *, input: global___Relation | None = ..., cols: collections.abc.Iterable[builtins.str] | None = ..., min_non_nulls: builtins.int | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_min_non_nulls", b"_min_non_nulls", "input", b"input", "min_non_nulls", b"min_non_nulls", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_min_non_nulls", b"_min_non_nulls", "cols", b"cols", "input", b"input", "min_non_nulls", b"min_non_nulls", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_min_non_nulls", b"_min_non_nulls"] ) -> typing_extensions.Literal["min_non_nulls"] | None: ... global___NADrop = NADrop class NAReplace(google.protobuf.message.Message): """Replaces old values with the corresponding values. It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace') to compute the results. """ DESCRIPTOR: google.protobuf.descriptor.Descriptor class Replacement(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor OLD_VALUE_FIELD_NUMBER: builtins.int NEW_VALUE_FIELD_NUMBER: builtins.int @property def old_value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: """(Required) The old value. Only 4 data types are supported now: null, bool, double, string. """ @property def new_value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: """(Required) The new value. Should be of the same data type with the old value. """ def __init__( self, *, old_value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., new_value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "new_value", b"new_value", "old_value", b"old_value" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "new_value", b"new_value", "old_value", b"old_value" ], ) -> None: ... INPUT_FIELD_NUMBER: builtins.int COLS_FIELD_NUMBER: builtins.int REPLACEMENTS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def cols( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Optional) List of column names to consider. When it is empty, all the type-compatible columns in the input relation will be considered. """ @property def replacements( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ global___NAReplace.Replacement ]: """(Optional) The value replacement mapping.""" def __init__( self, *, input: global___Relation | None = ..., cols: collections.abc.Iterable[builtins.str] | None = ..., replacements: collections.abc.Iterable[global___NAReplace.Replacement] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "cols", b"cols", "input", b"input", "replacements", b"replacements" ], ) -> None: ... global___NAReplace = NAReplace class ToDF(google.protobuf.message.Message): """Rename columns on the input relation by the same length of names.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int COLUMN_NAMES_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation of RenameColumnsBySameLengthNames.""" @property def column_names( self, ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: """(Required) The number of columns of the input relation must be equal to the length of this field. If this is not true, an exception will be returned. """ def __init__( self, *, input: global___Relation | None = ..., column_names: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["column_names", b"column_names", "input", b"input"], ) -> None: ... global___ToDF = ToDF class WithColumnsRenamed(google.protobuf.message.Message): """Rename columns on the input relation by a map with name to name mapping.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor class RenameColumnsMapEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor KEY_FIELD_NUMBER: builtins.int VALUE_FIELD_NUMBER: builtins.int key: builtins.str value: builtins.str def __init__( self, *, key: builtins.str = ..., value: builtins.str = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] ) -> None: ... INPUT_FIELD_NUMBER: builtins.int RENAME_COLUMNS_MAP_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def rename_columns_map( self, ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: """(Required) Renaming column names of input relation from A to B where A is the map key and B is the map value. This is a no-op if schema doesn't contain any A. It does not require that all input relation column names to present as keys. duplicated B are not allowed. """ def __init__( self, *, input: global___Relation | None = ..., rename_columns_map: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "input", b"input", "rename_columns_map", b"rename_columns_map" ], ) -> None: ... global___WithColumnsRenamed = WithColumnsRenamed class WithColumns(google.protobuf.message.Message): """Adding columns or replacing the existing columns that have the same names.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int ALIASES_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def aliases( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression.Alias ]: """(Required) Given a column name, apply the corresponding expression on the column. If column name exists in the input relation, then replace the column. If the column name does not exist in the input relation, then adds it as a new column. Only one name part is expected from each Expression.Alias. An exception is thrown when duplicated names are present in the mapping. """ def __init__( self, *, input: global___Relation | None = ..., aliases: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression.Alias ] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["aliases", b"aliases", "input", b"input"] ) -> None: ... global___WithColumns = WithColumns class WithWatermark(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int EVENT_TIME_FIELD_NUMBER: builtins.int DELAY_THRESHOLD_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation""" event_time: builtins.str """(Required) Name of the column containing event time.""" delay_threshold: builtins.str """(Required)""" def __init__( self, *, input: global___Relation | None = ..., event_time: builtins.str = ..., delay_threshold: builtins.str = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "delay_threshold", b"delay_threshold", "event_time", b"event_time", "input", b"input" ], ) -> None: ... global___WithWatermark = WithWatermark class Hint(google.protobuf.message.Message): """Specify a hint over a relation. Hint should have a name and optional parameters.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int NAME_FIELD_NUMBER: builtins.int PARAMETERS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" name: builtins.str """(Required) Hint name. Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL. Supported partitioning hints include COALESCE, REPARTITION, REPARTITION_BY_RANGE. """ @property def parameters( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Optional) Hint parameters.""" def __init__( self, *, input: global___Relation | None = ..., name: builtins.str = ..., parameters: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "input", b"input", "name", b"name", "parameters", b"parameters" ], ) -> None: ... global___Hint = Hint class Unpivot(google.protobuf.message.Message): """Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor class Values(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor VALUES_FIELD_NUMBER: builtins.int @property def values( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: ... def __init__( self, *, values: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] | None = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal["values", b"values"] ) -> None: ... INPUT_FIELD_NUMBER: builtins.int IDS_FIELD_NUMBER: builtins.int VALUES_FIELD_NUMBER: builtins.int VARIABLE_COLUMN_NAME_FIELD_NUMBER: builtins.int VALUE_COLUMN_NAME_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def ids( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) Id columns.""" @property def values(self) -> global___Unpivot.Values: """(Optional) Value columns to unpivot.""" variable_column_name: builtins.str """(Required) Name of the variable column.""" value_column_name: builtins.str """(Required) Name of the value column.""" def __init__( self, *, input: global___Relation | None = ..., ids: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] | None = ..., values: global___Unpivot.Values | None = ..., variable_column_name: builtins.str = ..., value_column_name: builtins.str = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_values", b"_values", "input", b"input", "values", b"values" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_values", b"_values", "ids", b"ids", "input", b"input", "value_column_name", b"value_column_name", "values", b"values", "variable_column_name", b"variable_column_name", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_values", b"_values"] ) -> typing_extensions.Literal["values"] | None: ... global___Unpivot = Unpivot class ToSchema(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int SCHEMA_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType: """(Required) The user provided schema. The Sever side will update the dataframe with this schema. """ def __init__( self, *, input: global___Relation | None = ..., schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input", "schema", b"schema"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal["input", b"input", "schema", b"schema"] ) -> None: ... global___ToSchema = ToSchema class RepartitionByExpression(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int PARTITION_EXPRS_FIELD_NUMBER: builtins.int NUM_PARTITIONS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" @property def partition_exprs( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) The partitioning expressions.""" num_partitions: builtins.int """(Optional) number of partitions, must be positive.""" def __init__( self, *, input: global___Relation | None = ..., partition_exprs: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., num_partitions: builtins.int | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_num_partitions", b"_num_partitions", "input", b"input", "num_partitions", b"num_partitions", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_num_partitions", b"_num_partitions", "input", b"input", "num_partitions", b"num_partitions", "partition_exprs", b"partition_exprs", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_num_partitions", b"_num_partitions"] ) -> typing_extensions.Literal["num_partitions"] | None: ... global___RepartitionByExpression = RepartitionByExpression class MapPartitions(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int FUNC_FIELD_NUMBER: builtins.int IS_BARRIER_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for a mapPartitions-equivalent API: mapInPandas, mapInArrow.""" @property def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: """(Required) Input user-defined function.""" is_barrier: builtins.bool """(Optional) Whether to use barrier mode execution or not.""" def __init__( self, *, input: global___Relation | None = ..., func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction | None = ..., is_barrier: builtins.bool | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_is_barrier", b"_is_barrier", "func", b"func", "input", b"input", "is_barrier", b"is_barrier", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_is_barrier", b"_is_barrier", "func", b"func", "input", b"input", "is_barrier", b"is_barrier", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_is_barrier", b"_is_barrier"] ) -> typing_extensions.Literal["is_barrier"] | None: ... global___MapPartitions = MapPartitions class GroupMap(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int FUNC_FIELD_NUMBER: builtins.int SORTING_EXPRESSIONS_FIELD_NUMBER: builtins.int INITIAL_INPUT_FIELD_NUMBER: builtins.int INITIAL_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int IS_MAP_GROUPS_WITH_STATE_FIELD_NUMBER: builtins.int OUTPUT_MODE_FIELD_NUMBER: builtins.int TIMEOUT_CONF_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for Group Map API: apply, applyInPandas.""" @property def grouping_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) Expressions for grouping keys.""" @property def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: """(Required) Input user-defined function.""" @property def sorting_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Optional) Expressions for sorting. Only used by Scala Sorted Group Map API.""" @property def initial_input(self) -> global___Relation: """Below fields are only used by (Flat)MapGroupsWithState (Optional) Input relation for initial State. """ @property def initial_grouping_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Optional) Expressions for grouping keys of the initial state input relation.""" is_map_groups_with_state: builtins.bool """(Optional) True if MapGroupsWithState, false if FlatMapGroupsWithState.""" output_mode: builtins.str """(Optional) The output mode of the function.""" timeout_conf: builtins.str """(Optional) Timeout configuration for groups that do not receive data for a while.""" def __init__( self, *, input: global___Relation | None = ..., grouping_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction | None = ..., sorting_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., initial_input: global___Relation | None = ..., initial_grouping_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., is_map_groups_with_state: builtins.bool | None = ..., output_mode: builtins.str | None = ..., timeout_conf: builtins.str | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_is_map_groups_with_state", b"_is_map_groups_with_state", "_output_mode", b"_output_mode", "_timeout_conf", b"_timeout_conf", "func", b"func", "initial_input", b"initial_input", "input", b"input", "is_map_groups_with_state", b"is_map_groups_with_state", "output_mode", b"output_mode", "timeout_conf", b"timeout_conf", ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_is_map_groups_with_state", b"_is_map_groups_with_state", "_output_mode", b"_output_mode", "_timeout_conf", b"_timeout_conf", "func", b"func", "grouping_expressions", b"grouping_expressions", "initial_grouping_expressions", b"initial_grouping_expressions", "initial_input", b"initial_input", "input", b"input", "is_map_groups_with_state", b"is_map_groups_with_state", "output_mode", b"output_mode", "sorting_expressions", b"sorting_expressions", "timeout_conf", b"timeout_conf", ], ) -> None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal[ "_is_map_groups_with_state", b"_is_map_groups_with_state" ], ) -> typing_extensions.Literal["is_map_groups_with_state"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_output_mode", b"_output_mode"] ) -> typing_extensions.Literal["output_mode"] | None: ... @typing.overload def WhichOneof( self, oneof_group: typing_extensions.Literal["_timeout_conf", b"_timeout_conf"] ) -> typing_extensions.Literal["timeout_conf"] | None: ... global___GroupMap = GroupMap class CoGroupMap(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int INPUT_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int OTHER_FIELD_NUMBER: builtins.int OTHER_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int FUNC_FIELD_NUMBER: builtins.int INPUT_SORTING_EXPRESSIONS_FIELD_NUMBER: builtins.int OTHER_SORTING_EXPRESSIONS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) One input relation for CoGroup Map API - applyInPandas.""" @property def input_grouping_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """Expressions for grouping keys of the first input relation.""" @property def other(self) -> global___Relation: """(Required) The other input relation.""" @property def other_grouping_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """Expressions for grouping keys of the other input relation.""" @property def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: """(Required) Input user-defined function.""" @property def input_sorting_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API.""" @property def other_sorting_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API.""" def __init__( self, *, input: global___Relation | None = ..., input_grouping_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., other: global___Relation | None = ..., other_grouping_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction | None = ..., input_sorting_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., other_sorting_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "func", b"func", "input", b"input", "other", b"other" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "func", b"func", "input", b"input", "input_grouping_expressions", b"input_grouping_expressions", "input_sorting_expressions", b"input_sorting_expressions", "other", b"other", "other_grouping_expressions", b"other_grouping_expressions", "other_sorting_expressions", b"other_sorting_expressions", ], ) -> None: ... global___CoGroupMap = CoGroupMap class ApplyInPandasWithState(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int FUNC_FIELD_NUMBER: builtins.int OUTPUT_SCHEMA_FIELD_NUMBER: builtins.int STATE_SCHEMA_FIELD_NUMBER: builtins.int OUTPUT_MODE_FIELD_NUMBER: builtins.int TIMEOUT_CONF_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation for applyInPandasWithState.""" @property def grouping_expressions( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) Expressions for grouping keys.""" @property def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: """(Required) Input user-defined function.""" output_schema: builtins.str """(Required) Schema for the output DataFrame.""" state_schema: builtins.str """(Required) Schema for the state.""" output_mode: builtins.str """(Required) The output mode of the function.""" timeout_conf: builtins.str """(Required) Timeout configuration for groups that do not receive data for a while.""" def __init__( self, *, input: global___Relation | None = ..., grouping_expressions: collections.abc.Iterable[ pyspark.sql.connect.proto.expressions_pb2.Expression ] | None = ..., func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction | None = ..., output_schema: builtins.str = ..., state_schema: builtins.str = ..., output_mode: builtins.str = ..., timeout_conf: builtins.str = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["func", b"func", "input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "func", b"func", "grouping_expressions", b"grouping_expressions", "input", b"input", "output_mode", b"output_mode", "output_schema", b"output_schema", "state_schema", b"state_schema", "timeout_conf", b"timeout_conf", ], ) -> None: ... global___ApplyInPandasWithState = ApplyInPandasWithState class CommonInlineUserDefinedTableFunction(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor FUNCTION_NAME_FIELD_NUMBER: builtins.int DETERMINISTIC_FIELD_NUMBER: builtins.int ARGUMENTS_FIELD_NUMBER: builtins.int PYTHON_UDTF_FIELD_NUMBER: builtins.int function_name: builtins.str """(Required) Name of the user-defined table function.""" deterministic: builtins.bool """(Optional) Whether the user-defined table function is deterministic.""" @property def arguments( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Optional) Function input arguments. Empty arguments are allowed.""" @property def python_udtf(self) -> global___PythonUDTF: ... def __init__( self, *, function_name: builtins.str = ..., deterministic: builtins.bool = ..., arguments: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] | None = ..., python_udtf: global___PythonUDTF | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "function", b"function", "python_udtf", b"python_udtf" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "arguments", b"arguments", "deterministic", b"deterministic", "function", b"function", "function_name", b"function_name", "python_udtf", b"python_udtf", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["function", b"function"] ) -> typing_extensions.Literal["python_udtf"] | None: ... global___CommonInlineUserDefinedTableFunction = CommonInlineUserDefinedTableFunction class PythonUDTF(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor RETURN_TYPE_FIELD_NUMBER: builtins.int EVAL_TYPE_FIELD_NUMBER: builtins.int COMMAND_FIELD_NUMBER: builtins.int PYTHON_VER_FIELD_NUMBER: builtins.int @property def return_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: """(Optional) Return type of the Python UDTF.""" eval_type: builtins.int """(Required) EvalType of the Python UDTF.""" command: builtins.bytes """(Required) The encoded commands of the Python UDTF.""" python_ver: builtins.str """(Required) Python version being used in the client.""" def __init__( self, *, return_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., eval_type: builtins.int = ..., command: builtins.bytes = ..., python_ver: builtins.str = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_return_type", b"_return_type", "return_type", b"return_type" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_return_type", b"_return_type", "command", b"command", "eval_type", b"eval_type", "python_ver", b"python_ver", "return_type", b"return_type", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_return_type", b"_return_type"] ) -> typing_extensions.Literal["return_type"] | None: ... global___PythonUDTF = PythonUDTF class CollectMetrics(google.protobuf.message.Message): """Collect arbitrary (named) metrics from a dataset.""" DESCRIPTOR: google.protobuf.descriptor.Descriptor INPUT_FIELD_NUMBER: builtins.int NAME_FIELD_NUMBER: builtins.int METRICS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) The input relation.""" name: builtins.str """(Required) Name of the metrics.""" @property def metrics( self, ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[ pyspark.sql.connect.proto.expressions_pb2.Expression ]: """(Required) The metric sequence.""" def __init__( self, *, input: global___Relation | None = ..., name: builtins.str = ..., metrics: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal["input", b"input"] ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "input", b"input", "metrics", b"metrics", "name", b"name" ], ) -> None: ... global___CollectMetrics = CollectMetrics class Parse(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor class _ParseFormat: ValueType = typing.NewType("ValueType", builtins.int) V: typing_extensions.TypeAlias = ValueType class _ParseFormatEnumTypeWrapper( google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Parse._ParseFormat.ValueType], builtins.type, ): # noqa: F821 DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor PARSE_FORMAT_UNSPECIFIED: Parse._ParseFormat.ValueType # 0 PARSE_FORMAT_CSV: Parse._ParseFormat.ValueType # 1 PARSE_FORMAT_JSON: Parse._ParseFormat.ValueType # 2 class ParseFormat(_ParseFormat, metaclass=_ParseFormatEnumTypeWrapper): ... PARSE_FORMAT_UNSPECIFIED: Parse.ParseFormat.ValueType # 0 PARSE_FORMAT_CSV: Parse.ParseFormat.ValueType # 1 PARSE_FORMAT_JSON: Parse.ParseFormat.ValueType # 2 class OptionsEntry(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor KEY_FIELD_NUMBER: builtins.int VALUE_FIELD_NUMBER: builtins.int key: builtins.str value: builtins.str def __init__( self, *, key: builtins.str = ..., value: builtins.str = ..., ) -> None: ... def ClearField( self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"] ) -> None: ... INPUT_FIELD_NUMBER: builtins.int FORMAT_FIELD_NUMBER: builtins.int SCHEMA_FIELD_NUMBER: builtins.int OPTIONS_FIELD_NUMBER: builtins.int @property def input(self) -> global___Relation: """(Required) Input relation to Parse. The input is expected to have single text column.""" format: global___Parse.ParseFormat.ValueType """(Required) The expected format of the text.""" @property def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType: """(Optional) DataType representing the schema. If not set, Spark will infer the schema.""" @property def options(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]: """Options for the csv/json parser. The map key is case insensitive.""" def __init__( self, *, input: global___Relation | None = ..., format: global___Parse.ParseFormat.ValueType = ..., schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ..., options: collections.abc.Mapping[builtins.str, builtins.str] | None = ..., ) -> None: ... def HasField( self, field_name: typing_extensions.Literal[ "_schema", b"_schema", "input", b"input", "schema", b"schema" ], ) -> builtins.bool: ... def ClearField( self, field_name: typing_extensions.Literal[ "_schema", b"_schema", "format", b"format", "input", b"input", "options", b"options", "schema", b"schema", ], ) -> None: ... def WhichOneof( self, oneof_group: typing_extensions.Literal["_schema", b"_schema"] ) -> typing_extensions.Literal["schema"] | None: ... global___Parse = Parse