services/google/dataproc/cluster.yaml (1,842 lines of code) (raw):
# Copyright 2025 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
info:
title: Dataproc/Cluster
description: The Dataproc Cluster resource
x-dcl-struct-name: Cluster
x-dcl-has-iam: true
paths:
get:
description: The function used to get information about a Cluster
parameters:
- name: cluster
required: true
description: A full instance of a Cluster
apply:
description: The function used to apply information about a Cluster
parameters:
- name: cluster
required: true
description: A full instance of a Cluster
delete:
description: The function used to delete a Cluster
parameters:
- name: cluster
required: true
description: A full instance of a Cluster
deleteAll:
description: The function used to delete all Cluster
parameters:
- name: project
required: true
schema:
type: string
- name: location
required: true
schema:
type: string
list:
description: The function used to list information about many Cluster
parameters:
- name: project
required: true
schema:
type: string
- name: location
required: true
schema:
type: string
components:
schemas:
Cluster:
title: Cluster
x-dcl-id: projects/{{project}}/regions/{{location}}/clusters/{{name}}
x-dcl-uses-state-hint: true
x-dcl-parent-container: project
x-dcl-labels: labels
x-dcl-has-create: true
x-dcl-has-iam: true
x-dcl-read-timeout: 0
x-dcl-apply-timeout: 0
x-dcl-delete-timeout: 0
type: object
required:
- project
- name
- location
properties:
clusterUuid:
type: string
x-dcl-go-name: ClusterUuid
readOnly: true
description: Output only. A cluster UUID (Unique Universal Identifier).
Dataproc generates this value when it creates the cluster.
x-kubernetes-immutable: true
config:
type: object
x-dcl-go-name: Config
x-dcl-go-type: ClusterConfig
description: The cluster config. Note that Dataproc may set default values,
and values may change when clusters are updated.
x-kubernetes-immutable: true
properties:
autoscalingConfig:
type: object
x-dcl-go-name: AutoscalingConfig
x-dcl-go-type: ClusterConfigAutoscalingConfig
description: Optional. Autoscaling config for the policy associated
with the cluster. Cluster does not autoscale if this field is unset.
x-kubernetes-immutable: true
properties:
policy:
type: string
x-dcl-go-name: Policy
description: 'Optional. The autoscaling policy used by the cluster.
Only resource names including projectid and location (region)
are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
* `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
Note that the policy must be in the same project and Dataproc
region.'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Dataproc/AutoscalingPolicy
field: name
dataprocMetricConfig:
type: object
x-dcl-go-name: DataprocMetricConfig
x-dcl-go-type: ClusterConfigDataprocMetricConfig
description: Optional. The config for Dataproc metrics.
x-kubernetes-immutable: true
required:
- metrics
properties:
metrics:
type: array
x-dcl-go-name: Metrics
description: Required. Metrics sources to enable.
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigDataprocMetricConfigMetrics
required:
- metricSource
properties:
metricOverrides:
type: array
x-dcl-go-name: MetricOverrides
description: 'Optional. Specify one or more [available OSS
metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
to collect for the metric course (for the `SPARK` metric
source, any [Spark metric] (https://spark.apache.org/docs/latest/monitoring.html#metrics)
can be specified). Provide metrics in the following format:
`METRIC_SOURCE:INSTANCE:GROUP:METRIC` Use camelcase as appropriate.
Examples: ``` yarn:ResourceManager:QueueMetrics:AppsCompleted
spark:driver:DAGScheduler:job.allJobs sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed
hiveserver2:JVM:Memory:NonHeapMemoryUsage.used ``` Notes:
* Only the specified overridden metrics will be collected
for the metric source. For example, if one or more `spark:executive`
metrics are listed as metric overrides, other `SPARK` metrics
will not be collected. The collection of the default metrics
for other OSS metric sources is unaffected. For example,
if both `SPARK` andd `YARN` metric sources are enabled,
and overrides are provided for Spark metrics only, all default
YARN metrics will be collected.'
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: string
metricSource:
type: string
x-dcl-go-name: MetricSource
x-dcl-go-type: ClusterConfigDataprocMetricConfigMetricsMetricSourceEnum
description: 'Required. Default metrics are collected unless
`metricOverrides` are specified for the metric source (see
[Available OSS metrics] (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
for more information). Possible values: METRIC_SOURCE_UNSPECIFIED,
MONITORING_AGENT_DEFAULTS, HDFS, SPARK, YARN, SPARK_HISTORY_SERVER,
HIVESERVER2'
x-kubernetes-immutable: true
enum:
- METRIC_SOURCE_UNSPECIFIED
- MONITORING_AGENT_DEFAULTS
- HDFS
- SPARK
- YARN
- SPARK_HISTORY_SERVER
- HIVESERVER2
encryptionConfig:
type: object
x-dcl-go-name: EncryptionConfig
x-dcl-go-type: ClusterConfigEncryptionConfig
description: Optional. Encryption settings for the cluster.
x-kubernetes-immutable: true
properties:
gcePdKmsKeyName:
type: string
x-dcl-go-name: GcePdKmsKeyName
description: Optional. The Cloud KMS key name to use for PD disk
encryption for all instances in the cluster.
x-kubernetes-immutable: true
x-dcl-references:
- resource: Cloudkms/CryptoKey
field: selfLink
endpointConfig:
type: object
x-dcl-go-name: EndpointConfig
x-dcl-go-type: ClusterConfigEndpointConfig
description: Optional. Port/endpoint configuration for this cluster
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
enableHttpPortAccess:
type: boolean
x-dcl-go-name: EnableHttpPortAccess
description: Optional. If true, enable http access to specific ports
on the cluster from external sources. Defaults to false.
x-kubernetes-immutable: true
httpPorts:
type: object
additionalProperties:
type: string
x-dcl-go-name: HttpPorts
readOnly: true
description: Output only. The map of port descriptions to URLs.
Will only be populated if enable_http_port_access is true.
x-kubernetes-immutable: true
gceClusterConfig:
type: object
x-dcl-go-name: GceClusterConfig
x-dcl-go-type: ClusterConfigGceClusterConfig
description: Optional. The shared Compute Engine config settings for
all instances in a cluster.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
confidentialInstanceConfig:
type: object
x-dcl-go-name: ConfidentialInstanceConfig
x-dcl-go-type: ClusterConfigGceClusterConfigConfidentialInstanceConfig
description: Optional. Confidential Instance Config for clusters
using [Confidential VMs](https://cloud.google.com/compute/confidential-vm/docs).
x-kubernetes-immutable: true
properties:
enableConfidentialCompute:
type: boolean
x-dcl-go-name: EnableConfidentialCompute
description: Optional. Defines whether the instance should have
confidential compute enabled.
x-kubernetes-immutable: true
internalIPOnly:
type: boolean
x-dcl-go-name: InternalIPOnly
description: Optional. If true, all instances in the cluster will
only have internal IP addresses. By default, clusters are not
restricted to internal IP addresses, and will have ephemeral external
IP addresses assigned to each instance. This `internal_ip_only`
restriction can only be enabled for subnetwork enabled networks,
and all off-cluster dependencies must be configured to be accessible
without external IP addresses.
x-kubernetes-immutable: true
x-dcl-server-default: true
metadata:
type: object
additionalProperties:
type: string
x-dcl-go-name: Metadata
description: The Compute Engine metadata entries to add to all instances
(see [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
x-kubernetes-immutable: true
network:
type: string
x-dcl-go-name: Network
description: 'Optional. The Compute Engine network to be used for
machine communications. Cannot be specified with subnetwork_uri.
If neither `network_uri` nor `subnetwork_uri` is specified, the
"default" network of the project is used, if it exists. Cannot
be a "Custom Subnet Network" (see [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks)
for more information). A full URL, partial URI, or short name
are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`
* `projects/[project_id]/regions/global/default` * `default`'
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-references:
- resource: Compute/Network
field: selfLink
nodeGroupAffinity:
type: object
x-dcl-go-name: NodeGroupAffinity
x-dcl-go-type: ClusterConfigGceClusterConfigNodeGroupAffinity
description: Optional. Node Group Affinity for sole-tenant clusters.
x-kubernetes-immutable: true
required:
- nodeGroup
properties:
nodeGroup:
type: string
x-dcl-go-name: NodeGroup
description: 'Required. The URI of a sole-tenant [node group
resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups)
that the cluster will be created on. A full URL, partial URI,
or node group name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1`
* `projects/[project_id]/zones/us-central1-a/nodeGroups/node-group-1`
* `node-group-1`'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Compute/NodeGroup
field: selfLink
privateIPv6GoogleAccess:
type: string
x-dcl-go-name: PrivateIPv6GoogleAccess
x-dcl-go-type: ClusterConfigGceClusterConfigPrivateIPv6GoogleAccessEnum
description: 'Optional. The type of IPv6 access for a cluster. Possible
values: PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED, INHERIT_FROM_SUBNETWORK,
OUTBOUND, BIDIRECTIONAL'
x-kubernetes-immutable: true
enum:
- PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED
- INHERIT_FROM_SUBNETWORK
- OUTBOUND
- BIDIRECTIONAL
reservationAffinity:
type: object
x-dcl-go-name: ReservationAffinity
x-dcl-go-type: ClusterConfigGceClusterConfigReservationAffinity
description: Optional. Reservation Affinity for consuming Zonal
reservation.
x-kubernetes-immutable: true
properties:
consumeReservationType:
type: string
x-dcl-go-name: ConsumeReservationType
x-dcl-go-type: ClusterConfigGceClusterConfigReservationAffinityConsumeReservationTypeEnum
description: 'Optional. Type of reservation to consume Possible
values: TYPE_UNSPECIFIED, NO_RESERVATION, ANY_RESERVATION,
SPECIFIC_RESERVATION'
x-kubernetes-immutable: true
enum:
- TYPE_UNSPECIFIED
- NO_RESERVATION
- ANY_RESERVATION
- SPECIFIC_RESERVATION
key:
type: string
x-dcl-go-name: Key
description: Optional. Corresponds to the label key of reservation
resource.
x-kubernetes-immutable: true
values:
type: array
x-dcl-go-name: Values
description: Optional. Corresponds to the label values of reservation
resource.
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: string
serviceAccount:
type: string
x-dcl-go-name: ServiceAccount
description: Optional. The [Dataproc service account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
(also see [VM Data Plane identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
used by Dataproc cluster VM instances to access Google Cloud Platform
services. If not specified, the [Compute Engine default service
account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account)
is used.
x-kubernetes-immutable: true
x-dcl-references:
- resource: Iam/ServiceAccount
field: email
serviceAccountScopes:
type: array
x-dcl-go-name: ServiceAccountScopes
description: 'Optional. The URIs of service account scopes to be
included in Compute Engine instances. The following base set of
scopes is always included: * https://www.googleapis.com/auth/cloud.useraccounts.readonly
* https://www.googleapis.com/auth/devstorage.read_write * https://www.googleapis.com/auth/logging.write
If no scopes are specified, the following defaults are also provided:
* https://www.googleapis.com/auth/bigquery * https://www.googleapis.com/auth/bigtable.admin.table
* https://www.googleapis.com/auth/bigtable.data * https://www.googleapis.com/auth/devstorage.full_control'
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: string
shieldedInstanceConfig:
type: object
x-dcl-go-name: ShieldedInstanceConfig
x-dcl-go-type: ClusterConfigGceClusterConfigShieldedInstanceConfig
description: Optional. Shielded Instance Config for clusters using
[Compute Engine Shielded VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
x-kubernetes-immutable: true
properties:
enableIntegrityMonitoring:
type: boolean
x-dcl-go-name: EnableIntegrityMonitoring
description: Optional. Defines whether instances have integrity
monitoring enabled.
x-kubernetes-immutable: true
enableSecureBoot:
type: boolean
x-dcl-go-name: EnableSecureBoot
description: Optional. Defines whether instances have Secure
Boot enabled.
x-kubernetes-immutable: true
enableVtpm:
type: boolean
x-dcl-go-name: EnableVtpm
description: Optional. Defines whether instances have the vTPM
enabled.
x-kubernetes-immutable: true
subnetwork:
type: string
x-dcl-go-name: Subnetwork
description: 'Optional. The Compute Engine subnetwork to be used
for machine communications. Cannot be specified with network_uri.
A full URL, partial URI, or short name are valid. Examples: *
`https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/subnetworks/sub0`
* `projects/[project_id]/regions/us-east1/subnetworks/sub0` *
`sub0`'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Compute/Subnetwork
field: selfLink
tags:
type: array
x-dcl-go-name: Tags
description: The Compute Engine tags to add to all instances (see
[Tagging instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)).
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: set
items:
type: string
x-dcl-go-type: string
zone:
type: string
x-dcl-go-name: Zone
description: 'Optional. The zone where the Compute Engine cluster
will be located. On a create request, it is required in the "global"
region. If omitted in a non-global Dataproc region, the service
will pick a zone in the corresponding Compute Engine region. On
a get request, zone will always be present. A full URL, partial
URI, or short name are valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
* `projects/[project_id]/zones/[zone]` * `us-central1-f`'
x-kubernetes-immutable: true
initializationActions:
type: array
x-dcl-go-name: InitializationActions
description: 'Optional. Commands to execute on each node after config
is completed. By default, executables are run on master and all worker
nodes. You can test a node''s `role` metadata to run an executable
on a master or worker node, as shown below using `curl` (you can also
use `wget`): ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
if [[ "${ROLE}" == ''Master'' ]]; then ... master specific actions
... else ... worker specific actions ... fi'
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigInitializationActions
required:
- executableFile
properties:
executableFile:
type: string
x-dcl-go-name: ExecutableFile
description: Required. Cloud Storage URI of executable file.
x-kubernetes-immutable: true
executionTimeout:
type: string
x-dcl-go-name: ExecutionTimeout
description: Optional. Amount of time executable has to complete.
Default is 10 minutes (see JSON representation of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
Cluster creation fails with an explanatory error message (the
name of the executable that caused the error and the exceeded
timeout period) if the executable is not completed at end of
the timeout period.
x-kubernetes-immutable: true
lifecycleConfig:
type: object
x-dcl-go-name: LifecycleConfig
x-dcl-go-type: ClusterConfigLifecycleConfig
description: Optional. Lifecycle setting for the cluster.
x-kubernetes-immutable: true
properties:
autoDeleteTime:
type: string
format: date-time
x-dcl-go-name: AutoDeleteTime
description: Optional. The time when cluster will be auto-deleted
(see JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
x-kubernetes-immutable: true
autoDeleteTtl:
type: string
x-dcl-go-name: AutoDeleteTtl
description: Optional. The lifetime duration of cluster. The cluster
will be auto-deleted at the end of this period. Minimum value
is 10 minutes; maximum value is 14 days (see JSON representation
of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
x-kubernetes-immutable: true
idleDeleteTtl:
type: string
x-dcl-go-name: IdleDeleteTtl
description: Optional. The duration to keep the cluster alive while
idling (when no jobs are running). Passing this threshold will
cause the cluster to be deleted. Minimum value is 5 minutes; maximum
value is 14 days (see JSON representation of [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
x-kubernetes-immutable: true
idleStartTime:
type: string
format: date-time
x-dcl-go-name: IdleStartTime
readOnly: true
description: Output only. The time when cluster became idle (most
recent job finished) and became eligible for deletion due to idleness
(see JSON representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
x-kubernetes-immutable: true
masterConfig:
type: object
x-dcl-go-name: MasterConfig
x-dcl-go-type: ClusterConfigMasterConfig
description: Optional. The Compute Engine config settings for the master
instance in a cluster.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
accelerators:
type: array
x-dcl-go-name: Accelerators
description: Optional. The Compute Engine accelerator configuration
for these instances.
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigMasterConfigAccelerators
properties:
acceleratorCount:
type: integer
format: int64
x-dcl-go-name: AcceleratorCount
description: The number of the accelerator cards of this type
exposed to this instance.
x-kubernetes-immutable: true
acceleratorType:
type: string
x-dcl-go-name: AcceleratorType
description: 'Full URL, partial URI, or short name of the
accelerator type resource to expose to this instance. See
[Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
* `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
* `nvidia-tesla-k80` **Auto Zone Exception**: If you are
using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
feature, you must use the short name of the accelerator
type resource, for example, `nvidia-tesla-k80`.'
x-kubernetes-immutable: true
diskConfig:
type: object
x-dcl-go-name: DiskConfig
x-dcl-go-type: ClusterConfigMasterConfigDiskConfig
description: Optional. Disk option config settings.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
bootDiskSizeGb:
type: integer
format: int64
x-dcl-go-name: BootDiskSizeGb
description: Optional. Size in GB of the boot disk (default
is 500GB).
x-kubernetes-immutable: true
bootDiskType:
type: string
x-dcl-go-name: BootDiskType
description: 'Optional. Type of the boot disk (default is "pd-standard").
Valid values: "pd-balanced" (Persistent Disk Balanced Solid
State Drive), "pd-ssd" (Persistent Disk Solid State Drive),
or "pd-standard" (Persistent Disk Hard Disk Drive). See [Disk
types](https://cloud.google.com/compute/docs/disks#disk-types).'
x-kubernetes-immutable: true
localSsdInterface:
type: string
x-dcl-go-name: LocalSsdInterface
description: 'Optional. Interface type of local SSDs (default
is "scsi"). Valid values: "scsi" (Small Computer System Interface),
"nvme" (Non-Volatile Memory Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
x-kubernetes-immutable: true
numLocalSsds:
type: integer
format: int64
x-dcl-go-name: NumLocalSsds
description: Optional. Number of attached SSDs, from 0 to 4
(default is 0). If SSDs are not attached, the boot disk is
used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
data. If one or more SSDs are attached, this runtime bulk
data is spread across them, and the boot disk contains only
basic config and installed binaries.
x-kubernetes-immutable: true
x-dcl-server-default: true
image:
type: string
x-dcl-go-name: Image
description: 'Optional. The Compute Engine image resource used for
cluster instances. The URI can represent an image or image family.
Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]`
* `projects/[project_id]/global/images/[image-id]` * `image-id`
Image family examples. Dataproc will use the most recent image
from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]`
* `projects/[project_id]/global/images/family/[custom-image-family-name]`
If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version`
or the system default.'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Compute/Image
field: selfLink
instanceNames:
type: array
x-dcl-go-name: InstanceNames
readOnly: true
description: Output only. The list of instance names. Dataproc derives
the names from `cluster_name`, `num_instances`, and the instance
group.
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: string
x-dcl-references:
- resource: Compute/Instance
field: selfLink
instanceReferences:
type: array
x-dcl-go-name: InstanceReferences
readOnly: true
description: Output only. List of references to Compute Engine instances.
x-kubernetes-immutable: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigMasterConfigInstanceReferences
properties:
instanceId:
type: string
x-dcl-go-name: InstanceId
description: The unique identifier of the Compute Engine instance.
x-kubernetes-immutable: true
instanceName:
type: string
x-dcl-go-name: InstanceName
description: The user-friendly name of the Compute Engine
instance.
x-kubernetes-immutable: true
publicEciesKey:
type: string
x-dcl-go-name: PublicEciesKey
description: The public ECIES key used for sharing data with
this instance.
x-kubernetes-immutable: true
publicKey:
type: string
x-dcl-go-name: PublicKey
description: The public RSA key used for sharing data with
this instance.
x-kubernetes-immutable: true
isPreemptible:
type: boolean
x-dcl-go-name: IsPreemptible
readOnly: true
description: Output only. Specifies that this instance group contains
preemptible instances.
x-kubernetes-immutable: true
machineType:
type: string
x-dcl-go-name: MachineType
description: 'Optional. The Compute Engine machine type used for
cluster instances. A full URL, partial URI, or short name are
valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
* `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
* `n1-standard-2` **Auto Zone Exception**: If you are using the
Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
feature, you must use the short name of the machine type resource,
for example, `n1-standard-2`.'
x-kubernetes-immutable: true
managedGroupConfig:
type: object
x-dcl-go-name: ManagedGroupConfig
x-dcl-go-type: ClusterConfigMasterConfigManagedGroupConfig
readOnly: true
description: Output only. The config for Compute Engine Instance
Group Manager that manages this group. This is only used for preemptible
instance groups.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
instanceGroupManagerName:
type: string
x-dcl-go-name: InstanceGroupManagerName
readOnly: true
description: Output only. The name of the Instance Group Manager
for this group.
x-kubernetes-immutable: true
instanceTemplateName:
type: string
x-dcl-go-name: InstanceTemplateName
readOnly: true
description: Output only. The name of the Instance Template
used for the Managed Instance Group.
x-kubernetes-immutable: true
minCpuPlatform:
type: string
x-dcl-go-name: MinCpuPlatform
description: Optional. Specifies the minimum cpu platform for the
Instance Group. See [Dataproc -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
x-kubernetes-immutable: true
x-dcl-server-default: true
numInstances:
type: integer
format: int64
x-dcl-go-name: NumInstances
description: Optional. The number of VM instances in the instance
group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
[master_config](#FIELDS.master_config) groups, **must be set to
3**. For standard cluster [master_config](#FIELDS.master_config)
groups, **must be set to 1**.
x-kubernetes-immutable: true
preemptibility:
type: string
x-dcl-go-name: Preemptibility
x-dcl-go-type: ClusterConfigMasterConfigPreemptibilityEnum
description: 'Optional. Specifies the preemptibility of the instance
group. The default value for master and worker groups is `NON_PREEMPTIBLE`.
This default cannot be changed. The default value for secondary
instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
NON_PREEMPTIBLE, PREEMPTIBLE'
x-kubernetes-immutable: true
enum:
- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
metastoreConfig:
type: object
x-dcl-go-name: MetastoreConfig
x-dcl-go-type: ClusterConfigMetastoreConfig
description: Optional. Metastore configuration.
x-kubernetes-immutable: true
required:
- dataprocMetastoreService
properties:
dataprocMetastoreService:
type: string
x-dcl-go-name: DataprocMetastoreService
description: 'Required. Resource name of an existing Dataproc Metastore
service. Example: * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Metastore/Service
field: selfLink
secondaryWorkerConfig:
type: object
x-dcl-go-name: SecondaryWorkerConfig
x-dcl-go-type: ClusterConfigSecondaryWorkerConfig
description: Optional. The Compute Engine config settings for additional
worker instances in a cluster.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
accelerators:
type: array
x-dcl-go-name: Accelerators
description: Optional. The Compute Engine accelerator configuration
for these instances.
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigSecondaryWorkerConfigAccelerators
properties:
acceleratorCount:
type: integer
format: int64
x-dcl-go-name: AcceleratorCount
description: The number of the accelerator cards of this type
exposed to this instance.
x-kubernetes-immutable: true
acceleratorType:
type: string
x-dcl-go-name: AcceleratorType
description: 'Full URL, partial URI, or short name of the
accelerator type resource to expose to this instance. See
[Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
* `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
* `nvidia-tesla-k80` **Auto Zone Exception**: If you are
using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
feature, you must use the short name of the accelerator
type resource, for example, `nvidia-tesla-k80`.'
x-kubernetes-immutable: true
diskConfig:
type: object
x-dcl-go-name: DiskConfig
x-dcl-go-type: ClusterConfigSecondaryWorkerConfigDiskConfig
description: Optional. Disk option config settings.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
bootDiskSizeGb:
type: integer
format: int64
x-dcl-go-name: BootDiskSizeGb
description: Optional. Size in GB of the boot disk (default
is 500GB).
x-kubernetes-immutable: true
bootDiskType:
type: string
x-dcl-go-name: BootDiskType
description: 'Optional. Type of the boot disk (default is "pd-standard").
Valid values: "pd-balanced" (Persistent Disk Balanced Solid
State Drive), "pd-ssd" (Persistent Disk Solid State Drive),
or "pd-standard" (Persistent Disk Hard Disk Drive). See [Disk
types](https://cloud.google.com/compute/docs/disks#disk-types).'
x-kubernetes-immutable: true
localSsdInterface:
type: string
x-dcl-go-name: LocalSsdInterface
description: 'Optional. Interface type of local SSDs (default
is "scsi"). Valid values: "scsi" (Small Computer System Interface),
"nvme" (Non-Volatile Memory Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
x-kubernetes-immutable: true
numLocalSsds:
type: integer
format: int64
x-dcl-go-name: NumLocalSsds
description: Optional. Number of attached SSDs, from 0 to 4
(default is 0). If SSDs are not attached, the boot disk is
used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
data. If one or more SSDs are attached, this runtime bulk
data is spread across them, and the boot disk contains only
basic config and installed binaries.
x-kubernetes-immutable: true
x-dcl-server-default: true
image:
type: string
x-dcl-go-name: Image
description: 'Optional. The Compute Engine image resource used for
cluster instances. The URI can represent an image or image family.
Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]`
* `projects/[project_id]/global/images/[image-id]` * `image-id`
Image family examples. Dataproc will use the most recent image
from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]`
* `projects/[project_id]/global/images/family/[custom-image-family-name]`
If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version`
or the system default.'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Compute/Image
field: selfLink
instanceNames:
type: array
x-dcl-go-name: InstanceNames
readOnly: true
description: Output only. The list of instance names. Dataproc derives
the names from `cluster_name`, `num_instances`, and the instance
group.
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: string
x-dcl-references:
- resource: Compute/Instance
field: selfLink
instanceReferences:
type: array
x-dcl-go-name: InstanceReferences
readOnly: true
description: Output only. List of references to Compute Engine instances.
x-kubernetes-immutable: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigSecondaryWorkerConfigInstanceReferences
properties:
instanceId:
type: string
x-dcl-go-name: InstanceId
description: The unique identifier of the Compute Engine instance.
x-kubernetes-immutable: true
instanceName:
type: string
x-dcl-go-name: InstanceName
description: The user-friendly name of the Compute Engine
instance.
x-kubernetes-immutable: true
publicEciesKey:
type: string
x-dcl-go-name: PublicEciesKey
description: The public ECIES key used for sharing data with
this instance.
x-kubernetes-immutable: true
publicKey:
type: string
x-dcl-go-name: PublicKey
description: The public RSA key used for sharing data with
this instance.
x-kubernetes-immutable: true
isPreemptible:
type: boolean
x-dcl-go-name: IsPreemptible
readOnly: true
description: Output only. Specifies that this instance group contains
preemptible instances.
x-kubernetes-immutable: true
machineType:
type: string
x-dcl-go-name: MachineType
description: 'Optional. The Compute Engine machine type used for
cluster instances. A full URL, partial URI, or short name are
valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
* `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
* `n1-standard-2` **Auto Zone Exception**: If you are using the
Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
feature, you must use the short name of the machine type resource,
for example, `n1-standard-2`.'
x-kubernetes-immutable: true
managedGroupConfig:
type: object
x-dcl-go-name: ManagedGroupConfig
x-dcl-go-type: ClusterConfigSecondaryWorkerConfigManagedGroupConfig
readOnly: true
description: Output only. The config for Compute Engine Instance
Group Manager that manages this group. This is only used for preemptible
instance groups.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
instanceGroupManagerName:
type: string
x-dcl-go-name: InstanceGroupManagerName
readOnly: true
description: Output only. The name of the Instance Group Manager
for this group.
x-kubernetes-immutable: true
instanceTemplateName:
type: string
x-dcl-go-name: InstanceTemplateName
readOnly: true
description: Output only. The name of the Instance Template
used for the Managed Instance Group.
x-kubernetes-immutable: true
minCpuPlatform:
type: string
x-dcl-go-name: MinCpuPlatform
description: Optional. Specifies the minimum cpu platform for the
Instance Group. See [Dataproc -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
x-kubernetes-immutable: true
x-dcl-server-default: true
numInstances:
type: integer
format: int64
x-dcl-go-name: NumInstances
description: Optional. The number of VM instances in the instance
group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
[master_config](#FIELDS.master_config) groups, **must be set to
3**. For standard cluster [master_config](#FIELDS.master_config)
groups, **must be set to 1**.
x-kubernetes-immutable: true
preemptibility:
type: string
x-dcl-go-name: Preemptibility
x-dcl-go-type: ClusterConfigSecondaryWorkerConfigPreemptibilityEnum
description: 'Optional. Specifies the preemptibility of the instance
group. The default value for master and worker groups is `NON_PREEMPTIBLE`.
This default cannot be changed. The default value for secondary
instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
NON_PREEMPTIBLE, PREEMPTIBLE'
x-kubernetes-immutable: true
enum:
- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
securityConfig:
type: object
x-dcl-go-name: SecurityConfig
x-dcl-go-type: ClusterConfigSecurityConfig
description: Optional. Security settings for the cluster.
x-kubernetes-immutable: true
properties:
identityConfig:
type: object
x-dcl-go-name: IdentityConfig
x-dcl-go-type: ClusterConfigSecurityConfigIdentityConfig
description: Optional. Identity related configuration, including
service account based secure multi-tenancy user mappings.
x-kubernetes-immutable: true
required:
- userServiceAccountMapping
properties:
userServiceAccountMapping:
type: object
additionalProperties:
type: string
x-dcl-go-name: UserServiceAccountMapping
description: Required. Map of user to service account.
x-kubernetes-immutable: true
kerberosConfig:
type: object
x-dcl-go-name: KerberosConfig
x-dcl-go-type: ClusterConfigSecurityConfigKerberosConfig
description: Optional. Kerberos related configuration.
x-kubernetes-immutable: true
properties:
crossRealmTrustAdminServer:
type: string
x-dcl-go-name: CrossRealmTrustAdminServer
description: Optional. The admin server (IP or hostname) for
the remote trusted realm in a cross realm trust relationship.
x-kubernetes-immutable: true
crossRealmTrustKdc:
type: string
x-dcl-go-name: CrossRealmTrustKdc
description: Optional. The KDC (IP or hostname) for the remote
trusted realm in a cross realm trust relationship.
x-kubernetes-immutable: true
crossRealmTrustRealm:
type: string
x-dcl-go-name: CrossRealmTrustRealm
description: Optional. The remote realm the Dataproc on-cluster
KDC will trust, should the user enable cross realm trust.
x-kubernetes-immutable: true
crossRealmTrustSharedPassword:
type: string
x-dcl-go-name: CrossRealmTrustSharedPassword
description: Optional. The Cloud Storage URI of a KMS encrypted
file containing the shared password between the on-cluster
Kerberos realm and the remote trusted realm, in a cross realm
trust relationship.
x-kubernetes-immutable: true
enableKerberos:
type: boolean
x-dcl-go-name: EnableKerberos
description: 'Optional. Flag to indicate whether to Kerberize
the cluster (default: false). Set this field to true to enable
Kerberos on a cluster.'
x-kubernetes-immutable: true
kdcDbKey:
type: string
x-dcl-go-name: KdcDbKey
description: Optional. The Cloud Storage URI of a KMS encrypted
file containing the master key of the KDC database.
x-kubernetes-immutable: true
keyPassword:
type: string
x-dcl-go-name: KeyPassword
description: Optional. The Cloud Storage URI of a KMS encrypted
file containing the password to the user provided key. For
the self-signed certificate, this password is generated by
Dataproc.
x-kubernetes-immutable: true
keystore:
type: string
x-dcl-go-name: Keystore
description: Optional. The Cloud Storage URI of the keystore
file used for SSL encryption. If not provided, Dataproc will
provide a self-signed certificate.
x-kubernetes-immutable: true
keystorePassword:
type: string
x-dcl-go-name: KeystorePassword
description: Optional. The Cloud Storage URI of a KMS encrypted
file containing the password to the user provided keystore.
For the self-signed certificate, this password is generated
by Dataproc.
x-kubernetes-immutable: true
kmsKey:
type: string
x-dcl-go-name: KmsKey
description: Optional. The uri of the KMS key used to encrypt
various sensitive files.
x-kubernetes-immutable: true
x-dcl-references:
- resource: Cloudkms/CryptoKey
field: selfLink
realm:
type: string
x-dcl-go-name: Realm
description: Optional. The name of the on-cluster Kerberos realm.
If not specified, the uppercased domain of hostnames will
be the realm.
x-kubernetes-immutable: true
rootPrincipalPassword:
type: string
x-dcl-go-name: RootPrincipalPassword
description: Optional. The Cloud Storage URI of a KMS encrypted
file containing the root principal password.
x-kubernetes-immutable: true
tgtLifetimeHours:
type: integer
format: int64
x-dcl-go-name: TgtLifetimeHours
description: Optional. The lifetime of the ticket granting ticket,
in hours. If not specified, or user specifies 0, then default
value 10 will be used.
x-kubernetes-immutable: true
truststore:
type: string
x-dcl-go-name: Truststore
description: Optional. The Cloud Storage URI of the truststore
file used for SSL encryption. If not provided, Dataproc will
provide a self-signed certificate.
x-kubernetes-immutable: true
truststorePassword:
type: string
x-dcl-go-name: TruststorePassword
description: Optional. The Cloud Storage URI of a KMS encrypted
file containing the password to the user provided truststore.
For the self-signed certificate, this password is generated
by Dataproc.
x-kubernetes-immutable: true
softwareConfig:
type: object
x-dcl-go-name: SoftwareConfig
x-dcl-go-type: ClusterConfigSoftwareConfig
description: Optional. The config settings for software inside the cluster.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
imageVersion:
type: string
x-dcl-go-name: ImageVersion
description: Optional. The version of software inside the cluster.
It must be one of the supported [Dataproc Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
such as "1.2" (including a subminor version, such as "1.2.29"),
or the ["preview" version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
If unspecified, it defaults to the latest Debian version.
x-kubernetes-immutable: true
optionalComponents:
type: array
x-dcl-go-name: OptionalComponents
description: Optional. The set of components to activate on the
cluster.
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: ClusterConfigSoftwareConfigOptionalComponentsEnum
enum:
- COMPONENT_UNSPECIFIED
- ANACONDA
- DOCKER
- DRUID
- FLINK
- HBASE
- HIVE_WEBHCAT
- JUPYTER
- KERBEROS
- PRESTO
- RANGER
- SOLR
- ZEPPELIN
- ZOOKEEPER
properties:
type: object
additionalProperties:
type: string
x-dcl-go-name: Properties
description: 'Optional. The properties to set on daemon config files.
Property keys are specified in `prefix:property` format, for example
`core:hadoop.tmp.dir`. The following are supported prefixes and
their mappings: * capacity-scheduler: `capacity-scheduler.xml`
* core: `core-site.xml` * distcp: `distcp-default.xml` * hdfs:
`hdfs-site.xml` * hive: `hive-site.xml` * mapred: `mapred-site.xml`
* pig: `pig.properties` * spark: `spark-defaults.conf` * yarn:
`yarn-site.xml` For more information, see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
x-kubernetes-immutable: true
stagingBucket:
type: string
x-dcl-go-name: StagingBucket
description: Optional. A Cloud Storage bucket used to stage job dependencies,
config files, and job driver console output. If you do not specify
a staging bucket, Cloud Dataproc will determine a Cloud Storage location
(US, ASIA, or EU) for your cluster's staging bucket according to the
Compute Engine zone where your cluster is deployed, and then create
and manage this project-level, per-location bucket (see [Dataproc
staging bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
**This field requires a Cloud Storage bucket name, not a URI to a
Cloud Storage bucket.**
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-references:
- resource: Storage/Bucket
field: name
tempBucket:
type: string
x-dcl-go-name: TempBucket
description: Optional. A Cloud Storage bucket used to store ephemeral
cluster and jobs data, such as Spark and MapReduce history files.
If you do not specify a temp bucket, Dataproc will determine a Cloud
Storage location (US, ASIA, or EU) for your cluster's temp bucket
according to the Compute Engine zone where your cluster is deployed,
and then create and manage this project-level, per-location bucket.
The default bucket has a TTL of 90 days, but you can use any TTL (or
none) if you specify a bucket. **This field requires a Cloud Storage
bucket name, not a URI to a Cloud Storage bucket.**
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-references:
- resource: Storage/Bucket
field: name
workerConfig:
type: object
x-dcl-go-name: WorkerConfig
x-dcl-go-type: ClusterConfigWorkerConfig
description: Optional. The Compute Engine config settings for worker
instances in a cluster.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
accelerators:
type: array
x-dcl-go-name: Accelerators
description: Optional. The Compute Engine accelerator configuration
for these instances.
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigWorkerConfigAccelerators
properties:
acceleratorCount:
type: integer
format: int64
x-dcl-go-name: AcceleratorCount
description: The number of the accelerator cards of this type
exposed to this instance.
x-kubernetes-immutable: true
acceleratorType:
type: string
x-dcl-go-name: AcceleratorType
description: 'Full URL, partial URI, or short name of the
accelerator type resource to expose to this instance. See
[Compute Engine AcceleratorTypes](https://cloud.google.com/compute/docs/reference/beta/acceleratorTypes).
Examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
* `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
* `nvidia-tesla-k80` **Auto Zone Exception**: If you are
using the Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
feature, you must use the short name of the accelerator
type resource, for example, `nvidia-tesla-k80`.'
x-kubernetes-immutable: true
diskConfig:
type: object
x-dcl-go-name: DiskConfig
x-dcl-go-type: ClusterConfigWorkerConfigDiskConfig
description: Optional. Disk option config settings.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
bootDiskSizeGb:
type: integer
format: int64
x-dcl-go-name: BootDiskSizeGb
description: Optional. Size in GB of the boot disk (default
is 500GB).
x-kubernetes-immutable: true
bootDiskType:
type: string
x-dcl-go-name: BootDiskType
description: 'Optional. Type of the boot disk (default is "pd-standard").
Valid values: "pd-balanced" (Persistent Disk Balanced Solid
State Drive), "pd-ssd" (Persistent Disk Solid State Drive),
or "pd-standard" (Persistent Disk Hard Disk Drive). See [Disk
types](https://cloud.google.com/compute/docs/disks#disk-types).'
x-kubernetes-immutable: true
localSsdInterface:
type: string
x-dcl-go-name: LocalSsdInterface
description: 'Optional. Interface type of local SSDs (default
is "scsi"). Valid values: "scsi" (Small Computer System Interface),
"nvme" (Non-Volatile Memory Express). See [local SSD performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).'
x-kubernetes-immutable: true
numLocalSsds:
type: integer
format: int64
x-dcl-go-name: NumLocalSsds
description: Optional. Number of attached SSDs, from 0 to 4
(default is 0). If SSDs are not attached, the boot disk is
used to store runtime logs and [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html)
data. If one or more SSDs are attached, this runtime bulk
data is spread across them, and the boot disk contains only
basic config and installed binaries.
x-kubernetes-immutable: true
x-dcl-server-default: true
image:
type: string
x-dcl-go-name: Image
description: 'Optional. The Compute Engine image resource used for
cluster instances. The URI can represent an image or image family.
Image examples: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/[image-id]`
* `projects/[project_id]/global/images/[image-id]` * `image-id`
Image family examples. Dataproc will use the most recent image
from the family: * `https://www.googleapis.com/compute/beta/projects/[project_id]/global/images/family/[custom-image-family-name]`
* `projects/[project_id]/global/images/family/[custom-image-family-name]`
If the URI is unspecified, it will be inferred from `SoftwareConfig.image_version`
or the system default.'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Compute/Image
field: selfLink
instanceNames:
type: array
x-dcl-go-name: InstanceNames
readOnly: true
description: Output only. The list of instance names. Dataproc derives
the names from `cluster_name`, `num_instances`, and the instance
group.
x-kubernetes-immutable: true
x-dcl-server-default: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: string
x-dcl-references:
- resource: Compute/Instance
field: selfLink
instanceReferences:
type: array
x-dcl-go-name: InstanceReferences
readOnly: true
description: Output only. List of references to Compute Engine instances.
x-kubernetes-immutable: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterConfigWorkerConfigInstanceReferences
properties:
instanceId:
type: string
x-dcl-go-name: InstanceId
description: The unique identifier of the Compute Engine instance.
x-kubernetes-immutable: true
instanceName:
type: string
x-dcl-go-name: InstanceName
description: The user-friendly name of the Compute Engine
instance.
x-kubernetes-immutable: true
publicEciesKey:
type: string
x-dcl-go-name: PublicEciesKey
description: The public ECIES key used for sharing data with
this instance.
x-kubernetes-immutable: true
publicKey:
type: string
x-dcl-go-name: PublicKey
description: The public RSA key used for sharing data with
this instance.
x-kubernetes-immutable: true
isPreemptible:
type: boolean
x-dcl-go-name: IsPreemptible
readOnly: true
description: Output only. Specifies that this instance group contains
preemptible instances.
x-kubernetes-immutable: true
machineType:
type: string
x-dcl-go-name: MachineType
description: 'Optional. The Compute Engine machine type used for
cluster instances. A full URL, partial URI, or short name are
valid. Examples: * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
* `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
* `n1-standard-2` **Auto Zone Exception**: If you are using the
Dataproc [Auto Zone Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
feature, you must use the short name of the machine type resource,
for example, `n1-standard-2`.'
x-kubernetes-immutable: true
managedGroupConfig:
type: object
x-dcl-go-name: ManagedGroupConfig
x-dcl-go-type: ClusterConfigWorkerConfigManagedGroupConfig
readOnly: true
description: Output only. The config for Compute Engine Instance
Group Manager that manages this group. This is only used for preemptible
instance groups.
x-kubernetes-immutable: true
x-dcl-server-default: true
properties:
instanceGroupManagerName:
type: string
x-dcl-go-name: InstanceGroupManagerName
readOnly: true
description: Output only. The name of the Instance Group Manager
for this group.
x-kubernetes-immutable: true
instanceTemplateName:
type: string
x-dcl-go-name: InstanceTemplateName
readOnly: true
description: Output only. The name of the Instance Template
used for the Managed Instance Group.
x-kubernetes-immutable: true
minCpuPlatform:
type: string
x-dcl-go-name: MinCpuPlatform
description: Optional. Specifies the minimum cpu platform for the
Instance Group. See [Dataproc -> Minimum CPU Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
x-kubernetes-immutable: true
x-dcl-server-default: true
numInstances:
type: integer
format: int64
x-dcl-go-name: NumInstances
description: Optional. The number of VM instances in the instance
group. For [HA cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
[master_config](#FIELDS.master_config) groups, **must be set to
3**. For standard cluster [master_config](#FIELDS.master_config)
groups, **must be set to 1**.
x-kubernetes-immutable: true
preemptibility:
type: string
x-dcl-go-name: Preemptibility
x-dcl-go-type: ClusterConfigWorkerConfigPreemptibilityEnum
description: 'Optional. Specifies the preemptibility of the instance
group. The default value for master and worker groups is `NON_PREEMPTIBLE`.
This default cannot be changed. The default value for secondary
instances is `PREEMPTIBLE`. Possible values: PREEMPTIBILITY_UNSPECIFIED,
NON_PREEMPTIBLE, PREEMPTIBLE'
x-kubernetes-immutable: true
enum:
- PREEMPTIBILITY_UNSPECIFIED
- NON_PREEMPTIBLE
- PREEMPTIBLE
labels:
type: object
additionalProperties:
type: string
x-dcl-go-name: Labels
description: Optional. The labels to associate with this cluster. Label
**keys** must contain 1 to 63 characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
Label **values** may be empty, but, if present, must contain 1 to 63 characters,
and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
No more than 32 labels can be associated with a cluster.
location:
type: string
x-dcl-go-name: Location
description: The location for the resource, usually a GCP region.
x-kubernetes-immutable: true
x-dcl-parameter: true
metrics:
type: object
x-dcl-go-name: Metrics
x-dcl-go-type: ClusterMetrics
readOnly: true
description: 'Output only. Contains cluster daemon metrics such as HDFS
and YARN stats. **Beta Feature**: This report is available for testing
purposes only. It may be changed before final release.'
x-kubernetes-immutable: true
properties:
hdfsMetrics:
type: object
additionalProperties:
type: string
x-dcl-go-name: HdfsMetrics
description: The HDFS metrics.
x-kubernetes-immutable: true
yarnMetrics:
type: object
additionalProperties:
type: string
x-dcl-go-name: YarnMetrics
description: The YARN metrics.
x-kubernetes-immutable: true
name:
type: string
x-dcl-go-name: Name
description: Required. The cluster name. Cluster names within a project
must be unique. Names of deleted clusters can be reused.
x-kubernetes-immutable: true
project:
type: string
x-dcl-go-name: Project
description: Required. The Google Cloud Platform project ID that the cluster
belongs to.
x-kubernetes-immutable: true
x-dcl-references:
- resource: Cloudresourcemanager/Project
field: name
parent: true
x-dcl-parameter: true
status:
type: object
x-dcl-go-name: Status
x-dcl-go-type: ClusterStatus
readOnly: true
description: Output only. Cluster status.
x-kubernetes-immutable: true
properties:
detail:
type: string
x-dcl-go-name: Detail
readOnly: true
description: Optional. Output only. Details of cluster's state.
x-kubernetes-immutable: true
state:
type: string
x-dcl-go-name: State
x-dcl-go-type: ClusterStatusStateEnum
readOnly: true
description: 'Output only. The cluster''s state. Possible values: UNKNOWN,
CREATING, RUNNING, ERROR, DELETING, UPDATING, STOPPING, STOPPED, STARTING'
x-kubernetes-immutable: true
enum:
- UNKNOWN
- CREATING
- RUNNING
- ERROR
- DELETING
- UPDATING
- STOPPING
- STOPPED
- STARTING
stateStartTime:
type: string
format: date-time
x-dcl-go-name: StateStartTime
readOnly: true
description: Output only. Time when this state was entered (see JSON
representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
x-kubernetes-immutable: true
substate:
type: string
x-dcl-go-name: Substate
x-dcl-go-type: ClusterStatusSubstateEnum
readOnly: true
description: 'Output only. Additional state information that includes
status reported by the agent. Possible values: UNSPECIFIED, UNHEALTHY,
STALE_STATUS'
x-kubernetes-immutable: true
enum:
- UNSPECIFIED
- UNHEALTHY
- STALE_STATUS
statusHistory:
type: array
x-dcl-go-name: StatusHistory
readOnly: true
description: Output only. The previous cluster status.
x-kubernetes-immutable: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterStatusHistory
properties:
detail:
type: string
x-dcl-go-name: Detail
readOnly: true
description: Optional. Output only. Details of cluster's state.
x-kubernetes-immutable: true
state:
type: string
x-dcl-go-name: State
x-dcl-go-type: ClusterStatusHistoryStateEnum
readOnly: true
description: 'Output only. The cluster''s state. Possible values:
UNKNOWN, CREATING, RUNNING, ERROR, DELETING, UPDATING, STOPPING,
STOPPED, STARTING'
x-kubernetes-immutable: true
enum:
- UNKNOWN
- CREATING
- RUNNING
- ERROR
- DELETING
- UPDATING
- STOPPING
- STOPPED
- STARTING
stateStartTime:
type: string
format: date-time
x-dcl-go-name: StateStartTime
readOnly: true
description: Output only. Time when this state was entered (see JSON
representation of [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
x-kubernetes-immutable: true
substate:
type: string
x-dcl-go-name: Substate
x-dcl-go-type: ClusterStatusHistorySubstateEnum
readOnly: true
description: 'Output only. Additional state information that includes
status reported by the agent. Possible values: UNSPECIFIED, UNHEALTHY,
STALE_STATUS'
x-kubernetes-immutable: true
enum:
- UNSPECIFIED
- UNHEALTHY
- STALE_STATUS
virtualClusterConfig:
type: object
x-dcl-go-name: VirtualClusterConfig
x-dcl-go-type: ClusterVirtualClusterConfig
description: Optional. The virtual cluster config is used when creating
a Dataproc cluster that does not directly control the underlying compute
resources, for example, when creating a [Dataproc-on-GKE cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke).
Dataproc may set default values, and values may change when clusters are
updated. Exactly one of config or virtual_cluster_config must be specified.
x-kubernetes-immutable: true
required:
- kubernetesClusterConfig
properties:
auxiliaryServicesConfig:
type: object
x-dcl-go-name: AuxiliaryServicesConfig
x-dcl-go-type: ClusterVirtualClusterConfigAuxiliaryServicesConfig
description: Optional. Configuration of auxiliary services used by this
cluster.
x-kubernetes-immutable: true
properties:
metastoreConfig:
type: object
x-dcl-go-name: MetastoreConfig
x-dcl-go-type: ClusterVirtualClusterConfigAuxiliaryServicesConfigMetastoreConfig
description: Optional. The Hive Metastore configuration for this
workload.
x-kubernetes-immutable: true
required:
- dataprocMetastoreService
properties:
dataprocMetastoreService:
type: string
x-dcl-go-name: DataprocMetastoreService
description: 'Required. Resource name of an existing Dataproc
Metastore service. Example: * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Metastore/Service
field: selfLink
sparkHistoryServerConfig:
type: object
x-dcl-go-name: SparkHistoryServerConfig
x-dcl-go-type: ClusterVirtualClusterConfigAuxiliaryServicesConfigSparkHistoryServerConfig
description: Optional. The Spark History Server configuration for
the workload.
x-kubernetes-immutable: true
properties:
dataprocCluster:
type: string
x-dcl-go-name: DataprocCluster
description: 'Optional. Resource name of an existing Dataproc
Cluster to act as a Spark History Server for the workload.
Example: * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`'
x-kubernetes-immutable: true
x-dcl-references:
- resource: Dataproc/Cluster
field: selfLink
kubernetesClusterConfig:
type: object
x-dcl-go-name: KubernetesClusterConfig
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfig
description: Required. The configuration for running the Dataproc cluster
on Kubernetes.
x-kubernetes-immutable: true
required:
- gkeClusterConfig
properties:
gkeClusterConfig:
type: object
x-dcl-go-name: GkeClusterConfig
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfig
description: Required. The configuration for running the Dataproc
cluster on GKE.
x-kubernetes-immutable: true
properties:
gkeClusterTarget:
type: string
x-dcl-go-name: GkeClusterTarget
description: 'Optional. A target GKE cluster to deploy to. It
must be in the same project and region as the Dataproc cluster
(the GKE cluster can be zonal or regional). Format: ''projects/{project}/locations/{location}/clusters/{cluster_id}'''
x-kubernetes-immutable: true
x-dcl-references:
- resource: Container/Cluster
field: selfLink
nodePoolTarget:
type: array
x-dcl-go-name: NodePoolTarget
description: Optional. GKE node pools where workloads will be
scheduled. At least one node pool must be assigned the `DEFAULT`
GkeNodePoolTarget.Role. If a `GkeNodePoolTarget` is not specified,
Dataproc constructs a `DEFAULT` `GkeNodePoolTarget`. Each
role can be given to only one `GkeNodePoolTarget`. All node
pools must have the same location settings.
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTarget
required:
- nodePool
- roles
properties:
nodePool:
type: string
x-dcl-go-name: NodePool
description: 'Required. The target GKE node pool. Format:
''projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'''
x-kubernetes-immutable: true
x-dcl-references:
- resource: Container/NodePool
field: selfLink
nodePoolConfig:
type: object
x-dcl-go-name: NodePoolConfig
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfig
description: Input only. The configuration for the GKE
node pool. If specified, Dataproc attempts to create
a node pool with the specified shape. If one with the
same name already exists, it is verified against all
specified fields. If a field differs, the virtual cluster
creation will fail. If omitted, any node pool with the
specified name is used. If a node pool with the specified
name does not exist, Dataproc create a node pool with
default values. This is an input only field. It will
not be returned by the API.
x-kubernetes-immutable: true
x-dcl-mutable-unreadable: true
properties:
autoscaling:
type: object
x-dcl-go-name: Autoscaling
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigAutoscaling
description: Optional. The autoscaler configuration
for this node pool. The autoscaler is enabled only
when a valid configuration is present.
x-kubernetes-immutable: true
properties:
maxNodeCount:
type: integer
format: int64
x-dcl-go-name: MaxNodeCount
description: The maximum number of nodes in the
node pool. Must be >= min_node_count, and must
be > 0. **Note:** Quota must be sufficient to
scale up the cluster.
x-kubernetes-immutable: true
minNodeCount:
type: integer
format: int64
x-dcl-go-name: MinNodeCount
description: The minimum number of nodes in the
node pool. Must be >= 0 and <= max_node_count.
x-kubernetes-immutable: true
config:
type: object
x-dcl-go-name: Config
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfig
description: Optional. The node pool configuration.
x-kubernetes-immutable: true
properties:
accelerators:
type: array
x-dcl-go-name: Accelerators
description: Optional. A list of [hardware accelerators](https://cloud.google.com/compute/docs/gpus)
to attach to each node.
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: object
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfigAccelerators
properties:
acceleratorCount:
type: integer
format: int64
x-dcl-go-name: AcceleratorCount
description: The number of accelerator cards
exposed to an instance.
x-kubernetes-immutable: true
acceleratorType:
type: string
x-dcl-go-name: AcceleratorType
description: The accelerator type resource
namename (see GPUs on Compute Engine).
x-kubernetes-immutable: true
gpuPartitionSize:
type: string
x-dcl-go-name: GpuPartitionSize
description: Size of partitions to create
on the GPU. Valid values are described
in the NVIDIA [mig user guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning).
x-kubernetes-immutable: true
bootDiskKmsKey:
type: string
x-dcl-go-name: BootDiskKmsKey
description: 'Optional. The [Customer Managed
Encryption Key (CMEK)] (https://cloud.google.com/kubernetes-engine/docs/how-to/using-cmek)
used to encrypt the boot disk attached to each
node in the node pool. Specify the key using
the following format: `projects/KEY_PROJECT_ID/locations/LOCATION/keyRings/RING_NAME/cryptoKeys/KEY_NAME`.'
x-kubernetes-immutable: true
ephemeralStorageConfig:
type: object
x-dcl-go-name: EphemeralStorageConfig
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetNodePoolConfigConfigEphemeralStorageConfig
description: Optional. Parameters for the ephemeral
storage filesystem. If unspecified, ephemeral
storage is backed by the boot disk.
x-kubernetes-immutable: true
properties:
localSsdCount:
type: integer
format: int64
x-dcl-go-name: LocalSsdCount
description: Number of local SSDs to use to
back ephemeral storage. Uses NVMe interfaces.
Each local SSD is 375 GB in size. If zero,
it means to disable using local SSDs as
ephemeral storage.
x-kubernetes-immutable: true
localSsdCount:
type: integer
format: int64
x-dcl-go-name: LocalSsdCount
description: Optional. The number of local SSD
disks to attach to the node, which is limited
by the maximum number of disks allowable per
zone (see [Adding Local SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
x-kubernetes-immutable: true
machineType:
type: string
x-dcl-go-name: MachineType
description: Optional. The name of a Compute Engine
[machine type](https://cloud.google.com/compute/docs/machine-types).
x-kubernetes-immutable: true
minCpuPlatform:
type: string
x-dcl-go-name: MinCpuPlatform
description: Optional. [Minimum CPU platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
to be used by this instance. The instance may
be scheduled on the specified or a newer CPU
platform. Specify the friendly names of CPU
platforms, such as "Intel Haswell"` or Intel
Sandy Bridge".
x-kubernetes-immutable: true
preemptible:
type: boolean
x-dcl-go-name: Preemptible
description: Optional. Whether the nodes are created
as legacy [preemptible VM instances] (https://cloud.google.com/compute/docs/instances/preemptible).
Also see Spot VMs, preemptible VM instances
without a maximum lifetime. Legacy and Spot
preemptible nodes cannot be used in a node pool
with the `CONTROLLER` [role] (/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
or in the DEFAULT node pool if the CONTROLLER
role is not assigned (the DEFAULT node pool
will assume the CONTROLLER role).
x-kubernetes-immutable: true
spot:
type: boolean
x-dcl-go-name: Spot
description: Optional. Whether the nodes are created
as [Spot VM instances] (https://cloud.google.com/compute/docs/instances/spot).
Spot VMs are the latest update to legacy preemptible
VMs. Spot VMs do not have a maximum lifetime.
Legacy and Spot preemptible nodes cannot be
used in a node pool with the `CONTROLLER` [role](/dataproc/docs/reference/rest/v1/projects.regions.clusters#role)
or in the DEFAULT node pool if the CONTROLLER
role is not assigned (the DEFAULT node pool
will assume the CONTROLLER role).
x-kubernetes-immutable: true
locations:
type: array
x-dcl-go-name: Locations
description: Optional. The list of Compute Engine
[zones](https://cloud.google.com/compute/docs/zones#available)
where node pool nodes associated with a Dataproc
on GKE virtual cluster will be located. **Note:**
All node pools associated with a virtual cluster
must be located in the same region as the virtual
cluster, and they must be located in the same zone
within that region. If a location is not specified
during node pool creation, Dataproc on GKE will
choose the zone.
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: string
roles:
type: array
x-dcl-go-name: Roles
description: Required. The roles associated with the GKE
node pool.
x-kubernetes-immutable: true
x-dcl-send-empty: true
x-dcl-list-type: list
items:
type: string
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigGkeClusterConfigNodePoolTargetRolesEnum
enum:
- ROLE_UNSPECIFIED
- DEFAULT
- CONTROLLER
- SPARK_DRIVER
- SPARK_EXECUTOR
kubernetesNamespace:
type: string
x-dcl-go-name: KubernetesNamespace
description: Optional. A namespace within the Kubernetes cluster
to deploy into. If this namespace does not exist, it is created.
If it exists, Dataproc verifies that another Dataproc VirtualCluster
is not installed into it. If not specified, the name of the Dataproc
Cluster is used.
x-kubernetes-immutable: true
kubernetesSoftwareConfig:
type: object
x-dcl-go-name: KubernetesSoftwareConfig
x-dcl-go-type: ClusterVirtualClusterConfigKubernetesClusterConfigKubernetesSoftwareConfig
description: Optional. The software configuration for this Dataproc
cluster running on Kubernetes.
x-kubernetes-immutable: true
properties:
componentVersion:
type: object
additionalProperties:
type: string
x-dcl-go-name: ComponentVersion
description: The components that should be installed in this
Dataproc cluster. The key must be a string from the KubernetesComponent
enumeration. The value is the version of the software to be
installed. At least one entry must be specified.
x-kubernetes-immutable: true
properties:
type: object
additionalProperties:
type: string
x-dcl-go-name: Properties
description: 'The properties to set on daemon config files.
Property keys are specified in `prefix:property` format, for
example `spark:spark.kubernetes.container.image`. The following
are supported prefixes and their mappings: * spark: `spark-defaults.conf`
For more information, see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).'
x-kubernetes-immutable: true
stagingBucket:
type: string
x-dcl-go-name: StagingBucket
description: Optional. A Cloud Storage bucket used to stage job dependencies,
config files, and job driver console output. If you do not specify
a staging bucket, Cloud Dataproc will determine a Cloud Storage location
(US, ASIA, or EU) for your cluster's staging bucket according to the
Compute Engine zone where your cluster is deployed, and then create
and manage this project-level, per-location bucket (see [Dataproc
staging and temp buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
**This field requires a Cloud Storage bucket name, not a `gs://...`
URI to a Cloud Storage bucket.**
x-kubernetes-immutable: true
x-dcl-references:
- resource: Storage/Bucket
field: name