config/crds/resources/apiextensions.k8s.io_v1_customresourcedefinition_dataplextasks.dataplex.cnrm.cloud.google.com.yaml (554 lines of code) (raw):
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
cnrm.cloud.google.com/version: 0.0.0-dev
creationTimestamp: null
labels:
cnrm.cloud.google.com/managed-by-kcc: "true"
cnrm.cloud.google.com/system: "true"
name: dataplextasks.dataplex.cnrm.cloud.google.com
spec:
group: dataplex.cnrm.cloud.google.com
names:
categories:
- gcp
kind: DataplexTask
listKind: DataplexTaskList
plural: dataplextasks
shortNames:
- gcpdataplextask
- gcpdataplextasks
singular: dataplextask
preserveUnknownFields: false
scope: Namespaced
versions:
- additionalPrinterColumns:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
- description: When 'True', the most recent reconcile of the resource succeeded
jsonPath: .status.conditions[?(@.type=='Ready')].status
name: Ready
type: string
- description: The reason for the value in 'Ready'
jsonPath: .status.conditions[?(@.type=='Ready')].reason
name: Status
type: string
- description: The last transition time for the value in 'Status'
jsonPath: .status.conditions[?(@.type=='Ready')].lastTransitionTime
name: Status Age
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: DataplexTask is the Schema for the DataplexTask API
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: DataplexTaskSpec defines the desired state of DataplexTask
properties:
description:
description: Optional. Description of the task.
type: string
displayName:
description: Optional. User friendly display name.
type: string
executionSpec:
description: Required. Spec related to how a task is executed.
properties:
args:
additionalProperties:
type: string
description: 'Optional. The arguments to pass to the task. The
args can use placeholders of the format ${placeholder} as part
of key/value string. These will be interpolated before passing
the args to the driver. Currently supported placeholders: -
${task_id} - ${job_time} To pass positional args, set the key
as TASK_ARGS. The value should be a comma-separated string of
all the positional arguments. To use a delimiter other than
comma, refer to https://cloud.google.com/sdk/gcloud/reference/topic/escaping.
In case of other keys being present in the args, then TASK_ARGS
will be passed as the last argument.'
type: object
kmsKeyRef:
description: 'Optional. The Cloud KMS key to use for encryption,
of the form: `projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}`.'
oneOf:
- not:
required:
- external
required:
- name
- not:
anyOf:
- required:
- name
- required:
- namespace
required:
- external
properties:
external:
description: A reference to an externally managed KMSCryptoKey.
Should be in the format `projects/[kms_project_id]/locations/[region]/keyRings/[key_ring_id]/cryptoKeys/[key]`.
type: string
name:
description: The `name` of a `KMSCryptoKey` resource.
type: string
namespace:
description: The `namespace` of a `KMSCryptoKey` resource.
type: string
type: object
maxJobExecutionLifetime:
description: Optional. The maximum duration after which the job
execution is expired.
type: string
project:
description: Optional. The project in which jobs are run. By default,
the project containing the Lake is used. If a project is provided,
the [ExecutionSpec.service_account][google.cloud.dataplex.v1.Task.ExecutionSpec.service_account]
must belong to this project.
type: string
serviceAccountRef:
description: Required. Service account to use to execute a task.
If not provided, the default Compute service account for the
project is used.
oneOf:
- not:
required:
- external
required:
- name
- not:
anyOf:
- required:
- name
- required:
- namespace
required:
- external
properties:
external:
description: The `email` field of an `IAMServiceAccount` resource.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
namespace:
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
type: string
type: object
required:
- serviceAccountRef
type: object
labels:
additionalProperties:
type: string
description: Optional. User-defined labels for the task.
type: object
lakeRef:
description: LakeRef defines the resource reference to DataplexLake,
which "External" field holds the GCP identifier for the KRM object.
oneOf:
- not:
required:
- external
required:
- name
- not:
anyOf:
- required:
- name
- required:
- namespace
required:
- external
properties:
external:
description: A reference to an externally managed DataplexLake
resource. Should be in the format "projects/{{projectID}}/locations/{{location}}/lakes/{{lakeID}}".
type: string
name:
description: The name of a DataplexLake resource.
type: string
namespace:
description: The namespace of a DataplexLake resource.
type: string
type: object
notebook:
description: Config related to running scheduled Notebooks. Exactly
one of spark or notebook must be set.
properties:
archiveURIs:
description: 'Optional. Cloud Storage URIs of archives to be extracted
into the working directory of each executor. Supported file
types: .jar, .tar, .tar.gz, .tgz, and .zip.'
items:
type: string
type: array
fileURIs:
description: Optional. Cloud Storage URIs of files to be placed
in the working directory of each executor.
items:
type: string
type: array
infrastructureSpec:
description: Optional. Infrastructure specification for the execution.
properties:
batch:
description: Compute resources needed for a Task when using
Dataproc Serverless.
properties:
executorsCount:
description: Optional. Total number of job executors.
Executor Count should be between 2 and 100. [Default=2]
format: int32
type: integer
maxExecutorsCount:
description: Optional. Max configurable executors. If
max_executors_count > executors_count, then auto-scaling
is enabled. Max Executor Count should be between 2 and
1000. [Default=1000]
format: int32
type: integer
type: object
containerImage:
description: Container Image Runtime Configuration.
properties:
image:
description: Optional. Container image to use.
type: string
javaJars:
description: Optional. A list of Java JARS to add to the
classpath. Valid input includes Cloud Storage URIs to
Jar binaries. For example, gs://bucket-name/my/path/to/file.jar
items:
type: string
type: array
properties:
additionalProperties:
type: string
description: Optional. Override to common configuration
of open source components installed on the Dataproc
cluster. The properties to set on daemon config files.
Property keys are specified in `prefix:property` format,
for example `core:hadoop.tmp.dir`. For more information,
see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
type: object
pythonPackages:
description: Optional. A list of python packages to be
installed. Valid formats include Cloud Storage URI to
a PIP installable library. For example, gs://bucket-name/my/path/to/lib.tar.gz
items:
type: string
type: array
type: object
vpcNetwork:
description: Vpc network.
properties:
network:
description: Optional. The Cloud VPC network in which
the job is run. By default, the Cloud VPC network named
Default within the project is used.
type: string
networkTags:
description: Optional. List of network tags to apply to
the job.
items:
type: string
type: array
subNetwork:
description: Optional. The Cloud VPC sub-network in which
the job is run.
type: string
type: object
type: object
notebook:
description: Required. Path to input notebook. This can be the
Cloud Storage URI of the notebook file or the path to a Notebook
Content. The execution args are accessible as environment variables
(`TASK_key=value`).
type: string
required:
- notebook
type: object
resourceID:
description: The DataplexTask name. If not given, the metadata.name
will be used.
type: string
spark:
description: Config related to running custom Spark tasks. Exactly
one of spark or notebook must be set.
properties:
archiveURIs:
description: 'Optional. Cloud Storage URIs of archives to be extracted
into the working directory of each executor. Supported file
types: .jar, .tar, .tar.gz, .tgz, and .zip.'
items:
type: string
type: array
fileURIs:
description: Optional. Cloud Storage URIs of files to be placed
in the working directory of each executor.
items:
type: string
type: array
infrastructureSpec:
description: Optional. Infrastructure specification for the execution.
properties:
batch:
description: Compute resources needed for a Task when using
Dataproc Serverless.
properties:
executorsCount:
description: Optional. Total number of job executors.
Executor Count should be between 2 and 100. [Default=2]
format: int32
type: integer
maxExecutorsCount:
description: Optional. Max configurable executors. If
max_executors_count > executors_count, then auto-scaling
is enabled. Max Executor Count should be between 2 and
1000. [Default=1000]
format: int32
type: integer
type: object
containerImage:
description: Container Image Runtime Configuration.
properties:
image:
description: Optional. Container image to use.
type: string
javaJars:
description: Optional. A list of Java JARS to add to the
classpath. Valid input includes Cloud Storage URIs to
Jar binaries. For example, gs://bucket-name/my/path/to/file.jar
items:
type: string
type: array
properties:
additionalProperties:
type: string
description: Optional. Override to common configuration
of open source components installed on the Dataproc
cluster. The properties to set on daemon config files.
Property keys are specified in `prefix:property` format,
for example `core:hadoop.tmp.dir`. For more information,
see [Cluster properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
type: object
pythonPackages:
description: Optional. A list of python packages to be
installed. Valid formats include Cloud Storage URI to
a PIP installable library. For example, gs://bucket-name/my/path/to/lib.tar.gz
items:
type: string
type: array
type: object
vpcNetwork:
description: Vpc network.
properties:
network:
description: Optional. The Cloud VPC network in which
the job is run. By default, the Cloud VPC network named
Default within the project is used.
type: string
networkTags:
description: Optional. List of network tags to apply to
the job.
items:
type: string
type: array
subNetwork:
description: Optional. The Cloud VPC sub-network in which
the job is run.
type: string
type: object
type: object
mainClass:
description: The name of the driver's main class. The jar file
that contains the class must be in the default CLASSPATH or
specified in `jar_file_uris`. The execution args are passed
in as a sequence of named process arguments (`--key=value`).
type: string
mainJarFileURI:
description: The Cloud Storage URI of the jar file that contains
the main class. The execution args are passed in as a sequence
of named process arguments (`--key=value`).
type: string
pythonScriptFile:
description: The Gcloud Storage URI of the main Python file to
use as the driver. Must be a .py file. The execution args are
passed in as a sequence of named process arguments (`--key=value`).
type: string
sqlScript:
description: The query text. The execution args are used to declare
a set of script variables (`set key="value";`).
type: string
sqlScriptFile:
description: A reference to a query file. This should be the Cloud
Storage URI of the query file. The execution args are used to
declare a set of script variables (`set key="value";`).
type: string
type: object
triggerSpec:
description: Required. Spec related to how often and when a task should
be triggered.
properties:
disabled:
description: Optional. Prevent the task from executing. This does
not cancel already running tasks. It is intended to temporarily
disable RECURRING tasks.
type: boolean
maxRetries:
description: Optional. Number of retry attempts before aborting.
Set to zero to never attempt to retry a failed task.
format: int32
type: integer
schedule:
description: 'Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron)
for running tasks periodically. To explicitly set a timezone
to the cron tab, apply a prefix in the cron tab: "CRON_TZ=${IANA_TIME_ZONE}"
or "TZ=${IANA_TIME_ZONE}". The ${IANA_TIME_ZONE} may only be
a valid string from IANA time zone database. For example, `CRON_TZ=America/New_York
1 * * * *`, or `TZ=America/New_York 1 * * * *`. This field is
required for RECURRING tasks.'
type: string
startTime:
description: Optional. The first run of the task will be after
this time. If not specified, the task will run shortly after
being submitted if ON_DEMAND and based on the schedule if RECURRING.
type: string
type:
description: Required. Immutable. Trigger type of the user-specified
Task.
type: string
required:
- type
type: object
required:
- executionSpec
- triggerSpec
type: object
status:
description: DataplexTaskStatus defines the config connector machine state
of DataplexTask
properties:
conditions:
description: Conditions represent the latest available observations
of the object's current state.
items:
properties:
lastTransitionTime:
description: Last time the condition transitioned from one status
to another.
type: string
message:
description: Human-readable message indicating details about
last transition.
type: string
reason:
description: Unique, one-word, CamelCase reason for the condition's
last transition.
type: string
status:
description: Status is the status of the condition. Can be True,
False, Unknown.
type: string
type:
description: Type is the type of the condition.
type: string
type: object
type: array
externalRef:
description: A unique specifier for the DataplexTask resource in GCP.
type: string
observedGeneration:
description: ObservedGeneration is the generation of the resource
that was most recently observed by the Config Connector controller.
If this is equal to metadata.generation, then that means that the
current reported status reflects the most recent desired state of
the resource.
format: int64
type: integer
observedState:
description: ObservedState is the state of the resource as most recently
observed in GCP.
properties:
createTime:
description: Output only. The time when the task was created.
type: string
executionStatus:
description: ' Status of the task execution (e.g. Jobs).'
properties:
latestJob:
description: Output only. latest job execution
properties:
endTime:
description: Output only. The time when the job ended.
type: string
executionSpec:
description: Output only. Spec related to how a task is
executed.
properties:
args:
additionalProperties:
type: string
description: 'The arguments to pass to the task. The
args can use placeholders of the format ${placeholder}
as part of key/value string. These will be interpolated
before passing the args to the driver. Currently
supported placeholders: - ${task_id} - ${job_time}
To pass positional args, set the key as TASK_ARGS.
The value should be a comma-separated string of
all the positional arguments. To use a delimiter
other than comma, refer to https://cloud.google.com/sdk/gcloud/reference/topic/escaping.
In case of other keys being present in the args,
then TASK_ARGS will be passed as the last argument.'
type: object
kmsKey:
description: 'The Cloud KMS key to use for encryption,
of the form: `projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}`.'
type: string
maxJobExecutionLifetime:
description: The maximum duration after which the
job execution is expired.
type: string
project:
description: The project in which jobs are run. By
default, the project containing the Lake is used.
If a project is provided, the [ExecutionSpec.service_account][google.cloud.dataplex.v1.Task.ExecutionSpec.service_account]
must belong to this project.
type: string
serviceAccount:
description: Service account to use to execute a task.
If not provided, the default Compute service account
for the project is used.
type: string
required:
- serviceAccount
type: object
labels:
additionalProperties:
type: string
description: Output only. User-defined labels for the
task.
type: object
message:
description: Output only. Additional information about
the current state.
type: string
name:
description: 'Output only. The relative resource name
of the job, of the form: `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/tasks/{task_id}/jobs/{job_id}`.'
type: string
retryCount:
description: Output only. The number of times the job
has been retried (excluding the initial attempt).
format: int32
type: integer
service:
description: Output only. The underlying service running
a job.
type: string
serviceJob:
description: Output only. The full resource name for the
job run under a particular service.
type: string
startTime:
description: Output only. The time when the job was started.
type: string
state:
description: Output only. Execution state for the job.
type: string
trigger:
description: Output only. Job execution trigger.
type: string
uid:
description: Output only. System generated globally unique
ID for the job.
type: string
type: object
updateTime:
description: Output only. Last update time of the status.
type: string
type: object
state:
description: Output only. Current state of the task.
type: string
uid:
description: Output only. System generated globally unique ID
for the task. This ID will be different if the task is deleted
and re-created with the same name.
type: string
updateTime:
description: Output only. The time when the task was last updated.
type: string
type: object
type: object
required:
- spec
type: object
served: true
storage: true
subresources:
status: {}