v2/mysql-to-googlecloud/terraform/MySQL_to_BigQuery/dataflow_job.tf (272 lines of code) (raw):

# Autogenerated file. DO NOT EDIT. # # Copyright (C) 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); you may not # use this file except in compliance with the License. You may obtain a copy of # the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. # variable "on_delete" { type = string description = "One of \"drain\" or \"cancel\". Specifies behavior of deletion during terraform destroy." } variable "project" { type = string description = "The Google Cloud Project ID within which this module provisions resources." } variable "region" { type = string description = "The region in which the created job should run." } variable "connectionURL" { type = string description = "The JDBC connection URL string. For example, `jdbc:mysql://some-host:3306/sampledb`. Can be passed in as a string that's Base64-encoded and then encrypted with a Cloud KMS key. (Example: jdbc:mysql://some-host:3306/sampledb)" } variable "connectionProperties" { type = string description = "Properties string to use for the JDBC connection. Format of the string must be [propertyName=property;]*. (Example: unicode=true;characterEncoding=UTF-8)" default = null } variable "username" { type = string description = "The username to be used for the JDBC connection. Can be passed in as a Base64-encoded string encrypted with a Cloud KMS key." default = null } variable "password" { type = string description = "The password to be used for the JDBC connection. Can be passed in as a Base64-encoded string encrypted with a Cloud KMS key." default = null } variable "query" { type = string description = "The query to be run on the source to extract the data. (Example: select * from sampledb.sample_table)" default = null } variable "outputTable" { type = string description = "BigQuery table location to write the output to. The name should be in the format `<project>:<dataset>.<table_name>`. The table's schema must match input objects. (Example: <my-project>:<my-dataset>.<my-table>)" } variable "bigQueryLoadingTemporaryDirectory" { type = string description = "The temporary directory for the BigQuery loading process (Example: gs://your-bucket/your-files/temp_dir)" } variable "KMSEncryptionKey" { type = string description = "Cloud KMS Encryption Key to decrypt the username, password, and connection string. If Cloud KMS key is passed in, the username, password, and connection string must all be passed in encrypted. (Example: projects/your-project/locations/global/keyRings/your-keyring/cryptoKeys/your-key)" default = null } variable "useColumnAlias" { type = bool description = <<EOT If enabled (set to true) the pipeline will consider column alias ("AS") instead of the column name to map the rows to BigQuery. Defaults to false. EOT default = null } variable "isTruncate" { type = bool description = "If enabled (set to true) the pipeline will truncate before loading data into BigQuery. Defaults to false, which is used to only append data." default = null } variable "partitionColumn" { type = string description = "If this parameter is provided (along with `table`), JdbcIO reads the table in parallel by executing multiple instances of the query on the same table (subquery) using ranges. Currently, only Long partition columns are supported." default = null } variable "table" { type = string description = "Table to read from using partitions. This parameter also accepts a subquery in parentheses. (Example: (select id, name from Person) as subq)" default = null } variable "numPartitions" { type = number description = "The number of partitions. This, along with the lower and upper bound, form partitions strides for generated WHERE clause expressions used to split the partition column evenly. When the input is less than 1, the number is set to 1." default = null } variable "lowerBound" { type = number description = "Lower bound used in the partition scheme. If not provided, it is automatically inferred by Beam (for the supported types)" default = null } variable "upperBound" { type = number description = "Upper bound used in partition scheme. If not provided, it is automatically inferred by Beam (for the supported types)" default = null } variable "fetchSize" { type = number description = "The number of rows to be fetched from database at a time. Not used for partitioned reads. Defaults to: 50000." default = null } variable "createDisposition" { type = string description = "BigQuery CreateDisposition. For example, CREATE_IF_NEEDED, CREATE_NEVER. Defaults to: CREATE_NEVER." default = null } variable "bigQuerySchemaPath" { type = string description = "The Cloud Storage path for the BigQuery JSON schema. If `createDisposition` is set to CREATE_IF_NEEDED, this parameter must be specified. (Example: gs://your-bucket/your-schema.json)" default = null } variable "disabledAlgorithms" { type = string description = "Comma-separated algorithms to disable. If this value is set to `none` then no algorithm is disabled. Use with care, because the algorithms that are disabled by default are known to have either vulnerabilities or performance issues. (Example: SSLv3, RC4)" default = null } variable "extraFilesToStage" { type = string description = "Comma separated Cloud Storage paths or Secret Manager secrets for files to stage in the worker. These files will be saved under the `/extra_files` directory in each worker (Example: gs://your-bucket/file.txt,projects/project-id/secrets/secret-id/versions/version-id)" default = null } variable "useStorageWriteApi" { type = bool description = "If enabled (set to true) the pipeline will use Storage Write API when writing the data to BigQuery (see https://cloud.google.com/blog/products/data-analytics/streaming-data-into-bigquery-using-storage-write-api). Defaults to: false." default = null } variable "useStorageWriteApiAtLeastOnce" { type = bool description = <<EOT This parameter takes effect only if "Use BigQuery Storage Write API" is enabled. If enabled the at-least-once semantics will be used for Storage Write API, otherwise exactly-once semantics will be used. Defaults to: false. EOT default = null } provider "google" { project = var.project } provider "google-beta" { project = var.project } variable "additional_experiments" { type = set(string) description = "List of experiments that should be used by the job. An example value is 'enable_stackdriver_agent_metrics'." default = null } variable "autoscaling_algorithm" { type = string description = "The algorithm to use for autoscaling" default = null } variable "enable_streaming_engine" { type = bool description = "Indicates if the job should use the streaming engine feature." default = null } variable "ip_configuration" { type = string description = "The configuration for VM IPs. Options are 'WORKER_IP_PUBLIC' or 'WORKER_IP_PRIVATE'." default = null } variable "kms_key_name" { type = string description = "The name for the Cloud KMS key for the job. Key format is: projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY" default = null } variable "labels" { type = map(string) description = "User labels to be specified for the job. Keys and values should follow the restrictions specified in the labeling restrictions page. NOTE: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field 'effective_labels' for all of the labels present on the resource." default = null } variable "launcher_machine_type" { type = string description = "The machine type to use for launching the job. The default is n1-standard-1." default = null } variable "machine_type" { type = string description = "The machine type to use for the job." default = null } variable "max_workers" { type = number description = "The maximum number of Google Compute Engine instances to be made available to your pipeline during execution, from 1 to 1000." default = null } variable "name" { type = string } variable "network" { type = string description = "The network to which VMs will be assigned. If it is not provided, 'default' will be used." default = null } variable "num_workers" { type = number description = "The initial number of Google Compute Engine instances for the job." default = null } variable "sdk_container_image" { type = string description = "Docker registry location of container image to use for the 'worker harness. Default is the container for the version of the SDK. Note this field is only valid for portable pipelines." default = null } variable "service_account_email" { type = string description = "The Service Account email used to create the job." default = null } variable "skip_wait_on_job_termination" { type = bool description = "If true, treat DRAINING and CANCELLING as terminal job states and do not wait for further changes before removing from terraform state and moving on. WARNING: this will lead to job name conflicts if you do not ensure that the job names are different, e.g. by embedding a release ID or by using a random_id." default = null } variable "staging_location" { type = string description = "The Cloud Storage path to use for staging files. Must be a valid Cloud Storage URL, beginning with gs://." default = null } variable "subnetwork" { type = string description = "The subnetwork to which VMs will be assigned. Should be of the form 'regions/REGION/subnetworks/SUBNETWORK'." default = null } variable "temp_location" { type = string description = "The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://." default = null } resource "google_project_service" "required" { service = "dataflow.googleapis.com" disable_on_destroy = false } resource "google_dataflow_flex_template_job" "generated" { depends_on = [google_project_service.required] provider = google-beta container_spec_gcs_path = "gs://dataflow-templates-${var.region}/latest/flex/MySQL_to_BigQuery" parameters = { connectionURL = var.connectionURL connectionProperties = var.connectionProperties username = var.username password = var.password query = var.query outputTable = var.outputTable bigQueryLoadingTemporaryDirectory = var.bigQueryLoadingTemporaryDirectory KMSEncryptionKey = var.KMSEncryptionKey useColumnAlias = tostring(var.useColumnAlias) isTruncate = tostring(var.isTruncate) partitionColumn = var.partitionColumn table = var.table numPartitions = tostring(var.numPartitions) lowerBound = tostring(var.lowerBound) upperBound = tostring(var.upperBound) fetchSize = tostring(var.fetchSize) createDisposition = var.createDisposition bigQuerySchemaPath = var.bigQuerySchemaPath disabledAlgorithms = var.disabledAlgorithms extraFilesToStage = var.extraFilesToStage useStorageWriteApi = tostring(var.useStorageWriteApi) useStorageWriteApiAtLeastOnce = tostring(var.useStorageWriteApiAtLeastOnce) } additional_experiments = var.additional_experiments autoscaling_algorithm = var.autoscaling_algorithm enable_streaming_engine = var.enable_streaming_engine ip_configuration = var.ip_configuration kms_key_name = var.kms_key_name labels = var.labels launcher_machine_type = var.launcher_machine_type machine_type = var.machine_type max_workers = var.max_workers name = var.name network = var.network num_workers = var.num_workers sdk_container_image = var.sdk_container_image service_account_email = var.service_account_email skip_wait_on_job_termination = var.skip_wait_on_job_termination staging_location = var.staging_location subnetwork = var.subnetwork temp_location = var.temp_location region = var.region } output "dataflow_job_url" { value = "https://console.cloud.google.com/dataflow/jobs/${var.region}/${google_dataflow_flex_template_job.generated.job_id}" }