v2/googlecloud-to-elasticsearch/terraform/GCS_to_Elasticsearch/dataflow_job.tf (366 lines of code) (raw):
# Autogenerated file. DO NOT EDIT.
#
# Copyright (C) 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
#
variable "on_delete" {
type = string
description = "One of \"drain\" or \"cancel\". Specifies behavior of deletion during terraform destroy."
}
variable "project" {
type = string
description = "The Google Cloud Project ID within which this module provisions resources."
}
variable "region" {
type = string
description = "The region in which the created job should run."
}
variable "deadletterTable" {
type = string
description = "Messages failed to reach the target for all kind of reasons (e.g., mismatched schema, malformed json) are written to this table. (Example: your-project:your-dataset.your-table-name)"
}
variable "inputFormat" {
type = string
description = "Input file format. Default is: CSV"
default = null
}
variable "inputFileSpec" {
type = string
description = "Cloud storage file pattern glob to read from. ex: gs://your-bucket/path/*.csv"
}
variable "containsHeaders" {
type = bool
description = "Input CSV files contain a header record (true/false). Only required if reading CSV files. Defaults to: false."
default = null
}
variable "delimiter" {
type = string
description = "The column delimiter of the input text files. Default: use delimiter provided in csvFormat (Example: ,)"
default = null
}
variable "csvFormat" {
type = string
description = "CSV format specification to use for parsing records. Default is: Default. See https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.html for more details. Must match format names exactly found at: https://commons.apache.org/proper/commons-csv/apidocs/org/apache/commons/csv/CSVFormat.Predefined.html"
default = null
}
variable "jsonSchemaPath" {
type = string
description = "Path to JSON schema. Default: null. (Example: gs://path/to/schema)"
default = null
}
variable "largeNumFiles" {
type = bool
description = "Set to true if number of files is in the tens of thousands. Defaults to: false."
default = null
}
variable "csvFileEncoding" {
type = string
description = "CSV file character encoding format. Allowed Values are US-ASCII, ISO-8859-1, UTF-8, UTF-16. Defaults to: UTF-8."
default = null
}
variable "logDetailedCsvConversionErrors" {
type = bool
description = "Set to true to enable detailed error logging when CSV parsing fails. Note that this may expose sensitive data in the logs (e.g., if the CSV file contains passwords). Default: false."
default = null
}
variable "connectionUrl" {
type = string
description = "Elasticsearch URL in the format https://hostname:[port] or specify CloudID if using Elastic Cloud (Example: https://elasticsearch-host:9200)"
}
variable "apiKey" {
type = string
description = "Base64 Encoded API Key for access without requiring basic authentication. Refer to: https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html#security-api-create-api-key-request"
}
variable "elasticsearchUsername" {
type = string
description = "Username for Elasticsearch endpoint. Overrides ApiKey option if specified"
default = null
}
variable "elasticsearchPassword" {
type = string
description = "Password for Elasticsearch endpoint. Overrides ApiKey option if specified"
default = null
}
variable "index" {
type = string
description = "The index toward which the requests will be issued (Example: my-index)"
}
variable "batchSize" {
type = number
description = "Batch Size used for batch insertion of messages into Elasticsearch. Defaults to: 1000."
default = null
}
variable "batchSizeBytes" {
type = number
description = "Batch Size in bytes used for batch insertion of messages into elasticsearch. Default: 5242880 (5mb)"
default = null
}
variable "maxRetryAttempts" {
type = number
description = "Max retry attempts, must be > 0. Default: no retries"
default = null
}
variable "maxRetryDuration" {
type = number
description = "Max retry duration in milliseconds, must be > 0. Default: no retries"
default = null
}
variable "propertyAsIndex" {
type = string
description = "A property in the document being indexed whose value will specify _index metadata to be included with document in bulk request (takes precedence over an _index UDF)."
default = null
}
variable "javaScriptIndexFnGcsPath" {
type = string
description = "Cloud Storage path to JavaScript UDF source for function that will specify _index metadata to be included with document in bulk request."
default = null
}
variable "javaScriptIndexFnName" {
type = string
description = "UDF JavaScript Function Name for function that will specify _index metadata to be included with document in bulk request"
default = null
}
variable "propertyAsId" {
type = string
description = "A property in the document being indexed whose value will specify _id metadata to be included with document in bulk request (takes precedence over an _id UDF)."
default = null
}
variable "javaScriptIdFnGcsPath" {
type = string
description = "Cloud Storage path to JavaScript UDF source for function that will specify _id metadata to be included with document in bulk request."
default = null
}
variable "javaScriptIdFnName" {
type = string
description = "UDF JavaScript Function Name for function that will specify _id metadata to be included with document in bulk request."
default = null
}
variable "javaScriptTypeFnGcsPath" {
type = string
description = "Cloud Storage path to JavaScript UDF source for function that will specify _type metadata to be included with document in bulk request."
default = null
}
variable "javaScriptTypeFnName" {
type = string
description = "UDF JavaScript Function Name for function that will specify _type metadata to be included with document in bulk request"
default = null
}
variable "javaScriptIsDeleteFnGcsPath" {
type = string
description = <<EOT
Cloud Storage path to JavaScript UDF source for function that will determine if document should be deleted rather than inserted or updated, function should return string value "true" or "false".
EOT
default = null
}
variable "javaScriptIsDeleteFnName" {
type = string
description = <<EOT
UDF JavaScript Function Name for function that will determine if document should be deleted rather than inserted or updated, function should return string value "true" or "false".
EOT
default = null
}
variable "usePartialUpdate" {
type = bool
description = "Whether to use partial updates (update rather than create or index, allowing partial docs) with Elasticsearch requests. Defaults to: false."
default = null
}
variable "bulkInsertMethod" {
type = string
description = "Whether to use INDEX (index, allows upsert) or CREATE (create, errors on duplicate _id) with Elasticsearch bulk requests. Defaults to: CREATE."
default = null
}
variable "trustSelfSignedCerts" {
type = bool
description = "Whether to trust self-signed certificate or not. An Elasticsearch instance installed might have a self-signed certificate, Enable this to True to by-pass the validation on SSL certificate. (default is False)"
default = null
}
variable "disableCertificateValidation" {
type = bool
description = "Disable SSL certificate validation (true/false). Default false (validation enabled). If true, all certificates are considered trusted."
default = null
}
variable "apiKeyKMSEncryptionKey" {
type = string
description = "The Cloud KMS key to decrypt the API key. This parameter must be provided if the apiKeySource is set to KMS. If this parameter is provided, apiKey string should be passed in encrypted. Encrypt parameters using the KMS API encrypt endpoint. The Key should be in the format projects/{gcp_project}/locations/{key_region}/keyRings/{key_ring}/cryptoKeys/{kms_key_name}. See: https://cloud.google.com/kms/docs/reference/rest/v1/projects.locations.keyRings.cryptoKeys/encrypt (Example: projects/your-project-id/locations/global/keyRings/your-keyring/cryptoKeys/your-key-name)"
default = null
}
variable "apiKeySecretId" {
type = string
description = "Secret Manager secret ID for the apiKey. This parameter should be provided if the apiKeySource is set to SECRET_MANAGER. Should be in the format projects/{project}/secrets/{secret}/versions/{secret_version}. (Example: projects/your-project-id/secrets/your-secret/versions/your-secret-version)"
default = null
}
variable "apiKeySource" {
type = string
description = "Source of the API key. One of PLAINTEXT, KMS or SECRET_MANAGER. This parameter must be provided if secret manager or KMS is used. If apiKeySource is set to KMS, apiKeyKMSEncryptionKey and encrypted apiKey must be provided. If apiKeySource is set to SECRET_MANAGER, apiKeySecretId must be provided. If apiKeySource is set to PLAINTEXT, apiKey must be provided. Defaults to: PLAINTEXT."
default = null
}
variable "javascriptTextTransformGcsPath" {
type = string
description = "The Cloud Storage path pattern for the JavaScript code containing your user-defined functions. (Example: gs://your-bucket/your-function.js)"
default = null
}
variable "javascriptTextTransformFunctionName" {
type = string
description = "The name of the function to call from your JavaScript file. Use only letters, digits, and underscores. (Example: 'transform' or 'transform_udf1')"
default = null
}
provider "google" {
project = var.project
}
provider "google-beta" {
project = var.project
}
variable "additional_experiments" {
type = set(string)
description = "List of experiments that should be used by the job. An example value is 'enable_stackdriver_agent_metrics'."
default = null
}
variable "autoscaling_algorithm" {
type = string
description = "The algorithm to use for autoscaling"
default = null
}
variable "enable_streaming_engine" {
type = bool
description = "Indicates if the job should use the streaming engine feature."
default = null
}
variable "ip_configuration" {
type = string
description = "The configuration for VM IPs. Options are 'WORKER_IP_PUBLIC' or 'WORKER_IP_PRIVATE'."
default = null
}
variable "kms_key_name" {
type = string
description = "The name for the Cloud KMS key for the job. Key format is: projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY"
default = null
}
variable "labels" {
type = map(string)
description = "User labels to be specified for the job. Keys and values should follow the restrictions specified in the labeling restrictions page. NOTE: This field is non-authoritative, and will only manage the labels present in your configuration. Please refer to the field 'effective_labels' for all of the labels present on the resource."
default = null
}
variable "launcher_machine_type" {
type = string
description = "The machine type to use for launching the job. The default is n1-standard-1."
default = null
}
variable "machine_type" {
type = string
description = "The machine type to use for the job."
default = null
}
variable "max_workers" {
type = number
description = "The maximum number of Google Compute Engine instances to be made available to your pipeline during execution, from 1 to 1000."
default = null
}
variable "name" {
type = string
}
variable "network" {
type = string
description = "The network to which VMs will be assigned. If it is not provided, 'default' will be used."
default = null
}
variable "num_workers" {
type = number
description = "The initial number of Google Compute Engine instances for the job."
default = null
}
variable "sdk_container_image" {
type = string
description = "Docker registry location of container image to use for the 'worker harness. Default is the container for the version of the SDK. Note this field is only valid for portable pipelines."
default = null
}
variable "service_account_email" {
type = string
description = "The Service Account email used to create the job."
default = null
}
variable "skip_wait_on_job_termination" {
type = bool
description = "If true, treat DRAINING and CANCELLING as terminal job states and do not wait for further changes before removing from terraform state and moving on. WARNING: this will lead to job name conflicts if you do not ensure that the job names are different, e.g. by embedding a release ID or by using a random_id."
default = null
}
variable "staging_location" {
type = string
description = "The Cloud Storage path to use for staging files. Must be a valid Cloud Storage URL, beginning with gs://."
default = null
}
variable "subnetwork" {
type = string
description = "The subnetwork to which VMs will be assigned. Should be of the form 'regions/REGION/subnetworks/SUBNETWORK'."
default = null
}
variable "temp_location" {
type = string
description = "The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://."
default = null
}
resource "google_project_service" "required" {
service = "dataflow.googleapis.com"
disable_on_destroy = false
}
resource "google_dataflow_flex_template_job" "generated" {
depends_on = [google_project_service.required]
provider = google-beta
container_spec_gcs_path = "gs://dataflow-templates-${var.region}/latest/flex/GCS_to_Elasticsearch"
parameters = {
deadletterTable = var.deadletterTable
inputFormat = var.inputFormat
inputFileSpec = var.inputFileSpec
containsHeaders = tostring(var.containsHeaders)
delimiter = var.delimiter
csvFormat = var.csvFormat
jsonSchemaPath = var.jsonSchemaPath
largeNumFiles = tostring(var.largeNumFiles)
csvFileEncoding = var.csvFileEncoding
logDetailedCsvConversionErrors = tostring(var.logDetailedCsvConversionErrors)
connectionUrl = var.connectionUrl
apiKey = var.apiKey
elasticsearchUsername = var.elasticsearchUsername
elasticsearchPassword = var.elasticsearchPassword
index = var.index
batchSize = tostring(var.batchSize)
batchSizeBytes = tostring(var.batchSizeBytes)
maxRetryAttempts = tostring(var.maxRetryAttempts)
maxRetryDuration = tostring(var.maxRetryDuration)
propertyAsIndex = var.propertyAsIndex
javaScriptIndexFnGcsPath = var.javaScriptIndexFnGcsPath
javaScriptIndexFnName = var.javaScriptIndexFnName
propertyAsId = var.propertyAsId
javaScriptIdFnGcsPath = var.javaScriptIdFnGcsPath
javaScriptIdFnName = var.javaScriptIdFnName
javaScriptTypeFnGcsPath = var.javaScriptTypeFnGcsPath
javaScriptTypeFnName = var.javaScriptTypeFnName
javaScriptIsDeleteFnGcsPath = var.javaScriptIsDeleteFnGcsPath
javaScriptIsDeleteFnName = var.javaScriptIsDeleteFnName
usePartialUpdate = tostring(var.usePartialUpdate)
bulkInsertMethod = var.bulkInsertMethod
trustSelfSignedCerts = tostring(var.trustSelfSignedCerts)
disableCertificateValidation = tostring(var.disableCertificateValidation)
apiKeyKMSEncryptionKey = var.apiKeyKMSEncryptionKey
apiKeySecretId = var.apiKeySecretId
apiKeySource = var.apiKeySource
javascriptTextTransformGcsPath = var.javascriptTextTransformGcsPath
javascriptTextTransformFunctionName = var.javascriptTextTransformFunctionName
}
additional_experiments = var.additional_experiments
autoscaling_algorithm = var.autoscaling_algorithm
enable_streaming_engine = var.enable_streaming_engine
ip_configuration = var.ip_configuration
kms_key_name = var.kms_key_name
labels = var.labels
launcher_machine_type = var.launcher_machine_type
machine_type = var.machine_type
max_workers = var.max_workers
name = var.name
network = var.network
num_workers = var.num_workers
sdk_container_image = var.sdk_container_image
service_account_email = var.service_account_email
skip_wait_on_job_termination = var.skip_wait_on_job_termination
staging_location = var.staging_location
subnetwork = var.subnetwork
temp_location = var.temp_location
region = var.region
}
output "dataflow_job_url" {
value = "https://console.cloud.google.com/dataflow/jobs/${var.region}/${google_dataflow_flex_template_job.generated.job_id}"
}