mmv1/products/bigquery/Dataset.yaml (432 lines of code) (raw):
# Copyright 2024 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
name: 'Dataset'
kind: 'bigquery#dataset'
description: |
Datasets allow you to organize and control access to your tables.
references:
guides:
'Datasets Intro': 'https://cloud.google.com/bigquery/docs/datasets-intro'
api: 'https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets'
docs:
warning: |
You must specify the role field using the legacy format `OWNER` instead of `roles/bigquery.dataOwner`.
The API does accept both formats but it will always return the legacy format which results in Terraform
showing permanent diff on each plan and apply operation.
base_url: 'projects/{{project}}/datasets'
self_link: 'projects/{{project}}/datasets/{{dataset_id}}'
has_self_link: true
create_url: 'projects/{{project}}/datasets?accessPolicyVersion=3'
update_url: 'projects/{{project}}/datasets/{{dataset_id}}?accessPolicyVersion=3'
delete_url: 'projects/{{project}}/datasets/{{dataset_id}}?deleteContents={{delete_contents_on_destroy}}'
import_format:
- 'projects/{{project}}/datasets/{{dataset_id}}'
timeouts:
insert_minutes: 20
update_minutes: 20
delete_minutes: 20
custom_code:
constants: 'templates/terraform/constants/bigquery_dataset.go.tmpl'
pre_read: 'templates/terraform/pre_read/bigquery_dataset.go.tmpl'
exclude_sweeper: true
examples:
- name: 'bigquery_dataset_basic'
primary_resource_id: 'dataset'
vars:
dataset_id: 'example_dataset'
account_name: 'bqowner'
- name: 'bigquery_dataset_with_max_time_travel_hours'
primary_resource_id: 'dataset'
vars:
dataset_id: 'example_dataset'
account_name: 'bqowner'
exclude_docs: true
- name: 'bigquery_dataset_cmek'
primary_resource_id: 'dataset'
vars:
dataset_id: 'example_dataset'
key_name: 'example-key'
keyring_name: 'example-keyring'
exclude_test: true
- name: 'bigquery_dataset_authorized_dataset'
primary_resource_id: 'dataset'
vars:
private: 'private'
public: 'public'
account_name: 'bqowner'
- name: 'bigquery_dataset_authorized_routine'
primary_resource_id: 'private'
vars:
private_dataset: 'private_dataset'
public_dataset: 'public_dataset'
public_routine: 'public_routine'
test_env_vars:
service_account: 'SERVICE_ACCT'
- name: 'bigquery_dataset_case_insensitive_names'
primary_resource_id: 'dataset'
vars:
dataset_id: 'example_dataset'
account_name: 'bqowner'
exclude_docs: true
- name: 'bigquery_dataset_default_collation_set'
primary_resource_id: 'dataset'
vars:
dataset_id: 'example_dataset'
account_name: 'bqowner'
exclude_docs: true
- name: 'bigquery_dataset_external_reference_aws'
primary_resource_id: 'dataset'
vars:
dataset_id: 'example_dataset'
exclude_test: true
- name: 'bigquery_dataset_external_catalog_dataset_options'
primary_resource_id: 'dataset'
vars:
dataset_id: 'example_dataset'
- name: 'bigquery_dataset_resource_tags'
primary_resource_id: 'dataset'
primary_resource_name: 'fmt.Sprintf("tf_test_dataset%s", context["random_suffix"])'
vars:
dataset_id: 'dataset'
tag_key1: 'tag_key1'
tag_value1: 'tag_value1'
tag_key2: 'tag_key2'
tag_value2: 'tag_value2'
exclude_docs: true
virtual_fields:
- name: 'delete_contents_on_destroy'
description: |
If set to `true`, delete all the tables in the
dataset when destroying the resource; otherwise,
destroying the resource will fail if tables are present.
type: Boolean
default_value: false
parameters:
properties:
- name: 'maxTimeTravelHours'
type: String
description:
'Defines the time travel window in hours. The value can be from 48 to 168
hours (2 to 7 days).'
default_from_api: true
- name: 'access'
type: Array
description:
'An array of objects that define dataset access for one or more entities.'
is_set: true
default_from_api: true
item_type:
type: NestedObject
properties:
- name: 'domain'
type: String
description: |
A domain to grant access to. Any users signed in with the
domain specified will be granted the specified access
- name: 'groupByEmail'
type: String
description: An email address of a Google Group to grant access to.
- name: 'role'
type: String
description: |
Describes the rights granted to the user specified by the other
member of the access object. Basic, predefined, and custom roles
are supported. Predefined roles that have equivalent basic roles
are swapped by the API to their basic counterparts. See
[official docs](https://cloud.google.com/bigquery/docs/access-control).
- name: 'specialGroup'
type: String
description: |
A special group to grant access to. Possible values include:
* `projectOwners`: Owners of the enclosing project.
* `projectReaders`: Readers of the enclosing project.
* `projectWriters`: Writers of the enclosing project.
* `allAuthenticatedUsers`: All authenticated BigQuery users.
- name: 'iamMember'
type: String
description: |
Some other type of member that appears in the IAM Policy but isn't a user,
group, domain, or special group. For example: `allUsers`
- name: 'userByEmail'
type: String
description: |
An email address of a user to grant access to. For example:
fred@example.com
- name: 'view'
type: NestedObject
description: |
A view from a different dataset to grant access to. Queries
executed against that view will have read access to tables in
this dataset. The role field is not required when this field is
set. If that view is updated by any user, access to the view
needs to be granted again via an update operation.
properties:
- name: 'datasetId'
type: String
description: The ID of the dataset containing this table.
required: true
- name: 'projectId'
type: String
description: The ID of the project containing this table.
required: true
- name: 'tableId'
type: String
description: |
The ID of the table. The ID must contain only letters (a-z,
A-Z), numbers (0-9), or underscores (_). The maximum length
is 1,024 characters.
required: true
- name: 'dataset'
type: NestedObject
description: |
Grants all resources of particular types in a particular dataset read access to the current dataset.
properties:
- name: 'dataset'
type: NestedObject
description: |
The dataset this entry applies to
required: true
properties:
- name: 'datasetId'
type: String
description: The ID of the dataset containing this table.
required: true
- name: 'projectId'
type: String
description: The ID of the project containing this table.
required: true
- name: 'targetTypes'
type: Array
description: |
Which resources in the dataset this entry applies to. Currently, only views are supported,
but additional target types may be added in the future. Possible values: VIEWS
required: true
item_type:
type: String
- name: 'routine'
type: NestedObject
description: |
A routine from a different dataset to grant access to. Queries
executed against that routine will have read access to tables in
this dataset. The role field is not required when this field is
set. If that routine is updated by any user, access to the routine
needs to be granted again via an update operation.
properties:
- name: 'datasetId'
type: String
description: The ID of the dataset containing this table.
required: true
- name: 'projectId'
type: String
description: The ID of the project containing this table.
required: true
- name: 'routineId'
type: String
description: |
The ID of the routine. The ID must contain only letters (a-z,
A-Z), numbers (0-9), or underscores (_). The maximum length
is 256 characters.
required: true
- name: 'condition'
type: NestedObject
description: |
Condition for the binding. If CEL expression in this field is true, this
access binding will be considered.
properties:
- name: expression
type: String
required: true
description: |
Textual representation of an expression in Common Expression Language syntax.
- name: title
type: String
description: |
Title for the expression, i.e. a short string describing its purpose.
This can be used e.g. in UIs which allow to enter the expression.
- name: description
type: String
description: |
Description of the expression. This is a longer text which describes the expression,
e.g. when hovered over it in a UI.
- name: location
type: String
description: |
String indicating the location of the expression for error reporting, e.g. a file
name and a position in the file.
- name: 'creationTime'
type: Integer
description: |
The time when this dataset was created, in milliseconds since the
epoch.
output: true
- name: 'datasetReference'
type: NestedObject
description: 'A reference that identifies the dataset.'
required: true
immutable: true
flatten_object: true
properties:
- name: 'datasetId'
type: String
description: |
A unique ID for this dataset, without the project name. The ID
must contain only letters (a-z, A-Z), numbers (0-9), or
underscores (_). The maximum length is 1,024 characters.
required: true
immutable: true
validation:
function: 'validateDatasetId'
- name: 'defaultTableExpirationMs'
type: Integer
description: |
The default lifetime of all tables in the dataset, in milliseconds.
The minimum value is 3600000 milliseconds (one hour).
Once this property is set, all newly-created tables in the dataset
will have an `expirationTime` property set to the creation time plus
the value in this property, and changing the value will only affect
new tables, not existing ones. When the `expirationTime` for a given
table is reached, that table will be deleted automatically.
If a table's `expirationTime` is modified or removed before the
table expires, or if you provide an explicit `expirationTime` when
creating a table, that value takes precedence over the default
expiration time indicated by this property.
validation:
function: 'validateDefaultTableExpirationMs'
- name: 'defaultPartitionExpirationMs'
type: Integer
description: |
The default partition expiration for all partitioned tables in
the dataset, in milliseconds.
Once this property is set, all newly-created partitioned tables in
the dataset will have an `expirationMs` property in the `timePartitioning`
settings set to this value, and changing the value will only
affect new tables, not existing ones. The storage in a partition will
have an expiration time of its partition time plus this value.
Setting this property overrides the use of `defaultTableExpirationMs`
for partitioned tables: only one of `defaultTableExpirationMs` and
`defaultPartitionExpirationMs` will be used for any new partitioned
table. If you provide an explicit `timePartitioning.expirationMs` when
creating or updating a partitioned table, that value takes precedence
over the default partition expiration time indicated by this property.
- name: 'description'
type: String
description: A user-friendly description of the dataset
- name: 'etag'
type: String
description: |
A hash of the resource.
output: true
- name: 'externalDatasetReference'
type: NestedObject
description: |
Information about the external metadata storage where the dataset is defined.
immutable: true
properties:
- name: 'externalSource'
type: String
description: |
External source that backs this dataset.
required: true
immutable: true
- name: 'connection'
type: String
description: |
The connection id that is used to access the externalSource.
Format: projects/{projectId}/locations/{locationId}/connections/{connectionId}
required: true
immutable: true
- name: 'friendlyName'
type: String
description: A descriptive name for the dataset
send_empty_value: true
- name: 'labels'
type: KeyValueLabels
description: |
The labels associated with this dataset. You can use these to
organize and group your datasets.
- name: 'lastModifiedTime'
type: Integer
description: |
The date when this dataset or any of its tables was last modified, in
milliseconds since the epoch.
output: true
- name: 'location'
type: String
description: |
The geographic location where the dataset should reside.
See [official docs](https://cloud.google.com/bigquery/docs/dataset-locations).
There are two types of locations, regional or multi-regional. A regional
location is a specific geographic place, such as Tokyo, and a multi-regional
location is a large geographic area, such as the United States, that
contains at least two geographic places.
The default value is multi-regional location `US`.
Changing this forces a new resource to be created.
immutable: true
diff_suppress_func: 'tpgresource.CaseDiffSuppress'
custom_flatten: 'templates/terraform/custom_flatten/bigquery_dataset_location.go.tmpl'
default_value: "US"
- name: 'defaultEncryptionConfiguration'
type: NestedObject
description: |
The default encryption key for all tables in the dataset. Once this property is set,
all newly-created partitioned tables in the dataset will have encryption key set to
this value, unless table creation request (or query) overrides the key.
properties:
- name: 'kmsKeyName'
type: String
description: |
Describes the Cloud KMS encryption key that will be used to protect destination
BigQuery table. The BigQuery Service Account associated with your project requires
access to this encryption key.
required: true
- name: 'isCaseInsensitive'
type: Boolean
description: |
TRUE if the dataset and its table names are case-insensitive, otherwise FALSE.
By default, this is FALSE, which means the dataset and its table names are
case-sensitive. This field does not affect routine references.
default_from_api: true
- name: 'defaultCollation'
type: String
description: |
Defines the default collation specification of future tables created
in the dataset. If a table is created in this dataset without table-level
default collation, then the table inherits the dataset default collation,
which is applied to the string fields that do not have explicit collation
specified. A change to this field affects only tables created afterwards,
and does not alter the existing tables.
The following values are supported:
- 'und:ci': undetermined locale, case insensitive.
- '': empty string. Default to case-sensitive behavior.
default_from_api: true
- name: 'storageBillingModel'
type: String
description: |
Specifies the storage billing model for the dataset.
Set this flag value to LOGICAL to use logical bytes for storage billing,
or to PHYSICAL to use physical bytes instead.
LOGICAL is the default if this flag isn't specified.
default_from_api: true
- name: 'resourceTags'
type: KeyValuePairs
description: |
The tags attached to this table. Tag keys are globally unique. Tag key is expected to be
in the namespaced format, for example "123456789012/environment" where 123456789012 is the
ID of the parent organization or project resource for this tag key. Tag value is expected
to be the short name, for example "Production". See [Tag definitions](https://cloud.google.com/iam/docs/tags-access-control#definitions)
for more details.
- name: 'externalCatalogDatasetOptions'
type: NestedObject
description: |
Options defining open source compatible datasets living in the BigQuery catalog. Contains
metadata of open source database, schema or namespace represented by the current dataset.
properties:
- name: 'parameters'
type: KeyValuePairs
description: |
A map of key value pairs defining the parameters and properties of the open source schema.
Maximum size of 2Mib.
- name: 'defaultStorageLocationUri'
type: String
description: |
The storage location URI for all tables in the dataset. Equivalent to hive metastore's
database locationUri. Maximum length of 1024 characters.