tools/asset-inventory/gae/config.yaml (24 lines of code) (raw):
# Copyright 2019 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Where the export is written to. A GCS path like gs://my-bucket-name/asset_export
# see import_pipeline_runtime_environment for another location to override this value.
gcs_destination: <ENTER-BUCKET-URL>
# Organization number (organizations/123) or project id (projects/id) or number (projects/123)
export_parent: <ENTER-PARENT>
# BigQuery dataset to load to.
import_dataset: <ENTER-DATASET>
# Location to write intermediary data to load from from. A GCS path like gs://my-bucket-name/asset_export/stage
import_stage: <ENTER-STAGE>
# Project id to run the dataflow job in when using the template or dataflow runner.
import_dataflow_project: <ENTER-PROJECT>
# When using the template runner. Supply values as a json representation of
# RuntimeEnvironment:
#
# https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
# If you don't get a warning about firewall rules preventing shuffle operations
# it's safe to set maxWorkers higher. To increase the machine size for faster
# loads try "machineType": "n1-standard-8", and to use a Shared VPC supply the
# network and subnetwork values
# "network": "https://www.googleapis.com/compute/v1/projects/<project>/networks/<network>"
# "subnetwork": "https://www.googleapis.com/compute/v1/projects/<project>/regions/<region>/subnetworks/<subnetwork>"
# Or try the new (faster) shuffler by adding "additionalExperiments": ["shuffle_mode=service"]
import_pipeline_runtime_environment: >
{
"tempLocation": "<ENTER-BUCKET-URL>/dataflow_temp",
"maxWorkers": 1
}
# When using direct or dataflow running these are the other arguments to supply to import pipeline.
import_pipeline_arguments: --max_num_workers=1 --temp_location <ENTER-BUCKET-URL>/dataflow_temp
# Export resources, iam_policy or both.
export_content_types:
- RESOURCE
- IAM_POLICY
# List of asset types to export such as google.compute.Firewall and google.compute.HealthCheck default is '*' for everything.
export_asset_types:
- '*'
# Can be direct, dataflow or template (however App Engine Standard doesn't support the direct or dataflow runner).
import_pipeline_runner: template
# Region to run the dataflow pipeline in, must be a valid Dataflow region as listed here:
#
# https://cloud.google.com/dataflow/docs/concepts/regional-endpoints
#
# this doesn't have to be the same region the works run it, but it should be close.
import_template_region: us-central1
# If running a template. This is the template location.
import_template_location: gs://professional-services-tools-asset-inventory/latest/import_pipeline
# How to group exported resources into Bigquery tables. Either NONE, ASSET_TYPE_VERSION or ASSET_TYPE
import_group_by: ASSET_TYPE
# If tables should be ovewritten (WRITE_EMPTY) or appended (WRITE_APPEND) to.
import_write_disposition: WRITE_APPEND
# Number of shards to use per asset type. If you have a large number of a
# particular asset type like BigQuery tables you can speed up the pipeline by
# sharding, For example: num_shards:
# *=1,resource=100,google.cloud.bigquery.Table=100
import_num_shards: "*=1"
# If the load date [YYYYMMDD] is added as a table suffix.
import_add_load_date_suffix: False
# If we are running on App Engine and only want to be invoked by cron tasks for security reasons.
restrict_to_cron_tasks: True