datasets/libraries_io/pipelines/repository_dependencies/pipeline.yaml (1,159 lines of code) (raw):
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# requestations under the License.
---
resources:
- type: bigquery_table
table_id: repository_dependencies
description:
dag:
airflow_version: 2
initialize:
dag_id: repository_dependencies
default_args:
owner: "Google"
depends_on_past: False
start_date: "2022-11-15"
max_active_runs: 1
schedule_interval: "@daily"
catchup: False
default_view: graph
tasks:
- operator: "BashOperator"
description: "Fetch data gcs - gcs"
args:
task_id: "bash_gcs_to_gcs"
bash_command: |
if test -f /home/airflow/gcs/data/libraries_io/lib-1.6.0.tar.gz;
then
mkdir /home/airflow/gcs/data/libraries_io/repository_dependencies/
cp /home/airflow/gcs/data/libraries_io/libraries-1.4.0-2018-12-22/repository_dependencies-1.4.0-2018-12-22.csv /home/airflow/gcs/data/libraries_io/repository_dependencies/repository_dependencies.csv
split -l 37000000 --additional-suffix=.csv /home/airflow/gcs/data/libraries_io/repository_dependencies/repository_dependencies.csv /home/airflow/gcs/data/libraries_io/repository_dependencies/
rm /home/airflow/gcs/data/libraries_io/repository_dependencies/repository_dependencies.csv
else
mkdir /home/airflow/gcs/data/libraries_io/
curl -o /home/airflow/gcs/data/libraries_io/lib-1.6.0.tar.gz -L https://zenodo.org/record/2536573/files/Libraries.io-open-data-1.4.0.tar.gz
tar -xf /home/airflow/gcs/data/libraries_io/lib-1.6.0.tar.gz -C /home/airflow/gcs/data/libraries_io/
mkdir /home/airflow/gcs/data/libraries_io/repository_dependencies/
cp /home/airflow/gcs/data/libraries_io/libraries-1.4.0-2018-12-22/repository_dependencies-1.4.0-2018-12-22.csv /home/airflow/gcs/data/libraries_io/repository_dependencies/repository_dependencies.csv
split -l 37000000 --additional-suffix=.csv /home/airflow/gcs/data/libraries_io/repository_dependencies/repository_dependencies.csv /home/airflow/gcs/data/libraries_io/repository_dependencies/
rm /home/airflow/gcs/data/libraries_io/repository_dependencies/repository_dependencies.csv
fi
- operator: "GKECreateClusterOperator"
args:
task_id: "create_cluster"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
body:
name: pdp-libraries-io-repository-dependencies
initial_node_count: 1
network: "{{ var.value.vpc_network }}"
node_config:
machine_type: e2-standard-16
oauth_scopes:
- https://www.googleapis.com/auth/devstorage.read_write
- https://www.googleapis.com/auth/cloud-platform
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/aa.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_1.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_1.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_2"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ab.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_2.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_2"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_2.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_APPEND"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_3"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ac.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_3.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_3"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_3.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_4"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ad.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_4.csv"
CHUNKSIZE: "100000"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_4"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_4.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_5"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ae.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_5.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_5"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_5.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_6"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/af.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_6.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_6"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_6.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_7"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ag.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_7.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_7"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_7.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_8"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ah.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_8.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_8"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_8.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_9"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ai.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_9.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_9"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_9.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_11"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/ak.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_11.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_11"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_11.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEStartPodOperator"
description: "Run CSV transform within kubernetes pod"
args:
task_id: "transform_repository_dependencies_10"
startup_timeout_seconds: 600
name: "repository_dependencies"
namespace: "default"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
cluster_name: pdp-libraries-io-repository-dependencies
image_pull_policy: "Always"
image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}"
env_vars:
SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}"
SOURCE_GCS_OBJECT: "data/libraries_io/repository_dependencies/aj.csv"
SOURCE_FILE: "files/repository_dependencies.csv"
TARGET_FILE: "files/data_repository_dependencies.csv"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "data/libraries_io/repository_dependencies/data_repository_dependencies_10.csv"
CHUNKSIZE: "100000"
PIPELINE_NAME: "repository_dependencies"
RENAME_MAPPINGS: >-
{"ID":"id","Host Type":"host_type","Repository Name with Owner":"repository_name_with_owner","Repository ID":"repository_id",
"Manifest Platform":"manifest_platform","Manifest Filepath":"manifest_filepath","Git branch":"git_branch",
"Manifest kind":"manifest_kind","Optional":"optional","Dependency Project Name":"dependency_project_name",
"Dependency Requirements":"dependency_requirements","Dependency Kind":"dependency_kind","Dependency Project ID":"dependency_project_id"}
CSV_HEADERS: >-
["id","host_type","repository_name_with_owner","repository_id","manifest_platform","manifest_filepath","git_branch",
"manifest_kind","optional","dependency_project_name","dependency_requirements","dependency_kind","dependency_project_id"]
container_resources:
memory:
request: "16Gi"
cpu:
request: "1"
ephemeral-storage:
request: "10Gi"
- operator: "GoogleCloudStorageToBigQueryOperator"
description: "Task to load CSV data to a BigQuery table"
args:
task_id: "load_repository_dependencies_to_bq_10"
bucket: "{{ var.value.composer_bucket }}"
source_objects: ["data/libraries_io/repository_dependencies/data_repository_dependencies_10.csv"]
source_format: "CSV"
destination_project_dataset_table: "libraries_io.repository_dependencies"
skip_leading_rows: 2
allow_quoted_newlines: True
write_disposition: "WRITE_TRUNCATE"
schema_fields:
- name: "id"
type: "integer"
description: "The unique primary key of the repository dependency in the Libraries.io database."
mode: "nullable"
- name: "host_type"
type: "string"
description: "Which website the dependencys repository is hosted on, either GitHub, GitLab or Bitbucket."
mode: "nullable"
- name: "repository_name_with_owner"
type: "string"
description: "The repository name and owner seperated by a slash, also maps to the url slug on the given repository host e.g. librariesio/libraries.io."
mode: "nullable"
- name: "repository_id"
type: "integer"
description: "The unique primary key of the repository for this dependency in the Libraries.io database."
mode: "nullable"
- name: "manifest_platform"
type: "string"
description: "Which package manager the dependency listed in the manifest should use."
mode: "nullable"
- name: "manifest_filepath"
type: "string"
description: "Path to the file where the dependency is declared within the repository."
mode: "nullable"
- name: "git_branch"
type: "string"
description: "Which branch was the manifest loaded from the repository."
mode: "nullable"
- name: "manifest_kind"
type: "string"
description: "Either manifest or lockfile, manifests are written by humans, lockfiles contain full resolved dependency tree."
mode: "nullable"
- name: "optional"
type: "string"
description: "Is the dependency optional?."
mode: "nullable"
- name: "dependency_project_name"
type: "string"
description: "The name of the project that the dependency specifies."
mode: "nullable"
- name: "dependency_requirements"
type: "string"
description: "The version or range of versions that the dependency specifies, resolution of that to a particular version is package manager specific."
mode: "nullable"
- name: "dependency_kind"
type: "string"
description: "The type of dependency, often declared for the phase of usage, e.g. runtime, test, development, build."
mode: "nullable"
- name: "dependency_project_id"
type: "integer"
description: "The unique primary key of the project for this dependency in the Libraries.io database."
mode: "nullable"
- operator: "GKEDeleteClusterOperator"
args:
task_id: "delete_cluster"
project_id: "{{ var.value.gcp_project }}"
location: "us-central1-c"
name: pdp-libraries-io-repository-dependencies
graph_paths:
- "bash_gcs_to_gcs >> create_cluster >> [transform_repository_dependencies,transform_repository_dependencies_2,transform_repository_dependencies_3,transform_repository_dependencies_4,transform_repository_dependencies_5,transform_repository_dependencies_6,transform_repository_dependencies_7,transform_repository_dependencies_8,transform_repository_dependencies_9,transform_repository_dependencies_10,transform_repository_dependencies_11] >> delete_cluster >> [load_repository_dependencies_to_bq, load_repository_dependencies_to_bq_2, load_repository_dependencies_to_bq_3, load_repository_dependencies_to_bq_4, load_repository_dependencies_to_bq_5, load_repository_dependencies_to_bq_6, load_repository_dependencies_to_bq_7, load_repository_dependencies_to_bq_8, load_repository_dependencies_to_bq_9, load_repository_dependencies_to_bq_10, load_repository_dependencies_to_bq_11]"