datasets/libraries_io/pipelines/repositories/pipeline.yaml (717 lines of code) (raw):

# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # requestations under the License. --- resources: - type: bigquery_table table_id: repositories description: dag: airflow_version: 2 initialize: dag_id: repositories default_args: owner: "Google" depends_on_past: False start_date: "2022-11-15" max_active_runs: 1 schedule_interval: "@daily" catchup: False default_view: graph tasks: - operator: "BashOperator" description: "Fetch data gcs - gcs" args: task_id: "bash_gcs_to_gcs" bash_command: | if test -f /home/airflow/gcs/data/libraries_io/lib-1.6.0.tar.gz; then mkdir -p /home/airflow/gcs/data/libraries_io/repositories/ cp /home/airflow/gcs/data/libraries_io/libraries-1.4.0-2018-12-22/repositories-1.4.0-2018-12-22.csv /home/airflow/gcs/data/libraries_io/repositories/repositories.csv split -l 13000000 --additional-suffix=.csv /home/airflow/gcs/data/libraries_io/repositories/repositories.csv /home/airflow/gcs/data/libraries_io/repositories/ rm /home/airflow/gcs/data/libraries_io/repositories/repositories.csv else mkdir -p /home/airflow/gcs/data/libraries_io/ curl -o /home/airflow/gcs/data/libraries_io/lib-1.6.0.tar.gz -L https://zenodo.org/record/2536573/files/Libraries.io-open-data-1.4.0.tar.gz tar -xf /home/airflow/gcs/data/libraries_io/lib-1.6.0.tar.gz -C /home/airflow/gcs/data/libraries_io/ mkdir -p /home/airflow/gcs/data/libraries_io/repositories/ cp /home/airflow/gcs/data/libraries_io/libraries-1.4.0-2018-12-22/repositories-1.4.0-2018-12-22.csv /home/airflow/gcs/data/libraries_io/repositories/repositories.csv split -l 13000000 --additional-suffix=.csv /home/airflow/gcs/data/libraries_io/repositories/repositories.csv /home/airflow/gcs/data/libraries_io/repositories/ rm /home/airflow/gcs/data/libraries_io/repositories/repositories.csv fi - operator: "GKECreateClusterOperator" args: task_id: "create_cluster" project_id: "{{ var.value.gcp_project }}" location: "us-central1-c" body: name: pdp-libraries-io-repositories initial_node_count: 1 network: "{{ var.value.vpc_network }}" node_config: machine_type: e2-standard-16 oauth_scopes: - https://www.googleapis.com/auth/devstorage.read_write - https://www.googleapis.com/auth/cloud-platform - operator: "GKEStartPodOperator" description: "Run CSV transform within kubernetes pod" args: task_id: "transform_repositories" startup_timeout_seconds: 600 name: "repositories" namespace: "default" project_id: "{{ var.value.gcp_project }}" location: "us-central1-c" cluster_name: pdp-libraries-io-repositories image_pull_policy: "Always" image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}" env_vars: SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}" SOURCE_GCS_OBJECT: "data/libraries_io/repositories/aa.csv" SOURCE_FILE: "files/repositories.csv" TARGET_FILE: "files/data_repositories.csv" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/libraries_io/repositories/data_repositories_1.csv" CHUNKSIZE: "100000" PIPELINE_NAME: "repositories" RENAME_MAPPINGS: >- {"ID":"id","Host Type":"host_type","Name with Owner":"name_with_owner","Description":"description","Fork":"fork", "Created Timestamp":"created_timestamp","Updated Timestamp":"updated_timestamp","Last pushed Timestamp":"last_pushed_timestamp", "Homepage URL":"homepage_url","Size":"size","Stars Count":"stars_count","Language":"language","Issues enable":"issues_enabled", "Wiki enabled":"wiki_enabled","Pages enabled":"pages_enabled","Forks Count":"forks_count","Mirror URL":"mirror_url", "Open Issues Count":"open_issues_count","Default branch":"default_branch","Watchers Count":"watchers_count","UUID":"uuid", "Fork Source Name with Owner":"fork_source_name_with_owner","License":"license","Contributors Count":"contributors_count", "Readme filename":"readme_filename","Changelog filename":"changelog_filename","Contributing guidelines filename":"contributing_guidelines_filename", "License filename":"license_filename","Code of Conduct filename":"code_of_conduct_filename", "Security Threat Model filename":"security_threat_model_filename","Security Audit filename":"security_audit_filename", "Status":"status","Last Synced Timestamp":"last_synced_timestamp","SourceRank":"sourcerank","Display Name":"display_name", "SCM typ":"scm_type","Pull requests enabled":"pull_requests_enabled","Logo URL":"logo_url","Keywords":"keywords","39":"an"} CSV_HEADERS: >- ["id","host_type","name_with_owner","description","fork","created_timestamp","updated_timestamp","last_pushed_timestamp", "homepage_url","size","stars_count","language","issues_enabled","wiki_enabled","pages_enabled","forks_count","mirror_url", "open_issues_count","default_branch","watchers_count","uuid","fork_source_name_with_owner","license","contributors_count", "readme_filename","changelog_filename","contributing_guidelines_filename","license_filename","code_of_conduct_filename", "security_threat_model_filename","security_audit_filename","status","last_synced_timestamp","sourcerank","display_name", "scm_type","pull_requests_enabled","logo_url","keywords","an"] container_resources: memory: request: "16Gi" cpu: request: "1" ephemeral-storage: request: "10Gi" - operator: "GoogleCloudStorageToBigQueryOperator" description: "Task to load CSV data to a BigQuery table" args: task_id: "load_repositories_to_bq" bucket: "{{ var.value.composer_bucket }}" source_objects: ["data/libraries_io/repositories/data_repositories_1.csv"] source_format: "CSV" destination_project_dataset_table: "libraries_io.repositories" skip_leading_rows: 2 allow_quoted_newlines: True write_disposition: "WRITE_TRUNCATE" schema_fields: - name: "id" type: "integer" description: "he unique primary key of the repository in the Libraries.io database." mode: "nullable" - name: "host_type" type: "string" description: "Which website the repository is hosted on either GitHub GitLab or Bitbucket." mode: "nullable" - name: "name_with_owner" type: "string" description: "The repository name and owner seperated by a slash also maps to the url slug on the given repository host e.g. librariesio/libraries.io." mode: "nullable" - name: "description" type: "string" description: "Description of repository." mode: "nullable" - name: "fork" type: "boolean" description: "Is the repository a fork of another." mode: "nullable" - name: "created_timestamp" type: "timestamp" description: "Timestamp of when the repository was created on the host." mode: "nullable" - name: "updated_timestamp" type: "timestamp" description: "Timestamp of when the repository was last saved by Libraries.io." mode: "nullable" - name: "last_pushed_timestamp" type: "timestamp" description: "Timestamp of when the repository was last pushed to only available for GitHub repositories." mode: "nullable" - name: "homepage_url" type: "string" description: "URL of a declared homepage or other website for the repository." mode: "nullable" - name: "size" type: "integer" description: "Size of the repository in kilobytes only available for GitHub and Bitbucket." mode: "nullable" - name: "stars_count" type: "integer" description: "Number of stars on the repository only available for GitHub and GitLab." mode: "nullable" - name: "language" type: "string" description: "Primary programming language the project is written in only available for GitHub and Bitbucket." mode: "nullable" - name: "issues_enabled" type: "boolean" description: "Is the bug tracker enabled for this repository?." mode: "nullable" - name: "wiki_enabled" type: "boolean" description: "Is the wiki enabled for this repository?." mode: "nullable" - name: "pages_enabled" type: "boolean" description: "Is GitHub pages enabled for this repository? only possible for GitHub." mode: "nullable" - name: "forks_count" type: "integer" description: "Number of forks of this repository." mode: "nullable" - name: "mirror_url" type: "string" description: "URL of the repositroy of which this is a mirror of only present if this repository is a mirror of another." mode: "nullable" - name: "open_issues_count" type: "integer" description: "Number of open issues on the repository bug tracker only available for GitHub and GitLab." mode: "nullable" - name: "default_branch" type: "string" description: "Primary branch of the repository." mode: "nullable" - name: "watchers_count" type: "integer" description: "Number of subscribers to all notifications for the repository only available for GitHub and Bitbucket." mode: "nullable" - name: "uuid" type: "string" description: "ID of the repository on the remote host not unique between GitLab and GitHub repositories." mode: "nullable" - name: "fork_source_name_with_owner" type: "string" description: "If the repository is a fork the repository name and owner seperated by a slash of the repository if was forked from." mode: "nullable" - name: "license" type: "string" description: "SPDX identifier of the license of the repository only available for GitHub repositories." mode: "nullable" - name: "contributors_count" type: "integer" description: "Number of unique contributors that have committed to the default branch." mode: "nullable" - name: "readme_filename" type: "string" description: "If a readme file has been detected the full name of the readme file e.g README.md." mode: "nullable" - name: "changelog_filename" type: "string" description: "If a changelog file has been detected the full name of the changelog file e.g changelog.txt." mode: "nullable" - name: "contributing_guidelines_filename" type: "string" description: "If a contributing guidelines file has been detected the full name of the contributing guidelines file e.g contributing.md." mode: "nullable" - name: "license_filename" type: "string" description: "If a license file has been detected the full name of the license file e.g LICENSE." mode: "nullable" - name: "code_of_conduct_filename" type: "string" description: "If a code of conduct file has been detected the full name of the code of conduct file e.g code_of_conduct.md." mode: "nullable" - name: "security_threat_model_filename" type: "string" description: "If a Security Threat Model file has been detected the full name of the Security Threat Model file e.g threatmodel.md." mode: "nullable" - name: "security_audit_filename" type: "string" description: "If a Security Audit file has been detected the full name of the Security Audit file e.g security.md." mode: "nullable" - name: "status" type: "string" description: "Either Active Deprecated Unmaintained Help Wanted Removed no value also means active. Updated when detected by Libraries.io or su. manually by Libraries.io user via \"repo suggection\" feature." mode: "nullable" - name: "last_synced_timestamp" type: "timestamp" description: "Timestamp of when Libraries.io last synced the repository from the host API." mode: "nullable" - name: "sourcerank" type: "integer" description: "Libraries.io defined score based on quality popularity and community metrics." mode: "nullable" - name: "display_name" type: "string" description: "Display name for the repository only available for GitLab repositories." mode: "nullable" - name: "scm_type" type: "string" description: "Type of source control repository uses always \"git\" for GitHub and GitLab." mode: "nullable" - name: "pull_requests_enabled" type: "string" description: "Are pull requests enabled for this repository? Only available for GitLab repositories." mode: "nullable" - name: "logo_url" type: "string" description: "Custom logo url for repository only available for GitLab repositories." mode: "nullable" - name: "keywords" type: "string" description: "Comma separated array of keywords called \"topics\" on GitHub only available for GitHub and GitLab." mode: "nullable" - name: "an" type: "string" description: "" mode: "nullable" - operator: "GKEStartPodOperator" description: "Run CSV transform within kubernetes pod" args: task_id: "transform_repositories_2" startup_timeout_seconds: 600 name: "repositories" namespace: "default" project_id: "{{ var.value.gcp_project }}" location: "us-central1-c" cluster_name: pdp-libraries-io-repositories image_pull_policy: "Always" image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}" env_vars: SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}" SOURCE_GCS_OBJECT: "data/libraries_io/repositories/ab.csv" SOURCE_FILE: "files/repositories.csv" TARGET_FILE: "files/data_repositories.csv" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/libraries_io/repositories/data_repositories_2.csv" CHUNKSIZE: "100000" PIPELINE_NAME: "repositories" RENAME_MAPPINGS: >- {"ID":"id","Host Type":"host_type","Name with Owner":"name_with_owner","Description":"description","Fork":"fork", "Created Timestamp":"created_timestamp","Updated Timestamp":"updated_timestamp","Last pushed Timestamp":"last_pushed_timestamp", "Homepage URL":"homepage_url","Size":"size","Stars Count":"stars_count","Language":"language","Issues enable":"issues_enabled", "Wiki enabled":"wiki_enabled","Pages enabled":"pages_enabled","Forks Count":"forks_count","Mirror URL":"mirror_url", "Open Issues Count":"open_issues_count","Default branch":"default_branch","Watchers Count":"watchers_count","UUID":"uuid", "Fork Source Name with Owner":"fork_source_name_with_owner","License":"license","Contributors Count":"contributors_count", "Readme filename":"readme_filename","Changelog filename":"changelog_filename","Contributing guidelines filename":"contributing_guidelines_filename", "License filename":"license_filename","Code of Conduct filename":"code_of_conduct_filename", "Security Threat Model filename":"security_threat_model_filename","Security Audit filename":"security_audit_filename", "Status":"status","Last Synced Timestamp":"last_synced_timestamp","SourceRank":"sourcerank","Display Name":"display_name", "SCM typ":"scm_type","Pull requests enabled":"pull_requests_enabled","Logo URL":"logo_url","Keywords":"keywords","39":"an"} CSV_HEADERS: >- ["id","host_type","name_with_owner","description","fork","created_timestamp","updated_timestamp","last_pushed_timestamp", "homepage_url","size","stars_count","language","issues_enabled","wiki_enabled","pages_enabled","forks_count","mirror_url", "open_issues_count","default_branch","watchers_count","uuid","fork_source_name_with_owner","license","contributors_count", "readme_filename","changelog_filename","contributing_guidelines_filename","license_filename","code_of_conduct_filename", "security_threat_model_filename","security_audit_filename","status","last_synced_timestamp","sourcerank","display_name", "scm_type","pull_requests_enabled","logo_url","keywords","an"] container_resources: memory: request: "16Gi" cpu: request: "1" ephemeral-storage: request: "10Gi" - operator: "GoogleCloudStorageToBigQueryOperator" description: "Task to load CSV data to a BigQuery table" args: task_id: "load_repositories_to_bq_2" bucket: "{{ var.value.composer_bucket }}" source_objects: ["data/libraries_io/repositories/data_repositories_2.csv"] source_format: "CSV" destination_project_dataset_table: "libraries_io.repositories" skip_leading_rows: 2 allow_quoted_newlines: True write_disposition: "WRITE_APPEND" schema_fields: - name: "id" type: "integer" description: "he unique primary key of the repository in the Libraries.io database." mode: "nullable" - name: "host_type" type: "string" description: "Which website the repository is hosted on either GitHub GitLab or Bitbucket." mode: "nullable" - name: "name_with_owner" type: "string" description: "The repository name and owner seperated by a slash also maps to the url slug on the given repository host e.g. librariesio/libraries.io." mode: "nullable" - name: "description" type: "string" description: "Description of repository." mode: "nullable" - name: "fork" type: "boolean" description: "Is the repository a fork of another." mode: "nullable" - name: "created_timestamp" type: "timestamp" description: "Timestamp of when the repository was created on the host." mode: "nullable" - name: "updated_timestamp" type: "timestamp" description: "Timestamp of when the repository was last saved by Libraries.io." mode: "nullable" - name: "last_pushed_timestamp" type: "timestamp" description: "Timestamp of when the repository was last pushed to only available for GitHub repositories." mode: "nullable" - name: "homepage_url" type: "string" description: "URL of a declared homepage or other website for the repository." mode: "nullable" - name: "size" type: "integer" description: "Size of the repository in kilobytes only available for GitHub and Bitbucket." mode: "nullable" - name: "stars_count" type: "integer" description: "Number of stars on the repository only available for GitHub and GitLab." mode: "nullable" - name: "language" type: "string" description: "Primary programming language the project is written in only available for GitHub and Bitbucket." mode: "nullable" - name: "issues_enabled" type: "boolean" description: "Is the bug tracker enabled for this repository?." mode: "nullable" - name: "wiki_enabled" type: "boolean" description: "Is the wiki enabled for this repository?." mode: "nullable" - name: "pages_enabled" type: "boolean" description: "Is GitHub pages enabled for this repository? only possible for GitHub." mode: "nullable" - name: "forks_count" type: "integer" description: "Number of forks of this repository." mode: "nullable" - name: "mirror_url" type: "string" description: "URL of the repositroy of which this is a mirror of only present if this repository is a mirror of another." mode: "nullable" - name: "open_issues_count" type: "integer" description: "Number of open issues on the repository bug tracker only available for GitHub and GitLab." mode: "nullable" - name: "default_branch" type: "string" description: "Primary branch of the repository." mode: "nullable" - name: "watchers_count" type: "integer" description: "Number of subscribers to all notifications for the repository only available for GitHub and Bitbucket." mode: "nullable" - name: "uuid" type: "string" description: "ID of the repository on the remote host not unique between GitLab and GitHub repositories." mode: "nullable" - name: "fork_source_name_with_owner" type: "string" description: "If the repository is a fork the repository name and owner seperated by a slash of the repository if was forked from." mode: "nullable" - name: "license" type: "string" description: "SPDX identifier of the license of the repository only available for GitHub repositories." mode: "nullable" - name: "contributors_count" type: "integer" description: "Number of unique contributors that have committed to the default branch." mode: "nullable" - name: "readme_filename" type: "string" description: "If a readme file has been detected the full name of the readme file e.g README.md." mode: "nullable" - name: "changelog_filename" type: "string" description: "If a changelog file has been detected the full name of the changelog file e.g changelog.txt." mode: "nullable" - name: "contributing_guidelines_filename" type: "string" description: "If a contributing guidelines file has been detected the full name of the contributing guidelines file e.g contributing.md." mode: "nullable" - name: "license_filename" type: "string" description: "If a license file has been detected the full name of the license file e.g LICENSE." mode: "nullable" - name: "code_of_conduct_filename" type: "string" description: "If a code of conduct file has been detected the full name of the code of conduct file e.g code_of_conduct.md." mode: "nullable" - name: "security_threat_model_filename" type: "string" description: "If a Security Threat Model file has been detected the full name of the Security Threat Model file e.g threatmodel.md." mode: "nullable" - name: "security_audit_filename" type: "string" description: "If a Security Audit file has been detected the full name of the Security Audit file e.g security.md." mode: "nullable" - name: "status" type: "string" description: "Either Active Deprecated Unmaintained Help Wanted Removed no value also means active. Updated when detected by Libraries.io or su. manually by Libraries.io user via \"repo suggection\" feature." mode: "nullable" - name: "last_synced_timestamp" type: "timestamp" description: "Timestamp of when Libraries.io last synced the repository from the host API." mode: "nullable" - name: "sourcerank" type: "integer" description: "Libraries.io defined score based on quality popularity and community metrics." mode: "nullable" - name: "display_name" type: "string" description: "Display name for the repository only available for GitLab repositories." mode: "nullable" - name: "scm_type" type: "string" description: "Type of source control repository uses always \"git\" for GitHub and GitLab." mode: "nullable" - name: "pull_requests_enabled" type: "string" description: "Are pull requests enabled for this repository? Only available for GitLab repositories." mode: "nullable" - name: "logo_url" type: "string" description: "Custom logo url for repository only available for GitLab repositories." mode: "nullable" - name: "keywords" type: "string" description: "Comma separated array of keywords called \"topics\" on GitHub only available for GitHub and GitLab." mode: "nullable" - name: "an" type: "string" description: "" mode: "nullable" - operator: "GKEStartPodOperator" description: "Run CSV transform within kubernetes pod" args: task_id: "transform_repositories_3" startup_timeout_seconds: 600 name: "repositories" namespace: "default" project_id: "{{ var.value.gcp_project }}" location: "us-central1-c" cluster_name: pdp-libraries-io-repositories image_pull_policy: "Always" image: "{{ var.json.libraries_io.container_registry.run_csv_transform_kub }}" env_vars: SOURCE_GCS_BUCKET: "{{ var.value.composer_bucket }}" SOURCE_GCS_OBJECT: "data/libraries_io/repositories/ac.csv" SOURCE_FILE: "files/repositories.csv" TARGET_FILE: "files/data_repositories.csv" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/libraries_io/repositories/data_repositories_3.csv" CHUNKSIZE: "100000" PIPELINE_NAME: "repositories" RENAME_MAPPINGS: >- {"ID":"id","Host Type":"host_type","Name with Owner":"name_with_owner","Description":"description","Fork":"fork", "Created Timestamp":"created_timestamp","Updated Timestamp":"updated_timestamp","Last pushed Timestamp":"last_pushed_timestamp", "Homepage URL":"homepage_url","Size":"size","Stars Count":"stars_count","Language":"language","Issues enable":"issues_enabled", "Wiki enabled":"wiki_enabled","Pages enabled":"pages_enabled","Forks Count":"forks_count","Mirror URL":"mirror_url", "Open Issues Count":"open_issues_count","Default branch":"default_branch","Watchers Count":"watchers_count","UUID":"uuid", "Fork Source Name with Owner":"fork_source_name_with_owner","License":"license","Contributors Count":"contributors_count", "Readme filename":"readme_filename","Changelog filename":"changelog_filename","Contributing guidelines filename":"contributing_guidelines_filename", "License filename":"license_filename","Code of Conduct filename":"code_of_conduct_filename", "Security Threat Model filename":"security_threat_model_filename","Security Audit filename":"security_audit_filename", "Status":"status","Last Synced Timestamp":"last_synced_timestamp","SourceRank":"sourcerank","Display Name":"display_name", "SCM typ":"scm_type","Pull requests enabled":"pull_requests_enabled","Logo URL":"logo_url","Keywords":"keywords","39":"an"} CSV_HEADERS: >- ["id","host_type","name_with_owner","description","fork","created_timestamp","updated_timestamp","last_pushed_timestamp", "homepage_url","size","stars_count","language","issues_enabled","wiki_enabled","pages_enabled","forks_count","mirror_url", "open_issues_count","default_branch","watchers_count","uuid","fork_source_name_with_owner","license","contributors_count", "readme_filename","changelog_filename","contributing_guidelines_filename","license_filename","code_of_conduct_filename", "security_threat_model_filename","security_audit_filename","status","last_synced_timestamp","sourcerank","display_name", "scm_type","pull_requests_enabled","logo_url","keywords","an"] container_resources: memory: request: "16Gi" cpu: request: "1" ephemeral-storage: request: "10Gi" - operator: "GoogleCloudStorageToBigQueryOperator" description: "Task to load CSV data to a BigQuery table" args: task_id: "load_repositories_to_bq_3" bucket: "{{ var.value.composer_bucket }}" source_objects: ["data/libraries_io/repositories/data_repositories_3.csv"] source_format: "CSV" destination_project_dataset_table: "libraries_io.repositories" skip_leading_rows: 2 allow_quoted_newlines: True write_disposition: "WRITE_TRUNCATE" schema_fields: - name: "id" type: "integer" description: "he unique primary key of the repository in the Libraries.io database." mode: "nullable" - name: "host_type" type: "string" description: "Which website the repository is hosted on either GitHub GitLab or Bitbucket." mode: "nullable" - name: "name_with_owner" type: "string" description: "The repository name and owner seperated by a slash also maps to the url slug on the given repository host e.g. librariesio/libraries.io." mode: "nullable" - name: "description" type: "string" description: "Description of repository." mode: "nullable" - name: "fork" type: "boolean" description: "Is the repository a fork of another." mode: "nullable" - name: "created_timestamp" type: "timestamp" description: "Timestamp of when the repository was created on the host." mode: "nullable" - name: "updated_timestamp" type: "timestamp" description: "Timestamp of when the repository was last saved by Libraries.io." mode: "nullable" - name: "last_pushed_timestamp" type: "timestamp" description: "Timestamp of when the repository was last pushed to only available for GitHub repositories." mode: "nullable" - name: "homepage_url" type: "string" description: "URL of a declared homepage or other website for the repository." mode: "nullable" - name: "size" type: "integer" description: "Size of the repository in kilobytes only available for GitHub and Bitbucket." mode: "nullable" - name: "stars_count" type: "integer" description: "Number of stars on the repository only available for GitHub and GitLab." mode: "nullable" - name: "language" type: "string" description: "Primary programming language the project is written in only available for GitHub and Bitbucket." mode: "nullable" - name: "issues_enabled" type: "boolean" description: "Is the bug tracker enabled for this repository?." mode: "nullable" - name: "wiki_enabled" type: "boolean" description: "Is the wiki enabled for this repository?." mode: "nullable" - name: "pages_enabled" type: "boolean" description: "Is GitHub pages enabled for this repository? only possible for GitHub." mode: "nullable" - name: "forks_count" type: "integer" description: "Number of forks of this repository." mode: "nullable" - name: "mirror_url" type: "string" description: "URL of the repositroy of which this is a mirror of only present if this repository is a mirror of another." mode: "nullable" - name: "open_issues_count" type: "integer" description: "Number of open issues on the repository bug tracker only available for GitHub and GitLab." mode: "nullable" - name: "default_branch" type: "string" description: "Primary branch of the repository." mode: "nullable" - name: "watchers_count" type: "integer" description: "Number of subscribers to all notifications for the repository only available for GitHub and Bitbucket." mode: "nullable" - name: "uuid" type: "string" description: "ID of the repository on the remote host not unique between GitLab and GitHub repositories." mode: "nullable" - name: "fork_source_name_with_owner" type: "string" description: "If the repository is a fork the repository name and owner seperated by a slash of the repository if was forked from." mode: "nullable" - name: "license" type: "string" description: "SPDX identifier of the license of the repository only available for GitHub repositories." mode: "nullable" - name: "contributors_count" type: "integer" description: "Number of unique contributors that have committed to the default branch." mode: "nullable" - name: "readme_filename" type: "string" description: "If a readme file has been detected the full name of the readme file e.g README.md." mode: "nullable" - name: "changelog_filename" type: "string" description: "If a changelog file has been detected the full name of the changelog file e.g changelog.txt." mode: "nullable" - name: "contributing_guidelines_filename" type: "string" description: "If a contributing guidelines file has been detected the full name of the contributing guidelines file e.g contributing.md." mode: "nullable" - name: "license_filename" type: "string" description: "If a license file has been detected the full name of the license file e.g LICENSE." mode: "nullable" - name: "code_of_conduct_filename" type: "string" description: "If a code of conduct file has been detected the full name of the code of conduct file e.g code_of_conduct.md." mode: "nullable" - name: "security_threat_model_filename" type: "string" description: "If a Security Threat Model file has been detected the full name of the Security Threat Model file e.g threatmodel.md." mode: "nullable" - name: "security_audit_filename" type: "string" description: "If a Security Audit file has been detected the full name of the Security Audit file e.g security.md." mode: "nullable" - name: "status" type: "string" description: "Either Active Deprecated Unmaintained Help Wanted Removed no value also means active. Updated when detected by Libraries.io or su. manually by Libraries.io user via \"repo suggection\" feature." mode: "nullable" - name: "last_synced_timestamp" type: "timestamp" description: "Timestamp of when Libraries.io last synced the repository from the host API." mode: "nullable" - name: "sourcerank" type: "integer" description: "Libraries.io defined score based on quality popularity and community metrics." mode: "nullable" - name: "display_name" type: "string" description: "Display name for the repository only available for GitLab repositories." mode: "nullable" - name: "scm_type" type: "string" description: "Type of source control repository uses always \"git\" for GitHub and GitLab." mode: "nullable" - name: "pull_requests_enabled" type: "string" description: "Are pull requests enabled for this repository? Only available for GitLab repositories." mode: "nullable" - name: "logo_url" type: "string" description: "Custom logo url for repository only available for GitLab repositories." mode: "nullable" - name: "keywords" type: "string" description: "Comma separated array of keywords called \"topics\" on GitHub only available for GitHub and GitLab." mode: "nullable" - name: "an" type: "string" description: "" mode: "nullable" - operator: "GKEDeleteClusterOperator" args: task_id: "delete_cluster" project_id: "{{ var.value.gcp_project }}" location: "us-central1-c" name: pdp-libraries-io-repositories graph_paths: - "bash_gcs_to_gcs >> create_cluster >> [transform_repositories,transform_repositories_2,transform_repositories_3] >> delete_cluster >> [load_repositories_to_bq, load_repositories_to_bq_2, load_repositories_to_bq_3]"