cicd-deployers/iam_metadata_extractor.py (62 lines of code) (raw):

import sys import argparse import collections import logging import re import json from github import Auth from github import Github def extract_iam_metadata(file_content): """Extracts IAM metadata from file content and formats it as JSON. Args: file_content: The content of the .sqlx file. Returns: A JSON string containing the extracted IAM metadata, or None if no metadata is found. """ # Find the iam_metadata block within the comments using regex match = re.search(r"//iam_metadata:\s*{(.*?)}", file_content, re.DOTALL) if match: metadata_block = match.group(1) # Remove comments and extra whitespace metadata_block = re.sub(r"//|\s", "", metadata_block) # Find the table name table_name_match = re.search(r"name:\s*\"([^\"]+)\"", file_content) table_name = table_name_match.group(1) if table_name_match else None # Construct the JSON output try: print(str(table_name)) print(str(metadata_block)) json_output = { "table": table_name, "iam_metadata": json.loads(metadata_block) } return json.dumps(json_output, indent=2) except json.JSONDecodeError as e: logging.error(f"Error decoding iam metadata JSON from .sqlx file: {e}") return None else: return None def list_sqlx_files(repo): """Lists .sqlx files with 'ddl' tag in a GitHub repository. Args: repo: The GitHub repository object. """ all_metadata = [] contents = repo.get_contents("") while contents: file_content = contents.pop(0) if file_content.type == "dir": contents.extend(repo.get_contents(file_content.path)) elif file_content.name.endswith(".sqlx"): file_path = file_content.path file_content = repo.get_contents(file_path).decoded_content.decode() if 'tags: ["ddl"]' in file_content: metadata = extract_iam_metadata(file_content) if metadata: all_metadata.append(metadata) print(json.dumps(all_metadata, indent=2)) def main(args: collections.abc.Sequence[str]) -> int: """The main function parses command-line arguments and calls the run_workflow function to execute the complete Dataform workflow. To run the script, provide the required command-line arguments: python intro.py --project_id your_project_id --location your_location --repository your_repo_name --dataset your_bq_dataset --branch your_branch """ parser = argparse.ArgumentParser(description="IAM metadata extractor from dataform repository") parser.add_argument("--remote_repo_url", type=str, required=True, help="The github repository URL.") parser.add_argument("--dataform_repositories_git_token", type=str, required=True, help="The GCP project Number where the Dataform code will be deployed.") params = parser.parse_args(args) remote_repo_url = str(params.remote_repo_url) dataform_repositories_git_token = str(params.dataform_repositories_git_token) auth = Auth.Token(dataform_repositories_git_token) g = Github(auth=auth) repo = g.get_user().get_repo("aef-sample-dataform-repo") list_sqlx_files(repo) if __name__ == "__main__": sys.exit(main(sys.argv[1:]))