bigquery_etl/public_data/publish_public_data_views.py (35 lines of code) (raw):
"""
Generate and publish views for publicly available tables.
Generate view definitions for queries that are written to the
public data project and execute them. Views are published to
an internal project so that data is also accessible in private
datasets.
"""
from argparse import ArgumentParser
from google.cloud import bigquery
from ..config import ConfigLoader
from ..util import standard_args
from ..util.bigquery_tables import get_tables_matching_patterns
DEFAULT_PATTERN = "mozilla-public-data:*.*"
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--target-project",
default=ConfigLoader.get("default", "project", fallback="moz-fx-data-shared-prod"),
help="Create views in the target project",
)
parser.add_argument(
"patterns",
metavar="[project:]dataset[.table]",
default=[DEFAULT_PATTERN],
nargs="*",
help="Table that should have a latest-version view, may use shell-style wildcards,"
f" defaults to: {DEFAULT_PATTERN}",
)
standard_args.add_dry_run(parser)
def generate_and_publish_views(client, tables, target_project, dry_run):
"""Generate view definitions for public data tables and executes them."""
for public_table in tables:
project, dataset, table_name = public_table.split(".")
full_view_id = f"{target_project}.{dataset}.{table_name}"
view_sql = f"""CREATE OR REPLACE VIEW
`{full_view_id}`
AS SELECT * FROM `{public_table}`
"""
job_config = bigquery.QueryJobConfig(use_legacy_sql=False, dry_run=dry_run)
client.query(view_sql, job_config)
def main():
"""Publish public data views."""
args = parser.parse_args()
client = bigquery.Client(args.target_project)
tables = get_tables_matching_patterns(client, args.patterns)
generate_and_publish_views(client, tables, args.target_project, args.dry_run)
if __name__ == "__main__":
main()