projects/hive-bigquery-connector-demo/scripts/main.py (56 lines of code) (raw):

# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This script prepares the data for the Hive-BigQuery-Connector demo. Run this script after the `terraform apply` command, to prepare the data for the demo. """ import argparse import logging import sys import bq_funcs from logging_funcs import setup_logging import notebook_funcs from state import ScriptState logger = logging.getLogger("main") def get_parser(): """Get the command line parser.""" parser = argparse.ArgumentParser( prog="HiveBQConnectorDemoPrep", description=( "Script to prepare data for the Hive-BigQuery-Connector demo" ), ) parser.add_argument( "--skip-tables-prep", action=argparse.BooleanOptionalAction, dest="skip_tables", help="Skip (or not) the BigQuery tables preparation step (takes a long " "time)", ) parser.set_defaults(skip_tables=False) return parser def run(): """Main entry point for the script.""" setup_logging() parser = get_parser() args = parser.parse_args(sys.argv[1:]) notebook_full_path = notebook_funcs.compile_notebook() notebook_funcs.update_notebook( notebook_full_path, ScriptState.tf_state().staging_bucket ) if not args.skip_tables: logger.info( "Preparing BQ tables - grab a coffee, this will take a while" ) existing_tables = ScriptState.bq_client().list_tables( ScriptState.dataset() ) for tbl in existing_tables: table = ScriptState.bq_client().get_table(tbl) ScriptState.bq_client().delete_table(table) bq_funcs.handle_distribution_center() bq_funcs.handle_events() bq_funcs.handle_inventory_items() bq_funcs.handle_order_items() bq_funcs.handle_orders() bq_funcs.handle_products() bq_funcs.handle_users() logger.info( "Finished! Open %snotebooks/GCS/notebook.ipynb in your browser to " "continue.", ScriptState.tf_state().jupyter_url, ) if __name__ == "__main__": run()