def main()

in utilities/Hive_metastore_migration/src/import_into_datacatalog.py [0:0]


def main():
    # arguments
    from_s3 = 'from-s3'
    from_jdbc = 'from-jdbc'
    parser = argparse.ArgumentParser(prog=sys.argv[0])
    parser.add_argument('-m', '--mode', required=True, choices=[from_s3, from_jdbc], help='Choose to migrate metastore either from JDBC or from S3')
    parser.add_argument('-c', '--connection-name', required=False, help='Glue Connection name for Hive metastore JDBC connection')
    parser.add_argument('-R', '--region', required=False, help='AWS region of target Glue DataCatalog, default to "us-east-1"')
    parser.add_argument('-d', '--database-prefix', required=False, help='Optional prefix for database names in Glue DataCatalog')
    parser.add_argument('-t', '--table-prefix', required=False, help='Optional prefix for table name in Glue DataCatalog')
    parser.add_argument('-D', '--database-input-path', required=False, help='An S3 path containing json files of metastore database entities')
    parser.add_argument('-T', '--table-input-path', required=False, help='An S3 path containing json files of metastore table entities')
    parser.add_argument('-P', '--partition-input-path', required=False, help='An S3 path containing json files of metastore partition entities')

    options = get_options(parser, sys.argv)
    if options['mode'] == from_s3:
        validate_options_in_mode(
            options=options, mode=from_s3,
            required_options=['database_input_path', 'table_input_path', 'partition_input_path'],
            not_allowed_options=['database_prefix', 'table_prefix']
        )
    elif options['mode'] == from_jdbc:
        validate_options_in_mode(
            options=options, mode=from_jdbc,
            required_options=['connection_name'],
            not_allowed_options=['database_input_path', 'table_input_path', 'partition_input_path']
        )
    else:
        raise AssertionError('unknown mode ' + options['mode'])

    validate_aws_regions(options['region'])

    # spark env
    (conf, sc, sql_context) = get_spark_env()
    glue_context = GlueContext(sc)

    # launch job
    if options['mode'] == from_s3:
        metastore_import_from_s3(
            sql_context=sql_context,
            glue_context=glue_context,
            db_input_dir=options['database_input_path'],
            tbl_input_dir=options['table_input_path'],
            parts_input_dir=options['partition_input_path'],
            datacatalog_name='datacatalog',
            region=options.get('region') or 'us-east-1'
        )
    elif options['mode'] == from_jdbc:
        glue_context.extract_jdbc_conf(options['connection_name'])
        metastore_full_migration(
            sc=sc,
            sql_context=sql_context,
            glue_context=glue_context,
            connection=glue_context.extract_jdbc_conf(options['connection_name']),
            db_prefix=options.get('database_prefix') or '',
            table_prefix=options.get('table_prefix') or '',
            datacatalog_name='datacatalog',
            region=options.get('region') or 'us-east-1'
        )