in awsglue/scripts/crawler_undo.py [0:0]
def crawler_undo_options(args):
# arguments
parser = argparse.ArgumentParser(description='This script allows you to rollback the effects of a crawler.')
parser.add_argument('-c', '--crawler-name', required=True, help='Name of the crawler to rollback.')
parser.add_argument('-b', '--backup-location', required=False, help='Location of the backup to use. If not specified, no backup is used.')
parser.add_argument('-d', '--database-name', required=False, help='Database to roll back. If not specified, '
'the database target of the crawler is used instead.')
parser.add_argument('-t', '--timestamp', required=False, help='Timestamp to rollback to, in milliseconds since epoch. If not specified, '
'the start timestamp of the crawler is used instead.')
parser.add_argument('-r', '--region', required=False, default=DEFAULT_REGION, help='Optional DataCatalog service endpoint region.')
options, unknown = parser.parse_known_args(args)
if not (options.database_name is not None and options.timestamp is not None):
import boto3 # Import is done here to ensure script does not fail in case boto3 is not required.
glue_endpoint = DEFAULT_GLUE_ENDPOINT
glue = boto3.client('glue', endpoint_url="https://%s.%s.amazonaws.com" % (glue_endpoint, options.region))
crawler = glue.get_crawler(Name=options.crawler_name)['Crawler']
if options.database_name is not None:
database_name = options.database_name
else:
database_name = crawler['DatabaseName']
if options.timestamp is not None:
timestamp = options.timestamp
else:
timestamp = crawler['LastCrawlInfo']['StartTime']
return {
"catalog.name": DEFAULT_CATALOG_ENDPOINT,
"catalog.region": options.region,
"catalog.database": database_name,
"crawler.name" : options.crawler_name,
"s3.backup_location" : options.backup_location,
"timestamp": int(timestamp)
}