testscripts/build-id-list.py (38 lines of code) (raw):
#!/usr/bin/env python3
from typing import Union
from argparse import ArgumentParser
import boto3
import base64
MAX_ID_LENGTH=512
# val initialString = bucket + ":" + key
# if(initialString.length<=maxIdLength){
# encoder.encodeToString(initialString.toCharArray.map(_.toByte))
# } else {
# /* I figure that the best way to get something that should be unique for a long path is to chop out the middle */
# val chunkLength = initialString.length/3
# val stringParts = initialString.grouped(chunkLength).toList
# val midSectionLength = maxIdLength - chunkLength*2 //FIXME: what if chunkLength*2>512??
# val finalString = stringParts.head + stringParts(1).substring(0, midSectionLength) + stringParts(2)
# encoder.encodeToString(finalString.toCharArray.map(_.toByte))
# }
def make_id(bucket:str, entry:dict) -> str:
initial_string = bucket + ":" + entry["Key"]
if len(initial_string)<=MAX_ID_LENGTH:
return base64.b64encode(initial_string.encode("UTF-8")).decode("UTF-8")
else:
chunk_length = len(initial_string) / 3
mid_section_length = MAX_ID_LENGTH - chunk_length*2
final_section_start = len(initial_string) - 2*chunk_length
final_string = initial_string[0:chunk_length] + initial_string[chunk_length:mid_section_length+chunk_length] + initial_string[final_section_start:]
return base64.b64encode(final_string.encode("UTF-8")).decode("UTF-8")
def handle_next_page(bucket:str, prefix:Union[str,None], continuation_token:Union[str,None]):
s3_args = {
"Bucket": bucket,
"MaxKeys": 1000,
}
if prefix:
s3_args["Prefix"] = prefix
if continuation_token:
s3_args["ContinuationToken"] = continuation_token
response = client.list_objects_v2(**s3_args)
for entry in response["Contents"]:
print(make_id(bucket, entry))
if "NextContinuationToken" in response:
handle_next_page(bucket, prefix, response["NextContinuationToken"])
# START MAIN
parser = ArgumentParser()
parser.add_argument("--bucket", dest="bucket", help="bucket name to scan")
parser.add_argument("--prefix", dest="prefix", help="path prefix to output")
args = parser.parse_args()
client = boto3.client("s3")
prefix = None
if args.prefix != "":
prefix = args.prefix
handle_next_page(args.bucket, prefix, None)