in parallel_enumerate_blobs.py [0:0]
def enumerate_prefix(prefix,sas_url,output_folder,get_sizes=False,get_access_tiers=False):
account_name = sas_blob_utils.get_account_from_uri(sas_url)
container_name = sas_blob_utils.get_container_from_uri(sas_url)
ro_sas_token = sas_blob_utils.get_sas_token_from_uri(sas_url)
if ro_sas_token is not None:
assert not ro_sas_token.startswith('?')
ro_sas_token = '?' + ro_sas_token
storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net'
# prefix = prefixes[0]; print(prefix)
print('Starting enumeration for prefix {}'.format(prefix))
# Open the output file
fn = path_utils.clean_filename(prefix)
output_file = os.path.join(output_folder,fn)
# Create the container
blob_service_client = BlobServiceClient(
account_url=storage_account_url_blob,
credential=ro_sas_token)
container_client = blob_service_client.get_container_client(container_name)
# Enumerate
with open(output_file,'w') as output_f:
continuation_token = ''
hit_debug_limit = False
i_blob = 0
while (continuation_token is not None) and (not hit_debug_limit):
blobs_iter = container_client.list_blobs(
name_starts_with=prefix,
results_per_page=n_blobs_per_page).by_page(
continuation_token=continuation_token)
# This is a paged list of BlobProperties objects
blobs = next(blobs_iter)
n_blobs_this_page = 0
for blob in blobs:
i_blob += 1
n_blobs_this_page += 1
if (debug_max_files > 0) and (i_blob > debug_max_files):
print('Hit debug path limit for prefix {}'.format(prefix))
i_blob -= 1
hit_debug_limit = True
break
else:
size_string = ''
if get_sizes:
size_string = '\t' + str(blob.size)
tier_string = ''
if get_access_tiers:
s = blob.blob_tier
# This typically indicates a GPv1 Storage Account, with no tiering support
if s is None:
s = 'Unknown'
tier_string = '\t' + s
output_f.write(blob.name + size_string + tier_string + '\n')
# print('Enumerated {} blobs'.format(n_blobs_this_page))
cnt.increment(n=n_blobs_this_page)
continuation_token = blobs_iter.continuation_token
if sleep_time_per_page > 0:
time.sleep(sleep_time_per_page)
# ...while we're enumerating
# ...with open(output_file)
print('Finished enumerating {} blobs for prefix {}'.format(
i_blob,prefix))