in parallel_enumerate_containers.py [0:0]
def list_blobs_in_container(container_name,account_name,sas_token,output_folder,prefix=None):
if not sas_token.startswith('?'):
sas_token = '?' + sas_token
storage_account_url_blob = 'https://' + account_name + '.blob.core.windows.net'
# prefix = prefixes[0]; print(prefix)
print('Starting enumeration for container {}'.format(container_name))
# Open the output file
fn = path_utils.clean_filename(container_name) + '.log'
output_file = os.path.join(output_folder,fn)
# Create the container
blob_service_client = BlobServiceClient(
account_url=storage_account_url_blob,
credential=sas_token)
container_client = blob_service_client.get_container_client(container_name)
# Enumerate
with open(output_file,'w') as output_f:
continuation_token = ''
hit_debug_limit = False
i_blob = 0
while (continuation_token is not None) and (not hit_debug_limit):
blobs_iter = container_client.list_blobs(
name_starts_with=prefix,
results_per_page=n_blobs_per_page).by_page(
continuation_token=continuation_token)
blobs = next(blobs_iter)
n_blobs_this_page = 0
for blob in blobs:
i_blob += 1
n_blobs_this_page += 1
if (debug_max_files > 0) and (i_blob > debug_max_files):
print('Hit debug path limit for prefix {}'.format(prefix))
i_blob -= 1
hit_debug_limit = True
break
else:
size_string = ''
if get_sizes:
size_string = '\t' + str(blob.size)
tier_string = ''
if get_access_tiers:
s = blob.blob_tier
# This typically indicates a GPv1 Storage Account, with no tiering support
if s is None:
s = 'Unknown'
tier_string = '\t' + s
output_f.write(blob.name + size_string + tier_string + '\n')
# print('Enumerated {} blobs'.format(n_blobs_this_page))
cnt.increment(n=n_blobs_this_page)
continuation_token = blobs_iter.continuation_token
if sleep_time_per_page > 0:
time.sleep(sleep_time_per_page)
# ...while we're enumerating
# ...with open(output_file)
print('Finished enumerating {} blobs for container {}'.format(
i_blob,container_name))