def traverse_and_create_index()

in directory_listing.py [0:0]


def traverse_and_create_index(dir, sas_url=None, overwrite_files=False,
                              template_fun=create_plain_index, basepath=None):
    '''
    Recursively traverses the local directory *dir* and generates a index
    file for each folder using *template_fun* to generate the HTML output.
    Excludes hidden files.

    Args:
        dir: string, path to directory
        template_fun: function taking three arguments (string, list of string, list of string)
            representing the current root, the list of folders, and the list of files.
            Should return the HTML source of the index file

    Return:
        None
    '''

    print("Traversing {}".format(dir))

    # Make sure we remove the trailing /
    dir = os.path.normpath(dir)

    # If we want to set the content type in blob storage using a SAS URL
    if sas_url:

        # Example: sas_url = 'https://accname.blob.core.windows.net/bname/path/to/folder?st=...&se=...&sp=...&...'
        if '?' in sas_url:
            # 'https://accname.blob.core.windows.net/bname/path/to/folder' and 'st=...&se=...&sp=...&...'
            base_url, sas_token = sas_url.split('?', 1)
        else:
            # 'https://accname.blob.core.windows.net/bname/path/to/folder' and None
            base_url, sas_token = sas_url, None
        # Remove https:// from base url
        # 'accname.blob.core.windows.net/bname/path/to/folder'
        base_url = base_url.split("//", 1)[1]
        # Everything up to the first dot is accout name
        # 'accname'
        account_name = base_url.split(".", 1)[0]
        # get everything after the first /
        # 'bname/path/to/folder'
        query_string = base_url.split("/", 1)[1]
        # Get container name and subfolder
        if '/' in query_string:
            # 'bname', 'path/to/folder'
            container_name, container_folder = query_string.split("/", 1)
        else:
            container_name, container_folder = query_string, ''

        # Prepare the storage access
        target_settings = ContentSettings(content_type='text/html')
        blob_service = BlobServiceClient(
            account_url=f'{account_name}.blob.core.windows.net',
            credential=sas_token)

    # Traverse directory and all sub directories, excluding hidden files
    for root, dirs, files in os.walk(dir):

        # Exclude files and folders that are hidden
        files = [f for f in files if not f[0] == '.']
        dirs[:] = [d for d in dirs if not d[0] == '.']

        # Output is written to file *root*/index.html
        output_file = os.path.join(root, "index.html")

        if not overwrite_files and os.path.isfile(output_file):
            print('Skipping {}, file exists'.format(output_file))
            continue

        print("Generating {}".format(output_file))

        # Generate HTML with template function
        dirname = None
        if basepath is not None:
            dirname = os.path.relpath(root,basepath)
        html = template_fun(root[len(dir):], dirs, files, dirname)

        # Write to file
        with open(output_file, 'wt') as fi:
            fi.write(html)

        # Set content type in blob storage
        if sas_url:
            if container_folder:
                output_blob_path = container_folder + '/' + output_file[len(dir) + 1:]
            else:
                output_blob_path = output_file[len(dir) + 1:]
            try:
                blob_client = blob_service.get_blob_client(container_name, output_blob_path)
                blob_client.set_http_headers(content_settings=target_settings)
            except azure.common.AzureMissingResourceHttpError:
                print('ERROR: It seems the SAS URL is incorrect or does not allow setting properties.')
                return