def simplify_media_node()

in obelics/utils/simplification_utils.py [0:0]


def simplify_media_node(node, page_url):
    src = get_media_src(node)

    if not src:
        return None
    unformatted_src = src
    if not src.startswith("http"):
        src = format_relative_to_absolute_path(page_url=page_url, relative_path=src)
    if not is_url_valid(src):
        return None

    node_attributes = node.attributes
    if node.tag == "img":
        new_image = {"document_url": page_url}
        new_image["unformatted_src"] = unformatted_src
        new_image["src"] = src

        formatted_filename = format_filename(src)
        if formatted_filename:
            new_image["formatted_filename"] = formatted_filename

        if ("alt" in node_attributes) and node_attributes["alt"]:
            new_image["alt_text"] = node_attributes["alt"]

        # TODO: eventually, for image sizes we could parse cases like
        # `{'src': 'http://wellbeingteams.org/wp-content/uploads/2017/04/spread600300.jpg',
        # 'width': None, 'height': None, 'alt': None, 'title': 'spread600300', 'class': 'img-responsive wp-image-122',
        # 'srcset': 'https://wellbeingteams.org/wp-content/uploads/2017/04/spread600300-200x100.jpg 200w, https://wellbeingteams.org/wp-content/uploads/2017/04/spread600300-400x200.jpg 400w, https://wellbeingteams.org/wp-content/uploads/2017/04/spread600300.jpg 600w',
        # 'sizes': '(max-width: 800px) 100vw, 400px'}`
        for size in ["width", "height"]:
            if size in node_attributes and node_attributes[size] is not None:
                try:
                    new_image[f"rendered_{size}"] = format_image_size(node_attributes[size])
                except ValueError:
                    pass  # Unrecognized format, generally an error, skipping

        return new_image

    elif node.tag == "video":
        new_video = {"document_url": page_url}
        new_video["src"] = src
        if "width" in node_attributes:
            new_video["width"] = node_attributes["width"]
        if "height" in node_attributes:
            new_video["height"] = node_attributes["height"]
        return new_video

    elif node.tag == "audio":
        new_audio = {"document_url": page_url}
        new_audio["src"] = src
        return new_audio