def remote_storage_push()

in probe_scraper/remote_storage.py [0:0]


def remote_storage_push(src: Path, dst: str, compress: bool = False, **kwargs):
    sync = _get_sync_function(dst)
    if compress:
        kwargs["content_encoding"] = "gzip"
        if "exclude" in kwargs:
            raise NotImplementedError("exclude is not supported while compressing")
        # cloudfront is supposed to automatically gzip objects, but it won't do that
        # if the object size is > 10 megabytes (https://webmasters.stackexchange.com/a/111734)
        # which our files sometimes are. to work around this, as well as to support google
        # cloud storage, we'll gzip the contents into a temporary directory, and upload that
        # with a special content encoding
        with TemporaryDirectory() as tmp_name:
            tmp = Path(tmp_name)
            if src.is_dir():
                for in_file in src.rglob("*"):
                    if not in_file.is_dir():
                        out_file = tmp / in_file.relative_to(src)
                        out_file.parent.mkdir(parents=True, exist_ok=True)
                        out_file.write_bytes(gzip.compress(in_file.read_bytes()))
                index = tmp / INDEX_HTML
                if index.exists():
                    # must be a tuple
                    kwargs["exclude"] = (INDEX_HTML,)
                sync(
                    src=tmp,
                    dst=dst,
                    content_type=APPLICATION_JSON,
                    **kwargs,
                )
                if index.exists():
                    # cannot delete or exclude with a single file
                    kwargs["delete"] = False
                    kwargs["exclude"] = ()
                    sync(
                        src=index,
                        dst=dst,
                        content_type=TEXT_HTML,
                        **kwargs,
                    )
            else:
                tmp_file = tmp / src.name
                tmp_file.write_bytes(gzip.compress(src.read_bytes()))
                content_type = TEXT_HTML if src.name == INDEX_HTML else APPLICATION_JSON
                sync(
                    src=tmp_file,
                    dst=dst,
                    content_type=content_type,
                    **kwargs,
                )
    else:
        sync(src, dst, **kwargs)