in probe_scraper/remote_storage.py [0:0]
def remote_storage_push(src: Path, dst: str, compress: bool = False, **kwargs):
sync = _get_sync_function(dst)
if compress:
kwargs["content_encoding"] = "gzip"
if "exclude" in kwargs:
raise NotImplementedError("exclude is not supported while compressing")
# cloudfront is supposed to automatically gzip objects, but it won't do that
# if the object size is > 10 megabytes (https://webmasters.stackexchange.com/a/111734)
# which our files sometimes are. to work around this, as well as to support google
# cloud storage, we'll gzip the contents into a temporary directory, and upload that
# with a special content encoding
with TemporaryDirectory() as tmp_name:
tmp = Path(tmp_name)
if src.is_dir():
for in_file in src.rglob("*"):
if not in_file.is_dir():
out_file = tmp / in_file.relative_to(src)
out_file.parent.mkdir(parents=True, exist_ok=True)
out_file.write_bytes(gzip.compress(in_file.read_bytes()))
index = tmp / INDEX_HTML
if index.exists():
# must be a tuple
kwargs["exclude"] = (INDEX_HTML,)
sync(
src=tmp,
dst=dst,
content_type=APPLICATION_JSON,
**kwargs,
)
if index.exists():
# cannot delete or exclude with a single file
kwargs["delete"] = False
kwargs["exclude"] = ()
sync(
src=index,
dst=dst,
content_type=TEXT_HTML,
**kwargs,
)
else:
tmp_file = tmp / src.name
tmp_file.write_bytes(gzip.compress(src.read_bytes()))
content_type = TEXT_HTML if src.name == INDEX_HTML else APPLICATION_JSON
sync(
src=tmp_file,
dst=dst,
content_type=content_type,
**kwargs,
)
else:
sync(src, dst, **kwargs)