build_obelics/06_01_create_set_image_urls_in_webdocs.py [59:69]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    path_sync_s3 = os.path.join(args.path_web_document_dataset_filtered, str(args.idx_job))
    path_save_disk_web_document_dataset_filtered = os.path.join(
        path_save_disk_tmp_files, "web_document_dataset_filtered"
    )
    os.system(f"mkdir {path_save_disk_web_document_dataset_filtered}")
    command_sync_s3 = f"aws s3 sync {path_sync_s3} {path_save_disk_web_document_dataset_filtered}"
    os.system(command_sync_s3)
    os.system(command_sync_s3)
    os.system(command_sync_s3)

    web_document_dataset_filtered = load_from_disk(path_save_disk_web_document_dataset_filtered)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



build_obelics/06_03_remove_image_duplicates.py [102:112]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    path_sync_s3 = os.path.join(args.path_web_document_dataset_filtered, str(args.idx_job))
    path_save_disk_web_document_dataset_filtered = os.path.join(
        path_save_disk_tmp_files, "web_document_dataset_filtered"
    )
    os.system(f"mkdir {path_save_disk_web_document_dataset_filtered}")
    command_sync_s3 = f"aws s3 sync {path_sync_s3} {path_save_disk_web_document_dataset_filtered}"
    os.system(command_sync_s3)
    os.system(command_sync_s3)
    os.system(command_sync_s3)

    web_document_dataset_filtered = load_from_disk(path_save_disk_web_document_dataset_filtered)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



