def urls_to_images()

in obelics/processors/web_document_extractor.py [0:0]


    def urls_to_images(self, reload_files=False):
        with open(self.path_save_file_map_url_idx) as f:
            self.map_url_idx = json.load(f)
        # Useful when this method is called independently without
        # the previous ones, so we need to load some files
        if reload_files:
            logger.info("Starting reloading variables for the step urls_to_images")
            self.dataset = load_from_disk(self.path_save_dir_dataset)
            self.dataset_images = load_from_disk(self.path_save_dir_dataset_images)
            logger.info("Finished reloading variables for the step urls_to_images")

        else:
            try:
                _ = self.dataset
                _ = self.dataset_images
                _ = self.map_url_idx
            except Exception:
                print("Set `reload_files=True` if you're calling this method alone to define the missing variables")

        self.dataset = urls_to_images(
            dataset=self.dataset,
            dataset_images=self.dataset_images,
            map_url_idx=self.map_url_idx,
            num_proc=self.num_proc_urls_to_images,
        )