codegen_sources/preprocessing/dataset_modes/obfuscation_functions_mode.py [153:187]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if executor is None:
            executor = LocalExecutor(folder=self.folder.joinpath("log"))
        # apply BPE with tmp suffix
        _bpe_ext = self.bpe.ext
        self.bpe.ext += TMP_EXT
        super().apply_bpe(executor)
        self.bpe.ext = _bpe_ext
        # restore BPE on obfuscation special tokens
        jobs = []
        to_restore = list(
            chain(
                *[
                    self.folder.glob(f"{lang}.{split}.*{self.bpe.ext}{TMP_EXT}")
                    for split in DATASET_SPLITS
                    for lang in self.languages
                ]
            )
        )
        for f in to_restore:
            job = executor.submit(
                self.bpe.repair_bpe_for_obfuscation_file, f, f.with_suffix("")
            )
            jobs.append(job)
        for job in jobs:
            job.result()
        for f in to_restore:
            assert f.with_suffix("").is_file()
            f.unlink()

    def _get_vocab(self, executor: Executor = None):
        raise Exception(
            "Vocab should not be learnt from obfuscated data. Learn it on monolingual data."
            "Please provide vocab or learn them."
            "To do so, please run pipepline with monolingual mode until get_vocab."
        )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



codegen_sources/preprocessing/dataset_modes/obfuscation_mode.py [124:158]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        if executor is None:
            executor = LocalExecutor(folder=self.folder.joinpath("log"))
        # apply BPE with tmp suffix
        _bpe_ext = self.bpe.ext
        self.bpe.ext += TMP_EXT
        super().apply_bpe(executor)
        self.bpe.ext = _bpe_ext
        # restore BPE on obfuscation special tokens
        jobs = []
        to_restore = list(
            chain(
                *[
                    self.folder.glob(f"{lang}.{split}.*{self.bpe.ext}{TMP_EXT}")
                    for split in DATASET_SPLITS
                    for lang in self.languages
                ]
            )
        )
        for f in to_restore:
            job = executor.submit(
                self.bpe.repair_bpe_for_obfuscation_file, f, f.with_suffix("")
            )
            jobs.append(job)
        for job in jobs:
            job.result()
        for f in to_restore:
            assert f.with_suffix("").is_file()
            f.unlink()

    def _get_vocab(self, executor: Executor = None):
        raise Exception(
            "Vocab should not be learnt from obfuscated data. Learn it on monolingual data."
            "Please provide vocab or learn them."
            "To do so, please run pipepline with monolingual mode until get_vocab."
        )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



