codegen_sources/preprocessing/dataset_modes/obfuscation_functions_mode.py [47:101]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            folder=folder,
            languages=languages,
            bpe=bpe,
            parallel_dataset=True,
            processed_lines=processed_lines,
            nb_train_split=nb_train_split,
            keep_comments=keep_comments,
        )

    def checkpoint(
        self, input_path: str, process_strings: bool
    ) -> submitit.helpers.DelayedSubmission:
        return submitit.helpers.DelayedSubmission(
            self.__class__(
                self.folder, self.languages, self.bpe, self.processed_lines,
            ),
            input_path,
            process_strings,
        )

    @timeout(60)
    def extract_data_for_line(
        self,
        line_id: int,
        json_line: dict,
        process_strings: bool,
        lang_processor: LangProcessor,
    ):
        default_return = line_id, None, None
        if "content" not in json_line:
            return default_return

        content = json_line["content"]
        for k, v in REPLACE_DICT.items():
            content = content.replace(k, v)

        try:
            obfuscated, dico = lang_processor.obfuscate_code(content)
            tokenized_obfuscated_file = " ".join(
                lang_processor.tokenize_code(
                    obfuscated,
                    process_strings=process_strings,
                    keep_comments=self.keep_comments,
                )
            )
        except NotImplementedError:
            logger.error(
                f"Obfuscate method is not implemented for {lang_processor.__class__.__name__}"
            )
            raise
        except KeyboardInterrupt:
            raise
        except Exception as e:
            logger.warning(f"Error obfuscating content {e} \n")
            return default_return
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


codegen_sources/preprocessing/dataset_modes/obfuscation_mode.py [44:97]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            folder=folder,
            languages=languages,
            bpe=bpe,
            parallel_dataset=True,
            processed_lines=processed_lines,
            nb_train_split=nb_train_split,
            keep_comments=keep_comments,
        )

    def checkpoint(
        self, input_path: str, process_strings: bool
    ) -> submitit.helpers.DelayedSubmission:
        return submitit.helpers.DelayedSubmission(
            self.__class__(
                self.folder, self.languages, self.bpe, self.processed_lines,
            ),
            input_path,
            process_strings,
        )

    @timeout(60)
    def extract_data_for_line(
        self,
        line_id: int,
        json_line: dict,
        process_strings: bool,
        lang_processor: LangProcessor,
    ):
        default_return = line_id, None, None
        if "content" not in json_line:
            return default_return

        content = json_line["content"]
        for k, v in REPLACE_DICT.items():
            content = content.replace(k, v)
        try:
            obfuscated, dico = lang_processor.obfuscate_code(content)
            tokenized_obfuscated_file = " ".join(
                lang_processor.tokenize_code(
                    obfuscated,
                    process_strings=process_strings,
                    keep_comments=self.keep_comments,
                )
            )
        except NotImplementedError:
            logger.error(
                f"Obfuscate method is not implemented for {lang_processor.__class__.__name__}"
            )
            raise
        except KeyboardInterrupt:
            raise
        except Exception as e:
            logger.warning(f"Error obfuscating content {e} \n")
            return default_return
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -