def obfuscate()

in codegen_sources/preprocessing/obfuscation/javalang_obfuscator.py [0:0]
114 lines of code
45 McCabe index (conditional complexity)

def obfuscate(java_program):
    tokens = list(javalang.tokenizer.tokenize(java_program))
    declarations, declarations_per_vartype, calls_to_replace = get_variable_usages(
        java_program
    )
    names_generator = ObfuscatedNamesGenerator()

    # Finding the right tokens for declarations first
    for token_name, dec_list in declarations.items():
        for dec_info in dec_list:
            dec_position = dec_info["position"]

            # TODO could make it O(log(n)) with binary search for find first token
            for i, tok in enumerate([t for t in tokens if t.position >= dec_position]):
                if tok.value == token_name:
                    tok.value = names_generator.get_new_name(
                        token_name, dec_info["var_type"]
                    )
                    # TODO: check type for variable definitions?
                    dec_info["new_name"] = tok.value
                    break

    calls_to_replace_index = 0
    for current_tok_index, tok in enumerate(tokens):
        if calls_to_replace_index < len(calls_to_replace):
            # handle special calls or references to replace
            current_call_to_replace = calls_to_replace[calls_to_replace_index]
            assert current_call_to_replace["var_type"] in ObfuscatedNameType
            relevant_declarations = declarations_per_vartype[
                current_call_to_replace["var_type"]
            ][current_call_to_replace["name"]]
            assert (
                len(relevant_declarations) > 0
            ), "No relevant declarations in special token to replace. It should have been filtered out"
            if tok.position >= current_call_to_replace["position"]:
                calls_to_replace_index += 1
                for advanced_tok_index in range(current_tok_index, len(tokens)):
                    if (
                        tokens[advanced_tok_index].value
                        == current_call_to_replace["name"]
                    ):
                        if (
                            current_call_to_replace["var_type"]
                            == ObfuscatedNameType.FUNCTION
                        ):
                            if current_call_to_replace["qualifier"] is None:
                                # if there is no qualifier, the method is called directly
                                is_replace_candidate = (
                                    advanced_tok_index == 0
                                    or tokens[advanced_tok_index - 1].value != "."
                                )
                            else:
                                # if there is a qualifier, the qualifier should be before the function call
                                qualifier_split = current_call_to_replace[
                                    "qualifier"
                                ].split(".")
                                is_replace_candidate = advanced_tok_index > 2 * len(
                                    qualifier_split
                                )
                                for i, qual in enumerate(qualifier_split[::-1]):
                                    is_replace_candidate = (
                                        is_replace_candidate
                                        and tokens[
                                            advanced_tok_index - (2 * i + 1)
                                        ].value
                                        == "."
                                        and tokens[
                                            advanced_tok_index - (2 * i + 2)
                                        ].value
                                        == qual
                                    )
                            if is_replace_candidate:
                                tokens[
                                    advanced_tok_index
                                ].value = relevant_declarations[0]["new_name"]

        # handle other tokens using the declarations
        if isinstance(tok, Identifier) and tok.value in declarations:
            token_declarations = declarations[tok.value]
            tok_position = tok.position
            previous_declarations = [
                dec
                for dec in token_declarations
                if dec["position"] < tok_position and "new_name" in dec
            ]
            if (
                current_tok_index >= 2
                and tokens[current_tok_index - 1].value == "."
                and tokens[current_tok_index - 2].value == "this"
            ):
                previous_declarations = [
                    dec for dec in previous_declarations if dec["is_field"]
                ]
                if len(previous_declarations) == 0:
                    # fields can be declared after in the file an inherited class
                    previous_declarations = [
                        dec for dec in token_declarations if dec["is_field"]
                    ]

            relevant_declaration = None
            if len(previous_declarations) == 0:
                class_declarations = declarations_per_vartype[ObfuscatedNameType.CLASS][
                    tok.value
                ]
                if len(class_declarations) > 0:
                    relevant_declaration = class_declarations[0]
                else:
                    func_declarations = declarations_per_vartype[
                        ObfuscatedNameType.FUNCTION
                    ][tok.value]
                    if len(func_declarations) > 0:
                        relevant_declaration = func_declarations[0]
            else:
                relevant_declaration = previous_declarations[-1]
            if relevant_declaration is not None:
                tok.value = relevant_declaration["new_name"]

    res_lines = [[]]
    prev_line = 0
    for tok in tokens:
        if tok.position.line > prev_line:
            res_lines.append([])
            prev_line = tok.position.line
        res_lines[-1].append(tok.value)

    return (
        "\n".join([" ".join(line) for line in res_lines]),
        names_generator.get_dictionary(),
    )