in codegen_sources/preprocessing/obfuscation/javalang_obfuscator.py [0:0]
def obfuscate(java_program):
tokens = list(javalang.tokenizer.tokenize(java_program))
declarations, declarations_per_vartype, calls_to_replace = get_variable_usages(
java_program
)
names_generator = ObfuscatedNamesGenerator()
# Finding the right tokens for declarations first
for token_name, dec_list in declarations.items():
for dec_info in dec_list:
dec_position = dec_info["position"]
# TODO could make it O(log(n)) with binary search for find first token
for i, tok in enumerate([t for t in tokens if t.position >= dec_position]):
if tok.value == token_name:
tok.value = names_generator.get_new_name(
token_name, dec_info["var_type"]
)
# TODO: check type for variable definitions?
dec_info["new_name"] = tok.value
break
calls_to_replace_index = 0
for current_tok_index, tok in enumerate(tokens):
if calls_to_replace_index < len(calls_to_replace):
# handle special calls or references to replace
current_call_to_replace = calls_to_replace[calls_to_replace_index]
assert current_call_to_replace["var_type"] in ObfuscatedNameType
relevant_declarations = declarations_per_vartype[
current_call_to_replace["var_type"]
][current_call_to_replace["name"]]
assert (
len(relevant_declarations) > 0
), "No relevant declarations in special token to replace. It should have been filtered out"
if tok.position >= current_call_to_replace["position"]:
calls_to_replace_index += 1
for advanced_tok_index in range(current_tok_index, len(tokens)):
if (
tokens[advanced_tok_index].value
== current_call_to_replace["name"]
):
if (
current_call_to_replace["var_type"]
== ObfuscatedNameType.FUNCTION
):
if current_call_to_replace["qualifier"] is None:
# if there is no qualifier, the method is called directly
is_replace_candidate = (
advanced_tok_index == 0
or tokens[advanced_tok_index - 1].value != "."
)
else:
# if there is a qualifier, the qualifier should be before the function call
qualifier_split = current_call_to_replace[
"qualifier"
].split(".")
is_replace_candidate = advanced_tok_index > 2 * len(
qualifier_split
)
for i, qual in enumerate(qualifier_split[::-1]):
is_replace_candidate = (
is_replace_candidate
and tokens[
advanced_tok_index - (2 * i + 1)
].value
== "."
and tokens[
advanced_tok_index - (2 * i + 2)
].value
== qual
)
if is_replace_candidate:
tokens[
advanced_tok_index
].value = relevant_declarations[0]["new_name"]
# handle other tokens using the declarations
if isinstance(tok, Identifier) and tok.value in declarations:
token_declarations = declarations[tok.value]
tok_position = tok.position
previous_declarations = [
dec
for dec in token_declarations
if dec["position"] < tok_position and "new_name" in dec
]
if (
current_tok_index >= 2
and tokens[current_tok_index - 1].value == "."
and tokens[current_tok_index - 2].value == "this"
):
previous_declarations = [
dec for dec in previous_declarations if dec["is_field"]
]
if len(previous_declarations) == 0:
# fields can be declared after in the file an inherited class
previous_declarations = [
dec for dec in token_declarations if dec["is_field"]
]
relevant_declaration = None
if len(previous_declarations) == 0:
class_declarations = declarations_per_vartype[ObfuscatedNameType.CLASS][
tok.value
]
if len(class_declarations) > 0:
relevant_declaration = class_declarations[0]
else:
func_declarations = declarations_per_vartype[
ObfuscatedNameType.FUNCTION
][tok.value]
if len(func_declarations) > 0:
relevant_declaration = func_declarations[0]
else:
relevant_declaration = previous_declarations[-1]
if relevant_declaration is not None:
tok.value = relevant_declaration["new_name"]
res_lines = [[]]
prev_line = 0
for tok in tokens:
if tok.position.line > prev_line:
res_lines.append([])
prev_line = tok.position.line
res_lines[-1].append(tok.value)
return (
"\n".join([" ".join(line) for line in res_lines]),
names_generator.get_dictionary(),
)