in preprocessing/src/code_tokenizer.py [0:0]
def extract_functions_java(s):
tokens = s.split()
i = ind_iter(len(tokens))
functions_standalone = []
functions_class = []
try:
token = tokens[i.i]
except KeyboardInterrupt:
raise
except:
return [], []
while True:
try:
# detect function
if token == ')' and (tokens[i.i + 1] == '{' or (tokens[i.i + 1] == 'throws' and tokens[i.i + 3] == '{')):
# go previous until the start of function
while token not in [';', '}', '{', '*/', 'ENDCOM']:
i.prev()
token = tokens[i.i]
if token == '*/':
while token != '/*':
i.prev()
token = tokens[i.i]
function = [token]
while token != '*/':
i.next()
token = tokens[i.i]
function.append(token)
elif token == 'ENDCOM':
while token != '//':
i.prev()
token = tokens[i.i]
function = [token]
while token != 'ENDCOM':
i.next()
token = tokens[i.i]
function.append(token)
else:
i.next()
token = tokens[i.i]
function = [token]
while token != '{':
i.next()
token = tokens[i.i]
function.append(token)
if token == '{':
number_indent = 1
while not (token == '}' and number_indent == 0):
try:
i.next()
token = tokens[i.i]
if token == '{':
number_indent += 1
elif token == '}':
number_indent -= 1
function.append(token)
except StopIteration:
break
if 'static' in function[0:function.index('{')]:
functions_standalone.append(
remove_java_annotation(' '.join(function)))
else:
functions_class.append(
remove_java_annotation(' '.join(function)))
i.next()
token = tokens[i.i]
except KeyboardInterrupt:
raise
except:
break
return functions_standalone, functions_class