in preprocess/extract_obj.py [0:0]
def extract_from_solution(arg):
solution_path, output_asm_path, asm_type = arg
os.makedirs(os.path.split(output_asm_path)[0], exist_ok=True)
output_c_path = output_asm_path[:-3] +'c'
output_bc_path = output_asm_path[:-3] +'bc'
output_obj_path = output_asm_path[:-3] +'o'
output_asm_path = output_asm_path[:-3] +'s'
lcs = []
errs = []
pre_df = set(['LEN'])
pre_var = []
total_large_asm = 0
try:
with open(solution_path, 'rb') as f:
content = f.read()
try:
raw = content.decode('utf-8')
except:
raw = content.decode('iso-8859-1')
z = check_var(raw)
#get variable
for elem in z:
l = list(filter(None, re.split('[, = [;]', elem[1])))
pre_var += l
#get definition
z = check_malloc_def(raw)
for elem in z:
if not (elem[0].islower() and len(elem[0]) == 1):
pre_df.add(elem[0])
z = check_arr_def(raw)
for elem in z:
if not (elem[2].islower() and len(elem[2]) == 1):
pre_df.add(elem[2])
pp = check_arr_def_dim(elem[3])
if len(pp)!=0:
if not (pp[0][1].islower() and len(pp[0][1]) == 1):
pre_df.add(pp[0][1])
if 'PI' not in pre_var:
raw = "\n#define PI 3.14\n" +raw
pre_df = pre_df.difference(set(pre_var))
file_c = open(output_c_path, "w")
raw = check_sense(raw)
# Fix-1 : error null
raw = re.sub('null','NULL',raw)
raw = re.sub('Null','NULL',raw)
# Fix-2 : missing definition
raw = ('\n').join(['#define '+ elem +' 100' for elem in pre_df]) + '\n' + raw
raw = "\n#define INT_MAX 2147483647\n #define INT_MIN -2147483648\n#define MAX 100\n" + raw
# Fix-3 : check compilers
if asm_type =='x86':
if('cin' in raw or 'cout' in raw):
compiler = 'g++'
raw = '#include <iostream>\n#include <string.h>\n#include <math.h>\n' + \
'#include <algorithm>\n#include <iomanip>\n' + \
'using namespace std;\n' + raw
compile_list = [compiler, '-w','-o', output_obj_path, '-O0', output_c_path, '-std=c++11','-lm']
else:
compiler = 'gcc'
raw = '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdbool.h>\n' + \
'#include <math.h>\n#include <stddef.h>\n' + raw
compile_list = [compiler, '-w','-o', output_obj_path, '-O0', output_c_path, '-lm']
file_c.write(raw)
file_c.close()
subprocess.check_output(compile_list)
result = subprocess.check_output(["objdump",'-dS', output_obj_path,'--section', '.text']).decode("utf-8")
result_denoise = []
result_denoise_len = 0
# De-noise asm
for i, elem in enumerate(result.split("\n\n")):
elem_split = elem.splitlines()
if i == 0 or elem_split[0] == "":
continue
elif not all([re.search(x, elem_split[0]) is None for x in ignore_x86_asm_block]):
continue
else:
result_denoise += elem_split[1:]
result_denoise_len += len(elem_split)
if (result_denoise_len > 1000):
total_large_asm +=1
file_asm = open(output_asm_path, 'w')
file_asm.write("\n".join(result_denoise))
file_asm.close()
subprocess.call(['rm', output_obj_path])
subprocess.call(['rm', output_c_path])
if asm_type =='mips':
if('cin' in raw or 'cout' in raw):
compiler = 'clang++'
raw = '#include <iostream>\n#include <string.h>\n#include <math.h>\n' + \
'#include <algorithm>\n#include <iomanip>\n' + \
'using namespace std;\n' + raw
compile_list_clang = [compiler, '-emit-llvm', output_c_path, '-c', '-o', output_bc_path, '-std=c++11','-lm']
compile_list_llc = ['llc', output_bc_path, '-march=mipsel', '-relocation-model=static', '-o', output_asm_path]
else:
compiler = 'clang'
raw = '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdbool.h>\n' + \
'#include <math.h>\n#include <stddef.h>\n' + raw
compile_list = [compiler, '-w','-o', output_obj_path, '-O0', output_c_path, '-lm']
compile_list_clang = [compiler, '-emit-llvm', output_c_path, '-c', '-o', output_bc_path,'-lm']
compile_list_llc = ['llc', output_bc_path, '-march=mipsel', '-relocation-model=static', '-o', output_asm_path]
file_c.write(raw)
file_c.close()
with open(os.devnull, "w") as f:
subprocess.check_output(compile_list_clang, stderr=f)
subprocess.check_output(compile_list_llc, stderr=f)
subprocess.call(['rm', output_bc_path])
subprocess.call(['rm', output_c_path])
except subprocess.CalledProcessError:
errs.append(solution_path)
return lcs, errs, total_large_asm