def extract_from_solution()

in preprocess/extract_obj.py [0:0]


def extract_from_solution(arg):
    solution_path, output_asm_path, asm_type = arg
    os.makedirs(os.path.split(output_asm_path)[0], exist_ok=True)

    output_c_path = output_asm_path[:-3] +'c'
    output_bc_path = output_asm_path[:-3] +'bc'
    output_obj_path = output_asm_path[:-3] +'o'
    output_asm_path = output_asm_path[:-3] +'s'

    lcs = []
    errs = []
    pre_df = set(['LEN'])
    pre_var = []
    total_large_asm = 0
    try:
        with open(solution_path, 'rb') as f:
            content = f.read()
        try:
            raw = content.decode('utf-8')
        except:
            raw = content.decode('iso-8859-1')
            
        z = check_var(raw)

        #get variable
        for elem in z:
            l = list(filter(None, re.split('[, = [;]', elem[1])))
            pre_var += l

        #get definition
        z =  check_malloc_def(raw)
        for elem in z:
            if not (elem[0].islower() and len(elem[0]) == 1):
                pre_df.add(elem[0])
        z =  check_arr_def(raw)
        for elem in z:
            if not (elem[2].islower() and len(elem[2]) == 1):
                pre_df.add(elem[2])
            pp = check_arr_def_dim(elem[3])
            if len(pp)!=0:
                if not (pp[0][1].islower() and len(pp[0][1]) == 1):
                    pre_df.add(pp[0][1])

        if 'PI' not in pre_var:
            raw = "\n#define PI 3.14\n" +raw
        pre_df = pre_df.difference(set(pre_var))
        file_c = open(output_c_path, "w")
        raw = check_sense(raw)

        # Fix-1 : error null 
        raw = re.sub('null','NULL',raw)
        raw = re.sub('Null','NULL',raw)
        
        # Fix-2 : missing definition
        raw = ('\n').join(['#define '+ elem +' 100' for elem in pre_df]) + '\n' + raw
        raw = "\n#define INT_MAX 2147483647\n #define INT_MIN -2147483648\n#define MAX 100\n" + raw
        
        # Fix-3 : check compilers
        if asm_type =='x86':
            if('cin' in raw or 'cout' in raw):
                compiler = 'g++'
                raw = '#include <iostream>\n#include <string.h>\n#include <math.h>\n' + \
                    '#include <algorithm>\n#include <iomanip>\n' + \
                    'using namespace std;\n' + raw
                compile_list = [compiler, '-w','-o', output_obj_path, '-O0', output_c_path, '-std=c++11','-lm']
            else:
                compiler = 'gcc'
                raw = '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdbool.h>\n' +  \
                    '#include <math.h>\n#include <stddef.h>\n' + raw
                compile_list = [compiler, '-w','-o', output_obj_path, '-O0', output_c_path, '-lm']

            file_c.write(raw)
            file_c.close()
            subprocess.check_output(compile_list)
            result = subprocess.check_output(["objdump",'-dS', output_obj_path,'--section', '.text']).decode("utf-8")
            result_denoise = []
            result_denoise_len = 0
            # De-noise asm
            for i, elem in enumerate(result.split("\n\n")):
                elem_split = elem.splitlines()
                if i == 0 or elem_split[0] == "":
                    continue 
                elif not all([re.search(x, elem_split[0]) is None for x in ignore_x86_asm_block]):
                    continue
                else:
                    result_denoise += elem_split[1:]
                    result_denoise_len += len(elem_split)
        
            if (result_denoise_len > 1000):
                total_large_asm +=1

            file_asm = open(output_asm_path, 'w')
            file_asm.write("\n".join(result_denoise))
            file_asm.close()

            subprocess.call(['rm', output_obj_path])
            subprocess.call(['rm', output_c_path])

        if asm_type  =='mips':
            if('cin' in raw or 'cout' in raw):
                compiler = 'clang++'
                raw = '#include <iostream>\n#include <string.h>\n#include <math.h>\n' + \
                    '#include <algorithm>\n#include <iomanip>\n' + \
                    'using namespace std;\n' + raw
                compile_list_clang = [compiler, '-emit-llvm', output_c_path, '-c', '-o', output_bc_path, '-std=c++11','-lm']
                compile_list_llc = ['llc', output_bc_path, '-march=mipsel', '-relocation-model=static', '-o', output_asm_path]
            else:
                compiler = 'clang'
                raw = '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdbool.h>\n' +  \
                    '#include <math.h>\n#include <stddef.h>\n' + raw
                compile_list = [compiler, '-w','-o', output_obj_path, '-O0', output_c_path, '-lm']
                compile_list_clang = [compiler, '-emit-llvm', output_c_path, '-c', '-o', output_bc_path,'-lm']
                compile_list_llc = ['llc', output_bc_path, '-march=mipsel', '-relocation-model=static', '-o', output_asm_path]
            file_c.write(raw)
            file_c.close()

            with open(os.devnull, "w") as f:
                subprocess.check_output(compile_list_clang, stderr=f)
                subprocess.check_output(compile_list_llc, stderr=f)
            subprocess.call(['rm', output_bc_path])
            subprocess.call(['rm', output_c_path])

    except subprocess.CalledProcessError:
        errs.append(solution_path)
    return lcs, errs, total_large_asm