def process_file()

in 5-4o_fine_tuning/eval.py [0:0]


def process_file(test_case, fixed_files, model_name: str, n_shot: int, use_similarity: bool):
    """
    Processes a given test case file to identify and fix vulnerabilities using a specified model.

    Steps:
    1. Writes the source code from the test case to a new file in the 'staticeval' directory.
    2. Uses the free version of `semgrep` to scan the newly written file for vulnerabilities.
    3. If vulnerabilities are found, constructs a prompt detailing the vulnerability, optionally fetches few-shot training examples, and calls `get_fixed_code_fine_tuned`.
    4. Writes the returned corrected code to a new file and rescans it with `semgrep` to verify the fix.
    5. Updates the list of fixed files if the fix is successful.

    Args:
        test_case(dict): A dictionary containing the test case details, including 'source' and 'file_name'.
        fixed_files(list): A list to store the names of files that have been successfully fixed.
        model_name(str): The name of the model used for generating fixes.
        n_shot(int): The number of few-shot examples to use.
        use_similarity(bool): A flag to determine if similarity-based retrieval of examples should be used.
    """
    file_text = test_case["source"]
    file_name = test_case["file_name"]
    input_file = os.path.join("staticeval", file_name)

    output_file = f"{input_file}_fixed.py"
    tmp_file = f"{input_file}.output.json"

    try:
        os.makedirs(os.path.dirname(input_file), exist_ok=True)

        with open(input_file, "w") as file_object:
            file_object.write(file_text)

        if os.path.exists(tmp_file):
            os.remove(tmp_file)

        # Scan the file for vulnerabilities
        tqdm.write("Scanning file " + input_file + "...")
        scan_command_input = f"semgrep --config auto {input_file} --output {tmp_file} --json > /dev/null 2>&1"
        os.system(scan_command_input)

        # Check if the scan output file exists
        if not os.path.exists(tmp_file):
            tqdm.write(
                f"Semgrep failed to create output file for {input_file}")
            return False

        # Check if there are any errors or no vulnerabilities
        with open(tmp_file, 'r') as jf:
            data = json.load(jf)

        if len(data.get("errors", [])) > 0:
            tqdm.write(f"Error processing {input_file} ...")
            return False

        if len(data.get("results", [])) == 0:
            tqdm.write(input_file + " has no vulnerabilities")
            result = False

        # handle the case where there are vulnerabilities
        else:
            tqdm.write(f"Vulnerability found in {input_file}...")
            cwe = test_case['cwe']
            lines = data["results"][0]["extra"]["lines"]
            message = data["results"][0]["extra"]["message"]

            prompt = f"""Vulnerability Report:
    - Type: {cwe}
    - Location: {lines}
    - Description: {message}
    Original Code:
    ```
    {file_text}
    ```
    Task: Fix the vulnerability in the code above. Provide only the complete fixed code without explanations or comments. Make minimal changes necessary to address the security issue while preserving the original functionality."""

            few_shot_messages = fetch_few_shot_train_examples(
                prompt=prompt,
                num_examples=n_shot,
                use_similarity=use_similarity)

            fixed_code = get_fixed_code_fine_tuned(
                prompt=prompt,
                few_shot_messages=few_shot_messages,
                model_name=model_name)

            # Check if the fixed code is valid
            if len(fixed_code) < 512 or is_fully_commented(fixed_code):
                result = False
            else:
                # Remove the output file and tmp file if they exist
                if os.path.exists(output_file):
                    os.remove(output_file)
                with open(output_file, 'w') as wf:
                    wf.write(fixed_code)
                if os.path.exists(tmp_file):
                    os.remove(tmp_file)
                scan_command_output = f"semgrep --config auto {output_file} --output {tmp_file} --json > /dev/null 2>&1"
                os.system(scan_command_output)
                with open(tmp_file, 'r') as jf:
                    data = json.load(jf)
                if len(data["results"]) == 0:
                    tqdm.write("Passing response for " +
                               input_file + " at 1 ...")
                    result = True
                    fixed_files.append(file_name)
                else:
                    tqdm.write("Failing response for " +
                               input_file + " at " + str(len(data["results"])))
                    print(data["results"])
                    result = False

        if os.path.exists(tmp_file):
            os.remove(tmp_file)

        return result
    except Exception as e:
        tqdm.write(f"Error processing {input_file}: {str(e)}")
        return False