In [None]:
import os
import json
import time
import uuid
import random
import openai
import markdown
import textwrap
from tqdm import tqdm
from bs4 import BeautifulSoup
from datasets import load_dataset
from dotenv import load_dotenv
from openai import AzureOpenAI, RateLimitError

load_dotenv()  # take environment variables from .env.

client = AzureOpenAI(
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key        = os.getenv("AZURE_OPENAI_API_KEY"),
    api_version    = os.getenv("AZURE_OPENAI_API_VERSION")
)

LANGUAGE = "English"
MODEL_NAME = "gpt-4o"
MODEL_NAME_FOR_ANSWER = "gpt-4o"

MAX_NUMBER_OF_FIELDS = 1
MAX_NUMBER_OF_SUBJECTS = 2
MAX_NUMBER_OF_SUBTOPICS = 3
MAX_NUMBER_OF_SESSION_NAME = 3
NUM_ITERATIONS = 1
NUM_QUESTIONS_PER_ITERATION = 3
QUESTION_MAX_TOKENS = 256
QUESTION_BACTH_SIZE = 3
ANSWER_BACTH_SIZE = 3
OUTPUT_DIR = "./outputs"
UUID = str(uuid.uuid4())[:4]

In [None]:
### Initial Evolving Method
import re
import random
def call_llm(prompt, model_name="gpt-4o", max_tokens=150, **kwargs):

    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": prompt}],
        #response_format = {'type': "json_object"},
        **kwargs    
    )
    #     
    output = response.choices[0].message.content.strip()

    return output

def get_step_0_evolved_instruction(instruction, **kwargs):
    prompt = f"""You are an Instruction Rewriter that rewrites the given #Instruction# into a more complex version. Please follow the steps below to rewrite the given "#Instruction#" into a more complex version.

    ### Step 1: Please read the "#Instruction#" carefully and list all the possible methods to make this instruction more complex (to make it a bit harder for well-known AI assistants such as ChatGPT and GPT4 to handle). Please do not provide methods to change the language of the instruction!
    ### Step 2: Please create a comprehensive plan based on the #Methods List# generated in Step 1 to make the #Instruction# more complex. The plan should include several methods from the #Methods List#.
    ### Step 3: Please execute the plan step by step and provide the #Rewritten Instruction#. #Rewritten Instruction# can only add 10 to 20 words into the "#Instruction#".
    ### Step 4: Please carefully review the #Rewritten Instruction# and identify any unreasonable parts. Ensure that the #Rewritten Instruction# is only a more complex version of the #Instruction#. Just provide the #Finally Rewritten Instruction# without any explanation.

    Please reply strictly in the following format:

    ### Step 1 #Methods List#:
    ### Step 2 #Plan#:
    ### Step 3 #Rewritten Instruction#:
    ### Step 4 #Finally Rewritten Instruction#:

    #Instruction#: {instruction}
    """
    
    output = call_llm(prompt, **kwargs)
    rewritten_instruction = re.search(r"Step 4 #Finally Rewritten Instruction#:(.*)", output, re.DOTALL).group(1).strip()
    return rewritten_instruction

def get_step_1_evolved_instruction(instruction, **kwargs):
    prompt = f"""You are an Instruction Rewriter that rewrites the given #Instruction# into a more complex version. Please follow the steps below to rewrite the given "#Instruction#" into a more complex version.

    ### Step 1: Carefully read the initial instruction and identify all the elements involved - this includes variables, constants, operations, and conditions.
    ### Step 2: Consider how each element could be made more complex. For variables, this could involve introducing more variables or making the existing variables dependent on others. For constants, consider changing them to variables or making them dependent on other factors. For operations, consider introducing more complex operations or multiple steps. For conditions, consider adding more conditions or making the existing conditions more complex.
    ### Step 3: Formulate a plan to integrate these complexities into the instruction. Ensure that the changes are coherent and relevant to the initial problem context. The plan should not just randomly add complexity but should make the problem more interesting or challenging in a meaningful way.
    ### Step 4: Rewrite the instruction according to the plan. Ensure that the rewritten instruction is still understandable and that it accurately represents the initial problem context. The rewritten instruction should only add 10 to 20 words to the original instruction.
    ### Step 5: Review the rewritten instruction and check for any inaccuracies or inconsistencies. Make sure that the rewritten instruction is a more complex version of the original instruction and not a completely different problem. If any parts of the rewritten instruction are unreasonable or do not fit the problem context, revise them as necessary.

    Please reply strictly in the following format:
    
    ### Step 1 #Elements Identified#:
    ### Step 2 #Complexity Additions#:
    ### Step 3 #Plan#:
    ### Step 4 #Rewritten Instruction#:
    ### Step 5 #Finally Rewritten Instruction#:

    #Instruction#: {instruction}
    """
    output = call_llm(prompt, **kwargs)
    rewritten_instruction = re.search(r"Step 5 #Finally Rewritten Instruction#:(.*)", output, re.DOTALL).group(1).strip()
    return rewritten_instruction

def get_step_2_evolved_instruction(instruction, **kwargs):
    prompt = f"""You are an Instruction Rewriter that rewrites the given #Instruction# into a more complex version. Please follow the steps below to rewrite the given "#Instruction#" into a more complex version.

    ### Step 1: Carefully read the initial instruction and identify all the elements involved - this includes variables, constants, operations, and conditions.
    ### Step 2: Consider how each element could be made more complex. For variables, this could involve introducing more variables or making the existing variables dependent on others. For constants, consider changing them to variables or making them dependent on other factors. For operations, consider introducing more complex operations or multiple steps. For conditions, consider adding more conditions or making the existing conditions more complex.
    ### Step 3: Formulate a plan to integrate these complexities into the instruction. Ensure that the changes are coherent and relevant to the initial problem context. The plan should not just randomly add complexity but should make the problem more interesting or challenging in a meaningful way. Avoid introducing irrelevant concepts or complicating the problem to the extent of changing its nature.
    ### Step 4: Rewrite the instruction according to the plan. Ensure that the rewritten instruction is still understandable and that it accurately represents the initial problem context. The rewritten instruction should only add 10 to 20 words to the original instruction. Make sure that the progression of complexity is smooth and gradual.
    ### Step 5: Review the rewritten instruction and check for any inaccuracies or inconsistencies. Make sure that the rewritten instruction is a more complex version of the original instruction and not a completely different problem. If any parts of the rewritten instruction are unreasonable or do not fit the problem context, revise them as necessary.

    Please reply strictly in the following format:
    
    ### Step 1 #Elements Identified#:
    ### Step 2 #Complexity Additions#:
    ### Step 3 #Plan#:
    ### Step 4 #Rewritten Instruction#:
    ### Step 5 #Finally Rewritten Instruction#:

    #Instruction#: {instruction}
    """
    output = call_llm(prompt, **kwargs)
    rewritten_instruction = re.search(r"Step 5 #Finally Rewritten Instruction#:(.*)", output, re.DOTALL).group(1).strip()
    return rewritten_instruction

def get_step_k_evolved_instruction(instruction, **kwargs):
    prompt = f"""You are an Instruction Rewriter that rewrites the given #Instruction# into a more complex version. Please follow the steps below to rewrite the given "#Instruction#" into a more complex version.

    ### Step 1: Carefully read the initial instruction and identify all the elements involved - this includes variables, constants, operations, and conditions.
    ### Step 2: Consider how each element could be made more complex. For variables, this could involve introducing more variables or making the existing variables dependent on others. For constants, consider changing them to variables or making them dependent on other factors. For operations, consider introducing more complex operations or multiple steps. For conditions, consider adding more conditions or making the existing conditions more complex.
    ### Step 3: Formulate a plan to integrate these complexities into the instruction. Ensure that the changes are coherent and relevant to the initial problem context. The plan should not just randomly add complexity but should make the problem more interesting or challenging in a meaningful way. Avoid introducing irrelevant concepts or complicating the problem to the extent of changing its nature.
    ### Step 4: Rewrite the instruction according to the plan. Ensure that the rewritten instruction is still understandable and that it accurately represents the initial problem context. The rewritten instruction should only add 10 to 20 words to the original instruction. Make sure that the progression of complexity is smooth and gradual.
    ### Step 5: Review the rewritten instruction and check for any inaccuracies or inconsistencies. Make sure that the rewritten instruction is a more complex version of the original instruction and not a completely different problem. If any parts of the rewritten instruction are unreasonable or do not fit the problem context, revise them as necessary.
    ### Step 6: Ensure that the complexity increase is consistent and logical. Avoid introducing new conditions or variables that are not related to the initial problem. The complexity should evolve from the initial problem and not transform it into a different problem.
    ### Step 7: Test the rewritten instruction to ensure that it is solvable and that the complexity has indeed increased. If the problem is too difficult or impossible to solve, revise it as necessary.

    Please reply strictly in the following format:
    
    ### Step 1 #Elements Identified#:
    ### Step 2 #Complexity Additions#:
    ### Step 3 #Plan#:
    ### Step 4 #Rewritten Instruction#:
    ### Step 5 #Revisied Instruction#:
    ### Step 6 #Consistency Check#:
    ### Step 7 #Finally Rewritten Instruction#:

    #Instruction#: {instruction}
    """
    output = call_llm(prompt, **kwargs)
    rewritten_instruction = re.search(r"Step 7 #Finally Rewritten Instruction#:(.*)", output, re.DOTALL).group(1).strip()
    return rewritten_instruction

In [None]:

FEEDBACK_ASPECTS = [
    ("Complexity", "Assess whether the complexity of instructions has increased sufficiently. See if more details and steps have been added."),
    ("Diversity", "Evaluate whether new scenarios, examples, or contexts are introduced. Make sure they cover a variety of situations."),
    ("Preservation of key information:", "Evaluate whether key information has been maintained. Make sure important concepts and facts are not missing."),
    # ("Clarity", "Assess whether instructions are clearly communicated. Make sure they are easy to understand and unambiguous."),
    # ("Misunderstanding", "Evaluate instructions to ensure they are not misleading. Identify areas that may be misinterpreted."),
    # ("Different contexts", "Assess whether the instructions can be applied to a variety of situations and contexts. Make sure they are generalizable.")
]
from concurrent.futures import ThreadPoolExecutor

def generate_feedback(original_instruction, evolved_instruction, feedback_aspect, top_p=0.95, temperature=0.5):
    feedback_prompt = f"""    
    The following list shows cases where an #Instruction# evolves into a more complex version of an Instruction, #Evolved Instruction#.
    Analyze the evolution of the #Instruction# and provide detailed feedback on the following aspects:

    Provide concise feedback on the #Aspect# in under 100 characters:
    #Aspect#: {feedback_aspect}#:

    #Instruction#: {original_instruction}
    #Evolved Instruction#: {evolved_instruction}

    Please answer concisely in the format below and provide a reason for each item within 100 characters using the format examples below. Please provide only one feedback.
    
    If feedback is required, please write the #Need Feedback#, #Issue Name#, #Reason#, and #Feedback# using the format below.
    
    #Need Feedback#: 
    #Issue Name#:
    #Reason#:
    #Feedback#:

    If feedback is not required, please write the #Need Feedback# using the format below.
    
    #Need Feedback#: No
    
    Please refer to the example below for an accurate answer.

    [Example 1]
    #Need Feedback#: Yes
    #Issue Name#: Lack of consistency and logical progression in complexity
    #Reason#: Introduce new concepts or variables without building on the previous instruction, making the problem confusing or unrealistic.
    #Feedback#: Build on previous instructions to improve consistency and transform into logical sentences

    [Example 2]
    #Need Feedback#: Yes
    #Issue Name#: Incorrect or unrealistic mathematical calculations
    #Reason#: Introduce mathematical operations or equations that donâ€™t make sense in the context of the problem or are mathematically incorrect.
    #Feedback#: Make accurate mathematical calculations. Take advantage of think step-by-step.

    [Example 3]
    #Need Feedback#: Yes
    #Issue Name#: Inconsistent or contradictory information
    #Issue Description#: Introduce information that contradicts previous instruction.
    #Feedback#: Ensure consistency in the information provided. Avoid introducing contradictory information.

    [Example 4]
    #Need Feedback#: No
    """
    return call_llm(feedback_prompt, top_p=top_p, temperature=temperature)

def convert_feedback_needed(feedback_list):
    return [True if '#Need Feedback#: Yes' in feedback else False for feedback in feedback_list]

def generate_feedback_parallel(original_instruction, evolved_instruction, m=1):
    feedback_list = []
    with ThreadPoolExecutor(max_workers=m) as executor:
        futures = [
            executor.submit(
                generate_feedback,
                original_instruction,
                evolved_instruction,
                random.choice(FEEDBACK_ASPECTS),
                random.uniform(0.8, 1.0),  # top_p range
                random.uniform(0.6, 0.9)   # temperature range
            ) for _ in range(m)
        ]
        for future in futures:
            feedback_list.append(future.result())
    return convert_feedback_needed(feedback_list), feedback_list

def evaluate_feedback(feedback_list):
    failure_count = 0
    for feedback in feedback_list:
        if "#Need Feedback#: Yes" in feedback:
            failure_count += 1

    failure_rate = failure_count / len(feedback_list)
    return failure_rate

In [None]:
original_instruction = "Explain the concept of gravity."
evolved_instruction = get_step_0_evolved_instruction(original_instruction, max_tokens=512)
feedback_required, feedback_list = generate_feedback_parallel(original_instruction, evolved_instruction, m=1)
print(feedback_required, feedback_list)

In [None]:
def get_optimized_instruction(combined_feedback, current_instruction, **kwargs):
    if combined_feedback != "":
        optimize_prompt = f"""
        #Current Instruction#: {current_instruction}
        #Feedback#: {combined_feedback}

        I will provide you with the method for evolving the above #Current Instructions#.
        You need to optimize this method based on the #Feedback# from the evolution failure case, without harming the performance on other cases, and ensure that the complexity increase brought by the optimized method is not lower than the previous method.
        Please provide the optimized method in the following format. 

        #Optimized Instruction#:   
        """
        
        output = call_llm(optimize_prompt, **kwargs)
        rewritten_instruction = re.search(r"#Optimized Instruction#:(.*)", output, re.DOTALL).group(1).strip()

        return rewritten_instruction
    else:
        return current_instruction

In [None]:
def get_failure_rate(feedback_list):
    failure_count = 0
    for feedback in feedback_list:
        if "#Need Feedback#: Yes" in feedback:
            failure_count += 1

    failure_rate = failure_count / len(feedback_list)
    return failure_rate

def get_feeback_text(feedback_required, feedback_list):
    import re
    feedback_pattern = r"#Feedback#:\s*(.*)"
    final_feedback_text = ""

    for do_feedback, feedback in zip(feedback_required, feedback_list):
        if do_feedback:
            # Extracting #Feedback# using regular expressions
            feedback_match = re.search(feedback_pattern, feedback)

            if feedback_match:
                feedback_text = feedback_match.group(1)
                final_feedback_text += f"\n{feedback_text}"
    return final_feedback_text


In [None]:
original_instruction = "Explain the concept of gravity."
evolved_instruction = get_step_0_evolved_instruction(original_instruction, max_tokens=512)
feedback_required, feedback_list = generate_feedback_parallel(original_instruction, evolved_instruction, m=1)
print(feedback_required, feedback_list)

In [None]:
# optimized_instruction = get_optimized_instruction(get_feeback_text(feedback_required, feedback_list), evolved_instruction)a

In [None]:
# evolved_instruction = get_step_1_evolved_instruction(optimized_instruction)

In [None]:
# feedback_required, feedback_list = generate_feedback_parallel(optimized_instruction, evolved_instruction, m)