def generate_subjects()

in glan-instruct/glan.py [0:0]


def generate_subjects(discipline, max_number_of_subjects=2, max_number_of_subtopics=5, model_name="gpt-4o", **kwargs):
    """
    Generate a list of subjects for a given discipline. Please refer to section 2.2 of the paper.
    """

    prompt = f"""
    You are an expert in {discipline}. Create a comprehensive list of subjects a student should learn under this discipline. 
    For each subject, provide the level (e.g., 100, 200, 300, 400, 500, 600, 700, 800, 900) and include key subtopics in JSON format.
    {{    
        "subjects": [
            {{
                'subject': 'Introduction to Computer Science',
                'level': 100,
                'subtopics': [
                    'Basic Programming',
                    'Software Development Fundamentals',
                    'Computer Organization'
                ]
            }}, 
            ...
        ]
    }}
    Limit the number of `subjects` to a maximum of {max_number_of_subjects}.    
    Limit the number of `subtopics` to a maximum of {max_number_of_subtopics} for each `subject`.    
    """

    t0 = time.time()    
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": prompt}],
        response_format = {'type': "json_object"},
        **kwargs    
    )
    subjects = response.choices[0].message.content

    subjects_json = json.loads(subjects)
    if not validate_subjects_json_structure(subjects_json):
        logger.info("Failed to parse JSON. Trying again.")
        subjects_json = generate_subjects(discipline, max_number_of_subjects, max_number_of_subtopics, model_name, **kwargs)

    t1 = time.time()
    logger.info(f"Generating subjects took {t1 - t0:.4f} seconds.")
    
    return subjects_json