def generate_taxonomy()

in glan-instruct/glan.py [0:0]


def generate_taxonomy(max_number_of_fields=10, model_name="gpt-4o", **kwargs):
    """
    Generate a taxonomy of human knowledge and capabilities.
    """

    prompt = f"""
    Create a taxonomy of human knowledge and capabilities. Break it down into fields, sub-fields, and disciplines.
    Limit the number of fields to a maximum of {max_number_of_fields}.

    Provide the result in JSON format with the following structure:
    {{
        "fields": [
            {{
                "field_name": "Field Name",
                "sub_fields": [
                    {{
                        "sub_field_name": "Sub-field Name",
                        "disciplines": ["Discipline 1", "Discipline 2", ...]
                    }},
                    ...
                ]
            }},
            ...
        ]
    }}

    Examples of `field_name` are Natural Sciences, Humanities or Service.
    Examples of `sub_field_name` are Chemistry, Sociology or Retailing.
    """
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": prompt}],
        response_format = {'type': "json_object"},
        **kwargs    
    )
    taxonomy = response.choices[0].message.content
    try:
        taxonomy_json = json.loads(taxonomy)
    except json.JSONDecodeError:
        taxonomy_json = {"error": "Failed to parse JSON"}

    key = next(iter(taxonomy_json))
    disciplines = [discipline for field in taxonomy_json[key] for sub_field in field['sub_fields'] for discipline in sub_field['disciplines']]
    
    return taxonomy_json, disciplines