projects/Aligned-Platform-EnergizeAI/taxonomybuilder/tbm.py (230 lines of code) (raw):
import json
import openai
import requests
import random
with open('keys.txt', 'r') as f:
openai.api_key = f.readline()
def extract_nodes(data, hierarchical = False):
'''
Get taxonomy data from json file.
Returns:
labels: A labeled list of all prompts in the dataset
ref_labels: a dictionary mapping labels to sample prompts
definition: a dictionary mapping labels to categories
taxonomy: a dictionary showing the directed taxonomy tree graph
'''
if data is None:
return []
labels = []
ref_labels = {}
definitions = {}
taxonomy = {}
for elem in data:
taxonomy[elem['title']] = []
defs = [elem['description']]
if 'children' in elem:
children_prompts, children_refs, children_definitions, children_tax = extract_nodes(elem['children'])
for prompt, classif in children_prompts:
labels.append((prompt, classif))
for category in children_definitions:
definitions[category] = children_definitions[category]
if category in children_refs:
ref_labels[category] = children_refs[category]
if hierarchical:
defs.append(children_definitions[category])
del children_tax['root']
for node in children_tax:
taxonomy[node] = children_tax[node]
taxonomy[elem['title']] = [x for x in children_tax]
prompt_list = elem['prompts']
if 'examplePrompt' in elem:
ref_labels[elem['title']] = elem['examplePrompt']
else:
idx = random.choice([x for x in range(len(prompt_list))])
ref_labels[elem['title']] = prompt_list[idx]
prompt_list = prompt_list[:idx] + prompt_list[idx+1:]
for prompt in prompt_list:
labels.append((prompt, elem['title']))
definitions[elem['title']] = ' OR '.join(defs)
taxonomy['root'] = [elem['title'] for elem in data]
return labels, ref_labels, definitions, taxonomy
def generate_classification_prompt(categories, descriptions, ref_labels, prompt, few_shot = False):
'''
Create the prompt string to pass to GPT.
Params:
categories: list of possible classifications
descriptions: dictionary mapping labels to descritpions
ref_labels: dictionary mapping labels to example prompt
prompt: the new prompt to classify
few_shot: if True, uses ref_labels; if False, does not use ref_labels.
Returns:
classif_prompt: the entire prompt string to the GPT model.
'''
defns_formatted = []
for title in categories:
desc = descriptions[title]
defns_formatted.append(f'{title}: {desc}')
defns_formatted = '\n'.join(defns_formatted)
few_shot_examples = ''
if few_shot:
for title in categories:
few_shot_examples += f'Text: {ref_labels[title]}\n'
few_shot_examples += f'Class: {title}\n'
categories = json.dumps(categories)
classif_prompt = f"""Classify the text into one of the classes. If none of the classes apply, reply 'General'.
Classes: {categories}
Class descriptions:
{defns_formatted}
{few_shot_examples}
Text: {prompt}
Class: """
return classif_prompt
def execute_llm_request(prompt, model = 'gpt-4', verbose = False):
'''
Sends prompt to ChatGPT and processes results.
Params:
prompt: the prompt to evaluate
model: ChatGPT model
verbose: if True, prints ChatGPT response. If False, does not print response.
Returns:
res: ChatGPT response string.
'''
messages = [{"role": "user", "content": prompt}]
if verbose:
print('EXECUTING LLM REQUEST')
print(prompt)
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=0,
max_tokens=20,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
)
if verbose:
print(response["choices"][0]["message"]["content"])
res = response["choices"][0]["message"]["content"].replace('\'', '').replace('\"', '')
return res
def get_results_leaves(prompts, defns, verbose = False):
'''
Given a list of prompt-category labelled pairs and definitions, runs the zero-shot training loop.
Ignores structure of the taxonomy, treats each category as a separate class.
Deprecated.
Params:
prompts: list of labelled pairs
defns: definitions of categories
verbose: if True, print ChatGPT outputs; if False, don't print.
Returns:
results: A list of all incorrectly classified prompts.
'''
results = []
# generate categories
categories = list(set([x[1] for x in prompts]))
for prompt, classification in prompts:
# always treat the leaf node as the classification, ignore structure
content = generate_classification_prompt(
categories,
defns,
prompt
)
resp = execute_llm_request(content, verbose = verbose)
results.append(
{
'prompt': prompt,
'expected': classification,
'llm': resp
}
)
return results
def get_results_hierarchical(prompts, ref_labels, defns, taxonomy, verbose = False, few_shot = False):
'''
Given a list of prompt-category labelled pairs, definitions, and a taxonomy, runs the zero-shot training loop.
Each classification task traverses one level deeper into the tree.
Params:
prompts: list of labelled pairs
defns: definitions of categories
taxonomy: structure of the taxonomy as a graph.
verbose: if True, print ChatGPT outputs; if False, don't print.
few_shot: if True, use held-out reference prompts. If False, don't.
Returns:
results: A list of all incorrectly classified prompts.
'''
results = []
parent = 'General'
for prompt, classification in prompts:
categories = [x for x in taxonomy['root']]
content = generate_classification_prompt(
categories,
{x: defns[x] for x in defns if x in categories},
ref_labels,
prompt,
few_shot = few_shot
)
resp = execute_llm_request(content, verbose = verbose)
if resp != 'General':
parent = resp
while resp in taxonomy and len(taxonomy[resp]) > 0:
categories = taxonomy[resp]
content = generate_classification_prompt(
categories,
{x: defns[x] for x in defns if x in categories},
ref_labels,
prompt,
few_shot = few_shot
)
resp = execute_llm_request(content, verbose = verbose)
if resp != 'General':
parent = resp
results.append(
{
'prompt': prompt,
'expected': classification,
'llm': resp if resp != 'General' else parent
}
)
return results
# train_data is a list of dictionaries
def evaluate_taxonomy(train_data, hierarchical = True, verbose = False, few_shot = False):
'''
Runs the classification loop.
Params:
train_data: json containing all prompts and taxonomy.
hierarchical: If True, runs using taxonomy. If False, treats all categories equally. For now, always set to True
verbose: If True, prints out all ChatGPT outpus. If False, only prints incorrect prompts.
few_shot: If True, uses held-out examples for few-shot prompting. If False, runs zero-shot.
Returns:
acc: Accuracy on the given dataset.
results: List of incorrect predictions.
'''
# generate training dataset
prompts, ref_labels, defns, taxonomy = extract_nodes(train_data, hierarchical)
results = get_results_hierarchical(prompts, ref_labels, defns, taxonomy, verbose = verbose, few_shot = few_shot) if hierarchical \
else get_results_leaves(prompts, defns, verbose = verbose)
print('INCORRECT OUTPUTS:')
num_wrong = 0
for res in results:
if res['expected'] != res['llm']:
num_wrong += 1
print('Prompt:', res['prompt'])
print('Expected:', res['expected'])
print('GPT output:', res['llm'])
acc = 1 - num_wrong/len(results)
print('Accuracy:', acc)
return acc, results
def load_data():
'''
Gets data from Energize taxonomy.
'''
with open('apikey.txt', 'r') as f:
key = f.readline()
params = {
"prompts": True,
}
# Define the API endpoint and headers
url = "https://dev.api.energize.ai/api/openapi/taxonomy"
headers = {
"authorization": "Bearer " + key
}
# Make the request
response = requests.get(url, headers=headers, params=params)
data = response.json()
return data
def zero_shot_loop():
'''
Runs the entire TBM zero-shot finetuning loop.
'''
data = load_data()
acc, results = evaluate_taxonomy(data, verbose = False, few_shot = False)
return acc, results
if __name__ == "__main__":
# Main training loop, no example prompts
acc, results = zero_shot_loop()