in projects/Aligned-Platform-EnergizeAI/taxonomybuilder/tbm.py [0:0]
def extract_nodes(data, hierarchical = False):
'''
Get taxonomy data from json file.
Returns:
labels: A labeled list of all prompts in the dataset
ref_labels: a dictionary mapping labels to sample prompts
definition: a dictionary mapping labels to categories
taxonomy: a dictionary showing the directed taxonomy tree graph
'''
if data is None:
return []
labels = []
ref_labels = {}
definitions = {}
taxonomy = {}
for elem in data:
taxonomy[elem['title']] = []
defs = [elem['description']]
if 'children' in elem:
children_prompts, children_refs, children_definitions, children_tax = extract_nodes(elem['children'])
for prompt, classif in children_prompts:
labels.append((prompt, classif))
for category in children_definitions:
definitions[category] = children_definitions[category]
if category in children_refs:
ref_labels[category] = children_refs[category]
if hierarchical:
defs.append(children_definitions[category])
del children_tax['root']
for node in children_tax:
taxonomy[node] = children_tax[node]
taxonomy[elem['title']] = [x for x in children_tax]
prompt_list = elem['prompts']
if 'examplePrompt' in elem:
ref_labels[elem['title']] = elem['examplePrompt']
else:
idx = random.choice([x for x in range(len(prompt_list))])
ref_labels[elem['title']] = prompt_list[idx]
prompt_list = prompt_list[:idx] + prompt_list[idx+1:]
for prompt in prompt_list:
labels.append((prompt, elem['title']))
definitions[elem['title']] = ' OR '.join(defs)
taxonomy['root'] = [elem['title'] for elem in data]
return labels, ref_labels, definitions, taxonomy