def extract_nodes()

in projects/Aligned-Platform-EnergizeAI/taxonomybuilder/tbm.py [0:0]
44 lines of code
14 McCabe index (conditional complexity)

def extract_nodes(data, hierarchical = False):
    '''
    Get taxonomy data from json file.

    Returns:
    labels: A labeled list of all prompts in the dataset
    ref_labels: a dictionary mapping labels to sample prompts
    definition: a dictionary mapping labels to categories
    taxonomy: a dictionary showing the directed taxonomy tree graph
    '''
    if data is None:
        return []
    labels = []
    ref_labels = {}
    definitions = {}
    taxonomy = {}
    for elem in data:
        taxonomy[elem['title']] = []
        defs = [elem['description']]
        if 'children' in elem:
            children_prompts, children_refs, children_definitions, children_tax = extract_nodes(elem['children'])

            for prompt, classif in children_prompts:
                labels.append((prompt, classif))
            
            for category in children_definitions:
                definitions[category] = children_definitions[category]
                if category in children_refs:
                    ref_labels[category] = children_refs[category]
                if hierarchical:
                    defs.append(children_definitions[category])

            del children_tax['root']
            for node in children_tax:
                taxonomy[node] = children_tax[node]
            taxonomy[elem['title']] = [x for x in children_tax]
        
        prompt_list = elem['prompts']
        if 'examplePrompt' in elem:
            ref_labels[elem['title']] = elem['examplePrompt']
        else:
            idx = random.choice([x for x in range(len(prompt_list))])
            ref_labels[elem['title']] = prompt_list[idx]
            prompt_list = prompt_list[:idx] + prompt_list[idx+1:]

        for prompt in prompt_list:
            labels.append((prompt, elem['title']))
        definitions[elem['title']] = ' OR '.join(defs)
    
    taxonomy['root'] = [elem['title'] for elem in data]
    return labels, ref_labels, definitions, taxonomy