neuron-explainer/neuron_explainer/explanations/token_space_few_shot_examples.py (195 lines of code) (raw):

from dataclasses import dataclass from enum import Enum from typing import List from neuron_explainer.fast_dataclasses import FastDataclass @dataclass class Example(FastDataclass): """ An example list of tokens as strings corresponding to top token space inputs of a neuron, with a string explanation of the neuron's behavior on these tokens. """ tokens: List[str] explanation: str class TokenSpaceFewShotExampleSet(Enum): """Determines which few-shot examples to use when sampling explanations.""" ORIGINAL = "original" TEST = "test" def get_examples(self) -> list[Example]: """Returns regular examples for use in a few-shot prompt.""" if self is TokenSpaceFewShotExampleSet.ORIGINAL: return ORIGINAL_EXAMPLES elif self is TokenSpaceFewShotExampleSet.TEST: return TEST_EXAMPLES else: raise ValueError(f"Unhandled example set: {self}") ORIGINAL_EXAMPLES = [ Example( tokens=[ "actual", " literal", " actual", " hyper", " real", " EX", " Real", "^", "Full", " full", " optical", " style", "any", "ALL", "extreme", " miniature", " Optical", " faint", "~", " Physical", " REAL", "*", "virtual", "TYPE", " technical", "otally", " physic", "Type", "<", "images", "atic", " sheer", " Style", " partial", " natural", "Hyper", " Any", " theoretical", "|", " ultimate", "oing", " constant", "ANY", "antically", "ishly", " ex", " visual", "special", "omorphic", "visual", ], explanation=" adjectives related to being real, or to physical properties and evidence", ), Example( tokens=[ "cephal", "aeus", " coma", "bered", "abetes", "inflamm", "rugged", "alysed", "azine", "hered", "cells", "aneously", "fml", "igm", "culosis", "iani", "CTV", "disabled", "heric", "ulo", "geoning", "awi", "translation", "iral", "govtrack", "mson", "cloth", "nesota", " Dise", " Lyme", " dementia", "agn", " reversible", " susceptibility", "esthesia", "orf", " inflamm", " Obesity", " tox", " Disorders", "uberty", "blind", "ALTH", "avier", " Immunity", " Hurt", "ulet", "ueless", " sluggish", "rosis", ], explanation=" words related to physical medical conditions", ), Example( tokens=[ " January", "terday", "cember", " April", " July", "September", "December", "Thursday", "quished", "November", "Tuesday", "uesday", " Sept", "ruary", " March", ";;;;;;;;;;;;", " Monday", "Wednesday", " Saturday", " Wednesday", "Reloaded", "aturday", " August", "Feb", "Sunday", "Reviewed", "uggest", " Dhabi", "ACTED", "tten", "Year", "August", "alogue", "MX", " Janeiro", "yss", " Leilan", " Fiscal", " referen", "semb", "eele", "wcs", "detail", "ertation", " Reborn", " Sunday", "itially", "aturdays", " Dise", "essage", ], explanation=" nouns related to time and dates", ), ] TEST_EXAMPLES = [ Example( tokens=[ "these", " are", " tokens", ], explanation=" this is a test explanation", ), ]