in neuron-explainer/neuron_explainer/explanations/explainer.py [0:0]
def make_explanation_prompt(self, **kwargs: Any) -> Union[str, list[HarmonyMessage]]:
original_kwargs = kwargs.copy()
all_activation_records: Sequence[ActivationRecord] = kwargs.pop("all_activation_records")
max_activation: float = kwargs.pop("max_activation")
kwargs.setdefault("numbered_list_of_n_explanations", None)
numbered_list_of_n_explanations: Optional[int] = kwargs.pop(
"numbered_list_of_n_explanations"
)
if numbered_list_of_n_explanations is not None:
assert numbered_list_of_n_explanations > 0, numbered_list_of_n_explanations
# This parameter lets us dynamically shrink the prompt if our initial attempt to create it
# results in something that's too long. It's only implemented for the 4k context size.
kwargs.setdefault("omit_n_activation_records", 0)
omit_n_activation_records: int = kwargs.pop("omit_n_activation_records")
max_tokens_for_completion: int = kwargs.pop("max_tokens_for_completion")
assert not kwargs, f"Unexpected kwargs: {kwargs}"
prompt_builder = PromptBuilder()
prompt_builder.add_message(
Role.SYSTEM,
"We're studying neurons in a neural network. Each neuron looks for some particular "
"thing in a short document. Look at the parts of the document the neuron activates for "
"and summarize in a single sentence what the neuron is looking for. Don't list "
"examples of words.\n\nThe activation format is token<tab>activation. Activation "
"values range from 0 to 10. A neuron finding what it's looking for is represented by a "
"non-zero activation value. The higher the activation value, the stronger the match.",
)
few_shot_examples = self.few_shot_example_set.get_examples()
num_omitted_activation_records = 0
for i, few_shot_example in enumerate(few_shot_examples):
few_shot_activation_records = few_shot_example.activation_records
if self.context_size == ContextSize.TWO_K:
# If we're using a 2k context window, we only have room for one activation record
# per few-shot example. (Two few-shot examples with one activation record each seems
# to work better than one few-shot example with two activation records, in local
# testing.)
few_shot_activation_records = few_shot_activation_records[:1]
elif (
self.context_size == ContextSize.FOUR_K
and num_omitted_activation_records < omit_n_activation_records
):
# Drop the last activation record for this few-shot example to save tokens, assuming
# there are at least two activation records.
if len(few_shot_activation_records) > 1:
print(f"Warning: omitting activation record from few-shot example {i}")
few_shot_activation_records = few_shot_activation_records[:-1]
num_omitted_activation_records += 1
self._add_per_neuron_explanation_prompt(
prompt_builder,
few_shot_activation_records,
i,
calculate_max_activation(few_shot_example.activation_records),
numbered_list_of_n_explanations=numbered_list_of_n_explanations,
explanation=few_shot_example.explanation,
)
self._add_per_neuron_explanation_prompt(
prompt_builder,
# If we're using a 2k context window, we only have room for two of the activation
# records.
all_activation_records[:2]
if self.context_size == ContextSize.TWO_K
else all_activation_records,
len(few_shot_examples),
max_activation,
numbered_list_of_n_explanations=numbered_list_of_n_explanations,
explanation=None,
)
# If the prompt is too long *and* we omitted the specified number of activation records, try
# again, omitting one more. (If we didn't make the specified number of omissions, we're out
# of opportunities to omit records, so we just return the prompt as-is.)
if (
self._prompt_is_too_long(prompt_builder, max_tokens_for_completion)
and num_omitted_activation_records == omit_n_activation_records
):
original_kwargs["omit_n_activation_records"] = omit_n_activation_records + 1
return self.make_explanation_prompt(**original_kwargs)
return prompt_builder.build(self.prompt_format)