neuron_explainer/activations/derived_scalars/postprocessing.py (7 lines):
	- line 58: # TODO: this should really match derived scalar types based on the compatibility of their indexing prefixes, rather
	- line 311: # TODO: rationalize the setup for choosing the raw activations device by getting it from DerivedScalarTypeConfig,
	- line 313: # TODO: Derived scalar tensors sometimes haven't been detached yet! We work around that
	- line 479: # TODO: support attention heads; this will require specifying q, k or v in the make_reconstituted_gradient_fn
	- line 510: # TODO: finish supporting attention heads
	- line 549: # TODO: rationalize the setup for choosing the raw activations device by getting it from DerivedScalarTypeConfig,
	- line 551: # TODO: Derived scalar tensors sometimes haven't been detached yet! We work around that


neuron_explainer/models/model_context.py (3 lines):
	- line 173: # TODO: Figure out why test_interactive_model.py crashes on the "mps" backend, then remove
	- line 338: # TODO: maybe make a unified interface for the Config objects of ModelContext objects, and
	- line 374: # TODO: make this robust to whether the transformer is 'simplified' in our terminology


neuron_explainer/activations/derived_scalars/indexing.py (3 lines):
	- line 33: # TODO: if all goes well, have this be hard-coded to True, and remove the plumbing
	- line 127: # copied from DerivedScalarIndex; TODO: ActivationIndex and DerivedScalarIndex inherit from a shared base class,
	- line 387: # TODO: consider subsuming this and the above into NodeIndex/ActivationIndex respectively


neuron_viewer/src/TransformerDebugger/utils/explanations.ts (2 lines):
	- line 1: // TODO: Make this explanation clearer. Does this only cover the direct effect as opposed to indirect effects?
	- line 8: // TODO: Make this explanation clearer. Is this a magnitude? Does this only cover the direct effect as opposed to indirect effects?


neuron_viewer/src/requests/readRequests.ts (2 lines):
	- line 118: // TODO: this is really used for any NodeType that is a scalar per token (e.g.
	- line 128: // TODO: this is really used for any NodeType that is a scalar per token pair. Should


neuron_explainer/models/model_component_registry.py (2 lines):
	- line 134: # TODO: remove this hack, and make NodeType depend on the token dimensions
	- line 313: # TODO: remove this hack, and make NodeType depend on the token dimensions


neuron_explainer/activations/derived_scalars/multi_pass_scalar_deriver.py (2 lines):
	- line 28: Probable TODO: make an ABC, from which both ScalarDeriver and MultiPassScalarDeriver inherit
	- line 338: # TODO: Run PromptCombo.derive_from_raw(scalar_source, raw_activation_store) as a part of


neuron_explainer/activations/derived_scalars/activations_and_metadata.py (2 lines):
	- line 159: # TODO: this function should take transform_fn_with_layer_index, a Callable[[torch.Tensor, LayerIndex], torch.Tensor]
	- line 430: # TODO: clarify comment to indicate "Dummy" tensors can be either "truly 0" tensors, as in the case of backward passes, or "invalid"


neuron_explainer/activations/derived_scalars/reconstituted.py (1 line):
	- line 439: # TODO: consider deleting in favor of universal non-gradient-keeping at the outside of ScalarDeriver base functions


neuron_explainer/activations/derived_scalars/autoencoder.py (1 line):
	- line 197: # TODO: Consider removing this workaround and using RESID_DELTA_MLP directly.


neuron_explainer/models/inference_engine_type_registry.py (1 line):
	- line 9: # TODO: Consider using a stronger type here.


neuron_explainer/activations/derived_scalars/edge_attribution.py (1 line):
	- line 592: )  # TODO: figure out how to thread


neuron_explainer/explanations/simulator.py (1 line):
	- line 266: # values. See the TODO elsewhere in this file about coming up with a better


neuron_explainer/activations/derived_scalars/direct_effects.py (1 line):
	- line 134: )  # TODO: consider splitting into two cases, once we have separate node_types


neuron_explainer/activations/derived_scalars/raw_activations.py (1 line):
	- line 357: # TODO: this entire function should be simplified or deleted?