sae-viewer/src/welcome.tsx (131 lines of code) (raw):

import React from "react" import { useState, FormEvent } from "react" import { useNavigate } from "react-router-dom" import { Feature } from "./types" import FeatureSelect from "./components/featureSelect" import { pathForFeature, DEFAULT_AUTOENCODER, AUTOENCODER_FAMILIES } from "./autoencoder_registry" export default function Welcome() { const navigate = useNavigate() const GPT4_ATOMS_PER_SHARD = 1024; const displayFeatures = [ /************** /* well explained + interesting ***************/ {heading: 'GPT-4', heading_type: 'h4', feature: null, label: ''}, {feature: {atom: 62 * GPT4_ATOMS_PER_SHARD + 53, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "humans have flaws", description: "descriptions of how humans are flawed"}, {feature: {atom: 25 * GPT4_ATOMS_PER_SHARD + 8, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "police reports, especially child safety", description: "safety incidents especially related to children"}, {feature: {atom: 9 * GPT4_ATOMS_PER_SHARD + 44, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "price changes", description: "ends of phrases describing commodity/equity price changes"}, {feature: {atom: 17 * GPT4_ATOMS_PER_SHARD + 33, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "ratification (multilingual)", description: "ratification (multilingual)"}, {feature: {atom: 3 * GPT4_ATOMS_PER_SHARD + 421, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "would [...]", description: "conditionals (things that would be true)"}, {feature: {atom: 63 * GPT4_ATOMS_PER_SHARD + 8, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "identification documents (multilingual)", description: "identification documents (multilingual)"}, {feature: {atom: 0 * GPT4_ATOMS_PER_SHARD + 14, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "lightly incremented timestamps", description: "timestamps being lightly incremented with recurring formats"}, {heading: 'Technical knowledge', heading_type: 'h3', feature: null, label: ''}, {feature: {atom: 40 * GPT4_ATOMS_PER_SHARD + 42, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "machine learning training logs", description: "machine learning training logs"}, {feature: {atom: 12 * GPT4_ATOMS_PER_SHARD + 47, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "onclick/onchange = function(this)", description: "onclick/onchange = function(this)"}, {feature: {atom: 54 * GPT4_ATOMS_PER_SHARD + 23, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "edges (graph theory) and related concepts", description: "edges (graph theory) and related concepts"}, {feature: {atom: 56 * GPT4_ATOMS_PER_SHARD + 12, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "algebraic rings", description: "algebraic rings"}, {feature: {atom: 28 * GPT4_ATOMS_PER_SHARD + 47, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "adenosine/dopamine receptors", description: "adenosine/dopamine receptors"}, {feature: {atom: 2 * GPT4_ATOMS_PER_SHARD + 601, autoencoder: AUTOENCODER_FAMILIES['v5_latelayer_postmlp'].get_ae({})}, label: "blockchain vibes", description: "blockchain vibes"}, {heading: 'GPT-2 small', heading_type: 'h4', feature: null, label: ''}, {feature: {atom: 488432, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '2097152', num_active_features: '8' })}, label: "rhetorical questions", description: "rhetorical questions"}, {feature: {atom: 2088200, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '2097152', num_active_features: '8' })}, label: "counting human casualties", description: "counting human casualties"}, {feature: {atom: 1621560, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '2097152', num_active_features: '8' })}, label: "X and Y phrases", description: "X and -> Y"}, {feature: {atom: 733, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '32768', num_active_features: '8' })}, label: "Patrick/Patty surname predictor", description: "Predicts surnames after Patrick"}, {feature: {atom: 64464, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "things that are unknown", description: "things that are unknown"}, {feature: {atom: 56907, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ // similar to 33248 num_features: '131072', num_active_features: '32' })}, label: "words in quotes", description: "predicts words in quotes"}, {feature: {atom: 1605835, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '2097152', num_active_features: '8' })}, label: "these/those responsible things", description: "these/those, in a phrase where something is responsible for something"}, {feature: {atom: 8040, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '8192', num_active_features: '32' })}, label: "2018 natural disasters", description: "2018 natural disasters"}, {feature: {atom: 21464, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "addition in code", description: "addition in code"}, {feature: {atom: 66232, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "function application", description: "function application"}, {feature: {atom: 64464, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "unclear/hidden things", description: "unclear/hidden things (top only)"}, {feature: {atom: 10423, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "what the ...", description: "[who/what/when/where/why/how] the"}, {heading: 'Safety relevant features (found via attribution methods)', heading_type: 'h3', feature: null, label: ''}, {feature: {atom: 64840, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "profanity (1)", description: "activates in order to output profanity"}, {feature: {atom: 104813, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "profanity (2)", description: "activates on profanity"}, {feature: {atom: 101090, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "profanity (3)", description: "activates on 'fucking' (profane, not sexual contexts)"}, {feature: {atom: 72185, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "erotic content", description: "erotic content"}, {feature: {atom: 69134, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ num_features: '131072', num_active_features: '32' })}, label: "[content warning] sexual abuse", description: "sexual abuse"}, // {feature: {atom: 2, autoencoder: AUTOENCODER_FAMILIES['v5_l8_postmlp'].get_ae({ // num_features: '2097152', num_active_features: '8' // })}, label: "things being brought", description: "bring * -> together/back"}, ] let [feature, setFeature] = useState({ atom: 0, autoencoder: DEFAULT_AUTOENCODER }) const handleClick = (click_feature: Feature) => { navigate(pathForFeature(click_feature)) } return ( <div className="flex flex-col" style={{'padding': '100px'}}> <h1 className="text-2xl font-bold mb-4">Welcome! This is a viewer for sparse autoencoders features trained in <a href="https://cdn.openai.com/papers/sparse-autoencoders.pdf">this paper</a> </h1> <h1>Pick a feature:</h1> <FeatureSelect init_feature={feature} onFeatureChange={(f: Feature) => setFeature(f)} onFeatureSubmit={(f: Feature) => navigate(pathForFeature(f))} show_go={true} /> <div className="mt-4"> <h2 className="text-xl font-bold mb-2">Interesting features:</h2> <div className="mb-10 flex-row"> <div className="flex flex-flow flex-wrap" > {displayFeatures.map(({ heading, heading_type, feature, label, description }, j) => ( heading ? <div style={{width: '100%'}} key={j}> {React.createElement(heading_type, {}, heading)} </div> : <button key={j} onClick={() => handleClick(feature)} style={{ width: 200 }} className="text-blue-500 hover:text-blue-700" title={description} > {label} </button> ))} </div> </div> </div> </div> ) }