scripts/phi3.py (28 lines of code) (raw):

import torch from transformers import Phi3ForCausalLM, AutoTokenizer def ground(): model = Phi3ForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True) print("Model: ", model) model.eval() tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) inputs = tokenizer("""<|user|> How to explain Internet for a medieval knight?<|end|> <|assistant|>""", return_tensors="pt", return_attention_mask=False) outputs = model.generate(**inputs, max_length=500, return_dict_in_generate=True, output_logits=True) tokens = outputs[0] print("Tokens: ", tokens) print("Text: ", tokenizer.decode(tokens[0], skip_special_tokens=True)) def hooked(): model = Phi3ForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True) model.eval() first_layer_output = None def hook(module, input, output): nonlocal first_layer_output first_layer_output = output # Register the forward hook on the first decoder layer model.model.layers[0].register_forward_hook(hook) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) inputs = tokenizer('{}', return_tensors="pt", return_attention_mask=False) print("PROMPT TOKENS:", inputs["input_ids"]) logits = model(**inputs).logits print("FIRST LAYER OUTPUT: ", first_layer_output) return [first_layer_output[0].detach().numpy()] ground()