def error_analysis()

in review_sentiment_flow.py [0:0]
32 lines of code
1 McCabe index (conditional complexity)

    def error_analysis(self):
        """
        Predict the sentiment of some sample movie reviews and see,
        on an individual level, how they look
        """
        import torch
        from tokenizers import Tokenizer
        from review_sentiment_model import ReviewSentimentModel

        from io import BytesIO
        import json

        device = self.device

        with open('tokenizer.json', 'w') as fp:
            json.dump(self.tokenizer_as_dict, fp)

        tokenizer = Tokenizer.from_file("tokenizer.json")

        model = ReviewSentimentModel(tokenizer, self.output_dim, False)
        buffer = BytesIO(self.model_state_dict_bytes)
        model.load_state_dict(torch.load(buffer, map_location=device, weights_only=True))

        def predict_sentiment(text, model, tokenizer, device):
            ids = tokenizer(text)["input_ids"]
            tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)
            prediction = model(tensor).squeeze(dim=0)
            probability = torch.softmax(prediction, dim=-1)
            predicted_class = prediction.argmax(dim=-1).item()
            predicted_probability = probability[predicted_class].item()
            return predicted_class, predicted_probability

        print("(Clearly these are toy examples; one could load a batch of examples here for more rigorous error analysis)")

        text = "This film is terrible!"
        print(f"Analysis of text: {text}")
        print(predict_sentiment(text, model, tokenizer, device))

        text = "This film is not terrible, it's great!"
        print(f"Analysis of text: {text}")
        print(predict_sentiment(text, model, tokenizer, device))

        text = "This film is not terrible, it's great!"
        print(f"Analysis of text: {text}")
        print(predict_sentiment(text, model, tokenizer, device))

        self.next(self.upload_model_to_gcs)