def generate_response()

in agora/cerebral_api/src/app.py [0:0]


    def generate_response(self, question: str, industry: str = None, role: str = None, sid: str = None) -> str:
        """Generate response using local model and retrieved context."""
        try:
            if VERBOSE:
                logger.debug(f"Generating response for question: {question}")

            # Get context from vector store
            context = self._get_context(question)
            
            if VERBOSE:
                logger.debug("Retrieved context:")
                logger.debug(context)
            
            if sid:
                socketio.emit('context', {'context': context}, room=sid)

            # Format prompt with explicit instructions about context usage
            prompt = (
                "You are an AI assistant that provides accurate answers based on the given context. "
                "Always reference and use the provided context in your response. "
                "If the context doesn't contain enough information, acknowledge this explicitly.\n\n"
                f"Context:\n{context}\n\n"
                f"Question: {question}\n\n"
                "Answer (using the above context): "
            )

            # Generate response using local model
            input_tokens = self.tokenizer.encode(prompt)
            params = og.GeneratorParams(self.model)
            params.set_search_options(**self.search_options)
            params.input_ids = input_tokens
            
            generator = og.Generator(self.model, params)
            generated_text = ""

            try:
                while not generator.is_done():
                    generator.compute_logits()
                    generator.generate_next_token()
                    
                    new_token = generator.get_next_tokens()[0]
                    token_text = self.tokenizer_stream.decode(new_token)
                    generated_text += token_text
                    
                    if sid:
                        socketio.emit('token', {'token': token_text}, room=sid)
                
                if sid:
                    socketio.emit('complete', room=sid)
                
                if VERBOSE:
                    logger.debug(f"Generated response length: {len(generated_text)}")
                    logger.debug(f"Response preview: {generated_text[:200]}...")
                
                return generated_text

            finally:
                del generator

        except Exception as e:
            error_msg = f"Error generating response: {str(e)}"
            logger.error(error_msg)
            if sid:
                socketio.emit('error', {'error': error_msg}, room=sid)
            return error_msg