in agora/cerebral_api/src/app.py [0:0]
def generate_response(self, question: str, industry: str = None, role: str = None, sid: str = None) -> str:
"""Generate response using local model and retrieved context."""
try:
if VERBOSE:
logger.debug(f"Generating response for question: {question}")
# Get context from vector store
context = self._get_context(question)
if VERBOSE:
logger.debug("Retrieved context:")
logger.debug(context)
if sid:
socketio.emit('context', {'context': context}, room=sid)
# Format prompt with explicit instructions about context usage
prompt = (
"You are an AI assistant that provides accurate answers based on the given context. "
"Always reference and use the provided context in your response. "
"If the context doesn't contain enough information, acknowledge this explicitly.\n\n"
f"Context:\n{context}\n\n"
f"Question: {question}\n\n"
"Answer (using the above context): "
)
# Generate response using local model
input_tokens = self.tokenizer.encode(prompt)
params = og.GeneratorParams(self.model)
params.set_search_options(**self.search_options)
params.input_ids = input_tokens
generator = og.Generator(self.model, params)
generated_text = ""
try:
while not generator.is_done():
generator.compute_logits()
generator.generate_next_token()
new_token = generator.get_next_tokens()[0]
token_text = self.tokenizer_stream.decode(new_token)
generated_text += token_text
if sid:
socketio.emit('token', {'token': token_text}, room=sid)
if sid:
socketio.emit('complete', room=sid)
if VERBOSE:
logger.debug(f"Generated response length: {len(generated_text)}")
logger.debug(f"Response preview: {generated_text[:200]}...")
return generated_text
finally:
del generator
except Exception as e:
error_msg = f"Error generating response: {str(e)}"
logger.error(error_msg)
if sid:
socketio.emit('error', {'error': error_msg}, room=sid)
return error_msg