in agora/cerebral_api/src/app.py [0:0]
def generate_response_slm(self, question: str, industry: str = None, role: str = None, sid: str = None) -> str:
"""Generate response using local model and retrieved context with improved socket handling."""
try:
if VERBOSE:
logger.debug(f"Generating response for question: {question}")
# Send initial status to keep connection alive
if sid:
socketio.emit('status', {'message': 'Searching relevant documents...'}, room=sid)
# Get context with timeout
try:
context = self._get_context(question)
# Send context immediately when found
if sid:
socketio.emit('context', {'context': context}, room=sid)
socketio.emit('status', {'message': 'Generating response...'}, room=sid)
except Exception as e:
logger.error(f"Error retrieving context: {str(e)}")
context = "Error retrieving context. Proceeding with general response."
if sid:
socketio.emit('error', {'error': 'Context retrieval error, proceeding with general response'}, room=sid)
# Format prompt
prompt = self.prompt_template.format(
context=context,
question=question
)
# Keep connection alive during token generation
last_update = time.time()
update_interval = 2.0 # Send status update every 2 seconds
# Generate response
input_tokens = self.tokenizer.encode(prompt)
params = og.GeneratorParams(self.model)
params.set_search_options(**self.search_options)
params.input_ids = input_tokens
generator = og.Generator(self.model, params)
try:
generated_text = ""
current_time = time.time()
while not generator.is_done():
# Send keep-alive status periodically
if sid and (current_time - last_update) > update_interval:
socketio.emit('status', {'message': 'Still generating...'}, room=sid)
last_update = current_time
generator.compute_logits()
generator.generate_next_token()
new_token = generator.get_next_tokens()[0]
token_text = self.tokenizer_stream.decode(new_token)
generated_text += token_text
if sid:
socketio.emit('token', {'token': token_text}, room=sid)
# Small sleep to prevent overwhelming the socket
time.sleep(0.01)
current_time = time.time()
if sid:
socketio.emit('complete', room=sid)
if VERBOSE:
logger.debug(f"Generated response length: {len(generated_text)}")
logger.debug(f"Response preview: {generated_text[:200]}...")
return generated_text
finally:
del generator
except Exception as e:
error_msg = f"Error generating response: {str(e)}"
logger.error(error_msg)
if sid:
socketio.emit('error', {'error': error_msg}, room=sid)
return error_msg