in debug.py [0:0]
def _ttft_processor(self) -> processor.Processor:
@processor.processor_function
async def log_on_close(
content: AsyncIterable[ProcessorPart],
) -> AsyncIterable[ProcessorPart]:
self._model_call_event.clear()
async for part in content:
yield part
self._start = time.perf_counter()
self._model_call_event.set()
logging.info('ttft single stream start time: %s', self._start)
@processor.processor_function
async def log_on_first(
content: AsyncIterable[ProcessorPart],
) -> AsyncIterable[ProcessorPart]:
first_part = True
async for part in content:
if first_part and self._start is not None:
duration = time.perf_counter() - self._start
self._ttft = duration
self._message += f' TTFT={duration:.2f} seconds'
yield processor.status(ProcessorPart(self._message))
first_part = False
yield part
return log_on_close + self._p + log_on_first