def _ttft_processor()

in debug.py [0:0]


  def _ttft_processor(self) -> processor.Processor:

    @processor.processor_function
    async def log_on_close(
        content: AsyncIterable[ProcessorPart],
    ) -> AsyncIterable[ProcessorPart]:
      self._model_call_event.clear()
      async for part in content:
        yield part
      self._start = time.perf_counter()
      self._model_call_event.set()
      logging.info('ttft single stream start time: %s', self._start)

    @processor.processor_function
    async def log_on_first(
        content: AsyncIterable[ProcessorPart],
    ) -> AsyncIterable[ProcessorPart]:
      first_part = True
      async for part in content:
        if first_part and self._start is not None:
          duration = time.perf_counter() - self._start
          self._ttft = duration
          self._message += f' TTFT={duration:.2f} seconds'
          yield processor.status(ProcessorPart(self._message))
        first_part = False
        yield part

    return log_on_close + self._p + log_on_first