def run_text_generation_energy_tracking()

in optimum_benchmark/scenarios/inference/scenario.py [0:0]


    def run_text_generation_energy_tracking(self):
        self.logger.info("\t+ Running Text Generation energy tracking")
        prefill_kwargs = {**self.config.generate_kwargs, **TEXT_GENERATION_PREFILL_OVERRIDES}

        count, elapsed, start_time = 0, 0, time.perf_counter()

        with self.energy_tracker.track(task_name="prefill"):
            while elapsed < self.config.duration or count < self.config.iterations:
                self.backend.prefill(self.inputs, prefill_kwargs)
                elapsed = time.perf_counter() - start_time
                count += 1

        prefill_energy = self.energy_tracker.get_energy() / count

        self.report.prefill.energy = prefill_energy
        self.report.prefill.efficiency = Efficiency.from_energy(
            prefill_energy, self.atomic_prefill_volume, unit=PREFILL_EFFICIENCY_UNIT
        )

        count, elapsed, start_time = 0, 0, time.perf_counter()

        with self.energy_tracker.track(task_name="generate"):
            while elapsed < self.config.duration or count < self.config.iterations:
                self.backend.generate(self.inputs, self.config.generate_kwargs)
                elapsed = time.perf_counter() - start_time
                count += 1

        generate_energy = self.energy_tracker.get_energy() / count
        self.report.generate.energy = generate_energy
        self.report.generate.efficiency = Efficiency.from_energy(
            generate_energy, self.atomic_generate_volume, unit=GENERATE_EFFICIENCY_UNIT
        )

        decode_energy = generate_energy - prefill_energy
        self.report.decode.energy = decode_energy
        self.report.decode.efficiency = Efficiency.from_energy(
            decode_energy, self.atomic_decode_volume, unit=DECODE_EFFICIENCY_UNIT
        )