in pytext/metric_reporters/language_model_metric_reporter.py [0:0]
def report_realtime_metric(self, stage):
if stage != Stage.TRAIN:
return
if cuda.DISTRIBUTED_WORLD_SIZE > 1:
all_reduce_stats = cuda.tensor(
[
self.last_batch_tps,
self.last_batch_loss,
self.aggregate_loss,
self.total_masked_tokens,
self.realtime_meters["tps"].n,
],
dtype=torch.float32,
)
total_elapsed_time = self.realtime_meters["tps"].elapsed_time
torch.distributed.all_reduce(all_reduce_stats)
# average last_batch_loss by distributed_world_size
all_reduce_stats[1:2].div_(cuda.DISTRIBUTED_WORLD_SIZE)
[
last_batch_tps,
last_batch_loss,
aggregate_loss,
total_masked_tokens,
total_tokens,
] = all_reduce_stats.tolist()
tps = total_tokens / total_elapsed_time
else:
last_batch_tps = self.last_batch_tps
last_batch_loss = self.last_batch_loss
aggregate_loss = self.aggregate_loss
total_masked_tokens = self.total_masked_tokens
tps = self.realtime_meters["tps"].avg
print(
f"Tokens/s: {last_batch_tps:.0f}, "
f"batch ppl: {math.exp(last_batch_loss):.2f}, "
f"agg ppl: {math.exp(self._calculate_loss(aggregate_loss, total_masked_tokens)):.2f}, "
f"number of batches: {self.total_batches:.0f}, "
f"accumulated tokens/s: {tps:.0f}",
flush=True,
)
# TODO: remove GPU0 report
print(
f"GPU-0 tokens/s: {self.last_batch_tps:.0f}, "
f"batch ppl: {math.exp(self.last_batch_loss):.2f}, "
f"agg ppl: {math.exp(self.calculate_loss()):.2f}, "
f"number of batches: {self.total_batches}, "
f"accumulated tokens/s: {self.realtime_meters['tps'].avg:.0f}",
flush=True,
)
if self.pep_format:
# used for pep regression benchmark
print(
"PyTorchObserver "
+ json.dumps(
{
"type": "MLM",
"metric": "tps",
"unit": "token/sec",
"value": f"{tps:.0f}",
}
),
flush=True,
)