in compare_models.py [0:0]
def _run_inference(self, msg, n_games=0):
other_metrics = self.__init_metrics()
n_f = 0
tot_loss = [0 for _ in self.models]
n_g = 0
n_f = 0
scmap, race, inp, targ = 0, 0, 0, 0
for model in self.models:
model.eval()
for data in self.other_dl:
if n_games > 0 and n_g >= n_games:
break
n_g += 1
scmap, race, inp, targ, _, vis = data
outputs, losses = self._do_model_step(scmap, race, inp, targ, None, optimize=False)
n_f += targ.size(0)
for mi, loss in enumerate(losses):
tot_loss[mi] += loss * (targ.size(0) if self.args.loss_averaging else 1)
self.__accumulate_metrics(inp, targ, vis, outputs, other_metrics)
logging.log(42, msg)
ret = defaultdict(list)
ret['outputs'] = [[y.cpu() for y in x] for x in outputs]
ret['inputs'] = (scmap.cpu(), race.cpu(), inp.cpu(), targ.cpu())
ret['loss'] = [(tl / n_f) for tl in tot_loss]
ret['metrics'] = defaultdict(list)
for i, model in enumerate(self.models + [DummyModel(bname) for bname in self.baselines]):
for metric_key, values in other_metrics[i].items():
# values are normalized per game, n_f would normalize per frame
v = values / n_g
mk = metric_key
if not np.isscalar(v):
mk = mk + '_prf'
v = [v[0], v[1], 2 * v[0] * v[1] / (v[0] + v[1] + 1E-9)] # TODO F1 score after all averaging ok?
format = "{:<10.4f}"
else:
v = [v]
format = "{:<10.4E}"
if type(model) != DummyModel:
ret['metrics'][mk].append(v)
logging.log(42, "{} {} {}".format(
model.model_name.ljust(20),
metric_key.ljust(10), " ".join(format.format(x) for x in v)))
if type(model) != DummyModel:
model.train()
return dict(ret)