in src/datatrove/utils/stats.py [0:0]
def to_dict(self):
if self.total == 0:
return 0
data = {
"total": self.total,
}
# only relevant if > 1 and we didn't just add 1 all the time
if self.n > 1 and self.n != self.total:
data["n"] = self.n
if self.mean != 1:
data["mean"] = self.mean
# are there actually different values
if self.mean != self.max or self.mean != self.min:
data["mean"] = self.mean
data["variance"] = self.variance
data["std_dev"] = self.standard_deviation
data["min"] = self.min
data["max"] = self.max
if self.unit != "doc":
data["unit"] = self.unit
return self.total if len(data) == 1 else data