def __add__()

in src/datatrove/utils/stats.py [0:0]


    def __add__(self, other):
        if not isinstance(other, type(self)):
            other = type(self).from_dict(other)
        # mean and variance: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
        n = self.n + other.n
        mean = 0.0
        _running_variance = 0.0
        if self.n + other.n > 0:
            mean = (self.n * self.mean + other.n * other.mean) / n
            delta = self.mean - other.mean
            _running_variance = (
                self._running_variance + other._running_variance + (delta * delta * self.n * other.n) / n
            )

        total = self.total + other.total
        M = max(self.max, other.max)
        m = min(self.min, other.min)
        return type(self)(
            total=total,
            n=n,
            mean=mean,
            min=m,
            max=M,
            _running_variance=_running_variance,
            unit=self.unit if self.unit != "doc" else other.unit,
        )