lib/maths/common/unittest/CBasicStatisticsTest.cc (1,102 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <core/CJsonStatePersistInserter.h>
#include <core/CJsonStateRestoreTraverser.h>
#include <core/CLogger.h>
#include <core/CPersistUtils.h>
#include <core/CSmallVector.h>
#include <maths/common/CBasicStatistics.h>
#include <maths/common/CBasicStatisticsCovariances.h>
#include <maths/common/CBasicStatisticsPersist.h>
#include <maths/common/CChecksum.h>
#include <maths/common/CLinearAlgebra.h>
#include <maths/common/CLinearAlgebraTools.h>
#include <maths/common/CSampling.h>
#include <test/BoostTestCloseAbsolute.h>
#include <test/CRandomNumbers.h>
#include <boost/test/unit_test.hpp>
#include <algorithm>
#include <functional>
#include <numeric>
#include <string>
#include <vector>
BOOST_AUTO_TEST_SUITE(CBasicStatisticsTest)
using namespace ml;
namespace {
using TDoubleVec = std::vector<double>;
using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator;
using TMeanVarAccumulator = maths::common::CBasicStatistics::SSampleMeanVar<double>::TAccumulator;
using TMeanVarSkewAccumulator =
maths::common::CBasicStatistics::SSampleMeanVarSkew<double>::TAccumulator;
using TMeanAccumulator2Vec = core::CSmallVector<TMeanAccumulator, 2>;
using TMeanVarAccumulator2Vec = core::CSmallVector<TMeanVarAccumulator, 2>;
using TMeanVarSkewAccumulator2Vec = core::CSmallVector<TMeanVarSkewAccumulator, 2>;
using TMeanAccumulatorVec = std::vector<TMeanAccumulator>;
using TMeanVarAccumulatorVec = std::vector<TMeanVarAccumulator>;
using TMeanVarSkewAccumulatorVec = std::vector<TMeanVarSkewAccumulator>;
const std::string TAG("a");
struct SRestore {
template<typename T>
bool operator()(std::vector<T>& restored, core::CStateRestoreTraverser& traverser) const {
return core::CPersistUtils::restore(TAG, restored, traverser);
}
template<typename T>
bool operator()(T& restored, core::CStateRestoreTraverser& traverser) const {
return restored.fromDelimited(traverser.value());
}
};
template<typename T>
void testPersistCollection(const T& moments) {
std::ostringstream json;
core::CJsonStatePersistInserter inserter(json);
core::CPersistUtils::persist(TAG, moments, inserter);
LOG_DEBUG(<< "Moments JSON representation:\n" << json.str());
std::istringstream jsonStrm{"{\"topLevel\" : " + json.str() + "}"};
core::CJsonStateRestoreTraverser traverser(jsonStrm);
T restored;
BOOST_TEST_REQUIRE(traverser.traverseSubLevel(
std::bind(SRestore(), std::ref(restored), std::placeholders::_1)));
LOG_DEBUG(<< "restored = " << restored);
BOOST_REQUIRE_EQUAL(moments.size(), restored.size());
for (std::size_t i = 0; i < restored.size(); ++i) {
BOOST_REQUIRE_EQUAL(moments[i].checksum(), restored[i].checksum());
}
}
}
BOOST_AUTO_TEST_CASE(testMean) {
double sample[] = {0.9, 10.0, 5.6, 1.23, -12.3, 445.2, 0.0, 1.2};
maths::common::CBasicStatistics::TDoubleVec sampleVec(
sample, sample + sizeof(sample) / sizeof(sample[0]));
double mean = maths::common::CBasicStatistics::mean(sampleVec);
// Compare with hand calculated value
BOOST_REQUIRE_EQUAL(56.47875, mean);
}
BOOST_AUTO_TEST_CASE(testVarianceAtPercentile) {
// Test that the variance at a percentile is correctly calibrated.
test::CRandomNumbers rng;
TDoubleVec samples;
TMeanAccumulator bias;
for (auto percentile : {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0}) {
for (std::size_t n : {5, 20, 50}) {
double varianceAtPercentile{maths::common::CBasicStatistics::varianceAtPercentile(
percentile, 1.0, static_cast<double>(n - 1))};
double percentageLessThan{0.0};
for (std::size_t i = 0; i < 1000; ++i) {
rng.generateNormalSamples(0.0, 1.0, n, samples);
double variance{maths::common::CBasicStatistics::variance(std::accumulate(
samples.begin(), samples.end(), TMeanVarAccumulator{},
[](TMeanVarAccumulator moments, double value) {
moments.add(value);
return moments;
}))};
if (variance < varianceAtPercentile) {
percentageLessThan += 0.1;
}
}
LOG_DEBUG(<< "variance(" << percentile << ") = " << varianceAtPercentile
<< ", % less than = " << percentageLessThan);
BOOST_REQUIRE_CLOSE_ABSOLUTE(percentile, percentageLessThan, 4.0);
bias.add(percentile - percentageLessThan);
}
}
LOG_DEBUG(<< "bias = " << maths::common::CBasicStatistics::mean(bias));
BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(bias) < 0.1);
}
BOOST_AUTO_TEST_CASE(testCentralMoments) {
LOG_DEBUG(<< "Test mean double");
{
double samples[] = {0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2};
TMeanAccumulator acc;
size_t count = sizeof(samples) / sizeof(samples[0]);
acc = std::for_each(samples, samples + count, acc);
BOOST_REQUIRE_EQUAL(
count, static_cast<size_t>(maths::common::CBasicStatistics::count(acc)));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.72875, maths::common::CBasicStatistics::mean(acc), 0.000005);
double n0 = maths::common::CBasicStatistics::count(acc);
maths::common::CBasicStatistics::scale(0.5, acc);
double n1 = maths::common::CBasicStatistics::count(acc);
BOOST_REQUIRE_EQUAL(n1, 0.5 * n0);
}
LOG_DEBUG(<< "Test mean float");
{
using TFloatMeanAccumulator =
maths::common::CBasicStatistics::SSampleMean<float>::TAccumulator;
float samples[] = {0.9f, 10.0f, 5.6f, 1.23f, -12.3f, 7.2f, 0.0f, 1.2f};
TFloatMeanAccumulator acc;
size_t count = sizeof(samples) / sizeof(samples[0]);
acc = std::for_each(samples, samples + count, acc);
BOOST_REQUIRE_EQUAL(
count, static_cast<size_t>(maths::common::CBasicStatistics::count(acc)));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.72875f, maths::common::CBasicStatistics::mean(acc), 0.000005f);
}
LOG_DEBUG(<< "Test mean and variance");
{
double samples[] = {0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2};
TMeanVarAccumulator acc;
size_t count = sizeof(samples) / sizeof(samples[0]);
acc = std::for_each(samples, samples + count, acc);
BOOST_REQUIRE_EQUAL(
count, static_cast<size_t>(maths::common::CBasicStatistics::count(acc)));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.72875, maths::common::CBasicStatistics::mean(acc), 0.000005);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
44.90633, maths::common::CBasicStatistics::variance(acc), 0.000005);
double n0 = maths::common::CBasicStatistics::count(acc);
maths::common::CBasicStatistics::scale(0.5, acc);
double n1 = maths::common::CBasicStatistics::count(acc);
BOOST_REQUIRE_EQUAL(n1, 0.5 * n0);
}
LOG_DEBUG(<< "Test mean, variance and skew");
{
double samples[] = {0.9, 10.0, 5.6, 1.23, -12.3, 7.2, 0.0, 1.2};
TMeanVarSkewAccumulator acc;
size_t count = sizeof(samples) / sizeof(samples[0]);
acc = std::for_each(samples, samples + count, acc);
BOOST_REQUIRE_EQUAL(
count, static_cast<size_t>(maths::common::CBasicStatistics::count(acc)));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.72875, maths::common::CBasicStatistics::mean(acc), 0.000005);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
44.90633, maths::common::CBasicStatistics::variance(acc), 0.000005);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
-0.82216, maths::common::CBasicStatistics::skewness(acc), 0.000005);
double n0 = maths::common::CBasicStatistics::count(acc);
maths::common::CBasicStatistics::scale(0.5, acc);
double n1 = maths::common::CBasicStatistics::count(acc);
BOOST_REQUIRE_EQUAL(n1, 0.5 * n0);
}
LOG_DEBUG(<< "Test weighted update");
{
double samples[] = {0.9, 1.0, 2.3, 1.5};
std::size_t weights[] = {1, 4, 2, 3};
{
TMeanAccumulator acc1;
TMeanAccumulator acc2;
for (size_t i = 0; i < std::size(samples); ++i) {
acc1.add(samples[i], static_cast<double>(weights[i]));
for (std::size_t j = 0; j < weights[i]; ++j) {
acc2.add(samples[i]);
}
}
BOOST_REQUIRE_CLOSE_ABSOLUTE(maths::common::CBasicStatistics::mean(acc1),
maths::common::CBasicStatistics::mean(acc2), 1e-10);
}
{
TMeanVarAccumulator acc1;
TMeanVarAccumulator acc2;
for (size_t i = 0; i < std::size(samples); ++i) {
acc1.add(samples[i], static_cast<double>(weights[i]));
for (std::size_t j = 0; j < weights[i]; ++j) {
acc2.add(samples[i]);
}
}
BOOST_REQUIRE_CLOSE_ABSOLUTE(maths::common::CBasicStatistics::mean(acc1),
maths::common::CBasicStatistics::mean(acc2), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(acc1),
maths::common::CBasicStatistics::variance(acc2), 1e-10);
}
{
TMeanVarSkewAccumulator acc1;
TMeanVarSkewAccumulator acc2;
for (size_t i = 0; i < std::size(samples); ++i) {
acc1.add(samples[i], static_cast<double>(weights[i]));
for (std::size_t j = 0; j < weights[i]; ++j) {
acc2.add(samples[i]);
}
}
BOOST_REQUIRE_CLOSE_ABSOLUTE(maths::common::CBasicStatistics::mean(acc1),
maths::common::CBasicStatistics::mean(acc2), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(acc1),
maths::common::CBasicStatistics::variance(acc2), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::skewness(acc1),
maths::common::CBasicStatistics::skewness(acc2), 1e-10);
}
}
LOG_DEBUG(<< "Test addition");
{
// Test addition.
double samples1[] = {0.9, 10.0, 5.6, 1.23};
double samples2[] = {-12.3, 7.2, 0.0, 1.2};
size_t count1 = sizeof(samples1) / sizeof(samples1[0]);
size_t count2 = sizeof(samples2) / sizeof(samples2[0]);
{
TMeanAccumulator acc1;
TMeanAccumulator acc2;
acc1 = std::for_each(samples1, samples1 + count1, acc1);
acc2 = std::for_each(samples2, samples2 + count2, acc2);
BOOST_REQUIRE_EQUAL(
count1 + count2,
static_cast<size_t>(maths::common::CBasicStatistics::count(acc1 + acc2)));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.72875, maths::common::CBasicStatistics::mean(acc1 + acc2), 0.000005);
}
{
TMeanVarAccumulator acc1;
TMeanVarAccumulator acc2;
acc1 = std::for_each(samples1, samples1 + count1, acc1);
acc2 = std::for_each(samples2, samples2 + count2, acc2);
BOOST_REQUIRE_EQUAL(
count1 + count2,
static_cast<size_t>(maths::common::CBasicStatistics::count(acc1 + acc2)));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.72875, maths::common::CBasicStatistics::mean(acc1 + acc2), 0.000005);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
44.90633, maths::common::CBasicStatistics::variance(acc1 + acc2), 0.000005);
}
{
TMeanVarSkewAccumulator acc1;
TMeanVarSkewAccumulator acc2;
acc1 = std::for_each(samples1, samples1 + count1, acc1);
acc2 = std::for_each(samples2, samples2 + count2, acc2);
BOOST_REQUIRE_EQUAL(
count1 + count2,
static_cast<size_t>(maths::common::CBasicStatistics::count(acc1 + acc2)));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
1.72875, maths::common::CBasicStatistics::mean(acc1 + acc2), 0.000005);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
44.90633, maths::common::CBasicStatistics::variance(acc1 + acc2), 0.000005);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
-0.82216, maths::common::CBasicStatistics::skewness(acc1 + acc2), 0.000005);
}
}
LOG_DEBUG(<< "Test subtraction");
{
test::CRandomNumbers rng;
LOG_DEBUG(<< "Test mean");
{
TMeanAccumulator acc1;
TMeanAccumulator acc2;
TDoubleVec samples;
rng.generateNormalSamples(2.0, 3.0, 40u, samples);
for (std::size_t j = 1; j < samples.size(); ++j) {
LOG_DEBUG(<< "split = " << j << "/" << samples.size() - j);
for (std::size_t i = 0; i < j; ++i) {
acc1.add(samples[i]);
}
for (std::size_t i = j; i < samples.size(); ++i) {
acc2.add(samples[i]);
}
TMeanAccumulator sum = acc1 + acc2;
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::count(acc1),
maths::common::CBasicStatistics::count(sum - acc2));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(acc1),
maths::common::CBasicStatistics::mean(sum - acc2), 1e-10);
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::count(acc2),
maths::common::CBasicStatistics::count(sum - acc1));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(acc2),
maths::common::CBasicStatistics::mean(sum - acc1), 1e-10);
}
}
LOG_DEBUG(<< "Test mean and variance");
{
TMeanVarAccumulator acc1;
TMeanVarAccumulator acc2;
TDoubleVec samples;
rng.generateGammaSamples(3.0, 3.0, 40u, samples);
for (std::size_t j = 1; j < samples.size(); ++j) {
LOG_DEBUG(<< "split = " << j << "/" << samples.size() - j);
for (std::size_t i = 0; i < j; ++i) {
acc1.add(samples[i]);
}
for (std::size_t i = j; i < samples.size(); ++i) {
acc2.add(samples[i]);
}
TMeanVarAccumulator sum = acc1 + acc2;
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::count(acc1),
maths::common::CBasicStatistics::count(sum - acc2));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(acc1),
maths::common::CBasicStatistics::mean(sum - acc2), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(acc1),
maths::common::CBasicStatistics::variance(sum - acc2), 1e-10);
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::count(acc2),
maths::common::CBasicStatistics::count(sum - acc1));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(acc2),
maths::common::CBasicStatistics::mean(sum - acc1), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(acc2),
maths::common::CBasicStatistics::variance(sum - acc1), 1e-10);
}
}
LOG_DEBUG(<< "Test mean, variance and skew");
{
TMeanVarSkewAccumulator acc1;
TMeanVarSkewAccumulator acc2;
TDoubleVec samples;
rng.generateLogNormalSamples(1.1, 1.0, 40u, samples);
for (std::size_t j = 1; j < samples.size(); ++j) {
LOG_DEBUG(<< "split = " << j << "/" << samples.size() - j);
for (std::size_t i = 0; i < j; ++i) {
acc1.add(samples[i]);
}
for (std::size_t i = j; i < samples.size(); ++i) {
acc2.add(samples[i]);
}
TMeanVarSkewAccumulator sum = acc1 + acc2;
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::count(acc1),
maths::common::CBasicStatistics::count(sum - acc2));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(acc1),
maths::common::CBasicStatistics::mean(sum - acc2), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(acc1),
maths::common::CBasicStatistics::variance(sum - acc2), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::skewness(acc1),
maths::common::CBasicStatistics::skewness(sum - acc2), 1e-10);
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::count(acc2),
maths::common::CBasicStatistics::count(sum - acc1));
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(acc2),
maths::common::CBasicStatistics::mean(sum - acc1), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(acc2),
maths::common::CBasicStatistics::variance(sum - acc1), 1e-10);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::skewness(acc2),
maths::common::CBasicStatistics::skewness(sum - acc1), 1e-10);
}
}
}
LOG_DEBUG(<< "test vector");
{
using TVectorMeanAccumulator =
maths::common::CBasicStatistics::SSampleMean<maths::common::CVectorNx1<double, 4>>::TAccumulator;
using TVectorMeanVarAccumulator =
maths::common::CBasicStatistics::SSampleMeanVar<maths::common::CVectorNx1<double, 4>>::TAccumulator;
using TVectorMeanVarSkewAccumulator =
maths::common::CBasicStatistics::SSampleMeanVarSkew<maths::common::CVectorNx1<double, 4>>::TAccumulator;
test::CRandomNumbers rng;
{
LOG_DEBUG(<< "Test mean");
TDoubleVec samples;
rng.generateNormalSamples(5.0, 1.0, 120, samples);
TMeanAccumulator means[4];
TVectorMeanAccumulator vectorMean;
for (std::size_t i = 0; i < samples.size(); ++i) {
maths::common::CVectorNx1<double, 4> v;
for (std::size_t j = 0; j < 4; ++i, ++j) {
means[j].add(samples[i]);
v(j) = samples[i];
}
LOG_DEBUG(<< "v = " << v);
vectorMean.add(v);
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::count(means[0]),
maths::common::CBasicStatistics::count(vectorMean));
for (std::size_t j = 0; j < 4; ++j) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(means[j]),
(maths::common::CBasicStatistics::mean(vectorMean))(j), 1e-14);
}
}
}
{
LOG_DEBUG(<< "Test mean and variance");
TDoubleVec samples;
rng.generateNormalSamples(5.0, 1.0, 120, samples);
TMeanVarAccumulator meansAndVariances[4];
TVectorMeanVarAccumulator vectorMeanAndVariances;
for (std::size_t i = 0; i < samples.size(); ++i) {
maths::common::CVectorNx1<double, 4> v;
for (std::size_t j = 0; j < 4; ++i, ++j) {
meansAndVariances[j].add(samples[i]);
v(j) = samples[i];
}
LOG_DEBUG(<< "v = " << v);
vectorMeanAndVariances.add(v);
BOOST_REQUIRE_EQUAL(
maths::common::CBasicStatistics::count(meansAndVariances[0]),
maths::common::CBasicStatistics::count(vectorMeanAndVariances));
for (std::size_t j = 0; j < 4; ++j) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(meansAndVariances[j]),
(maths::common::CBasicStatistics::mean(vectorMeanAndVariances))(j),
1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(meansAndVariances[j]),
(maths::common::CBasicStatistics::variance(vectorMeanAndVariances))(j),
1e-14);
}
}
}
{
LOG_DEBUG(<< "Test mean, variance and skew");
TDoubleVec samples;
rng.generateNormalSamples(5.0, 1.0, 120, samples);
TMeanVarSkewAccumulator meansVariancesAndSkews[4];
TVectorMeanVarSkewAccumulator vectorMeanVarianceAndSkew;
for (std::size_t i = 0; i < samples.size(); ++i) {
maths::common::CVectorNx1<double, 4> v;
for (std::size_t j = 0; j < 4; ++i, ++j) {
meansVariancesAndSkews[j].add(samples[i]);
v(j) = samples[i];
}
LOG_DEBUG(<< "v = " << v);
vectorMeanVarianceAndSkew.add(v);
BOOST_REQUIRE_EQUAL(
maths::common::CBasicStatistics::count(meansVariancesAndSkews[0]),
maths::common::CBasicStatistics::count(vectorMeanVarianceAndSkew));
for (std::size_t j = 0; j < 4; ++j) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::mean(meansVariancesAndSkews[j]),
(maths::common::CBasicStatistics::mean(vectorMeanVarianceAndSkew))(j),
1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::variance(meansVariancesAndSkews[j]),
(maths::common::CBasicStatistics::variance(vectorMeanVarianceAndSkew))(j),
1e-14);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
maths::common::CBasicStatistics::skewness(meansVariancesAndSkews[j]),
(maths::common::CBasicStatistics::skewness(vectorMeanVarianceAndSkew))(j),
1e-14);
}
}
}
}
LOG_DEBUG(<< "Test persistence of collections");
{
LOG_DEBUG(<< "Test means");
{
TMeanAccumulatorVec moments(1);
moments[0].add(2.0);
moments[0].add(3.0);
testPersistCollection(moments);
moments.push_back(TMeanAccumulator());
moments.push_back(TMeanAccumulator());
moments[1].add(3.0);
moments[1].add(6.0);
moments[2].add(10.0);
moments[2].add(11.0);
moments[2].add(12.0);
testPersistCollection(moments);
}
LOG_DEBUG(<< "Test means and variances");
{
TMeanVarAccumulatorVec moments(1);
moments[0].add(2.0);
moments[0].add(3.0);
moments[0].add(3.5);
testPersistCollection(moments);
moments.push_back(TMeanVarAccumulator());
moments.push_back(TMeanVarAccumulator());
moments[1].add(3.0);
moments[1].add(6.0);
moments[1].add(6.0);
moments[2].add(10.0);
moments[2].add(11.0);
moments[2].add(12.0);
moments[2].add(12.0);
testPersistCollection(moments);
}
LOG_DEBUG(<< "Test means, variances and skews");
{
TMeanVarSkewAccumulatorVec moments(1);
moments[0].add(2.0);
moments[0].add(3.0);
moments[0].add(3.5);
testPersistCollection(moments);
moments.push_back(TMeanVarSkewAccumulator());
moments.push_back(TMeanVarSkewAccumulator());
moments[1].add(3.0);
moments[1].add(6.0);
moments[1].add(6.0);
moments[2].add(10.0);
moments[2].add(11.0);
moments[2].add(12.0);
moments[2].add(12.0);
testPersistCollection(moments);
}
}
BOOST_REQUIRE_EQUAL(
true, core::memory_detail::SDynamicSizeAlwaysZero<TMeanAccumulator>::value());
BOOST_REQUIRE_EQUAL(
true, core::memory_detail::SDynamicSizeAlwaysZero<TMeanVarAccumulator>::value());
BOOST_REQUIRE_EQUAL(
true, core::memory_detail::SDynamicSizeAlwaysZero<TMeanVarSkewAccumulator>::value());
}
BOOST_AUTO_TEST_CASE(testVectorCentralMoments) {
using TDouble2Vec = core::CSmallVector<double, 2>;
{
TMeanAccumulator2Vec moments1(2);
TMeanAccumulatorVec moments2(2);
moments1[0].add(2.0);
moments1[0].add(5.0);
moments1[0].add(2.9);
moments1[1].add(4.0);
moments1[1].add(3.0);
moments2[0].add(2.0);
moments2[0].add(5.0);
moments2[0].add(2.9);
moments2[1].add(4.0);
moments2[1].add(3.0);
TDouble2Vec counts1 = maths::common::CBasicStatistics::count(moments1);
TDouble2Vec means1 = maths::common::CBasicStatistics::mean(moments1);
TDoubleVec counts2 = maths::common::CBasicStatistics::count(moments2);
TDoubleVec means2 = maths::common::CBasicStatistics::mean(moments2);
BOOST_REQUIRE_EQUAL(std::string("[3, 2]"), core::CContainerPrinter::print(counts1));
BOOST_REQUIRE_EQUAL(std::string("[3.3, 3.5]"),
core::CContainerPrinter::print(means1));
BOOST_REQUIRE_EQUAL(std::string("[3, 2]"), core::CContainerPrinter::print(counts2));
BOOST_REQUIRE_EQUAL(std::string("[3.3, 3.5]"),
core::CContainerPrinter::print(means2));
}
{
TMeanVarAccumulator2Vec moments1(2);
TMeanVarAccumulatorVec moments2(2);
moments1[0].add(2.0);
moments1[0].add(4.0);
moments1[1].add(3.0);
moments1[1].add(4.0);
moments1[1].add(5.0);
moments2[0].add(2.0);
moments2[0].add(4.0);
moments2[1].add(3.0);
moments2[1].add(4.0);
moments2[1].add(5.0);
TDouble2Vec counts1 = maths::common::CBasicStatistics::count(moments1);
TDouble2Vec means1 = maths::common::CBasicStatistics::mean(moments1);
TDouble2Vec vars1 = maths::common::CBasicStatistics::variance(moments1);
TDouble2Vec mlvars1 =
maths::common::CBasicStatistics::maximumLikelihoodVariance(moments1);
TDoubleVec counts2 = maths::common::CBasicStatistics::count(moments2);
TDoubleVec means2 = maths::common::CBasicStatistics::mean(moments2);
TDoubleVec vars2 = maths::common::CBasicStatistics::variance(moments2);
TDouble2Vec mlvars2 =
maths::common::CBasicStatistics::maximumLikelihoodVariance(moments2);
BOOST_REQUIRE_EQUAL(std::string("[2, 3]"), core::CContainerPrinter::print(counts1));
BOOST_REQUIRE_EQUAL(std::string("[3, 4]"), core::CContainerPrinter::print(means1));
BOOST_REQUIRE_EQUAL(std::string("[2, 1]"), core::CContainerPrinter::print(vars1));
BOOST_REQUIRE_EQUAL(std::string("[1, 0.6666667]"),
core::CContainerPrinter::print(mlvars1));
BOOST_REQUIRE_EQUAL(std::string("[2, 3]"), core::CContainerPrinter::print(counts2));
BOOST_REQUIRE_EQUAL(std::string("[3, 4]"), core::CContainerPrinter::print(means2));
BOOST_REQUIRE_EQUAL(std::string("[2, 1]"), core::CContainerPrinter::print(vars2));
BOOST_REQUIRE_EQUAL(std::string("[1, 0.6666667]"),
core::CContainerPrinter::print(mlvars2));
}
{
TMeanVarSkewAccumulator2Vec moments1(2);
TMeanVarSkewAccumulatorVec moments2(2);
moments1[0].add(2.0);
moments1[0].add(4.0);
moments1[1].add(2.0);
moments1[1].add(5.0);
moments1[1].add(5.0);
moments2[0].add(2.0);
moments2[0].add(4.0);
moments2[1].add(2.0);
moments2[1].add(5.0);
moments2[1].add(5.0);
TDouble2Vec counts1 = maths::common::CBasicStatistics::count(moments1);
TDouble2Vec means1 = maths::common::CBasicStatistics::mean(moments1);
TDouble2Vec vars1 = maths::common::CBasicStatistics::variance(moments1);
TDouble2Vec mlvars1 =
maths::common::CBasicStatistics::maximumLikelihoodVariance(moments1);
TDouble2Vec skews1 = maths::common::CBasicStatistics::skewness(moments1);
TDoubleVec counts2 = maths::common::CBasicStatistics::count(moments2);
TDoubleVec means2 = maths::common::CBasicStatistics::mean(moments2);
TDoubleVec vars2 = maths::common::CBasicStatistics::variance(moments2);
TDouble2Vec mlvars2 =
maths::common::CBasicStatistics::maximumLikelihoodVariance(moments2);
TDouble2Vec skews2 = maths::common::CBasicStatistics::skewness(moments2);
BOOST_REQUIRE_EQUAL(std::string("[2, 3]"), core::CContainerPrinter::print(counts1));
BOOST_REQUIRE_EQUAL(std::string("[3, 4]"), core::CContainerPrinter::print(means1));
BOOST_REQUIRE_EQUAL(std::string("[2, 3]"), core::CContainerPrinter::print(vars1));
BOOST_REQUIRE_EQUAL(std::string("[1, 2]"), core::CContainerPrinter::print(mlvars1));
BOOST_REQUIRE_EQUAL(std::string("[0, -0.3849002]"),
core::CContainerPrinter::print(skews1));
BOOST_REQUIRE_EQUAL(std::string("[2, 3]"), core::CContainerPrinter::print(counts2));
BOOST_REQUIRE_EQUAL(std::string("[3, 4]"), core::CContainerPrinter::print(means2));
BOOST_REQUIRE_EQUAL(std::string("[2, 3]"), core::CContainerPrinter::print(vars2));
BOOST_REQUIRE_EQUAL(std::string("[1, 2]"), core::CContainerPrinter::print(mlvars2));
BOOST_REQUIRE_EQUAL(std::string("[0, -0.3849002]"),
core::CContainerPrinter::print(skews2));
}
}
BOOST_AUTO_TEST_CASE(testCovariances) {
LOG_DEBUG(<< "N(3,I)");
{
const double raw[][3] = {
{2.58894, 2.87211, 1.62609}, {3.88246, 2.98577, 2.70981},
{2.03317, 3.33715, 2.93560}, {3.30100, 4.38844, 1.65705},
{2.12426, 2.21127, 2.57000}, {4.21041, 4.20745, 1.90752},
{3.56139, 3.14454, 0.89316}, {4.29444, 1.58715, 3.58402},
{3.06731, 3.91581, 2.85951}, {3.62798, 2.28786, 2.89994},
{2.05834, 2.96137, 3.57654}, {2.72185, 3.36003, 3.09708},
{0.94924, 2.19797, 3.30941}, {2.11159, 2.49182, 3.56793},
{3.10364, 0.32747, 3.62487}, {2.28235, 3.83542, 3.35942},
{3.30549, 2.95951, 2.97006}, {3.05787, 2.94188, 2.64095},
{3.98245, 2.02892, 3.07909}, {3.81189, 2.89389, 3.81389},
{3.32811, 3.88484, 4.17866}, {2.06964, 3.80683, 2.46835},
{4.58989, 2.00321, 1.93029}, {2.51484, 4.46106, 3.71248},
{3.30729, 2.44768, 3.43241}, {3.52222, 2.91724, 1.49631},
{1.71826, 4.79752, 4.38398}, {3.14173, 3.16237, 2.49654},
{3.26538, 2.21858, 5.05477}, {2.88352, 1.94396, 3.08744}};
const double expectedMean[] = {3.013898, 2.952637, 2.964104};
const double expectedCovariances[][3] = {{0.711903, -0.174535, -0.199460},
{-0.174535, 0.935285, -0.091192},
{-0.199460, -0.091192, 0.833710}};
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVectorNx1<double, 3>> covariances1(
3);
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVector<double>> covariances2(
3);
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CDenseVector<double>> covariances3(
3);
for (std::size_t i = 0; i < std::size(raw); ++i) {
LOG_DEBUG(<< "v = " << raw[i]);
covariances1.add(maths::common::CVectorNx1<double, 3>(raw[i]));
covariances2.add(maths::common::CVector<double>(std::begin(raw[i]),
std::end(raw[i])));
maths::common::CDenseVector<double> v(3);
v << raw[i][0], raw[i][1], raw[i][2];
covariances3.add(v);
}
LOG_DEBUG(<< "count1 = " << maths::common::CBasicStatistics::count(covariances1));
LOG_DEBUG(<< "mean1 = " << maths::common::CBasicStatistics::mean(covariances1));
LOG_DEBUG(<< "covariances1 = "
<< maths::common::CBasicStatistics::covariances(covariances1));
LOG_DEBUG(<< "count2 = " << maths::common::CBasicStatistics::count(covariances2));
LOG_DEBUG(<< "mean2 = " << maths::common::CBasicStatistics::mean(covariances2));
LOG_DEBUG(<< "covariances2 = "
<< maths::common::CBasicStatistics::covariances(covariances2));
LOG_DEBUG(<< "count3 = " << maths::common::CBasicStatistics::count(covariances3));
LOG_DEBUG(<< "mean3 = "
<< maths::common::CBasicStatistics::mean(covariances3).transpose());
LOG_DEBUG(<< "covariances3 =\n"
<< maths::common::CBasicStatistics::covariances(covariances3));
BOOST_REQUIRE_EQUAL(static_cast<double>(std::size(raw)),
maths::common::CBasicStatistics::count(covariances1));
BOOST_REQUIRE_EQUAL(static_cast<double>(std::size(raw)),
maths::common::CBasicStatistics::count(covariances2));
BOOST_REQUIRE_EQUAL(static_cast<double>(std::size(raw)),
maths::common::CBasicStatistics::count(covariances3));
for (std::size_t i = 0; i < 3; ++i) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(
expectedMean[i],
(maths::common::CBasicStatistics::mean(covariances1))(i), 2e-6);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
expectedMean[i],
(maths::common::CBasicStatistics::mean(covariances2))(i), 2e-6);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
expectedMean[i],
(maths::common::CBasicStatistics::mean(covariances3))(i), 2e-6);
for (std::size_t j = 0; j < 3; ++j) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(
expectedCovariances[i][j],
(maths::common::CBasicStatistics::covariances(covariances1))(i, j), 2e-6);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
expectedCovariances[i][j],
(maths::common::CBasicStatistics::covariances(covariances2))(i, j), 2e-6);
BOOST_REQUIRE_CLOSE_ABSOLUTE(
expectedCovariances[i][j],
(maths::common::CBasicStatistics::covariances(covariances3))(i, j), 2e-6);
}
}
bool dynamicSizeAlwaysZero = core::memory_detail::SDynamicSizeAlwaysZero<
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVectorNx1<double, 3>>>::value();
BOOST_REQUIRE_EQUAL(true, dynamicSizeAlwaysZero);
}
{
using TVectorVec = std::vector<maths::common::CVectorNx1<double, 4>>;
double mean_[] = {1.0, 3.0, 2.0, 7.0};
maths::common::CVectorNx1<double, 4> mean(mean_);
double covariances1_[] = {1.0, 1.0, 1.0, 1.0};
double covariances2_[] = {-1.0, 1.0, 0.0, 0.0};
double covariances3_[] = {-1.0, -1.0, 2.0, 0.0};
double covariances4_[] = {-1.0, -1.0, -1.0, 3.0};
maths::common::CVectorNx1<double, 4> covariances1(covariances1_);
maths::common::CVectorNx1<double, 4> covariances2(covariances2_);
maths::common::CVectorNx1<double, 4> covariances3(covariances3_);
maths::common::CVectorNx1<double, 4> covariances4(covariances4_);
maths::common::CSymmetricMatrixNxN<double, 4> covariance(
10.0 * maths::common::CSymmetricMatrixNxN<double, 4>(
maths::common::E_OuterProduct,
covariances1 / covariances1.euclidean()) +
5.0 * maths::common::CSymmetricMatrixNxN<double, 4>(
maths::common::E_OuterProduct, covariances2 / covariances2.euclidean()) +
5.0 * maths::common::CSymmetricMatrixNxN<double, 4>(
maths::common::E_OuterProduct, covariances3 / covariances3.euclidean()) +
2.0 * maths::common::CSymmetricMatrixNxN<double, 4>(
maths::common::E_OuterProduct, covariances4 / covariances4.euclidean()));
std::size_t n = 10000;
TVectorVec samples;
maths::common::CSampling::multivariateNormalSample(mean, covariance, n, samples);
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVectorNx1<double, 4>> sampleCovariance(
4);
sampleCovariance.add(samples);
LOG_DEBUG(<< "expected mean = " << mean);
LOG_DEBUG(<< "expected covariances = " << covariance);
LOG_DEBUG(<< "mean = " << maths::common::CBasicStatistics::mean(sampleCovariance));
LOG_DEBUG(<< "covariances = "
<< maths::common::CBasicStatistics::covariances(sampleCovariance));
for (std::size_t i = 0; i < 4; ++i) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(
mean(i), (maths::common::CBasicStatistics::mean(sampleCovariance))(i), 0.05);
for (std::size_t j = 0; j < 4; ++j) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(
covariance(i, j),
(maths::common::CBasicStatistics::covariances(sampleCovariance))(i, j),
0.16);
}
}
}
{
test::CRandomNumbers rng;
std::vector<double> coordinates;
rng.generateUniformSamples(5.0, 10.0, 400, coordinates);
std::vector<maths::common::CVectorNx1<double, 4>> points;
for (std::size_t i = 0; i < coordinates.size(); i += 4) {
double c[] = {coordinates[i + 0], coordinates[i + 1],
coordinates[i + 2], coordinates[i + 3]};
points.push_back(maths::common::CVectorNx1<double, 4>(c));
}
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVectorNx1<double, 4>> expectedSampleCovariances(
4);
for (std::size_t i = 0; i < points.size(); ++i) {
expectedSampleCovariances.add(points[i]);
}
std::string expectedDelimited = expectedSampleCovariances.toDelimited();
LOG_DEBUG(<< "delimited = " << expectedDelimited);
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVectorNx1<double, 4>> sampleCovariances(
4);
BOOST_TEST_REQUIRE(sampleCovariances.fromDelimited(expectedDelimited));
BOOST_REQUIRE_EQUAL(expectedSampleCovariances.checksum(),
sampleCovariances.checksum());
std::string delimited = sampleCovariances.toDelimited();
BOOST_REQUIRE_EQUAL(expectedDelimited, delimited);
}
}
BOOST_AUTO_TEST_CASE(testCovariancesLedoitWolf) {
using TDoubleVecVec = std::vector<TDoubleVec>;
using TVector2 = maths::common::CVectorNx1<double, 2>;
using TVector2Vec = std::vector<TVector2>;
using TMatrix2 = maths::common::CSymmetricMatrixNxN<double, 2>;
using TDenseVector = maths::common::CDenseVector<double>;
using TDenseVectorVec = std::vector<TDenseVector>;
using TDenseMatrix = maths::common::CDenseMatrix<double>;
test::CRandomNumbers rng;
double means[][2] = {
{10.0, 10.0}, {20.0, 150.0}, {-10.0, -20.0}, {-20.0, 40.0}, {40.0, 90.0}};
double covariances[][2][2] = {{{40.0, 0.0}, {0.0, 40.0}},
{{20.0, 5.0}, {5.0, 10.0}},
{{300.0, -70.0}, {-70.0, 60.0}},
{{100.0, 20.0}, {20.0, 60.0}},
{{50.0, -10.0}, {-10.0, 60.0}}};
maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator error;
maths::common::CBasicStatistics::SSampleMean<double>::TAccumulator errorLW;
for (std::size_t i = 0; i < std::size(means); ++i) {
LOG_DEBUG(<< "*** test " << i << " ***");
TDoubleVec mean(std::begin(means[i]), std::end(means[i]));
TDoubleVecVec covariance;
for (std::size_t j = 0; j < std::size(covariances[i]); ++j) {
covariance.emplace_back(std::begin(covariances[i][j]),
std::end(covariances[i][j]));
}
TMatrix2 covExpected(covariance);
LOG_DEBUG(<< "cov expected = " << covExpected);
TDoubleVecVec samples;
rng.generateMultivariateNormalSamples(mean, covariance, 50, samples);
// Test the frobenius norm of the error in the covariance matrix.
for (std::size_t j = 3; j < samples.size(); ++j) {
TVector2Vec jsamples;
TDenseVectorVec jsamples2;
for (std::size_t k = 0; k < j; ++k) {
jsamples.emplace_back(samples[k]);
TDenseVector v(2);
v << samples[k][0], samples[k][1];
jsamples2.push_back(v);
}
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVectorNx1<double, 2>> cov(
2);
cov.add(jsamples);
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CVectorNx1<double, 2>> covLW(
2);
maths::common::CBasicStatistics::covariancesLedoitWolf(jsamples, covLW);
maths::common::CBasicStatistics::SSampleCovariances<maths::common::CDenseVector<double>> covLW2(
2);
maths::common::CBasicStatistics::covariancesLedoitWolf(jsamples2, covLW2);
const TMatrix2& covML =
maths::common::CBasicStatistics::maximumLikelihoodCovariances(cov);
const TMatrix2& covLWML =
maths::common::CBasicStatistics::maximumLikelihoodCovariances(covLW);
const TDenseMatrix& covLWML2 =
maths::common::CBasicStatistics::maximumLikelihoodCovariances(covLW2);
for (std::size_t r = 0; r < 2; ++r) {
for (std::size_t c = 0; c < 2; ++c) {
BOOST_REQUIRE_CLOSE_ABSOLUTE(covLWML(r, c), covLWML2(r, c), 1e-10);
}
}
double errorML = (covML - covExpected).frobenius();
double errorLWML = (covLWML - covExpected).frobenius();
if (j % 5 == 0) {
LOG_DEBUG(<< "cov ML = " << covML);
LOG_DEBUG(<< "cov LWML = " << covLWML);
LOG_DEBUG(<< "error ML = " << errorML << ", error LWML = " << errorLWML);
}
BOOST_TEST_REQUIRE(errorLWML < 6.0 * errorML);
error.add(errorML / covExpected.frobenius());
errorLW.add(errorLWML / covExpected.frobenius());
}
}
LOG_DEBUG(<< "error = " << error);
LOG_DEBUG(<< "error LW = " << errorLW);
BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(errorLW) <
0.9 * maths::common::CBasicStatistics::mean(error));
}
BOOST_AUTO_TEST_CASE(testMedian) {
BOOST_REQUIRE_EQUAL(0.0, maths::common::CBasicStatistics::median({}));
BOOST_REQUIRE_EQUAL(1.0, maths::common::CBasicStatistics::median({1.0}));
BOOST_REQUIRE_EQUAL(1.5, maths::common::CBasicStatistics::median({2.0, 1.0}));
BOOST_REQUIRE_EQUAL(2.0, maths::common::CBasicStatistics::median({3.0, 1.0, 2.0}));
BOOST_REQUIRE_EQUAL(5.0, maths::common::CBasicStatistics::median(
{3.0, 5.0, 9.0, 1.0, 2.0, 6.0, 7.0, 4.0, 8.0}));
BOOST_REQUIRE_EQUAL(5.5, maths::common::CBasicStatistics::median(
{3.0, 5.0, 10.0, 2.0, 6.0, 7.0, 1.0, 9.0, 4.0, 8.0}));
}
BOOST_AUTO_TEST_CASE(testMad) {
using TSizeVec = std::vector<std::size_t>;
// Edge cases 0, 1, 2 elements and > half values equal.
TDoubleVec samples;
samples.assign({5.0});
BOOST_REQUIRE_EQUAL(0.0, maths::common::CBasicStatistics::mad(samples));
samples.assign({5.0, 6.0});
BOOST_REQUIRE_EQUAL(0.5, maths::common::CBasicStatistics::mad(samples));
samples.assign({6.0, 6.0, 6.0, 2.0, -100.0});
BOOST_REQUIRE_EQUAL(0.0, maths::common::CBasicStatistics::mad(samples));
samples.assign({6.0, 6.0, 6.0, 6.0, -100.0, 1.0});
BOOST_REQUIRE_EQUAL(0.0, maths::common::CBasicStatistics::mad(samples));
// Odd/even number of samples.
samples.assign({12.2, 11.8, 1.0, 30.2, 5.9, 209.0, -390.3, 37.0});
BOOST_REQUIRE_CLOSE_ABSOLUTE(14.6, maths::common::CBasicStatistics::mad(samples), 1e-15);
samples.assign({12.2, 11.8, 1.0, 30.2, 5.9, 209.0, -390.3, 37.0, 51.2});
BOOST_REQUIRE_CLOSE_ABSOLUTE(18.0, maths::common::CBasicStatistics::mad(samples), 1e-15);
// Random.
test::CRandomNumbers rng;
TSizeVec size;
for (std::size_t test = 0; test < 100; ++test) {
rng.generateUniformSamples(1, 40, 1, size);
rng.generateUniformSamples(0.0, 100.0, size[0], samples);
double mad{maths::common::CBasicStatistics::mad(samples)};
double median{maths::common::CBasicStatistics::median(samples)};
for (auto& sample : samples) {
sample = std::fabs(sample - median);
}
BOOST_REQUIRE_EQUAL(maths::common::CBasicStatistics::median(samples), mad);
}
}
BOOST_AUTO_TEST_CASE(testOrderStatistics) {
// Test that the order statistics accumulators work for finding min and max
// elements of a collection.
using TMinStatsStack =
maths::common::CBasicStatistics::COrderStatisticsStack<double, 2u>;
using TMaxStatsStack =
maths::common::CBasicStatistics::COrderStatisticsStack<double, 3u, std::greater<double>>;
using TMinStatsHeap = maths::common::CBasicStatistics::COrderStatisticsHeap<double>;
using TMaxStatsHeap =
maths::common::CBasicStatistics::COrderStatisticsHeap<double, std::greater<double>>;
{
// Test on the stack min, max, combine and persist and restore.
double data[] = {1.0, 2.3, 1.1, 1.0, 5.0, 3.0, 11.0, 0.2, 15.8, 12.3};
TMinStatsStack minValues;
TMaxStatsStack maxValues;
TMinStatsStack minFirstHalf;
TMinStatsStack minSecondHalf;
for (size_t i = 0; i < std::size(data); ++i) {
minValues.add(data[i]);
maxValues.add(data[i]);
(2 * i < std::size(data) ? minFirstHalf : minSecondHalf).add(data[i]);
}
std::sort(std::begin(data), std::end(data));
minValues.sort();
LOG_DEBUG(<< "x_1 = " << minValues[0] << ", x_2 = " << minValues[1]);
BOOST_TEST_REQUIRE(std::equal(minValues.begin(), minValues.end(), data));
std::sort(std::begin(data), std::end(data), std::greater<double>());
maxValues.sort();
LOG_DEBUG(<< "x_n = " << maxValues[0] << ", x_(n-1) = " << maxValues[1]
<< ", x_(n-2) = " << maxValues[2]);
BOOST_TEST_REQUIRE(std::equal(maxValues.begin(), maxValues.end(), data));
BOOST_REQUIRE_EQUAL(static_cast<size_t>(2), minValues.count());
BOOST_REQUIRE_EQUAL(static_cast<size_t>(3), maxValues.count());
TMinStatsStack minFirstPlusSecondHalf = (minFirstHalf + minSecondHalf);
minFirstPlusSecondHalf.sort();
BOOST_TEST_REQUIRE(std::equal(minValues.begin(), minValues.end(),
minFirstPlusSecondHalf.begin()));
// Test persist is idempotent.
std::ostringstream origJson;
{
core::CJsonStatePersistInserter inserter(origJson);
inserter.insertValue(TAG, minValues.toDelimited());
}
LOG_DEBUG(<< "Stats JSON representation:\n" << origJson.str());
// Restore the JSON into stats object.
TMinStatsStack restoredMinValues;
{
std::istringstream origJsonStrm{"{\"topLevel\" : " + origJson.str() + "}"};
core::CJsonStateRestoreTraverser traverser(origJsonStrm);
BOOST_TEST_REQUIRE(traverser.traverseSubLevel(std::bind(
SRestore(), std::ref(restoredMinValues), std::placeholders::_1)));
}
// The JSON representation of the new stats object should be unchanged.
std::ostringstream newJson;
{
core::CJsonStatePersistInserter inserter(newJson);
inserter.insertValue(TAG, restoredMinValues.toDelimited());
}
BOOST_REQUIRE_EQUAL(origJson.str(), newJson.str());
}
{
// Test on the heap min, max, combine and persist and restore.
double data[] = {1.0, 2.3, 1.1, 1.0, 5.0, 3.0, 11.0, 0.2, 15.8, 12.3};
TMinStatsHeap min2Values(2);
TMaxStatsHeap max3Values(3);
TMaxStatsHeap max20Values(20);
for (size_t i = 0; i < std::size(data); ++i) {
min2Values.add(data[i]);
max3Values.add(data[i]);
max20Values.add(data[i]);
}
std::sort(std::begin(data), std::end(data));
min2Values.sort();
LOG_DEBUG(<< "x_1 = " << min2Values[0] << ", x_2 = " << min2Values[1]);
BOOST_TEST_REQUIRE(std::equal(min2Values.begin(), min2Values.end(), data));
std::sort(std::begin(data), std::end(data), std::greater<double>());
max3Values.sort();
LOG_DEBUG(<< "x_n = " << max3Values[0] << ", x_(n-1) = " << max3Values[1]
<< ", x_(n-2) = " << max3Values[2]);
BOOST_TEST_REQUIRE(std::equal(max3Values.begin(), max3Values.end(), data));
max20Values.sort();
BOOST_REQUIRE_EQUAL(std::size(data), max20Values.count());
BOOST_TEST_REQUIRE(std::equal(max20Values.begin(), max20Values.end(), data));
// Test persist is idempotent.
std::ostringstream origJson;
{
core::CJsonStatePersistInserter inserter(origJson);
inserter.insertValue(TAG, max20Values.toDelimited());
}
LOG_DEBUG(<< "Stats JSON representation:\n" << origJson.str());
// Restore the JSON into stats object.
TMinStatsHeap restoredMaxValues(20);
{
std::istringstream origJsonStrm{"{\"topLevel\" : " + origJson.str() + "}"};
core::CJsonStateRestoreTraverser traverser(origJsonStrm);
BOOST_TEST_REQUIRE(traverser.traverseSubLevel(std::bind(
SRestore(), std::ref(restoredMaxValues), std::placeholders::_1)));
}
// The JSON representation of the new stats object should be unchanged.
std::ostringstream newJson;
{
core::CJsonStatePersistInserter inserter(newJson);
inserter.insertValue(TAG, restoredMaxValues.toDelimited());
}
BOOST_REQUIRE_EQUAL(origJson.str(), newJson.str());
}
{
// Test we correctly age the minimum value accumulator.
TMinStatsStack test;
test.add(15.0);
test.age(0.5);
BOOST_REQUIRE_EQUAL(30.0, test[0]);
}
{
// Test we correctly age the maximum value accumulator.
TMaxStatsStack test;
test.add(15.0);
test.age(0.5);
BOOST_REQUIRE_EQUAL(7.5, test[0]);
}
{
// Test biggest.
TMinStatsHeap min(5);
TMaxStatsHeap max(5);
min.add(1.0);
max.add(1.0);
BOOST_REQUIRE_EQUAL(1.0, min.biggest());
BOOST_REQUIRE_EQUAL(1.0, max.biggest());
std::size_t i{0};
for (auto value : {3.6, -6.1, 1.0, 3.4}) {
min.add(value);
max.add(value);
if (i++ == 0) {
BOOST_REQUIRE_EQUAL(3.6, min.biggest());
BOOST_REQUIRE_EQUAL(1.0, max.biggest());
} else {
BOOST_REQUIRE_EQUAL(3.6, min.biggest());
BOOST_REQUIRE_EQUAL(-6.1, max.biggest());
}
}
min.add(0.9);
max.add(0.9);
BOOST_REQUIRE_EQUAL(3.4, min.biggest());
BOOST_REQUIRE_EQUAL(0.9, max.biggest());
}
{
// Test memory.
BOOST_REQUIRE_EQUAL(
true, core::memory_detail::SDynamicSizeAlwaysZero<TMinStatsStack>::value());
BOOST_REQUIRE_EQUAL(
true, core::memory_detail::SDynamicSizeAlwaysZero<TMaxStatsStack>::value());
BOOST_REQUIRE_EQUAL(
false, core::memory_detail::SDynamicSizeAlwaysZero<TMinStatsHeap>::value());
BOOST_REQUIRE_EQUAL(
false, core::memory_detail::SDynamicSizeAlwaysZero<TMaxStatsHeap>::value());
}
{
// Test to from delimited with callback to persist values.
using TDoubleDoublePr = std::pair<double, double>;
using TDoubleDoublePrMinAccumulator =
ml::maths::common::CBasicStatistics::COrderStatisticsStack<TDoubleDoublePr, 2u>;
TDoubleDoublePrMinAccumulator orig;
orig.add({1.0, 3.2});
orig.add({3.1, 1.2});
auto toDelimited = [](const TDoubleDoublePr& value) {
return ml::core::CStringUtils::typeToStringPrecise(
value.first, ml::core::CIEEE754::E_DoublePrecision) +
ml::maths::common::CBasicStatistics::EXTERNAL_DELIMITER +
ml::core::CStringUtils::typeToStringPrecise(
value.second, ml::core::CIEEE754::E_DoublePrecision);
};
std::string delimited{orig.toDelimited(toDelimited)};
LOG_DEBUG(<< "delimited = " << delimited);
TDoubleDoublePrMinAccumulator restored;
restored.fromDelimited(delimited, [](const std::string& value, TDoubleDoublePr& result) {
std::size_t pos{value.find(ml::maths::common::CBasicStatistics::EXTERNAL_DELIMITER)};
return ml::core::CStringUtils::stringToType(value.substr(0, pos),
result.first) &&
ml::core::CStringUtils::stringToType(value.substr(pos + 1),
result.second);
});
BOOST_REQUIRE_EQUAL(delimited, restored.toDelimited(toDelimited));
BOOST_REQUIRE_CLOSE_ABSOLUTE(orig[0].first, restored[0].first, 1e-15);
BOOST_REQUIRE_CLOSE_ABSOLUTE(orig[0].second, restored[0].second, 1e-15);
BOOST_REQUIRE_CLOSE_ABSOLUTE(orig[1].first, restored[1].first, 1e-15);
BOOST_REQUIRE_CLOSE_ABSOLUTE(orig[1].second, restored[1].second, 1e-15);
}
}
BOOST_AUTO_TEST_CASE(testMinMax) {
TDoubleVec positive{1.0, 2.7, 4.0, 0.3, 11.7};
TDoubleVec negative{-3.7, -0.8, -18.2, -0.8};
TDoubleVec mixed{1.3, -8.0, 2.1};
{
maths::common::CBasicStatistics::CMinMax<double> minmax;
BOOST_TEST_REQUIRE(!minmax.initialized());
minmax.add(positive);
BOOST_TEST_REQUIRE(minmax.initialized());
BOOST_REQUIRE_EQUAL(0.3, minmax.min());
BOOST_REQUIRE_EQUAL(11.7, minmax.max());
BOOST_REQUIRE_EQUAL(0.3, minmax.signMargin());
}
{
maths::common::CBasicStatistics::CMinMax<double> minmax;
BOOST_TEST_REQUIRE(!minmax.initialized());
minmax.add(negative);
BOOST_TEST_REQUIRE(minmax.initialized());
BOOST_REQUIRE_EQUAL(-18.2, minmax.min());
BOOST_REQUIRE_EQUAL(-0.8, minmax.max());
BOOST_REQUIRE_EQUAL(-0.8, minmax.signMargin());
}
{
maths::common::CBasicStatistics::CMinMax<double> minmax;
BOOST_TEST_REQUIRE(!minmax.initialized());
minmax.add(mixed);
BOOST_TEST_REQUIRE(minmax.initialized());
BOOST_REQUIRE_EQUAL(-8.0, minmax.min());
BOOST_REQUIRE_EQUAL(2.1, minmax.max());
BOOST_REQUIRE_EQUAL(0.0, minmax.signMargin());
}
{
maths::common::CBasicStatistics::CMinMax<double> minmax1;
maths::common::CBasicStatistics::CMinMax<double> minmax2;
maths::common::CBasicStatistics::CMinMax<double> minmax12;
minmax1.add(positive);
minmax2.add(negative);
minmax12.add(positive);
minmax12.add(negative);
BOOST_REQUIRE_EQUAL((minmax1 + minmax2).checksum(), minmax12.checksum());
}
}
BOOST_AUTO_TEST_SUITE_END()