lib/model/unittest/CEventRateDataGathererTest.cc (1,510 lines of code) (raw):
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <core/CLogger.h>
#include <core/CRegex.h>
#include <maths/common/COrderings.h>
#include <model/CDataGatherer.h>
#include <model/CEventData.h>
#include <model/CEventRateBucketGatherer.h>
#include <model/CResourceMonitor.h>
#include <model/CSearchKey.h>
#include <model/ModelTypes.h>
#include <model/SModelParams.h>
#include <boost/test/unit_test.hpp>
#include <fstream>
#include <utility>
#include <vector>
#include "ModelTestHelpers.h"
BOOST_AUTO_TEST_SUITE(CEventRateDataGathererTest)
using namespace ml;
using namespace model;
using TSizeVec = std::vector<std::size_t>;
using TFeatureVec = std::vector<model_t::EFeature>;
using TSizeUInt64Pr = std::pair<std::size_t, std::uint64_t>;
using TSizeUInt64PrVec = std::vector<TSizeUInt64Pr>;
using TStrVec = std::vector<std::string>;
using TStrVecCItr = TStrVec::const_iterator;
using TStrVecVec = std::vector<TStrVec>;
using TFeatureData = SEventRateFeatureData;
using TSizeFeatureDataPr = std::pair<std::size_t, TFeatureData>;
using TSizeFeatureDataPrVec = std::vector<TSizeFeatureDataPr>;
using TFeatureSizeFeatureDataPrVecPr = std::pair<model_t::EFeature, TSizeFeatureDataPrVec>;
using TFeatureSizeFeatureDataPrVecPrVec = std::vector<TFeatureSizeFeatureDataPrVecPr>;
using TSizeSizePr = std::pair<std::size_t, std::size_t>;
using TSizeSizePrFeatureDataPr = std::pair<TSizeSizePr, TFeatureData>;
using TSizeSizePrFeatureDataPrVec = std::vector<TSizeSizePrFeatureDataPr>;
using TFeatureSizeSizePrFeatureDataPrVecPr =
std::pair<model_t::EFeature, TSizeSizePrFeatureDataPrVec>;
using TFeatureSizeSizePrFeatureDataPrVecPrVec = std::vector<TFeatureSizeSizePrFeatureDataPrVecPr>;
using TSizeSizePrOptionalStrPr = CBucketGatherer::TSizeSizePrOptionalStrPr;
using TSizeSizePrOptionalStrPrUInt64UMapVec = CBucketGatherer::TSizeSizePrOptionalStrPrUInt64UMapVec;
using TTimeVec = std::vector<core_t::TTime>;
using TStrCPtrVec = CBucketGatherer::TStrCPtrVec;
namespace {
const CSearchKey key;
const std::string EMPTY_STRING;
std::size_t addPerson(CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor,
const std::string& p,
const std::string& v = EMPTY_STRING,
const std::size_t numInfluencers = 0) {
CDataGatherer::TStrCPtrVec person;
person.push_back(&p);
std::string const i("i");
for (std::size_t j = 0; j < numInfluencers; ++j) {
person.push_back(&i);
}
if (!v.empty()) {
person.push_back(&v);
}
CEventData result;
gatherer.processFields(person, result, resourceMonitor);
return *result.personId();
}
void addArrival(CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor,
const core_t::TTime time,
const std::string& person) {
CDataGatherer::TStrCPtrVec fieldValues;
fieldValues.push_back(&person);
CEventData eventData;
eventData.time(time);
gatherer.addArrival(fieldValues, eventData, resourceMonitor);
}
void addArrival(CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor,
const core_t::TTime time,
const std::string& person,
const std::string& attribute) {
CDataGatherer::TStrCPtrVec fieldValues;
fieldValues.push_back(&person);
fieldValues.push_back(&attribute);
CEventData eventData;
eventData.time(time);
gatherer.addArrival(fieldValues, eventData, resourceMonitor);
}
void addArrival(CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor,
const core_t::TTime time,
const std::string& person,
const std::string& value,
const std::string& influencer) {
CDataGatherer::TStrCPtrVec fieldValues;
fieldValues.push_back(&person);
fieldValues.push_back(&influencer);
fieldValues.push_back(&value);
CEventData eventData;
eventData.time(time);
gatherer.addArrival(fieldValues, eventData, resourceMonitor);
}
void addArrival(CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor,
const core_t::TTime time,
const std::string& person,
const TStrVec& influencers,
const std::string& value) {
CDataGatherer::TStrCPtrVec fieldValues;
fieldValues.push_back(&person);
for (const auto& influencer : influencers) {
fieldValues.push_back(&influencer);
}
if (!value.empty()) {
fieldValues.push_back(&value);
}
CEventData eventData;
eventData.time(time);
gatherer.addArrival(fieldValues, eventData, resourceMonitor);
}
void testInfluencerPerFeature(const model_t::EFeature feature,
const TTimeVec& data,
const TStrVecVec& influencers,
const TStrVec& expected,
const std::string& valueField,
CResourceMonitor& resourceMonitor) {
LOG_DEBUG(<< " *** testing " << model_t::print(feature) << " ***");
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
SModelParams const params(bucketLength);
TFeatureVec features;
features.push_back(feature);
TStrVec influencerFieldNames;
influencerFieldNames.emplace_back("IF1");
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.influenceFieldNames(influencerFieldNames)
.valueFieldName(valueField)
.build();
BOOST_TEST_REQUIRE(!gatherer.isPopulation());
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, resourceMonitor, "p", valueField, 1));
BOOST_REQUIRE_EQUAL(1, gatherer.numberFeatures());
for (std::size_t i = 0; i < features.size(); ++i) {
BOOST_REQUIRE_EQUAL(features[i], gatherer.feature(i));
}
testGathererAttributes(gatherer, startTime, bucketLength);
core_t::TTime time = startTime;
for (std::size_t i = 0, j = 0; i < data.size(); ++i) {
for (/**/; j < 5 && data[i] >= time + bucketLength;
time += bucketLength, ++j, gatherer.timeNow(time)) {
LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(feature, featureData[0].first);
BOOST_REQUIRE_EQUAL(expected[j],
core::CContainerPrinter::print(featureData[0].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j < 5) {
addArrival(gatherer, resourceMonitor, data[i], "p", influencers[i],
valueField.empty() ? EMPTY_STRING : "value");
}
}
}
void importCsvData(CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor,
const std::string& filename,
const TSizeVec& fields) {
auto ifs(std::make_shared<std::ifstream>(filename.c_str()));
BOOST_TEST_REQUIRE(ifs->is_open());
core::CRegex regex;
BOOST_TEST_REQUIRE(regex.init(","));
std::string line;
// read the header
BOOST_TEST_REQUIRE(std::getline(*ifs, line).good());
while (std::getline(*ifs, line)) {
LOG_TRACE(<< "Got string: " << line);
core::CRegex::TStrVec tokens;
regex.split(line, tokens);
core_t::TTime time;
BOOST_TEST_REQUIRE(core::CStringUtils::stringToType(tokens[0], time));
CDataGatherer::TStrCPtrVec fieldValues;
CEventData data;
data.time(time);
for (const auto field : fields) {
fieldValues.push_back(&tokens[field]);
}
gatherer.addArrival(fieldValues, data, resourceMonitor);
}
ifs.reset();
}
struct STestTimes {
core_t::TTime s_StartTime;
core_t::TTime s_BucketLength;
};
struct STestData {
std::vector<core_t::TTime> data1;
std::vector<core_t::TTime> data2;
};
void testGathererMultipleSeries(const STestTimes& testTimes,
const STestData& testData,
const std::vector<std::string>& expectedPersonCounts,
const SModelParams& params,
core_t::TTime upperLimit,
CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor) {
const core_t::TTime startTime = testTimes.s_StartTime;
const core_t::TTime bucketLength = testTimes.s_BucketLength;
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, resourceMonitor, "p1"));
BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, resourceMonitor, "p2"));
core_t::TTime time = startTime;
std::size_t i1 = 0U;
std::size_t i2 = 0U;
std::size_t j = 0U;
for (;;) {
for (/**/; j < 5 && std::min(testData.data1[i1], testData.data2[i2]) >= time + upperLimit;
time += bucketLength, ++j) {
LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(expectedPersonCounts[j],
core::CContainerPrinter::print(featureData[0].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j >= 5) {
break;
}
if (testData.data1[i1] < testData.data2[i2]) {
LOG_DEBUG(<< "Adding arrival for p1 at " << testData.data1[i1]);
addArrival(gatherer, resourceMonitor, testData.data1[i1], "p1");
++i1;
} else {
LOG_DEBUG(<< "Adding arrival for p2 at " << testData.data2[i2]);
addArrival(gatherer, resourceMonitor, testData.data2[i2], "p2");
++i2;
}
}
TSizeVec peopleToRemove;
peopleToRemove.push_back(1);
gatherer.recyclePeople(peopleToRemove);
BOOST_REQUIRE_EQUAL(1, gatherer.numberActivePeople());
BOOST_REQUIRE_EQUAL(std::string("p1"), gatherer.personName(0));
BOOST_REQUIRE_EQUAL(std::string("-"), gatherer.personName(1));
std::size_t pid;
BOOST_TEST_REQUIRE(gatherer.personId("p1", pid));
BOOST_REQUIRE_EQUAL(0, pid);
BOOST_TEST_REQUIRE(!gatherer.personId("p2", pid));
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(startTime + (4 * bucketLength), bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(std::string("[(0, 3)]"),
core::CContainerPrinter::print(featureData[0].second));
}
void testGathererMultipleSeries(const core_t::TTime startTime,
const core_t::TTime bucketLength,
CDataGatherer& gatherer,
CResourceMonitor& resourceMonitor) {
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, resourceMonitor, "p1"));
BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, resourceMonitor, "p2"));
BOOST_REQUIRE_EQUAL(2, addPerson(gatherer, resourceMonitor, "p3"));
BOOST_REQUIRE_EQUAL(3, addPerson(gatherer, resourceMonitor, "p4"));
BOOST_REQUIRE_EQUAL(4, addPerson(gatherer, resourceMonitor, "p5"));
for (std::size_t i = 0; i < 5; ++i) {
addArrival(gatherer, resourceMonitor, startTime, gatherer.personName(i));
}
addArrival(gatherer, resourceMonitor, startTime + 1, gatherer.personName(2));
addArrival(gatherer, resourceMonitor, startTime + 2, gatherer.personName(4));
addArrival(gatherer, resourceMonitor, startTime + 3, gatherer.personName(4));
const TSizeUInt64PrVec personCounts;
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(startTime, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1), (1, 1), (2, 2), (3, 1), (4, 3)]"),
core::CContainerPrinter::print(featureData[0].second));
TSizeVec peopleToRemove;
peopleToRemove.push_back(0);
peopleToRemove.push_back(1);
peopleToRemove.push_back(3);
gatherer.recyclePeople(peopleToRemove);
BOOST_REQUIRE_EQUAL(2, gatherer.numberActivePeople());
BOOST_REQUIRE_EQUAL(std::string("p3"), gatherer.personName(2));
BOOST_REQUIRE_EQUAL(std::string("p5"), gatherer.personName(4));
BOOST_REQUIRE_EQUAL(std::string("-"), gatherer.personName(0));
BOOST_REQUIRE_EQUAL(std::string("-"), gatherer.personName(1));
BOOST_REQUIRE_EQUAL(std::string("-"), gatherer.personName(3));
std::size_t pid;
BOOST_TEST_REQUIRE(gatherer.personId("p3", pid));
BOOST_REQUIRE_EQUAL(2, pid);
BOOST_TEST_REQUIRE(gatherer.personId("p5", pid));
BOOST_REQUIRE_EQUAL(4, pid);
gatherer.featureData(startTime, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(std::string("[(2, 2), (4, 3)]"),
core::CContainerPrinter::print(featureData[0].second));
}
} // namespace
class CTestFixture {
protected:
CResourceMonitor m_ResourceMonitor;
};
BOOST_FIXTURE_TEST_CASE(testLatencyPersist, CTestFixture) {
constexpr core_t::TTime bucketLength = 3600;
constexpr core_t::TTime latency = 5 * bucketLength;
constexpr core_t::TTime startTime = 1420192800;
SModelParams params(bucketLength);
params.configureLatency(latency, bucketLength);
{
// Create a gatherer, no influences
TFeatureVec features;
features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.personFieldName("program")
.valueFieldName("file")
.build();
TSizeVec fields;
fields.push_back(2);
fields.push_back(1);
importCsvData(gatherer, m_ResourceMonitor,
"testfiles/files_users_programs.csv", fields);
testPersistence(params, gatherer, model_t::E_EventRate);
}
{
// Create a gatherer, with influences
TFeatureVec features;
TStrVec influencers;
influencers.emplace_back("user");
features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.personFieldName("program")
.valueFieldName("file")
.build();
TSizeVec fields;
fields.push_back(2);
fields.push_back(3);
fields.push_back(1);
importCsvData(gatherer, m_ResourceMonitor,
"testfiles/files_users_programs.csv", fields);
testPersistence(params, gatherer, model_t::E_EventRate);
}
{
// Create a gatherer, no influences
TFeatureVec features;
features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.personFieldName("program")
.build();
TSizeVec fields;
fields.push_back(2);
importCsvData(gatherer, m_ResourceMonitor,
"testfiles/files_users_programs.csv", fields);
testPersistence(params, gatherer, model_t::E_EventRate);
}
{
// Create a gatherer, with influences
TFeatureVec features;
TStrVec influencers;
influencers.emplace_back("user");
features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.personFieldName("program")
.influenceFieldNames(influencers)
.build();
TSizeVec fields;
fields.push_back(2);
fields.push_back(3);
importCsvData(gatherer, m_ResourceMonitor,
"testfiles/files_users_programs.csv", fields);
testPersistence(params, gatherer, model_t::E_EventRate);
}
}
BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) {
// Test that the various statistics come back as we expect.
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
SModelParams const params(bucketLength);
constexpr std::array<core_t::TTime, 15> data = {
1, 15, 180, 190, 400,
550, // bucket 1
600, 799,
1199, // bucket 2
1200,
1250, // bucket 3
// bucket 4
2420, 2480,
2490, // bucket 5
10000 // sentinel
};
std::array const expectedPersonCounts{
std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"),
std::string("[(0, 0)]"), std::string("[(0, 3)]")};
std::array const expectedPersonNonZeroCounts{
std::string("[(0, 6)]"), std::string("[(0, 3)]"),
std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")};
std::array const expectedPersonIndicator{
std::string("[(0, 1)]"), std::string("[(0, 1)]"),
std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")};
// Test the count by bucket and person and bad feature
// (which should be ignored).
{
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
features.push_back(model_t::E_IndividualMinByPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
BOOST_TEST_REQUIRE(!gatherer.isPopulation());
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p"));
BOOST_REQUIRE_EQUAL(1, gatherer.numberFeatures());
for (std::size_t i = 0; i < 1; ++i) {
BOOST_REQUIRE_EQUAL(features[i], gatherer.feature(i));
}
BOOST_TEST_REQUIRE(gatherer.hasFeature(model_t::E_IndividualCountByBucketAndPerson));
BOOST_TEST_REQUIRE(!gatherer.hasFeature(model_t::E_IndividualMinByPerson));
testGathererAttributes(gatherer, startTime, bucketLength);
core_t::TTime time = startTime;
for (std::size_t i = 0, j = 0; i < std::size(data); ++i) {
for (/**/; j < 5 && data[i] >= time + bucketLength;
time += bucketLength, ++j, gatherer.timeNow(time)) {
LOG_DEBUG(<< "Processing bucket [" << time << ", "
<< time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(
expectedPersonCounts[j],
core::CContainerPrinter::print(featureData[0].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j < 5) {
addArrival(gatherer, m_ResourceMonitor, data[i], "p");
}
}
}
// Test non-zero count and person bucket count.
{
TFeatureVec features;
features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson);
features.push_back(model_t::E_IndividualTotalBucketCountByPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p"));
core_t::TTime time = startTime;
for (std::size_t i = 0, j = 0; i < std::size(data); ++i) {
for (/**/; j < 5 && data[i] >= time + bucketLength;
time += bucketLength, ++j, gatherer.timeNow(time)) {
LOG_DEBUG(<< "Processing bucket [" << time << ", "
<< time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(2, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(
expectedPersonNonZeroCounts[j],
core::CContainerPrinter::print(featureData[0].second));
BOOST_REQUIRE_EQUAL(model_t::E_IndividualTotalBucketCountByPerson,
featureData[1].first);
BOOST_REQUIRE_EQUAL(
expectedPersonNonZeroCounts[j],
core::CContainerPrinter::print(featureData[1].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j < 5) {
addArrival(gatherer, m_ResourceMonitor, data[i], "p");
}
}
}
// Test person indicator by bucket.
{
TFeatureVec features;
features.push_back(model_t::E_IndividualIndicatorOfBucketPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p"));
core_t::TTime time = startTime;
for (std::size_t i = 0, j = 0; i < std::size(data); ++i) {
for (/**/; j < 5 && data[i] >= time + bucketLength;
time += bucketLength, ++j, gatherer.timeNow(time)) {
LOG_DEBUG(<< "Processing bucket [" << time << ", "
<< time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(
expectedPersonIndicator[j],
core::CContainerPrinter::print(featureData[0].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j < 5) {
addArrival(gatherer, m_ResourceMonitor, data[i], "p");
}
}
}
}
BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) {
// Test that the various statistics come back as we expect
// for multiple people.
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
const std::vector<core_t::TTime> data1 = {
1, 15, 180, 190, 400,
550, // bucket 1
600, 799,
1199, // bucket 2
1200,
1250, // bucket 3
1900, // bucket 4
2420, 2480,
2490, // bucket 5
10000 // sentinel
};
const std::vector<core_t::TTime> data2 = {
1, 5, 15, 25, 180, 190, 400, 550, // bucket 1
600, 605, 609, 799, 1199, // bucket 2
1200, 1250, 1255, 1256, 1300, 1400, // bucket 3
1900, 1950, // bucket 4
2420, 2480, 2490, 2500, 2550, 2600, // bucket 5
10000 // sentinel
};
const std::vector expectedPersonCounts = {
std::string("[(0, 6), (1, 8)]"), std::string("[(0, 3), (1, 5)]"),
std::string("[(0, 2), (1, 6)]"), std::string("[(0, 1), (1, 2)]"),
std::string("[(0, 3), (1, 6)]")};
const SModelParams params(bucketLength);
{
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
testGathererMultipleSeries(
STestTimes{.s_StartTime = startTime, .s_BucketLength = bucketLength},
STestData{.data1 = data1, .data2 = data2}, expectedPersonCounts,
params, bucketLength, gatherer, m_ResourceMonitor);
BOOST_REQUIRE_EQUAL(1, gatherer.numberByFieldValues());
}
{
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
testGathererMultipleSeries(startTime, bucketLength, gatherer, m_ResourceMonitor);
BOOST_REQUIRE_EQUAL(2, gatherer.numberByFieldValues());
}
}
BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) {
// Test various combinations of removed people.
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson);
features.push_back(model_t::E_IndividualTotalBucketCountByPerson);
features.push_back(model_t::E_IndividualIndicatorOfBucketPerson);
features.push_back(model_t::E_IndividualLowCountsByBucketAndPerson);
features.push_back(model_t::E_IndividualHighCountsByBucketAndPerson);
const SModelParams params(bucketLength);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p1"));
BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, m_ResourceMonitor, "p2"));
BOOST_REQUIRE_EQUAL(2, addPerson(gatherer, m_ResourceMonitor, "p3"));
BOOST_REQUIRE_EQUAL(3, addPerson(gatherer, m_ResourceMonitor, "p4"));
BOOST_REQUIRE_EQUAL(4, addPerson(gatherer, m_ResourceMonitor, "p5"));
BOOST_REQUIRE_EQUAL(5, addPerson(gatherer, m_ResourceMonitor, "p6"));
BOOST_REQUIRE_EQUAL(6, addPerson(gatherer, m_ResourceMonitor, "p7"));
BOOST_REQUIRE_EQUAL(7, addPerson(gatherer, m_ResourceMonitor, "p8"));
constexpr std::array<core_t::TTime, 8> counts = {0, 3, 5, 2, 0, 5, 7, 10};
for (std::size_t i = 0; i < std::size(counts); ++i) {
for (core_t::TTime time = 0; time < counts[i]; ++time) {
addArrival(gatherer, m_ResourceMonitor, startTime + time,
gatherer.personName(i));
}
}
{
TSizeVec peopleToRemove;
peopleToRemove.push_back(0);
peopleToRemove.push_back(1);
gatherer.recyclePeople(peopleToRemove);
CDataGatherer expectedGatherer =
CDataGathererBuilder(model_t::E_EventRate, features, params, key, startTime)
.build();
BOOST_REQUIRE_EQUAL(0, addPerson(expectedGatherer, m_ResourceMonitor, "p3"));
BOOST_REQUIRE_EQUAL(1, addPerson(expectedGatherer, m_ResourceMonitor, "p4"));
BOOST_REQUIRE_EQUAL(2, addPerson(expectedGatherer, m_ResourceMonitor, "p5"));
BOOST_REQUIRE_EQUAL(3, addPerson(expectedGatherer, m_ResourceMonitor, "p6"));
BOOST_REQUIRE_EQUAL(4, addPerson(expectedGatherer, m_ResourceMonitor, "p7"));
BOOST_REQUIRE_EQUAL(5, addPerson(expectedGatherer, m_ResourceMonitor, "p8"));
constexpr std::array<core_t::TTime, 6> expectedCounts = {5, 2, 0,
5, 7, 10};
for (std::size_t i = 0; i < std::size(expectedCounts); ++i) {
for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) {
addArrival(expectedGatherer, m_ResourceMonitor,
startTime + time, expectedGatherer.personName(i));
}
}
LOG_DEBUG(<< "checksum = " << gatherer.checksum());
LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum());
BOOST_REQUIRE_EQUAL(gatherer.checksum(), expectedGatherer.checksum());
}
{
TSizeVec peopleToRemove;
peopleToRemove.push_back(3);
peopleToRemove.push_back(4);
peopleToRemove.push_back(7);
gatherer.recyclePeople(peopleToRemove);
CDataGatherer expectedGatherer =
CDataGathererBuilder(model_t::E_EventRate, features, params, key, startTime)
.build();
BOOST_REQUIRE_EQUAL(0, addPerson(expectedGatherer, m_ResourceMonitor, "p3"));
BOOST_REQUIRE_EQUAL(1, addPerson(expectedGatherer, m_ResourceMonitor, "p6"));
BOOST_REQUIRE_EQUAL(2, addPerson(expectedGatherer, m_ResourceMonitor, "p7"));
constexpr std::array<core_t::TTime, 3> expectedCounts = {5, 5, 7};
for (std::size_t i = 0; i < std::size(expectedCounts); ++i) {
for (core_t::TTime time = 0; time < expectedCounts[i]; ++time) {
addArrival(expectedGatherer, m_ResourceMonitor,
startTime + time, expectedGatherer.personName(i));
}
}
LOG_DEBUG(<< "checksum = " << gatherer.checksum());
LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum());
BOOST_REQUIRE_EQUAL(gatherer.checksum(), expectedGatherer.checksum());
}
{
TSizeVec peopleToRemove;
peopleToRemove.push_back(2);
peopleToRemove.push_back(5);
peopleToRemove.push_back(6);
gatherer.recyclePeople(peopleToRemove);
const CDataGatherer expectedGatherer =
CDataGathererBuilder(model_t::E_EventRate, features, params, key, startTime)
.build();
LOG_DEBUG(<< "checksum = " << gatherer.checksum());
LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum());
BOOST_REQUIRE_EQUAL(gatherer.checksum(), expectedGatherer.checksum());
}
TSizeVec expectedRecycled;
expectedRecycled.push_back(addPerson(gatherer, m_ResourceMonitor, "p1"));
expectedRecycled.push_back(addPerson(gatherer, m_ResourceMonitor, "p7"));
LOG_DEBUG(<< "recycled = " << gatherer.recycledPersonIds());
LOG_DEBUG(<< "expected recycled = " << expectedRecycled);
BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedRecycled),
core::CContainerPrinter::print(gatherer.recycledPersonIds()));
}
BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderFinalResult, CTestFixture) {
// Test that the various statistics come back as we expect.
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
constexpr std::size_t latencyBuckets(3);
constexpr core_t::TTime latencyTime =
bucketLength * static_cast<core_t::TTime>(latencyBuckets);
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
const std::array<core_t::TTime, 15> data = {
1, 180, 1200, 190, 400,
600, // bucket 1, 2 & 3
550, 799, 1199,
15, // bucket 1 & 2
2490, // bucket 5
// bucket 4 is empty
2420, 2480,
1250, // bucket 3 & 5
10000 // sentinel
};
const std::array expectedPersonCounts = {
std::string("[(0, 6)]"), std::string("[(0, 3)]"), std::string("[(0, 2)]"),
std::string("[(0, 0)]"), std::string("[(0, 3)]")};
const std::array expectedPersonNonZeroCounts = {
std::string("[(0, 6)]"), std::string("[(0, 3)]"),
std::string("[(0, 2)]"), std::string("[]"), std::string("[(0, 3)]")};
const std::array expectedPersonIndicator = {
std::string("[(0, 1)]"), std::string("[(0, 1)]"),
std::string("[(0, 1)]"), std::string("[]"), std::string("[(0, 1)]")};
// Test the count by bucket and person and bad feature
// (which should be ignored).
{
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
addPerson(gatherer, m_ResourceMonitor, "p");
core_t::TTime time = startTime;
for (std::size_t i = 0, j = 0; i < std::size(data); ++i) {
for (/**/; j < 5 && data[i] >= time + latencyTime;
time += bucketLength, ++j, gatherer.timeNow(time)) {
LOG_DEBUG(<< "Processing bucket [" << time << ", "
<< time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(
expectedPersonCounts[j],
core::CContainerPrinter::print(featureData[0].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j < 5) {
LOG_DEBUG(<< "Arriving = " << data[i]);
addArrival(gatherer, m_ResourceMonitor, data[i], "p");
}
}
}
// Test non-zero count and person bucket count.
{
TFeatureVec features;
features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson);
features.push_back(model_t::E_IndividualTotalBucketCountByPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p"));
core_t::TTime time = startTime;
for (std::size_t i = 0, j = 0; i < std::size(data); ++i) {
for (/**/; j < 5 && data[i] >= time + latencyTime;
time += bucketLength, ++j, gatherer.timeNow(time)) {
LOG_DEBUG(<< "Processing bucket [" << time << ", "
<< time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(2, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(
expectedPersonNonZeroCounts[j],
core::CContainerPrinter::print(featureData[0].second));
BOOST_REQUIRE_EQUAL(model_t::E_IndividualTotalBucketCountByPerson,
featureData[1].first);
BOOST_REQUIRE_EQUAL(
expectedPersonNonZeroCounts[j],
core::CContainerPrinter::print(featureData[1].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j < 5) {
addArrival(gatherer, m_ResourceMonitor, data[i], "p");
}
}
}
// Test person indicator by bucket.
{
TFeatureVec features;
features.push_back(model_t::E_IndividualIndicatorOfBucketPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p"));
core_t::TTime time = startTime;
for (std::size_t i = 0, j = 0; i < std::size(data); ++i) {
for (/**/; j < 5 && data[i] >= time + latencyTime;
time += bucketLength, ++j, gatherer.timeNow(time)) {
LOG_DEBUG(<< "Processing bucket [" << time << ", "
<< time + bucketLength << ")");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(time, bucketLength, featureData);
LOG_DEBUG(<< "featureData = " << featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson,
featureData[0].first);
BOOST_REQUIRE_EQUAL(
expectedPersonIndicator[j],
core::CContainerPrinter::print(featureData[0].second));
testPersistence(params, gatherer, model_t::E_EventRate);
}
if (j < 5) {
addArrival(gatherer, m_ResourceMonitor, data[i], "p");
}
}
}
}
BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderInterimResult, CTestFixture) {
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
constexpr std::size_t latencyBuckets(3);
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
constexpr std::array<core_t::TTime, 8> data = {
1, 1200,
600, // bucket 1, 3 & 2
1199,
15, // bucket 2 & 1
2490, // bucket 5
// bucket 4 is empty
2420,
1250 // bucket 5 & 3
};
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
addPerson(gatherer, m_ResourceMonitor, "p");
TFeatureSizeFeatureDataPrVecPrVec featureData;
// Bucket 1 only
addArrival(gatherer, m_ResourceMonitor, data[0], "p");
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
// Bucket 1, 2 & 3
addArrival(gatherer, m_ResourceMonitor, data[1], "p");
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
// Bucket 1, 2 & 3
addArrival(gatherer, m_ResourceMonitor, data[2], "p");
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
// Bucket 1, 2 & 3
addArrival(gatherer, m_ResourceMonitor, data[3], "p");
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
// Bucket 1, 2 & 3
addArrival(gatherer, m_ResourceMonitor, data[4], "p");
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
// Bucket 3, 4 & 5
addArrival(gatherer, m_ResourceMonitor, data[5], "p");
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1800, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(2400, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
// Bucket 3, 4 & 5
addArrival(gatherer, m_ResourceMonitor, data[6], "p");
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1800, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(2400, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
// Bucket 3, 4 & 5
addArrival(gatherer, m_ResourceMonitor, data[7], "p");
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1800, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(2400, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
}
BOOST_FIXTURE_TEST_CASE(testMultipleSeriesOutOfOrderFinalResult, CTestFixture) {
// Test that the various statistics come back as we expect
// for multiple people.
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
constexpr std::size_t latencyBuckets(3);
constexpr core_t::TTime latencyTime =
bucketLength * static_cast<core_t::TTime>(latencyBuckets);
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
const std::vector<core_t::TTime> data1 = {
1, 15, 1200, 190, 400,
550, // bucket 1, 2 & 3
600, 1250,
1199, // bucket 2 & 3
180,
799, // bucket 1 & 2
2480, // bucket 5
2420, 1900,
2490, // bucket 4 & 5
10000 // sentinel
};
const std::vector<core_t::TTime> data2 = {
1250, 5, 15, 600, 180, 190, 400, 550, // bucket 1, 2 & 3
25, 605, 609, 799, 1199, // bucket 1 & 2
1200, 1, 1255, 1950, 1400, // bucket 1, 3 & 4
2550, 1300, 2500, // bucket 3 & 5
2420, 2480, 2490, 1256, 1900, 2600, // bucket 3, 4 & 5
10000 // sentinel
};
{
const std::vector expectedPersonCounts = {
std::string("[(0, 6), (1, 8)]"), std::string("[(0, 3), (1, 5)]"),
std::string("[(0, 2), (1, 6)]"), std::string("[(0, 1), (1, 2)]"),
std::string("[(0, 3), (1, 6)]")};
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
testGathererMultipleSeries(
STestTimes{.s_StartTime = startTime, .s_BucketLength = bucketLength},
STestData{.data1 = data1, .data2 = data2}, expectedPersonCounts,
params, latencyTime, gatherer, m_ResourceMonitor);
}
{
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
testGathererMultipleSeries(startTime, bucketLength, gatherer, m_ResourceMonitor);
}
}
BOOST_FIXTURE_TEST_CASE(testArrivalBeforeLatencyWindowIsIgnored, CTestFixture) {
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
constexpr std::size_t latencyBuckets(2);
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
constexpr std::array<core_t::TTime, 2> data = {
1800, // Bucket 4, thus bucket 1's values are already out of latency window
1 // Bucket 1
};
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
addPerson(gatherer, m_ResourceMonitor, "p");
addArrival(gatherer, m_ResourceMonitor, data[0], "p");
addArrival(gatherer, m_ResourceMonitor, data[1], "p");
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(0, featureData.size());
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1800, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
}
BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) {
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
constexpr std::size_t latencyBuckets(2);
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
constexpr std::array<core_t::TTime, 6> data = {
100,
300, // Bucket 1
600, 800,
850, // Bucket 2
1200 // Bucket 3
};
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
addPerson(gatherer, m_ResourceMonitor, "p");
for (const auto i : data) {
addArrival(gatherer, m_ResourceMonitor, i, "p");
}
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 3)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.resetBucket(600);
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
}
BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) {
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
constexpr std::size_t latencyBuckets(2);
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
constexpr std::array<core_t::TTime, 6> data = {
100,
300, // Bucket 1
600, 800,
850, // Bucket 2
1200 // Bucket 3
};
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
addPerson(gatherer, m_ResourceMonitor, "p1");
addPerson(gatherer, m_ResourceMonitor, "p2");
addPerson(gatherer, m_ResourceMonitor, "p3");
for (const auto i : data) {
addArrival(gatherer, m_ResourceMonitor, i, "p1");
addArrival(gatherer, m_ResourceMonitor, i, "p2");
addArrival(gatherer, m_ResourceMonitor, i, "p3");
}
TFeatureSizeFeatureDataPrVecPrVec featureData;
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 3), (1, 3), (2, 3)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.resetBucket(600);
gatherer.featureData(0, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(600, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 0), (1, 0), (2, 0)]"),
core::CContainerPrinter::print(featureData[0].second));
gatherer.featureData(1200, bucketLength, featureData);
BOOST_REQUIRE_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"),
core::CContainerPrinter::print(featureData[0].second));
}
BOOST_FIXTURE_TEST_CASE(testResetBucketGivenBucketNotAvailable, CTestFixture) {
constexpr core_t::TTime startTime = 0;
constexpr core_t::TTime bucketLength = 600;
constexpr std::size_t latencyBuckets(1);
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
TFeatureVec features;
features.push_back(model_t::E_IndividualCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.build();
addPerson(gatherer, m_ResourceMonitor, "p");
addArrival(gatherer, m_ResourceMonitor, 1200, "p");
BOOST_TEST_REQUIRE(gatherer.resetBucket(0) == false);
BOOST_TEST_REQUIRE(gatherer.resetBucket(600));
BOOST_TEST_REQUIRE(gatherer.resetBucket(1200));
BOOST_TEST_REQUIRE(gatherer.resetBucket(1800) == false);
}
BOOST_FIXTURE_TEST_CASE(testInfluencerBucketStatistics, CTestFixture) {
constexpr std::array<core_t::TTime, 15> data = {
1, 15, 180, 190, 400,
550, // bucket 1
600, 799,
1199, // bucket 2
1200,
1250, // bucket 3
// bucket 4
2420, 2480,
2490, // bucket 5
10000 // sentinel
};
const TTimeVec dataVec(data.begin(), data.end());
const TStrVecVec influencers(14, TStrVec(1, "i"));
const TStrVec expectedPersonCountsVec{
std::string("[(0, 6, [[(i, ([6], 1))]])]"),
std::string("[(0, 3, [[(i, ([3], 1))]])]"),
std::string("[(0, 2, [[(i, ([2], 1))]])]"), std::string("[(0, 0, [[]])]"),
std::string("[(0, 3, [[(i, ([3], 1))]])]")};
const TStrVec expectedPersonNonZeroCountsVec{
std::string("[(0, 6, [[(i, ([6], 1))]])]"),
std::string("[(0, 3, [[(i, ([3], 1))]])]"),
std::string("[(0, 2, [[(i, ([2], 1))]])]"), std::string("[]"),
std::string("[(0, 3, [[(i, ([3], 1))]])]")};
const TStrVec expectedPersonIndicatorVec{
std::string("[(0, 1, [[(i, ([1], 1))]])]"),
std::string("[(0, 1, [[(i, ([1], 1))]])]"),
std::string("[(0, 1, [[(i, ([1], 1))]])]"), std::string("[]"),
std::string("[(0, 1, [[(i, ([1], 1))]])]")};
const TStrVec expectedArrivalTimeVec(6, std::string("[]"));
const TStrVec expectedInfoContentVec{
std::string("[(0, 13, [[(i, ([13], 1))]])]"),
std::string("[(0, 13, [[(i, ([13], 1))]])]"),
std::string("[(0, 13, [[(i, ([13], 1))]])]"), std::string("[]"),
std::string("[(0, 13, [[(i, ([13], 1))]])]")};
testInfluencerPerFeature(model_t::E_IndividualCountByBucketAndPerson, dataVec,
influencers, expectedPersonCountsVec, "", m_ResourceMonitor);
testInfluencerPerFeature(model_t::E_IndividualNonZeroCountByBucketAndPerson,
dataVec, influencers, expectedPersonNonZeroCountsVec,
"", m_ResourceMonitor);
testInfluencerPerFeature(model_t::E_IndividualLowCountsByBucketAndPerson, dataVec,
influencers, expectedPersonCountsVec, "", m_ResourceMonitor);
testInfluencerPerFeature(model_t::E_IndividualArrivalTimesByPerson, dataVec,
influencers, expectedArrivalTimeVec, "", m_ResourceMonitor);
testInfluencerPerFeature(model_t::E_IndividualLowNonZeroCountByBucketAndPerson,
dataVec, influencers, expectedPersonNonZeroCountsVec,
"", m_ResourceMonitor);
testInfluencerPerFeature(model_t::E_IndividualUniqueCountByBucketAndPerson,
dataVec, influencers, expectedPersonIndicatorVec,
"value", m_ResourceMonitor);
testInfluencerPerFeature(model_t::E_IndividualInfoContentByBucketAndPerson,
dataVec, influencers, expectedInfoContentVec,
"value", m_ResourceMonitor);
}
class CDistinctStringsTestFixture : public CTestFixture {
protected:
// Type aliases for convenience.
using TOptionalStr = std::optional<std::string>;
using TOptionalStrVec = std::vector<TOptionalStr>;
// Helper that creates a SEventRateFeatureData object, populates it using the distinct count method,
// and checks that its print() output matches the expected value.
static void verifyDistinctCountFeature(const CUniqueStringFeatureData& data,
const std::string& expected) {
SEventRateFeatureData featureData(0);
data.populateDistinctCountFeatureData(featureData);
BOOST_REQUIRE_EQUAL(expected, featureData.print());
}
// Similar helper for the info-content feature data.
static void verifyInfoContentFeature(const CUniqueStringFeatureData& data,
const std::string& expected) {
SEventRateFeatureData featureData(0);
data.populateInfoContentFeatureData(featureData);
BOOST_REQUIRE_EQUAL(expected, featureData.print());
}
// Helper to sort influence values (when needed) using the ordering defined in maths::common.
static void sortInfluenceValues(SEventRateFeatureData& featureData) {
for (auto& influenceGroup : featureData.s_InfluenceValues) {
std::sort(influenceGroup.begin(), influenceGroup.end(),
maths::common::COrderings::SFirstLess());
}
}
// ----- Block 1: Distinct Count with NO influences -----
static void testDistinctCountNoInfluence() {
// Create an empty (constexpr) vector of optional strings.
const TOptionalStrVec influencers{};
CUniqueStringFeatureData data;
// Initially, no strings have been inserted.
verifyDistinctCountFeature(data, "0");
// Insert "str1" repeatedly and verify that distinct count remains "1"
for (std::size_t i = 0; i < 100; ++i) {
data.insert("str1", influencers);
verifyDistinctCountFeature(data, "1");
}
// Insert "str2" and "str3" repeatedly so that eventually the distinct count becomes "3"
for (std::size_t i = 0; i < 100; ++i) {
data.insert("str2", influencers);
data.insert("str3", influencers);
verifyDistinctCountFeature(data, "3");
}
// For additional inserts, check that the internal count equals max(3, i)
for (std::size_t i = 1; i < 100; ++i) {
std::stringstream ss;
ss << "str" << i;
data.insert(ss.str(), influencers);
SEventRateFeatureData featureData(0);
data.populateDistinctCountFeatureData(featureData);
BOOST_REQUIRE_EQUAL(std::max(static_cast<std::uint64_t>(3),
static_cast<std::uint64_t>(i)),
featureData.s_Count);
}
}
// ----- Block 2: Distinct Count with a SINGLE influencer -----
static void testDistinctCountSingleInfluence() {
TOptionalStrVec influencers;
influencers.emplace_back(); // initially, the optional is not set
CUniqueStringFeatureData data;
data.insert("str1", influencers);
verifyDistinctCountFeature(data, "1, [[]]");
// Now set the influencer value.
influencers.back() = "inf1";
data.insert("str1", influencers);
verifyDistinctCountFeature(data, "1, [[(inf1, ([1], 1))]]");
// Insert additional values.
data.insert("str2", influencers);
data.insert("str2", influencers);
data.insert("str2", influencers);
data.insert("str2", influencers);
influencers.back() = "inf2";
data.insert("str1", influencers);
data.insert("str3", influencers);
influencers.back() = "inf3";
data.insert("str3", influencers);
SEventRateFeatureData featureData(0);
data.populateDistinctCountFeatureData(featureData);
std::sort(featureData.s_InfluenceValues[0].begin(),
featureData.s_InfluenceValues[0].end(),
maths::common::COrderings::SFirstLess());
BOOST_REQUIRE_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1)), (inf3, ([1], 1))]]"),
featureData.print());
}
// ----- Block 3: Distinct Count with MULTIPLE influencers -----
static void testDistinctCountMultipleInfluence() {
TOptionalStrVec influencers;
influencers.emplace_back();
influencers.emplace_back();
CUniqueStringFeatureData data;
data.insert("str1", influencers);
data.insert("str2", influencers);
data.insert("str1", influencers);
verifyDistinctCountFeature(data, "2, [[], []]");
influencers[0] = "inf1";
data.insert("str1", influencers);
data.insert("str2", influencers);
verifyDistinctCountFeature(data, "2, [[(inf1, ([2], 1))], []]");
influencers[1] = "inf_v2";
data.insert("str2", influencers);
influencers[0] = "inf2";
influencers[1] = "inf_v3";
data.insert("str3", influencers);
data.insert("str1", influencers);
data.insert("str3", influencers);
SEventRateFeatureData featureData(0);
data.populateDistinctCountFeatureData(featureData);
for (std::size_t i = 0; i < 2; i++) {
std::sort(featureData.s_InfluenceValues[i].begin(),
featureData.s_InfluenceValues[i].end(),
maths::common::COrderings::SFirstLess());
}
BOOST_REQUIRE_EQUAL(std::string("3, [[(inf1, ([2], 1)), (inf2, ([2], 1))], [(inf_v2, ([1], 1)), (inf_v3, ([2], 1))]]"),
featureData.print());
}
// ----- Block 4: Info Content with NO influences -----
static void testInfoContentNoInfluence() {
const TOptionalStrVec influencers{}; // empty
CUniqueStringFeatureData data;
verifyInfoContentFeature(data, "0");
data.insert("str1", influencers);
verifyInfoContentFeature(data, "12");
data.insert("str2", influencers);
data.insert("str3", influencers);
verifyInfoContentFeature(data, "18");
// For further inserts, ensure the info content count (offset by 12) is within expected bounds.
for (std::size_t i = 1; i < 100; ++i) {
std::stringstream ss;
ss << "str" << i;
data.insert(ss.str(), influencers);
SEventRateFeatureData featureData(0);
data.populateInfoContentFeatureData(featureData);
BOOST_TEST_REQUIRE((featureData.s_Count - 12) >=
std::max(static_cast<std::uint64_t>(3),
static_cast<std::uint64_t>(i)));
BOOST_TEST_REQUIRE(
(featureData.s_Count - 12) <=
std::max(static_cast<std::uint64_t>(3), static_cast<std::uint64_t>(i)) * 3);
}
}
// ----- Block 5: Info Content with a SINGLE influencer -----
static void testInfoContentSingleInfluence() {
TOptionalStrVec influencers;
influencers.emplace_back();
CUniqueStringFeatureData data;
data.insert("str1", influencers);
verifyInfoContentFeature(data, "12, [[]]");
influencers.back() = "inf1";
data.insert("str1", influencers);
verifyInfoContentFeature(data, "12, [[(inf1, ([12], 1))]]");
data.insert("str2", influencers);
data.insert("str2", influencers);
data.insert("str2", influencers);
data.insert("str2", influencers);
influencers.back() = "inf2";
data.insert("str1", influencers);
data.insert("str3", influencers);
influencers.back() = "inf3";
data.insert("str3", influencers);
SEventRateFeatureData featureData(0);
data.populateInfoContentFeatureData(featureData);
std::sort(featureData.s_InfluenceValues[0].begin(),
featureData.s_InfluenceValues[0].end(),
maths::common::COrderings::SFirstLess());
BOOST_REQUIRE_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1)), (inf3, ([12], 1))]]"),
featureData.print());
}
// ----- Block 6: Info Content with MULTIPLE influencers -----
static void testInfoContentMultipleInfluence() {
TOptionalStrVec influencers;
influencers.emplace_back();
influencers.emplace_back();
CUniqueStringFeatureData data;
data.insert("str1", influencers);
data.insert("str2", influencers);
data.insert("str1", influencers);
verifyInfoContentFeature(data, "16, [[], []]");
influencers[0] = "inf1";
data.insert("str1", influencers);
data.insert("str2", influencers);
verifyInfoContentFeature(data, "16, [[(inf1, ([16], 1))], []]");
influencers[1] = "inf_v2";
data.insert("str2", influencers);
influencers[0] = "inf2";
influencers[1] = "inf_v3";
data.insert("str3", influencers);
data.insert("str1", influencers);
data.insert("str3", influencers);
SEventRateFeatureData featureData(0);
data.populateInfoContentFeatureData(featureData);
for (std::size_t i = 0; i < 2; i++) {
std::sort(featureData.s_InfluenceValues[i].begin(),
featureData.s_InfluenceValues[i].end(),
maths::common::COrderings::SFirstLess());
}
BOOST_REQUIRE_EQUAL(std::string("18, [[(inf1, ([16], 1)), (inf2, ([16], 1))], [(inf_v2, ([12], 1)), (inf_v3, ([16], 1))]]"),
featureData.print());
}
// ----- Block 7: Distinct Strings in Latency Buckets -----
void testLatencyBucketsDistinctStrings() {
constexpr core_t::TTime bucketLength = 1800;
constexpr core_t::TTime startTime = 1432733400;
constexpr std::size_t latencyBuckets = 3;
SModelParams params(bucketLength);
params.s_LatencyBuckets = latencyBuckets;
TFeatureVec features;
features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson);
CDataGatherer gatherer = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime)
.personFieldName("P")
.valueFieldName("V")
.influenceFieldNames({"INF"})
.build();
BOOST_TEST_REQUIRE(!gatherer.isPopulation());
BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p", "v", 1));
BOOST_REQUIRE_EQUAL(1, gatherer.numberFeatures());
for (std::size_t i = 0; i < 1; ++i) {
BOOST_REQUIRE_EQUAL(features[i], gatherer.feature(i));
}
BOOST_TEST_REQUIRE(gatherer.hasFeature(model_t::E_IndividualUniqueCountByBucketAndPerson));
BOOST_REQUIRE_EQUAL(1, gatherer.numberActivePeople());
BOOST_REQUIRE_EQUAL(1, gatherer.numberByFieldValues());
BOOST_REQUIRE_EQUAL(std::string("p"), gatherer.personName(0));
constexpr core_t::TTime time = startTime;
BOOST_REQUIRE_EQUAL(bucketLength, gatherer.bucketLength());
testPersistence(params, gatherer, model_t::E_EventRate);
// Add data (some out-of-order) for distinct strings.
addArrival(gatherer, m_ResourceMonitor, time - (2 * bucketLength), "p",
"stringOne", "inf1");
addArrival(gatherer, m_ResourceMonitor, time - (2 * bucketLength), "p",
"stringTwo", "inf2");
addArrival(gatherer, m_ResourceMonitor, time - (1 * bucketLength), "p",
"stringThree", "inf3");
addArrival(gatherer, m_ResourceMonitor, time - (1 * bucketLength), "p",
"stringFour", "inf1");
addArrival(gatherer, m_ResourceMonitor, time, "p", "stringFive", "inf2");
addArrival(gatherer, m_ResourceMonitor, time, "p", "stringSix", "inf3");
testPersistence(params, gatherer, model_t::E_EventRate);
}
};
BOOST_FIXTURE_TEST_CASE(testDistinctStrings, CDistinctStringsTestFixture) {
testDistinctCountNoInfluence();
testDistinctCountSingleInfluence();
testDistinctCountMultipleInfluence();
testInfoContentNoInfluence();
testInfoContentSingleInfluence();
testInfoContentMultipleInfluence();
testLatencyBucketsDistinctStrings();
}
class CDiurnalTestFixture : public CTestFixture {
protected:
// Common constants.
static constexpr core_t::TTime BUCKET_LENGTH{3600};
static constexpr core_t::TTime START_TIME{1432731600};
static constexpr std::size_t LATENCY_BUCKETS{3};
const std::string PERSON{"p"};
const std::string ATTRIBUTE{"a"};
// Create and initialize the model parameters.
static SModelParams createParams() {
SModelParams params(BUCKET_LENGTH);
params.s_LatencyBuckets = LATENCY_BUCKETS;
return params;
}
// Compute the expected count.
// If isDay is true, the modulo is 86400 (day), otherwise 604800 (week).
static std::uint64_t computeExpected(const core_t::TTime time,
const std::uint64_t addition,
const bool isDay) {
return static_cast<std::uint64_t>(time % (isDay ? 86400 : 604800)) + addition;
}
// Build a gatherer based on the features, parameters, start time, and type.
// When useAttribute is true the attribute field is set (for population tests).
static CDataGatherer createGatherer(const TFeatureVec& features,
const SModelParams& params,
core_t::TTime startTime,
bool isPopulation,
bool useAttribute = false) {
auto builder = CDataGathererBuilder(model_t::E_EventRate, features,
params, key, startTime);
if (useAttribute) {
return builder.gathererType(model_t::E_PopulationEventRate)
.attributeFieldName("att")
.build();
}
if (!isPopulation) {
return builder.personFieldName("person").build();
}
return builder.build();
}
// Verify that the gatherer contains the proper features.
static void verifyGathererFeatures(const CDataGatherer& gatherer,
const TFeatureVec& features,
model_t::EFeature expectedFeature,
bool isPopulation) {
BOOST_REQUIRE_EQUAL(1, gatherer.numberFeatures());
for (std::size_t i = 0; i < features.size(); ++i) {
BOOST_REQUIRE_EQUAL(features[i], gatherer.feature(i));
}
BOOST_TEST_REQUIRE(gatherer.hasFeature(expectedFeature));
if (isPopulation) {
BOOST_TEST_REQUIRE(gatherer.isPopulation());
} else {
BOOST_TEST_REQUIRE(!gatherer.isPopulation());
}
}
// Helper to add an arrival with or without an attribute.
void addArrivalHelper(CDataGatherer& gatherer, core_t::TTime t, bool useAttribute) {
if (useAttribute) {
addArrival(gatherer, m_ResourceMonitor, t, PERSON, ATTRIBUTE);
} else {
addArrival(gatherer, m_ResourceMonitor, t, PERSON);
}
}
// Template to verify the feature data.
// FeatureDataT is one of:
// - TFeatureSizeFeatureDataPrVecPrVec for tests by person,
// - TFeatureSizeSizePrFeatureDataPrVecPrVec for tests over person.
template<typename FeatureDataT>
void verifyFeatureData(const CDataGatherer& gatherer, core_t::TTime time, std::uint64_t expectedCount) {
FeatureDataT featureData;
gatherer.featureData(time, BUCKET_LENGTH, featureData);
BOOST_REQUIRE_EQUAL(1, featureData.size());
BOOST_REQUIRE_EQUAL(1, featureData[0].second.size());
BOOST_REQUIRE_EQUAL(expectedCount, featureData[0].second[0].second.s_Count);
}
// Run a sequence of arrivals and verifications.
// The isDay flag selects the modulo (day or week), and useAttribute toggles between
// person-only and attribute-including arrivals.
template<typename FeatureDataT>
void runTestSequence(CDataGatherer& gatherer, bool isDay, bool useAttribute) {
core_t::TTime time = START_TIME;
// Check bucket length and persistence.
BOOST_REQUIRE_EQUAL(BUCKET_LENGTH, gatherer.bucketLength());
testPersistence(createParams(), gatherer, model_t::E_EventRate);
// Arrival 1: time + 0
addArrivalHelper(gatherer, time + 0, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 0, isDay));
// Arrival 2: time + 100, expected additional count of 50.
addArrivalHelper(gatherer, time + 100, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 50, isDay));
time += BUCKET_LENGTH;
// Arrival 3: new bucket, time + 0.
addArrivalHelper(gatherer, time + 0, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 0, isDay));
// Arrival 4: time + 200, expected additional count of 100.
addArrivalHelper(gatherer, time + 200, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 100, isDay));
time += BUCKET_LENGTH;
// Arrival 5: time + 0.
addArrivalHelper(gatherer, time + 0, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 0, isDay));
// Arrival 6: time + 300, expected additional count of 150.
addArrivalHelper(gatherer, time + 300, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 150, isDay));
// Check latency: go back two buckets.
time -= BUCKET_LENGTH * 2;
addArrivalHelper(gatherer, time + 200, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 100, isDay));
time += BUCKET_LENGTH;
addArrivalHelper(gatherer, time + 400, useAttribute);
verifyFeatureData<FeatureDataT>(gatherer, time, computeExpected(time, 200, isDay));
}
// Verify summary information for the gatherer.
static void verifyGathererSummary(const CDataGatherer& gatherer,
bool isPopulation,
bool useAttribute) {
if (isPopulation) {
if (useAttribute) {
BOOST_REQUIRE_EQUAL(1, gatherer.numberActivePeople());
BOOST_REQUIRE_EQUAL(1, gatherer.numberActiveAttributes());
BOOST_REQUIRE_EQUAL(std::string("a"), gatherer.attributeName(0));
}
} else {
BOOST_REQUIRE_EQUAL(1, gatherer.numberActivePeople());
BOOST_REQUIRE_EQUAL(std::string("p"), gatherer.personName(0));
}
BOOST_REQUIRE_EQUAL(1, gatherer.numberByFieldValues());
}
};
BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CDiurnalTestFixture) {
{
// Test: time_of_day by person
LOG_DEBUG(<< "Testing time_of_day by person");
SModelParams const params = createParams();
TFeatureVec const features{model_t::E_IndividualTimeOfDayByBucketAndPerson};
CDataGatherer gatherer = createGatherer(features, params, START_TIME, false);
verifyGathererFeatures(gatherer, features,
model_t::E_IndividualTimeOfDayByBucketAndPerson, false);
runTestSequence<TFeatureSizeFeatureDataPrVecPrVec>(gatherer, /*isDay=*/true,
/*useAttribute=*/false);
verifyGathererSummary(gatherer, false, false);
testPersistence(params, gatherer, model_t::E_EventRate);
}
{
// Test: time_of_week by person
LOG_DEBUG(<< "Testing time_of_week by person");
SModelParams const params = createParams();
TFeatureVec const features{model_t::E_IndividualTimeOfWeekByBucketAndPerson};
CDataGatherer gatherer = createGatherer(features, params, START_TIME, false);
verifyGathererFeatures(gatherer, features,
model_t::E_IndividualTimeOfWeekByBucketAndPerson, false);
runTestSequence<TFeatureSizeFeatureDataPrVecPrVec>(
gatherer, /*isDay=*/false, /*useAttribute=*/false);
verifyGathererSummary(gatherer, false, false);
testPersistence(params, gatherer, model_t::E_EventRate);
}
{
// Test: time_of_week over person (with attribute)
LOG_DEBUG(<< "Testing time_of_week over person");
SModelParams const params = createParams();
TFeatureVec const features{model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute};
CDataGatherer gatherer = createGatherer(features, params, START_TIME,
true, /*useAttribute=*/true);
verifyGathererFeatures(gatherer, features, model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute,
true);
runTestSequence<TFeatureSizeSizePrFeatureDataPrVecPrVec>(
gatherer, /*isDay=*/false, /*useAttribute=*/true);
verifyGathererSummary(gatherer, true, true);
testPersistence(params, gatherer, model_t::E_EventRate);
}
{
// Test: time_of_day over person (with attribute)
LOG_DEBUG(<< "Testing time_of_day over person");
SModelParams const params = createParams();
TFeatureVec const features{model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute};
CDataGatherer gatherer = createGatherer(features, params, START_TIME,
true, /*useAttribute=*/true);
verifyGathererFeatures(gatherer, features, model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute,
true);
runTestSequence<TFeatureSizeSizePrFeatureDataPrVecPrVec>(
gatherer, /*isDay=*/true, /*useAttribute=*/true);
verifyGathererSummary(gatherer, true, true);
testPersistence(params, gatherer, model_t::E_EventRate);
}
}
BOOST_AUTO_TEST_SUITE_END()