in lib/maths/analytics/unittest/CBoostedTreeLeafNodeStatisticsTest.cc [202:324]
void testPerSplitDerivativesFor(std::size_t numberParameters) {
LOG_DEBUG(<< "Testing " << numberParameters << " parameters");
TFloatVecVec featureSplits;
featureSplits.push_back(TFloatVec{1.0, 2.0, 3.0});
featureSplits.push_back(TFloatVec{0.1, 0.7, 1.1, 1.4});
test::CRandomNumbers rng;
std::size_t numberSamples{20};
std::size_t numberGradients{numberParameters};
std::size_t numberCurvatures{numberParameters * (numberParameters + 1) / 2};
for (std::size_t t = 0; t < 100; ++t) {
TSizeVec features;
TSizeVec splits[2];
TDoubleVec uniform01;
TDoubleVec gradients;
TDoubleVec curvatures;
rng.generateUniformSamples(0, 2, numberSamples, features);
rng.generateUniformSamples(0, featureSplits[0].size() + 1,
numberSamples, splits[0]);
rng.generateUniformSamples(0, featureSplits[1].size() + 1,
numberSamples, splits[1]);
rng.generateUniformSamples(0.0, 1.0, numberSamples, uniform01);
rng.generateUniformSamples(-1.5, 1.0, numberSamples * numberGradients, gradients);
rng.generateUniformSamples(0.1, 0.5, numberSamples * numberCurvatures, curvatures);
TSizeVecVec expectedCounts(2);
TVectorVecVec expectedGradients(2);
TMatrixVecVec expectedCurvatures(2);
TSizeVec expectedMissingCounts(2, 0);
TVectorVec expectedMissingGradients(2, TVector::Zero(numberParameters));
TMatrixVec expectedMissingCurvatures(2, TMatrix::Zero(numberParameters, numberParameters));
for (std::size_t i = 0; i < 2; ++i) {
expectedCounts[i].resize(featureSplits[i].size() + 1, 0);
expectedGradients[i].resize(featureSplits[i].size() + 1,
TVector::Zero(numberParameters));
expectedCurvatures[i].resize(featureSplits[i].size() + 1,
TMatrix::Zero(numberParameters, numberParameters));
}
auto addDerivatives = [&](TSplitsDerivatives& derivatives) {
for (std::size_t i = 0, j = 0, k = 0; i < numberSamples;
++i, j += numberGradients, k += numberCurvatures) {
TAlignedFloatVec storage;
storage.insert(storage.end(), &gradients[j], &gradients[j + numberGradients]);
storage.insert(storage.end(), &curvatures[j],
&curvatures[k + numberCurvatures]);
auto derivatives_ = makeAlignedVector<Eigen::Aligned16>(
storage.data(), numberGradients + numberCurvatures);
auto gradient = makeVector(storage.data(), numberGradients);
auto curvature = makeVector(storage.data() + numberGradients, numberCurvatures);
if (uniform01[i] < 0.1) {
derivatives.addMissingDerivatives(features[i], derivatives_);
++expectedMissingCounts[features[i]];
expectedMissingGradients[features[i]] += gradient;
expectedMissingCurvatures[features[i]] +=
columnMajorHessian(numberParameters, curvature);
} else {
derivatives.addDerivatives(features[i], splits[features[i]][i], derivatives_);
++expectedCounts[features[i]][splits[features[i]][i]];
expectedGradients[features[i]][splits[features[i]][i]] += gradient;
expectedCurvatures[features[i]][splits[features[i]][i]] +=
columnMajorHessian(numberParameters, curvature);
}
}
};
auto validate = [&](const TSplitsDerivatives& derivatives) {
for (std::size_t i = 0; i < expectedCounts.size(); ++i) {
for (std::size_t j = 0; j < expectedGradients[i].size(); ++j) {
TMatrix curvature{
derivatives.curvature(i, j).selfadjointView<Eigen::Lower>()};
BOOST_REQUIRE_EQUAL(expectedCounts[i][j], derivatives.count(i, j));
BOOST_REQUIRE_EQUAL(expectedGradients[i][j],
derivatives.gradient(i, j));
BOOST_REQUIRE_EQUAL(expectedCurvatures[i][j], curvature);
}
}
for (std::size_t i = 0; i < expectedMissingCounts.size(); ++i) {
TMatrix curvature{
derivatives.missingCurvature(i).selfadjointView<Eigen::Lower>()};
BOOST_REQUIRE_EQUAL(expectedMissingCounts[i], derivatives.missingCount(i));
BOOST_REQUIRE_EQUAL(expectedMissingGradients[i],
derivatives.missingGradient(i));
BOOST_REQUIRE_EQUAL(expectedMissingCurvatures[i], curvature);
}
};
TSplitsDerivatives derivatives1{featureSplits, numberParameters};
addDerivatives(derivatives1);
LOG_TRACE(<< "Test copy");
TSplitsDerivatives derivatives1Plus2{derivatives1};
BOOST_REQUIRE_EQUAL(derivatives1.checksum(), derivatives1Plus2.checksum());
LOG_TRACE(<< "Test accumulation");
derivatives1.remapCurvature(1, {0, 1});
validate(derivatives1);
LOG_TRACE(<< "Test merge");
rng.generateUniformSamples(0.0, 1.0, numberSamples, uniform01);
rng.generateUniformSamples(-1.5, 1.0, numberSamples * numberGradients, gradients);
rng.generateUniformSamples(0.1, 0.5, numberSamples * numberCurvatures, curvatures);
TSplitsDerivatives derivatives2{featureSplits, numberParameters};
addDerivatives(derivatives2);
derivatives1Plus2.add(1, derivatives2, {0, 1});
derivatives1Plus2.remapCurvature(1, {0, 1});
validate(derivatives1Plus2);
}
}