void testLimitedRecordsWriteHelper()

in lib/api/unittest/CJsonOutputWriterTest.cc [508:859]


void testLimitedRecordsWriteHelper(bool isInterim) {
    // Tests CJsonOutputWriter::limitNumberRecords(size_t)
    // set the record limit for each detector to 2

    std::ostringstream sstream;

    // The output writer won't close the JSON structures until it is destroyed
    {
        ml::core::CJsonOutputStreamWrapper outputStream(sstream);
        ml::api::CJsonOutputWriter writer("job", outputStream);
        writer.limitNumberRecords(2);

        std::string partitionFieldName("tfn");
        std::string partitionFieldValue("tfv");
        std::string overFieldName("pfn");
        std::string overFieldValue("pfv");
        std::string byFieldName("airline");
        std::string byFieldValue("GAL");
        std::string fieldName("responsetime");
        std::string function("mean");
        std::string functionDescription("mean(responsetime)");
        std::string emptyString;
        ml::api::CHierarchicalResultsWriter::TOptionalStrOptionalStrPrDoublePrVec influences;

        {
            // 1st bucket
            ml::api::CHierarchicalResultsWriter::SResults result111(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 1,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 0.0, 0.1, 0.1, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result111));

            ml::api::CHierarchicalResultsWriter::SResults result112(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 1,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 0.0, 0.1, 0.2, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result112));

            ml::api::CHierarchicalResultsWriter::SResults result113(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 1,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 2.0, 0.0, 0.4, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result113));

            ml::api::CHierarchicalResultsWriter::SResults result114(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 1,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 12.0, 0.0, 0.4, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result114));
            BOOST_TEST_REQUIRE(writer.acceptResult(result114));

            overFieldName = "ofn";
            overFieldValue = "ofv";

            ml::api::CHierarchicalResultsWriter::SResults result121(
                false, false, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, emptyString, emptyString, emptyString, 1, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                12.0, 0.0, 0.01, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result121));

            ml::api::CHierarchicalResultsWriter::SResults result122(
                false, true, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, byFieldName, byFieldValue, emptyString, 1, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                12.0, 0.0, 0.01, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result122));

            ml::api::CHierarchicalResultsWriter::SResults result123(
                false, false, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, byFieldName, byFieldValue, emptyString, 1, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                0.5, 0.0, 0.5, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result123));

            ml::api::CHierarchicalResultsWriter::SResults result124(
                false, true, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, emptyString, emptyString, emptyString, 1, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                0.5, 0.0, 0.5, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result124));

            ml::api::CHierarchicalResultsWriter::SResults result125(
                false, false, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, byFieldName, byFieldValue, emptyString, 1, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                6.0, 0.0, 0.5, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result125));

            ml::api::CHierarchicalResultsWriter::SResults result126(
                false, true, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, emptyString, emptyString, emptyString, 1, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                6.0, 0.0, 0.05, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result126));
        }

        {
            // 2nd bucket
            overFieldName.clear();
            overFieldValue.clear();

            ml::api::CHierarchicalResultsWriter::SResults result211(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 2,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 1.0, 0.0, 0.05, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result211));

            ml::api::CHierarchicalResultsWriter::SResults result212(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 2,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 7.0, 0.0, 0.001, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result212));

            ml::api::CHierarchicalResultsWriter::SResults result213(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 2,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 0.6, 0.0, 0.1, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result213));
            BOOST_TEST_REQUIRE(writer.acceptResult(result213));

            overFieldName = "ofn";
            overFieldValue = "ofv";

            ml::api::CHierarchicalResultsWriter::SResults result221(
                false, false, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, byFieldName, byFieldValue, emptyString, 2, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                0.6, 0.0, 0.1, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result221));
            BOOST_TEST_REQUIRE(writer.acceptResult(result221));

            ml::api::CHierarchicalResultsWriter::SResults result222(
                false, false, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, emptyString, emptyString, emptyString, 2, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                0.6, 0.0, 0.1, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result222));

            ml::api::CHierarchicalResultsWriter::SResults result223(
                false, false, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, byFieldName, byFieldValue, emptyString, 2, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                3.0, 0.0, 0.02, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result223));

            ml::api::CHierarchicalResultsWriter::SResults result224(
                false, true, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, emptyString, emptyString, emptyString, 2, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                20.0, 0.0, 0.02, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result224));
        }

        {
            // 3rd bucket
            overFieldName.clear();
            overFieldValue.clear();

            ml::api::CHierarchicalResultsWriter::SResults result311(
                ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName,
                partitionFieldValue, byFieldName, byFieldValue, emptyString, 3,
                function, functionDescription, 42.0, 79, TDouble1Vec(1, 6953.0),
                TDouble1Vec(1, 10090.0), 30.0, 0.0, 0.02, -5.0, fieldName,
                influences, false, true, 1, 100, EMPTY_STRING_LIST, {});
            BOOST_TEST_REQUIRE(writer.acceptResult(result311));

            overFieldName = "ofn";
            overFieldValue = "ofv";

            ml::api::CHierarchicalResultsWriter::SResults result321(
                false, false, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, byFieldName, byFieldValue, emptyString, 3, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                31.0, 0.0, 0.0002, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result321));

            ml::api::CHierarchicalResultsWriter::SResults result322(
                false, true, partitionFieldName, partitionFieldValue, overFieldName,
                overFieldValue, emptyString, emptyString, emptyString, 3, function,
                functionDescription, TDouble1Vec(1, 10090.0), TDouble1Vec(1, 6953.0),
                31.0, 0.0, 0.0002, 79, fieldName, influences, false, true, 2, 100);
            BOOST_TEST_REQUIRE(writer.acceptResult(result322));
        }

        // Finished adding results
        BOOST_TEST_REQUIRE(writer.endOutputBatch(isInterim, 10U));
    }

    json::error_code ec;
    json::value arrayDoc_ = json::parse(sstream.str(), ec);
    BOOST_TEST_REQUIRE(ec.failed() == false);
    LOG_DEBUG(<< "Results:\n" << arrayDoc_);

    BOOST_TEST_REQUIRE(arrayDoc_.is_array());
    const json::array& arrayDoc = arrayDoc_.as_array();
    BOOST_REQUIRE_EQUAL(std::size_t(6), arrayDoc.size());

    // buckets and records are the top level objects
    // records corresponding to a bucket appear first. The bucket follows.
    // each bucket has max 2 records from either both or
    // one or the other of the 2 detectors used.
    // records are sorted by probability.
    // bucket total anomaly score is the sum of all anomalies not just those printed.
    {
        const json::value& bucketWrapper_ = arrayDoc.at(std::size_t(1));
        BOOST_TEST_REQUIRE(bucketWrapper_.is_object());
        const json::object& bucketWrapper = bucketWrapper_.as_object();
        BOOST_TEST_REQUIRE(bucketWrapper.contains("bucket"));

        const json::value& bucket_ = bucketWrapper.at("bucket");
        BOOST_TEST_REQUIRE(bucket_.is_object());
        const json::object& bucket = bucket_.as_object();
        // It's hard to predict what these will be, so just assert their presence
        BOOST_TEST_REQUIRE(bucket.contains("anomaly_score"));
        if (isInterim) {
            BOOST_TEST_REQUIRE(bucket.contains("is_interim"));
            BOOST_REQUIRE_EQUAL(isInterim, bucket.at("is_interim").as_bool());
        } else {
            BOOST_TEST_REQUIRE(!bucket.contains("is_interim"));
        }

        const json::value& recordsWrapper_ = arrayDoc.at(std::size_t(0));
        BOOST_TEST_REQUIRE(recordsWrapper_.is_object());
        const json::object& recordsWrapper = recordsWrapper_.as_object();
        BOOST_TEST_REQUIRE(recordsWrapper.contains("records"));
        const json::value& records_ = recordsWrapper.at("records");
        BOOST_TEST_REQUIRE(records_.is_array());
        const json::array& records = records_.as_array();

        double EXPECTED_PROBABILITIES[] = {0.01, 0.05, 0.001, 0.02, 0.0002};

        int probIndex = 0;
        for (std::size_t i = 0; i < records.size(); i++) {
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("detector_index"));
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("initial_record_score"));
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("record_score"));
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("probability"));
            BOOST_REQUIRE_EQUAL(
                EXPECTED_PROBABILITIES[probIndex],
                records.at(i).as_object().at("probability").to_number<double>());
            ++probIndex;

            if (isInterim) {
                BOOST_TEST_REQUIRE(records.at(i).as_object().contains("is_interim"));
                BOOST_REQUIRE_EQUAL(
                    isInterim, records.at(i).as_object().at("is_interim").as_bool());
            } else {
                BOOST_TEST_REQUIRE(!records.at(i).as_object().contains("is_interim"));
            }
        }

        BOOST_REQUIRE_EQUAL(std::size_t(2), records.size());
    }
    {
        const json::value& bucketWrapper_ = arrayDoc.at(std::size_t(3));
        BOOST_TEST_REQUIRE(bucketWrapper_.is_object());
        const json::object& bucketWrapper = bucketWrapper_.as_object();
        BOOST_TEST_REQUIRE(bucketWrapper.contains("bucket"));

        const json::value& bucket_ = bucketWrapper.at("bucket");
        BOOST_TEST_REQUIRE(bucket_.is_object());
        const json::object& bucket = bucket_.as_object();
        // It's hard to predict what these will be, so just assert their presence
        BOOST_TEST_REQUIRE(bucket.contains("anomaly_score"));
        if (isInterim) {
            BOOST_TEST_REQUIRE(bucket.contains("is_interim"));
            BOOST_REQUIRE_EQUAL(isInterim, bucket.at("is_interim").as_bool());
        } else {
            BOOST_TEST_REQUIRE(!bucket.contains("is_interim"));
        }

        const json::value& recordsWrapper_ = arrayDoc.at(std::size_t(2));
        BOOST_TEST_REQUIRE(recordsWrapper_.is_object());
        const json::object& recordsWrapper = recordsWrapper_.as_object();
        BOOST_TEST_REQUIRE(recordsWrapper.contains("records"));
        const json::value& records_ = recordsWrapper.at("records");
        BOOST_TEST_REQUIRE(records_.is_array());
        const json::array& records = records_.as_array();
        for (std::size_t i = 0; i < records.size(); i++) {
            //BOOST_REQUIRE_EQUAL(0.1, records1[std::size_t(0)]["probability").to_number<double>());
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("detector_index"));
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("initial_record_score"));
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("record_score"));
            if (isInterim) {
                BOOST_TEST_REQUIRE(records.at(i).as_object().contains("is_interim"));
                BOOST_REQUIRE_EQUAL(
                    isInterim, records.at(i).as_object().at("is_interim").as_bool());
            } else {
                BOOST_TEST_REQUIRE(!records.at(i).as_object().contains("is_interim"));
            }
        }

        BOOST_REQUIRE_EQUAL(std::size_t(2), records.size());
    }
    {
        const json::value& bucketWrapper_ = arrayDoc.at(std::size_t(5));
        BOOST_TEST_REQUIRE(bucketWrapper_.is_object());
        const json::object& bucketWrapper = bucketWrapper_.as_object();
        BOOST_TEST_REQUIRE(bucketWrapper.contains("bucket"));

        const json::value& bucket_ = bucketWrapper.at("bucket");
        BOOST_TEST_REQUIRE(bucket_.is_object());
        const json::object& bucket = bucket_.as_object();
        // It's hard to predict what these will be, so just assert their presence
        BOOST_TEST_REQUIRE(bucket.contains("anomaly_score"));
        if (isInterim) {
            BOOST_TEST_REQUIRE(bucket.contains("is_interim"));
            BOOST_REQUIRE_EQUAL(isInterim, bucket.at("is_interim").as_bool());
        } else {
            BOOST_TEST_REQUIRE(!bucket.contains("is_interim"));
        }

        const json::value& recordsWrapper_ = arrayDoc.at(std::size_t(4));
        BOOST_TEST_REQUIRE(recordsWrapper_.is_object());
        const json::object& recordsWrapper = recordsWrapper_.as_object();
        BOOST_TEST_REQUIRE(recordsWrapper.contains("records"));
        const json::value& records_ = recordsWrapper.at("records");
        BOOST_TEST_REQUIRE(records_.is_array());
        const json::array& records = records_.as_array();

        for (std::size_t i = 0; i < records.size(); i++) {
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("detector_index"));
            //BOOST_REQUIRE_EQUAL(0.1, records1[std::size_t(0)]["probability").to_number<double>());
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("initial_record_score"));
            BOOST_TEST_REQUIRE(records.at(i).as_object().contains("record_score"));
            if (isInterim) {
                BOOST_TEST_REQUIRE(records.at(i).as_object().contains("is_interim"));
                BOOST_REQUIRE_EQUAL(
                    isInterim, records.at(i).as_object().at("is_interim").as_bool());
            } else {
                BOOST_TEST_REQUIRE(!records.at(i).as_object().contains("is_interim"));
            }
        }

        BOOST_REQUIRE_EQUAL(std::size_t(2), records.size());
    }
}