lib/swoc/unit_tests/ex_UnitParser.cc (120 lines of code) (raw):
// SPDX-License-Identifier: Apache-2.0
// Copyright Verizon Media 2020
/** @file
Example parser for parsing strings that are counts with attached unit tokens.
*/
#include <ctype.h>
#include <chrono>
#include "swoc/Lexicon.h"
#include "swoc/Errata.h"
#include "catch.hpp"
using swoc::TextView;
using swoc::Lexicon;
using swoc::Errata;
using swoc::Rv;
/** Parse a string that consists of counts and units.
*
* Give a set of units, each of which is a list of names and a multiplier, parse a string. The
* string contents must consist of (optional whitespace) with alternating counts and units,
* starting with a count. Each count is multiplied by the value of the subsequent unit. Optionally
* the parser can be set to allow counts without units, which are not multiplied.
*
* For example, if the units were [ "X", 10 ] , [ "L", 50 ] , [ "C", 100 ] , [ "M", 1000 ]
* then the following strings would be parsed as
*
* - "1X" : 10
* - "1L3X" : 80
* - "2C" : 200
* - "1M 4C 4X" : 1,440
* - "3M 5 C3 X" : 3,530
*/
class UnitParser {
using self_type = UnitParser; ///< Self reference type.
public:
using value_type = uintmax_t; ///< Integral type returned.
using Units = swoc::Lexicon<value_type>; ///< Unit definition type.
/// Symbolic name for setting whether units are required.
static constexpr bool UNITS_REQUIRED = true;
/// Symbolic name for setting whether units are required.
static constexpr bool UNITS_NOT_REQUIRED = false;
/** Constructor.
*
* @param units A @c Lexicon of unit definitions.
* @param unit_required_p Whether valid input requires units on all values.
*/
UnitParser(Units &&units, bool unit_required_p = true) noexcept;
/** Set whether a unit is required.
*
* @param flag @c true if a unit is required, @c false if not.
* @return @a this.
*/
self_type &unit_required(bool flag);
/** Parse a string.
*
* @param src Input string.
* @return The computed value if the input it valid, or an error report.
*/
Rv<value_type> operator()(swoc::TextView const &src) const noexcept;
protected:
bool _unit_required_p = true; ///< Whether unitless values are allowed.
Units _units; ///< Unit definitions.
};
UnitParser::UnitParser(UnitParser::Units &&units, bool unit_required_p) noexcept
: _unit_required_p(unit_required_p), _units(std::move(units)) {
_units.set_default(value_type{0}); // Used to check for bad unit names.
}
UnitParser::self_type &
UnitParser::unit_required(bool flag) {
_unit_required_p = false;
return *this;
}
auto
UnitParser::operator()(swoc::TextView const &src) const noexcept -> Rv<value_type> {
value_type zret = 0;
TextView text = src; // Keep @a src around to report error offsets.
while (text.ltrim_if(&isspace)) {
TextView parsed;
auto n = swoc::svtou(text, &parsed);
if (parsed.empty()) {
return Errata("Required count not found at offset {}", text.data() - src.data());
} else if (n == std::numeric_limits<decltype(n)>::max()) {
return Errata("Count at offset {} was out of bounds", text.data() - src.data());
}
text.remove_prefix(parsed.size());
auto ptr = text.ltrim_if(&isspace).data(); // save for error reporting.
// Everything up to the next digit or whitespace.
auto unit = text.clip_prefix_of([](char c) { return !(isspace(c) || isdigit(c)); });
if (unit.empty()) {
if (_unit_required_p) {
return Errata("Required unit not found at offset {}", ptr - src.data());
}
} else {
auto mult = _units[unit]; // What's the multiplier?
if (mult == 0) {
return Errata("Unknown unit \"{}\" at offset {}", unit, ptr - src.data());
}
n *= mult;
}
zret += n;
}
return zret;
}
// --- Tests ---
TEST_CASE("UnitParser Bytes", "[Lexicon][UnitParser]") {
UnitParser bytes{UnitParser::Units{{{1, {"B", "bytes"}},
{1024, {"K", "KB", "kilo", "kilobyte", "kilobytes"}},
{1048576, {"M", "MB", "mega", "megabyte", "megabytes"}},
{1 << 30, {"G", "GB", "giga", "gigabyte", "gigabytes"}}}},
UnitParser::UNITS_NOT_REQUIRED};
REQUIRE(bytes("56 bytes") == 56);
REQUIRE(bytes("3 kb") == 3 * (1 << 10));
REQUIRE(bytes("6k128bytes") == 6 * (1 << 10) + 128);
REQUIRE(bytes("6 k128bytes") == 6 * (1 << 10) + 128);
REQUIRE(bytes("6 K128 bytes") == 6 * (1 << 10) + 128);
REQUIRE(bytes("6 kilo 0x80 bytes") == 6 * (1 << 10) + 128);
REQUIRE(bytes("6kilo 0x8b bytes") == 6 * (1 << 10) + 0x8b);
REQUIRE(bytes("111") == 111);
REQUIRE(bytes("4MB") == 4 * (uintmax_t(1) << 20));
REQUIRE(bytes("4 giga") == 4 * (uintmax_t(1) << 30));
REQUIRE(bytes("10M 256K 512") == 10 * (1 << 20) + 256 * (1 << 10) + 512);
REQUIRE(bytes("512 256 kilobytes 10 megabytes") == 10 * (1 << 20) + 256 * (1 << 10) + 512);
REQUIRE(bytes("0x100000000") == 0x100000000);
auto result = bytes("56delain");
REQUIRE(result.is_ok() == false);
REQUIRE(result.errata().front().text() == "Unknown unit \"delain\" at offset 2");
result = bytes("12K delain");
REQUIRE(result.is_ok() == false);
REQUIRE(result.errata().front().text() == "Required count not found at offset 4");
result = bytes("99999999999999999999");
REQUIRE(result.is_ok() == false);
REQUIRE(result.errata().front().text() == "Count at offset 0 was out of bounds");
}
TEST_CASE("UnitParser Time", "[Lexicon][UnitParser]") {
using namespace std::chrono;
UnitParser time{UnitParser::Units{{{nanoseconds{1}.count(), {"ns", "nanosec", "nanoseconds"}},
{nanoseconds{microseconds{1}}.count(), {"us", "microsec", "microseconds"}},
{nanoseconds{milliseconds{1}}.count(), {"ms", "millisec", "milliseconds"}},
{nanoseconds{seconds{1}}.count(), {"s", "sec", "seconds"}},
{nanoseconds{minutes{1}}.count(), {"m", "min", "minutes"}},
{nanoseconds{hours{1}}.count(), {"h", "hour", "hours"}},
{nanoseconds{hours{24}}.count(), {"d", "day", "days"}},
{nanoseconds{hours{168}}.count(), {"w", "week", "weeks"}}}}};
REQUIRE(nanoseconds{time("2s")} == seconds{2});
REQUIRE(nanoseconds{time("1w 2days 12 hours")} == hours{168} + hours{2 * 24} + hours{12});
REQUIRE(nanoseconds{time("300ms")} == milliseconds{300});
REQUIRE(nanoseconds{time("1h30m")} == hours{1} + minutes{30});
auto result = time("1h30m10");
REQUIRE(result.is_ok() == false);
REQUIRE(result.errata().front().text() == "Required unit not found at offset 7");
auto duration = nanoseconds(time("30 minutes 12h"));
REQUIRE(minutes(750) == duration);
}
TEST_CASE("UnitParser Eggs", "[Lexicon][UnitParser]") {
const UnitParser eggs{
UnitParser::Units{UnitParser::Units::with_multi{{1, {"egg", "eggs"}}, {12, {"dozen"}}, {12 * 12, {"gross"}}}},
UnitParser::UNITS_NOT_REQUIRED};
REQUIRE(eggs("1") == 1);
REQUIRE(eggs("6") == 6);
REQUIRE(eggs("1 dozen") == 12);
REQUIRE(eggs("2 gross 6 dozen 10 eggs") == 370);
}