unittests/PlatformIntl/BCP47ParserTest.cpp (114 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "hermes/Platform/Intl/BCP47Parser.h"
#include "gtest/gtest.h"
namespace {
using namespace hermes::platform_intl;
TEST(BCP47Parser, LanguageIdTest) {
// language + county code
{
auto res = *ParsedLocaleIdentifier::parse(u"en-US");
EXPECT_EQ(u"en", res.languageIdentifier.languageSubtag);
EXPECT_EQ(u"us", res.languageIdentifier.regionSubtag);
EXPECT_EQ(u"en-US", res.canonicalize());
}
// language + script + country code
{
auto res = *ParsedLocaleIdentifier::parse(u"CMn-aRab-dE");
EXPECT_EQ(u"cmn", res.languageIdentifier.languageSubtag);
EXPECT_EQ(u"arab", res.languageIdentifier.scriptSubtag);
EXPECT_EQ(u"de", res.languageIdentifier.regionSubtag);
EXPECT_EQ(u"cmn-Arab-DE", res.canonicalize());
}
// language + region code
{
auto res = *ParsedLocaleIdentifier::parse(u"zh-319");
EXPECT_EQ(u"zh", res.languageIdentifier.languageSubtag);
EXPECT_EQ(u"319", res.languageIdentifier.regionSubtag);
EXPECT_EQ(u"zh-319", res.canonicalize());
}
// language + region code + extension
{
auto res = *ParsedLocaleIdentifier::parse(u"zh-319-u-abc-test");
EXPECT_EQ(u"zh", res.languageIdentifier.languageSubtag);
EXPECT_EQ(u"319", res.languageIdentifier.regionSubtag);
EXPECT_EQ(u"zh-319-u-abc-test", res.canonicalize());
}
// language + variant list
{
auto res = *ParsedLocaleIdentifier::parse(u"und-variant-alphabet-subtag");
EXPECT_EQ(u"und", res.languageIdentifier.languageSubtag);
EXPECT_EQ(u"variant", *res.languageIdentifier.variantSubtagList.rbegin());
EXPECT_EQ(u"alphabet", *res.languageIdentifier.variantSubtagList.begin());
EXPECT_EQ(u"und-alphabet-subtag-variant", res.canonicalize());
}
}
TEST(BCP47Parser, ExtensionText) {
// Unicode extension test
{
auto res = *ParsedLocaleIdentifier::parse(
u"und-u-att-attr-attr-nu-xx-latn-bob-xx-bob");
EXPECT_EQ(u"und", res.languageIdentifier.languageSubtag);
EXPECT_EQ(2, res.unicodeExtensionAttributes.size());
EXPECT_EQ(1, res.unicodeExtensionAttributes.count(u"att"));
EXPECT_EQ(1, res.unicodeExtensionAttributes.count(u"attr"));
const auto &extKeys = res.unicodeExtensionKeywords;
EXPECT_EQ(2, extKeys.size());
EXPECT_TRUE(extKeys.at(u"nu").empty());
EXPECT_EQ(u"latn-bob", extKeys.at(u"xx"));
EXPECT_EQ(u"und-u-att-attr-nu-xx-latn-bob", res.canonicalize());
}
// Transformed extension test
{
auto res =
*ParsedLocaleIdentifier::parse(u"und-t-en-test-US-a9-ecma402-262test");
EXPECT_EQ(u"en", res.transformedLanguageIdentifier.languageSubtag);
EXPECT_EQ(u"test", res.transformedLanguageIdentifier.scriptSubtag);
EXPECT_EQ(u"us", res.transformedLanguageIdentifier.regionSubtag);
const auto &tef = res.transformedExtensionFields;
EXPECT_EQ(1, tef.size());
EXPECT_EQ(u"ecma402-262test", tef.at(u"a9"));
EXPECT_EQ(u"und-t-en-test-us-a9-ecma402-262test", res.canonicalize());
}
// Other extension test
{
auto res = *ParsedLocaleIdentifier::parse(u"und-o-first-second-q-1o1");
const auto &oem = res.otherExtensionMap;
EXPECT_EQ(u"first-second", oem.at(u'o'));
EXPECT_EQ(u"1o1", oem.at(u'q'));
EXPECT_EQ(u"und-o-first-second-q-1o1", res.canonicalize());
}
// PU extension test
{
auto res = *ParsedLocaleIdentifier::parse(u"und-x-u-123");
EXPECT_EQ(u"u-123", res.puExtensions);
EXPECT_EQ(u"und-x-u-123", res.canonicalize());
}
}
TEST(BCP47Parser, ErrorTest) {
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-US-"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-US--u-att"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"-en-US"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"-"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u""));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-US-x-"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-a-foo-b-bar-a-baz"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-u-xx-u-yy"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-t-xx-t-yy"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-t-xx-u-xx-s-yy-t-ww"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-scouse-fonipa-scouse"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-scouse-fonipa-scouse"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-t-en-us-arab"));
EXPECT_FALSE(ParsedLocaleIdentifier::parse(u"en-t-m0-foo-m0-bar"));
}
TEST(BCP47Parser, Canonicalization) {
auto cano = [](std::u16string str) {
return ParsedLocaleIdentifier::parse(str)->canonicalize();
};
EXPECT_EQ(u"en-US", cano(u"En-uS"));
EXPECT_EQ(u"en-Scrt-US", cano(u"En-scrt-us"));
EXPECT_EQ(u"en-US-u-attr1-attr2", cano(u"en-us-u-attr2-attr1-attr1"));
EXPECT_EQ(u"en-US-u-attr-bb-bbb-aaa", cano(u"en-US-u-attr-bb-bbb-aaa"));
EXPECT_EQ(u"en-US-u-aa-ccc-bb-bbb", cano(u"en-US-u-bb-bbb-bb-aaa-aa-ccc"));
EXPECT_EQ(u"en-US-fonipa-scouse", cano(u"en-US-scouse-fonipa"));
EXPECT_EQ(
u"en-US-a-xx-s-ff-t-en-us-u-attr-z-aa-bb",
cano(u"en-us-z-aa-bb-u-attr-t-en-US-a-xx-s-ff"));
EXPECT_EQ(
u"en-u-bar-foo-ca-buddhist-kk-nu-thai",
cano(u"en-u-foo-bar-nu-thai-ca-buddhist-kk-true"));
EXPECT_EQ(u"en-u-1a-5a-true-true1", cano(u"en-u-5a-true-true1-1a-true"));
EXPECT_EQ(
u"en-US-a-xx-t-a1-foo-u-attr-z-aa-bb-x-abcd-efgh5",
cano(u"en-us-z-aa-bb-u-attr-t-a1-foo-a-xx-x-abcd-efgh5"));
}
} // namespace