in src/java.base/share/classes/sun/util/locale/LanguageTag.java [45:714]
public record LanguageTag(String language,
String script,
String region,
String privateuse,
List<String> extlangs,
List<String> variants,
List<String> extensions) {
public static final String SEP = "-";
public static final String PRIVATEUSE = "x";
public static final String UNDETERMINED = "und";
public static final String PRIVUSE_VARIANT_PREFIX = "lvariant";
private static final String EMPTY_SUBTAG = "";
private static final List<String> EMPTY_SUBTAGS = List.of();
/*
* BNF in RFC5646
*
* Language-Tag = langtag ; normal language tags
* / privateuse ; private use tag
* / grandfathered ; grandfathered tags
*
*
* langtag = language
* ["-" script]
* ["-" region]
* *("-" variant)
* *("-" extension)
* ["-" privateuse]
*
* language = 2*3ALPHA ; shortest ISO 639 code
* ["-" extlang] ; sometimes followed by
* ; extended language subtags
* / 4ALPHA ; or reserved for future use
* / 5*8ALPHA ; or registered language subtag
*
* extlang = 3ALPHA ; selected ISO 639 codes
* *2("-" 3ALPHA) ; permanently reserved
*
* script = 4ALPHA ; ISO 15924 code
*
* region = 2ALPHA ; ISO 3166-1 code
* / 3DIGIT ; UN M.49 code
*
* variant = 5*8alphanum ; registered variants
* / (DIGIT 3alphanum)
*
* extension = singleton 1*("-" (2*8alphanum))
*
* ; Single alphanumerics
* ; "x" reserved for private use
* singleton = DIGIT ; 0 - 9
* / %x41-57 ; A - W
* / %x59-5A ; Y - Z
* / %x61-77 ; a - w
* / %x79-7A ; y - z
*
* privateuse = "x" 1*("-" (1*8alphanum))
*
*/
public static LanguageTag parse(String languageTag, ParsePosition pp,
boolean lenient) {
StringTokenIterator itr;
var errorMsg = new StringBuilder();
// Check if the tag is a legacy tag
var pref = legacyToPreferred(LocaleUtils.toLowerString(languageTag));
// If legacy use preferred mapping, otherwise use the tag as is
itr = new StringTokenIterator(Objects.requireNonNullElse(pref, languageTag), SEP);
String language = parseLanguage(itr, pp);
List<String> extlangs;
String script;
String region;
List<String> variants;
List<String> extensions;
// langtag must start with either language or privateuse
if (!language.isEmpty()) {
extlangs = parseExtlangs(itr, pp);
script = parseScript(itr, pp);
region = parseRegion(itr, pp);
variants = parseVariants(itr, pp);
extensions = parseExtensions(itr, pp, errorMsg);
} else {
extlangs = EMPTY_SUBTAGS;
script = EMPTY_SUBTAG;
region = EMPTY_SUBTAG;
variants = EMPTY_SUBTAGS;
extensions = EMPTY_SUBTAGS;
}
String privateuse = parsePrivateuse(itr, pp, errorMsg);
if (!itr.isDone() && pp.getErrorIndex() == -1) {
String s = itr.current();
pp.setErrorIndex(itr.currentStart());
if (s.isEmpty()) {
errorMsg.append("Empty subtag");
} else {
errorMsg.append("Invalid subtag: ").append(s);
}
}
if (!lenient && pp.getErrorIndex() != -1) {
throw new IllformedLocaleException(errorMsg.toString(), pp.getErrorIndex());
}
return new LanguageTag(language, script, region, privateuse, extlangs, variants, extensions);
}
//
// Language subtag parsers
//
private static String parseLanguage(StringTokenIterator itr, ParsePosition pp) {
if (itr.isDone() || pp.getErrorIndex() != -1) {
return EMPTY_SUBTAG;
}
String s = itr.current();
if (isLanguage(s)) {
pp.setIndex(itr.currentEnd());
itr.next();
return s;
}
return EMPTY_SUBTAG;
}
private static List<String> parseExtlangs(StringTokenIterator itr, ParsePosition pp) {
if (itr.isDone() || pp.getErrorIndex() != -1) {
return EMPTY_SUBTAGS;
}
List<String> extlangs = null;
while (!itr.isDone()) {
String s = itr.current();
if (!isExtlang(s)) {
break;
}
if (extlangs == null) {
extlangs = new ArrayList<>(3);
}
extlangs.add(s);
pp.setIndex(itr.currentEnd());
itr.next();
if (extlangs.size() == 3) {
// Maximum 3 extlangs
break;
}
}
return extlangs == null ? EMPTY_SUBTAGS :
Collections.unmodifiableList(extlangs);
}
private static String parseScript(StringTokenIterator itr, ParsePosition pp) {
if (itr.isDone() || pp.getErrorIndex() != -1) {
return EMPTY_SUBTAG;
}
String s = itr.current();
if (isScript(s)) {
pp.setIndex(itr.currentEnd());
itr.next();
return s;
}
return EMPTY_SUBTAG;
}
private static String parseRegion(StringTokenIterator itr, ParsePosition pp) {
if (itr.isDone() || pp.getErrorIndex() != -1) {
return EMPTY_SUBTAG;
}
String s = itr.current();
if (isRegion(s)) {
pp.setIndex(itr.currentEnd());
itr.next();
return s;
}
return EMPTY_SUBTAG;
}
private static List<String> parseVariants(StringTokenIterator itr, ParsePosition pp) {
if (itr.isDone() || pp.getErrorIndex() != -1) {
return EMPTY_SUBTAGS;
}
List<String> variants = null;
while (!itr.isDone()) {
String s = itr.current();
if (!isVariant(s)) {
break;
}
if (variants == null) {
variants = new ArrayList<>(3);
}
variants.add(s);
pp.setIndex(itr.currentEnd());
itr.next();
}
return variants == null ? EMPTY_SUBTAGS :
Collections.unmodifiableList(variants);
}
private static List<String> parseExtensions(StringTokenIterator itr, ParsePosition pp,
StringBuilder err) {
if (itr.isDone() || pp.getErrorIndex() != -1) {
return EMPTY_SUBTAGS;
}
List<String> extensions = null;
while (!itr.isDone()) {
String s = itr.current();
if (isExtensionSingleton(s)) {
int start = itr.currentStart();
String singleton = s;
StringBuilder sb = new StringBuilder(singleton);
itr.next();
while (!itr.isDone()) {
s = itr.current();
if (isExtensionSubtag(s)) {
sb.append(SEP).append(s);
pp.setIndex(itr.currentEnd());
} else {
break;
}
itr.next();
}
if (pp.getIndex() <= start) {
pp.setErrorIndex(start);
err.append("Incomplete extension '").append(singleton).append("'");
break;
}
if (extensions == null) {
extensions = new ArrayList<>(4);
}
extensions.add(sb.toString());
} else {
break;
}
}
return extensions == null ? EMPTY_SUBTAGS :
Collections.unmodifiableList(extensions);
}
private static String parsePrivateuse(StringTokenIterator itr, ParsePosition pp,
StringBuilder err) {
if (itr.isDone() || pp.getErrorIndex() != -1) {
return EMPTY_SUBTAG;
}
String s = itr.current();
if (isPrivateusePrefix(s)) {
int start = itr.currentStart();
StringBuilder sb = new StringBuilder(s);
itr.next();
while (!itr.isDone()) {
s = itr.current();
if (!isPrivateuseSubtag(s)) {
break;
}
sb.append(SEP).append(s);
pp.setIndex(itr.currentEnd());
itr.next();
}
if (pp.getIndex() <= start) {
// need at least 1 private subtag
pp.setErrorIndex(start);
err.append("Incomplete privateuse");
} else {
return sb.toString();
}
}
return EMPTY_SUBTAG;
}
public static String caseFoldTag(String tag) {
parse(tag, new ParsePosition(0), false);
StringBuilder bldr = new StringBuilder(tag.length());
String[] subtags = tag.split(SEP);
// Legacy tags
if (legacyToPreferred(tag.toLowerCase(Locale.ROOT)) != null) {
// Fold the legacy tag
for (int i = 0; i < subtags.length ; i++) {
// 2 ALPHA Region subtag(s) are upper, all other subtags are lower
if (i > 0 && subtags[i].length() == 2) {
bldr.append(LocaleUtils.toUpperString(subtags[i])).append(SEP);
} else {
bldr.append(LocaleUtils.toLowerString(subtags[i])).append(SEP);
}
}
bldr.setLength(bldr.length() - 1); // Remove trailing '-'
return bldr.toString();
}
// Non-legacy tags
boolean privateFound = false;
boolean singletonFound = false;
boolean privUseVarFound = false;
for (int i = 0; i < subtags.length; i++) {
String subtag = subtags[i];
if (privUseVarFound) {
bldr.append(subtag);
} else if (i > 0 && isVariant(subtag) && !singletonFound && !privateFound) {
bldr.append(subtag);
} else if (i > 0 && isRegion(subtag) && !singletonFound && !privateFound) {
bldr.append(canonicalizeRegion(subtag));
} else if (i > 0 && isScript(subtag) && !singletonFound && !privateFound) {
bldr.append(canonicalizeScript(subtag));
// If subtag is not 2 letter, 4 letter, or variant
// under the right conditions, then it should be lower-case
} else {
if (isPrivateusePrefix(subtag)) {
privateFound = true;
} else if (isExtensionSingleton(subtag)) {
singletonFound = true;
} else if (subtag.equals(PRIVUSE_VARIANT_PREFIX)) {
privUseVarFound = true;
}
bldr.append(subtag.toLowerCase(Locale.ROOT));
}
if (i != subtags.length-1) {
bldr.append(SEP);
}
}
return bldr.substring(0);
}
public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) {
String language = EMPTY_SUBTAG;
String script = EMPTY_SUBTAG;
String region = EMPTY_SUBTAG;
String baseLanguage = baseLocale.getLanguage();
String baseScript = baseLocale.getScript();
String baseRegion = baseLocale.getRegion();
String baseVariant = baseLocale.getVariant();
boolean hasSubtag = false;
String privuseVar = null; // store ill-formed variant subtags
if (isLanguage(baseLanguage)) {
// Convert a deprecated language code to its new code
baseLanguage = switch (baseLanguage) {
case "iw" -> "he";
case "ji" -> "yi";
case "in" -> "id";
default -> baseLanguage;
};
language = baseLanguage;
}
if (isScript(baseScript)) {
script = canonicalizeScript(baseScript);
hasSubtag = true;
}
if (isRegion(baseRegion)) {
region = canonicalizeRegion(baseRegion);
hasSubtag = true;
}
// Special handling for no_NO_NY - use nn_NO for language tag
if (language.equals("no") && region.equals("NO") && baseVariant.equals("NY")) {
language = "nn";
baseVariant = EMPTY_SUBTAG;
}
List<String> variants = null;
if (!baseVariant.isEmpty()) {
StringTokenIterator varitr = new StringTokenIterator(baseVariant, BaseLocale.SEP);
while (!varitr.isDone()) {
String var = varitr.current();
if (!isVariant(var)) {
break;
}
if (variants == null) {
variants = new ArrayList<>();
}
variants.add(var); // Do not canonicalize!
varitr.next();
}
if (variants != null) {
hasSubtag = true;
}
if (!varitr.isDone()) {
// ill-formed variant subtags
StringJoiner sj = new StringJoiner(SEP);
while (!varitr.isDone()) {
String prvv = varitr.current();
if (!isPrivateuseSubtag(prvv)) {
// cannot use private use subtag - truncated
break;
}
sj.add(prvv);
varitr.next();
}
if (sj.length() > 0) {
privuseVar = sj.toString();
}
}
}
List<String> extensions = null;
String privateuse = null;
if (localeExtensions != null) {
Set<Character> locextKeys = localeExtensions.getKeys();
for (Character locextKey : locextKeys) {
Extension ext = localeExtensions.getExtension(locextKey);
if (isPrivateusePrefixChar(locextKey)) {
privateuse = ext.getValue();
} else {
if (extensions == null) {
extensions = new ArrayList<>();
}
extensions.add(locextKey.toString() + SEP + ext.getValue());
}
}
}
if (extensions != null) {
hasSubtag = true;
}
// append ill-formed variant subtags to private use
if (privuseVar != null) {
if (privateuse == null) {
privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar;
} else {
privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX
+ SEP + privuseVar.replace(BaseLocale.SEP, SEP);
}
}
if (language.isEmpty() && (hasSubtag || privateuse == null)) {
// use lang "und" when 1) no language is available AND
// 2) any of other subtags other than private use are available or
// no private use tag is available
language = UNDETERMINED;
}
privateuse = privateuse == null ? EMPTY_SUBTAG : privateuse;
extensions = extensions == null ? EMPTY_SUBTAGS :
Collections.unmodifiableList(extensions);
variants = variants == null ? EMPTY_SUBTAGS :
Collections.unmodifiableList(variants);
// extlangs always empty for locale parse
return new LanguageTag(language, script, region, privateuse, EMPTY_SUBTAGS, variants, extensions);
}
/*
* Converts a legacy tag to its preferred mapping if it exists, otherwise null.
* The keys are mapped and stored as lower case. (Folded on demand).
* See http://www.ietf.org/rfc/rfc5646.txt Section 2.1 and 2.2.8 for the
* full syntax and case accurate legacy tags.
*/
private static String legacyToPreferred(String tag) {
if (tag.length() < 5) {
return null;
}
return switch (tag) {
case "art-lojban" -> "jbo";
case "cel-gaulish" -> "xtg-x-cel-gaulish"; // fallback
case "en-gb-oed" -> "en-GB-x-oed"; // fallback
case "i-ami" -> "ami";
case "i-bnn" -> "bnn";
case "i-default" -> "en-x-i-default"; // fallback
case "i-enochian" -> "und-x-i-enochian"; // fallback
case "i-hak",
"zh-hakka" -> "hak";
case "i-klingon" -> "tlh";
case "i-lux" -> "lb";
case "i-mingo" -> "see-x-i-mingo"; // fallback
case "i-navajo" -> "nv";
case "i-pwn" -> "pwn";
case "i-tao" -> "tao";
case "i-tay" -> "tay";
case "i-tsu" -> "tsu";
case "no-bok" -> "nb";
case "no-nyn" -> "nn";
case "sgn-be-fr" -> "sfb";
case "sgn-be-nl" -> "vgt";
case "sgn-ch-de" -> "sgg";
case "zh-guoyu" -> "cmn";
case "zh-min" -> "nan-x-zh-min"; // fallback
case "zh-min-nan" -> "nan";
case "zh-xiang" -> "hsn";
default -> null;
};
}
//
// Language subtag syntax checking methods
//
public static boolean isLanguage(String s) {
// language = 2*3ALPHA ; shortest ISO 639 code
// ["-" extlang] ; sometimes followed by
// ; extended language subtags
// / 4ALPHA ; or reserved for future use
// / 5*8ALPHA ; or registered language subtag
int len = s.length();
return (len >= 2) && (len <= 8) && LocaleUtils.isAlphaString(s);
}
public static boolean isExtlang(String s) {
// extlang = 3ALPHA ; selected ISO 639 codes
// *2("-" 3ALPHA) ; permanently reserved
return (s.length() == 3) && LocaleUtils.isAlphaString(s);
}
public static boolean isScript(String s) {
// script = 4ALPHA ; ISO 15924 code
return (s.length() == 4) && LocaleUtils.isAlphaString(s);
}
public static boolean isRegion(String s) {
// region = 2ALPHA ; ISO 3166-1 code
// / 3DIGIT ; UN M.49 code
return ((s.length() == 2) && LocaleUtils.isAlphaString(s))
|| ((s.length() == 3) && LocaleUtils.isNumericString(s));
}
public static boolean isVariant(String s) {
// variant = 5*8alphanum ; registered variants
// / (DIGIT 3alphanum)
int len = s.length();
if (len >= 5 && len <= 8) {
return LocaleUtils.isAlphaNumericString(s);
}
if (len == 4) {
return LocaleUtils.isNumeric(s.charAt(0))
&& LocaleUtils.isAlphaNumeric(s.charAt(1))
&& LocaleUtils.isAlphaNumeric(s.charAt(2))
&& LocaleUtils.isAlphaNumeric(s.charAt(3));
}
return false;
}
public static boolean isExtensionSingleton(String s) {
// singleton = DIGIT ; 0 - 9
// / %x41-57 ; A - W
// / %x59-5A ; Y - Z
// / %x61-77 ; a - w
// / %x79-7A ; y - z
return (s.length() == 1)
&& LocaleUtils.isAlphaString(s)
&& !LocaleUtils.caseIgnoreMatch(PRIVATEUSE, s);
}
public static boolean isExtensionSingletonChar(char c) {
return isExtensionSingleton(String.valueOf(c));
}
public static boolean isExtensionSubtag(String s) {
// extension = singleton 1*("-" (2*8alphanum))
int len = s.length();
return (len >= 2) && (len <= 8) && LocaleUtils.isAlphaNumericString(s);
}
public static boolean isPrivateusePrefix(String s) {
// privateuse = "x" 1*("-" (1*8alphanum))
return (s.length() == 1)
&& LocaleUtils.caseIgnoreMatch(PRIVATEUSE, s);
}
public static boolean isPrivateusePrefixChar(char c) {
return (LocaleUtils.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c)));
}
public static boolean isPrivateuseSubtag(String s) {
// privateuse = "x" 1*("-" (1*8alphanum))
int len = s.length();
return (len >= 1) && (len <= 8) && LocaleUtils.isAlphaNumericString(s);
}
//
// Language subtag canonicalization methods
//
public static String canonicalizeLanguage(String s) {
return LocaleUtils.toLowerString(s);
}
public static String canonicalizeExtlang(String s) {
return LocaleUtils.toLowerString(s);
}
public static String canonicalizeScript(String s) {
return LocaleUtils.toTitleString(s);
}
public static String canonicalizeRegion(String s) {
return LocaleUtils.toUpperString(s);
}
public static String canonicalizeVariant(String s) {
return LocaleUtils.toLowerString(s);
}
public static String canonicalizeExtension(String s) {
return LocaleUtils.toLowerString(s);
}
public static String canonicalizeExtensionSingleton(String s) {
return LocaleUtils.toLowerString(s);
}
public static String canonicalizeExtensionSubtag(String s) {
return LocaleUtils.toLowerString(s);
}
public static String canonicalizePrivateuse(String s) {
return LocaleUtils.toLowerString(s);
}
public static String canonicalizePrivateuseSubtag(String s) {
return LocaleUtils.toLowerString(s);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (!language.isEmpty()) {
sb.append(language);
for (String extlang : extlangs) {
sb.append(SEP).append(extlang);
}
if (!script.isEmpty()) {
sb.append(SEP).append(script);
}
if (!region.isEmpty()) {
sb.append(SEP).append(region);
}
for (String variant : variants) {
sb.append(SEP).append(variant);
}
for (String extension : extensions) {
sb.append(SEP).append(extension);
}
}
if (!privateuse.isEmpty()) {
if (sb.length() > 0) {
sb.append(SEP);
}
sb.append(privateuse);
}
return sb.toString();
}
}