def guess_country_from_accept_lang()

in privaterelay/country_utils.py [0:0]


def guess_country_from_accept_lang(accept_lang: str) -> str:
    """
    Guess the user's country from the Accept-Language header

    Return is a 2-letter ISO 3166 country code

    If an issue is detected, a AcceptLanguageError is raised.

    The header may come directly from a web request, or may be the header
    captured by Mozilla Accounts (FxA) at signup.

    Even with all this logic and special casing, it is still more accurate to
    use a GeoIP lookup or a country code provided by the infrastructure.

    See RFC 9110, "HTTP Semantics", section 12.5.4, "Accept-Language"
    See RFC 5646, "Tags for Identifying Languages", and examples in Appendix A
    """
    lang_q_pairs = parse_accept_lang_header(accept_lang.strip())
    if not lang_q_pairs:
        raise AcceptLanguageError("Invalid Accept-Language string", accept_lang)
    top_lang_tag = lang_q_pairs[0][0]

    subtags = top_lang_tag.split("-")
    lang = subtags[0].lower()
    if lang == "i":
        raise AcceptLanguageError("Irregular language tag", accept_lang)
    if lang == "x":
        raise AcceptLanguageError("Private-use language tag", accept_lang)
    if lang == "*":
        raise AcceptLanguageError("Wildcard language tag", accept_lang)
    if len(lang) < 2:
        raise AcceptLanguageError("Invalid one-character primary language", accept_lang)
    if len(lang) == 3 and lang[0] == "q" and lang[1] <= "t":
        raise AcceptLanguageError(
            "Private-use language tag (RFC 5646 2.2.1)", accept_lang
        )

    for maybe_region_raw in subtags[1:]:
        maybe_region = maybe_region_raw.upper()

        # Look for a special case
        if override := _LANGUAGE_TAG_TO_COUNTRY_OVERRIDE.get((lang, maybe_region)):
            return override

        if len(maybe_region) <= 1:
            # One-character extension or empty, stop processing
            break
        if (
            len(maybe_region) == 2
            and all(c in ascii_uppercase for c in maybe_region)
            and
            # RFC 5646 2.2.4 "Region Subtag" point 6, reserved subtags
            maybe_region != "AA"
            and maybe_region != "ZZ"
            and maybe_region[0] != "X"
            and (maybe_region[0] != "Q" or maybe_region[1] < "M")
        ):
            # Subtag is a non-private ISO 3166 country code
            return maybe_region

        # Subtag is probably a script, like "Hans" in "zh-Hans-CN"
        # Loop to the next subtag, which might be a ISO 3166 country code

    # Guess the country from a simple language tag
    try:
        return _PRIMARY_LANGUAGE_TO_COUNTRY[lang]
    except KeyError:
        raise AcceptLanguageError("Unknown langauge", accept_lang)