runtime/shared/IntlPunctuation.js (69 lines of code) (raw):

/** * (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. * * This file is shared between www and fbsource and www is the source of truth. * When you make change to this file on www, please make sure you test it on * fbsource and send a diff to update the files too so that the 2 versions are * kept in sync. * * Run the following command to sync the change from www to fbsource. * js1 upgrade www-shared -p fbt --local ~/www * * @format * @flow strict-local * @emails oncall+i18n_fbt_js */ /** * Core javascript localization functions. * * Note: This file is required on every page. Please make sure that only core * functionality is included here. * * Note: Please keep this in sync with www/html/js/mobile/lib/intl-core.js. */ import FbtHooks from 'FbtHooks'; import IntlPhonologicalRewrites from 'IntlPhonologicalRewrites'; import IntlRedundantStops from 'IntlRedundantStops'; /** * Regular expression snippet containing all the characters that we * count as sentence-final punctuation. */ export const PUNCT_CHAR_CLASS = ('[.!?' + '\u3002' + // Chinese/Japanese period '\uFF01' + // Fullwidth exclamation point '\uFF1F' + // Fullwidth question mark '\u0964' + // Hindi "full stop" '\u2026' + // Chinese ellipsis '\u0EAF' + // Laotian ellipsis '\u1801' + // Mongolian ellipsis '\u0E2F' + // Thai ellipsis '\uFF0E' + // Fullwidth full stop ']': string); type Rules = $ReadOnlyArray<[RegExp, (string => string) | string]>; const rulesPerLocale: {[locale: string]: ?Rules, ...} = {}; function _getMemoizedRules(localeArg: ?string): Rules { const locale = localeArg ?? ''; let rules = rulesPerLocale[locale]; if (rules == null) { rules = rulesPerLocale[locale] = _getRules(localeArg); } return rules; } function _getRules(locale: ?string): Rules { const rules = []; const rewrites = IntlPhonologicalRewrites.get(locale); // Process the patterns and replacements by applying metaclasses. for (let pattern in rewrites.patterns) { let replacement = rewrites.patterns[pattern]; // "Metaclasses" are shorthand for larger character classes. For example, // _C may refer to consonants and _V to vowels for a locale. for (const metaclass in rewrites.meta) { const metaclassRegexp = new RegExp(metaclass.slice(1, -1), 'g'); const characterClass = rewrites.meta[metaclass]; pattern = pattern.replace(metaclassRegexp, characterClass); replacement = replacement.replace(metaclassRegexp, characterClass); } if (replacement === 'javascript') { replacement = match => match.slice(1).toLowerCase(); } rules.push([new RegExp(pattern.slice(1, -1), 'g'), replacement]); } return rules; } /** * Applies phonological rules (appropriate to the locale) * at the morpheme boundary when tokens are replaced with values. * For languages like Turkish, we allow translators to use shorthand * for a pattern of inflection (a suffix like '(y)i becomes 'i or 'yi or 'a or * 'ye, etc. depending on context). * * Input: Translated string with each {token} substituted with * "\x01value\x01" (e.g., "\x01Ozgur\x01(y)i..." which was * "{name}(y)i...") * Returns: String with phonological rules applied (e.g., "Ozguri...") */ export function applyPhonologicalRules(text: string): string { const rules = _getMemoizedRules(FbtHooks.getViewerContext().locale); let result = text; for (let i = 0; i < rules.length; i++) { const [regexp, replacement] = rules[i]; result = result.replace(regexp, replacement); } // If we have no rules (or if we already applied them), remove the delimiters. return result.replace(/\x01/g, ''); } /** * Map all equivalencies to the normalized key for the stop category. These * are the entries in the redundancy mapping */ const _normalizedStops = new Map(); for (const norm in IntlRedundantStops.equivalencies) { for (const eq of [norm].concat(IntlRedundantStops.equivalencies[norm])) { _normalizedStops.set(eq, norm); } } const _redundancies = new Map(); for (const prefix in IntlRedundantStops.redundancies) { _redundancies.set(prefix, new Set(IntlRedundantStops.redundancies[prefix])); } function isRedundant(rawPrefix: string, rawSuffix: string): boolean { const prefix = _normalizedStops.get(rawPrefix); const suffix = _normalizedStops.get(rawSuffix); return _redundancies.get(prefix)?.has(suffix) === true; } /** * If suffix is redundant with prefix (as determined by the redundancy map), * return the empty string, otherwise return suffix. */ export function dedupeStops(prefix: string, suffix: string): string { // We can naively grab the last "character" (a general Unicode "no-no") from // our string because we know our set of stops we test against have no // diacritics nor lie outside the BMP return isRedundant(prefix[prefix.length - 1], suffix) ? '' : suffix; }