modules/services/nsi.js (401 lines of code) (raw):
import { Matcher } from 'name-suggestion-index';
import { fileFetcher, locationManager } from '../core';
import { presetManager } from '../presets';
// This service contains all the code related to the **name-suggestion-index** (aka NSI)
// NSI contains the most correct tagging for many commonly mapped features.
// See https://github.com/osmlab/name-suggestion-index and https://nsi.guide
// DATA
let _nsiStatus = 'loading'; // 'loading', 'ok', 'failed'
let _nsi = {};
// Sometimes we can upgrade a feature tagged like `building=yes` to a better tag.
const buildingPreset = {
'building/commercial': true,
'building/government': true,
'building/hotel': true,
'building/retail': true,
'building/office': true,
'building/supermarket': true,
'building/yes': true
};
// Exceptions to the namelike regexes.
// Usually a tag suffix contains a language code like `name:en`, `name:ru`
// but we want to exclude things like `operator:type`, `name:etymology`, etc..
const notNames = /:(colou?r|type|forward|backward|left|right|etymology|pronunciation|wikipedia)$/i;
// Exceptions to the branchlike regexes
const notBranches = /(coop|express|wireless|factory|outlet)/i;
// PRIVATE FUNCTIONS
// `setNsiSources()`
// Adds the sources to iD's filemap so we can start downloading data.
//
function setNsiSources() {
const sources = {
'nsi_data': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@6.0/dist/nsi.min.json',
'nsi_dissolved': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@6.0/dist/dissolved.min.json',
'nsi_features': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@6.0/dist/featureCollection.min.json',
'nsi_generics': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@6.0/dist/genericWords.min.json',
'nsi_presets': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@6.0/dist/presets/nsi-id-presets.min.json',
'nsi_replacements': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@6.0/dist/replacements.min.json',
'nsi_trees': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@6.0/dist/trees.min.json'
};
let fileMap = fileFetcher.fileMap();
for (const k in sources) {
if (!fileMap[k]) fileMap[k] = sources[k];
}
}
// `loadNsiPresets()`
// Returns a Promise fulfilled when the presets have been downloaded and merged into iD.
//
function loadNsiPresets() {
return (
Promise.all([
fileFetcher.get('nsi_presets'),
fileFetcher.get('nsi_features')
])
.then(vals => {
// Add `suggestion=true` to all the nsi presets
// The preset json schema doesn't include it, but the iD code still uses it
Object.values(vals[0].presets).forEach(preset => preset.suggestion = true);
presetManager.merge({
presets: vals[0].presets,
featureCollection: vals[1]
});
})
);
}
// `loadNsiData()`
// Returns a Promise fulfilled when the other data have been downloaded and processed
//
function loadNsiData() {
return (
Promise.all([
fileFetcher.get('nsi_data'),
fileFetcher.get('nsi_dissolved'),
fileFetcher.get('nsi_replacements'),
fileFetcher.get('nsi_trees')
])
.then(vals => {
_nsi = {
data: vals[0].nsi, // the raw name-suggestion-index data
dissolved: vals[1].dissolved, // list of dissolved items
replacements: vals[2].replacements, // trivial old->new qid replacements
trees: vals[3].trees, // metadata about trees, main tags
kvt: new Map(), // Map (k -> Map (v -> t) )
qids: new Map(), // Map (wd/wp tag values -> qids)
ids: new Map() // Map (id -> NSI item)
};
_nsi.matcher = new Matcher();
_nsi.matcher.buildMatchIndex(_nsi.data);
_nsi.matcher.buildLocationIndex(_nsi.data, locationManager.loco());
Object.keys(_nsi.data).forEach(tkv => {
const category = _nsi.data[tkv];
const parts = tkv.split('/', 3); // tkv = "tree/key/value"
const t = parts[0];
const k = parts[1];
const v = parts[2];
// Build a reverse index of keys -> values -> trees present in the name-suggestion-index
// Collect primary keys (e.g. "amenity", "craft", "shop", "man_made", "route", etc)
// "amenity": {
// "restaurant": "brands"
// }
let vmap = _nsi.kvt.get(k);
if (!vmap) {
vmap = new Map();
_nsi.kvt.set(k, vmap);
}
vmap.set(v, t);
const tree = _nsi.trees[t]; // e.g. "brands", "operators"
const mainTag = tree.mainTag; // e.g. "brand:wikidata", "operator:wikidata", etc
const items = category.items || [];
items.forEach(item => {
// Remember some useful things for later, cache NSI id -> item
item.tkv = tkv;
item.mainTag = mainTag;
_nsi.ids.set(item.id, item);
// Cache Wikidata/Wikipedia values -> qid, for #6416
const wd = item.tags[mainTag];
const wp = item.tags[mainTag.replace('wikidata', 'wikipedia')];
if (wd) _nsi.qids.set(wd, wd);
if (wp && wd) _nsi.qids.set(wp, wd);
});
});
})
);
}
// `gatherKVs()`
// Gather all the k/v pairs that we will run through the NSI matcher.
// An OSM tags object can contain anything, but only a few tags will be interesting to NSI.
//
// This function will return the interesting tag pairs like:
// "amenity/restaurant", "man_made/flagpole"
// and fallbacks like
// "amenity/yes"
// excluding things like
// "tiger:reviewed", "surface", "ref", etc.
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `Object` containing kv pairs to test:
// {
// 'primary': Set(),
// 'alternate': Set()
// }
//
function gatherKVs(tags) {
let primary = new Set();
let alternate = new Set();
Object.keys(tags).forEach(osmkey => {
const osmvalue = tags[osmkey];
if (!osmvalue) return;
// Match a 'route_master' as if it were a 'route' - name-suggestion-index#5184
if (osmkey === 'route_master') osmkey = 'route';
const vmap = _nsi.kvt.get(osmkey);
if (!vmap) return; // not an interesting key
if (vmap.get(osmvalue)) { // Matched a category in NSI
primary.add(`${osmkey}/${osmvalue}`); // interesting key/value
} else if (osmvalue === 'yes') {
alternate.add(`${osmkey}/${osmvalue}`); // fallback key/yes
}
});
// Can we try a generic building fallback match? - See #6122, #7197
// Only try this if we do a preset match and find nothing else remarkable about that building.
// For example, a way with `building=yes` + `name=Westfield` may be a Westfield department store.
// But a way with `building=yes` + `name=Westfield` + `public_transport=station` is a train station for a town named "Westfield"
const preset = presetManager.matchTags(tags, 'area');
if (buildingPreset[preset.id]) {
alternate.add('building/yes');
}
return { primary: primary, alternate: alternate };
}
// `identifyTree()`
// NSI has a concept of trees: "brands", "operators", "flags", "transit".
// The tree determines things like which tags are namelike, and which tags hold important wikidata.
// This takes an Object of tags and tries to identify what tree to use.
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `string` the name of the tree if known
// or 'unknown' if it could match several trees (e.g. amenity/yes)
// or null if no match
//
function identifyTree(tags) {
let unknown;
let t;
// Check all tags
Object.keys(tags).forEach(osmkey => {
if (t) return; // found already
const osmvalue = tags[osmkey];
if (!osmvalue) return;
// Match a 'route_master' as if it were a 'route' - name-suggestion-index#5184
if (osmkey === 'route_master') osmkey = 'route';
const vmap = _nsi.kvt.get(osmkey);
if (!vmap) return; // this key is not in nsi
if (osmvalue === 'yes') {
unknown = 'unknown';
} else {
t = vmap.get(osmvalue);
}
});
return t || unknown || null;
}
// `gatherNames()`
// Gather all the namelike values that we will run through the NSI matcher.
// It will gather values primarily from tags `name`, `name:ru`, `flag:name`
// and fallback to alternate tags like `brand`, `brand:ru`, `alt_name`
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `Object` containing namelike values to test:
// {
// 'primary': Set(),
// 'fallbacks': Set()
// }
//
function gatherNames(tags) {
const empty = { primary: new Set(), alternate: new Set() };
let primary = new Set();
let alternate = new Set();
let foundSemi = false;
let testNameFragments = false;
let patterns;
// Patterns for matching OSM keys that might contain namelike values.
// These roughly correspond to the "trees" concept in name-suggestion-index,
let t = identifyTree(tags);
if (!t) return empty;
if (t === 'transit') {
patterns = {
primary: /^network$/i,
alternate: /^(operator|operator:\w+|network:\w+|\w+_name|\w+_name:\w+)$/i
};
} else if (t === 'flags') {
patterns = {
primary: /^(flag:name|flag:name:\w+)$/i,
alternate: /^(flag|flag:\w+|subject|subject:\w+)$/i // note: no `country`, we special-case it below
};
} else if (t === 'brands') {
testNameFragments = true;
patterns = {
primary: /^(name|name:\w+)$/i,
alternate: /^(brand|brand:\w+|operator|operator:\w+|\w+_name|\w+_name:\w+)/i,
};
} else if (t === 'operators') {
testNameFragments = true;
patterns = {
primary: /^(name|name:\w+|operator|operator:\w+)$/i,
alternate: /^(brand|brand:\w+|\w+_name|\w+_name:\w+)/i,
};
} else { // unknown/multiple
testNameFragments = true;
patterns = {
primary: /^(name|name:\w+)$/i,
alternate: /^(brand|brand:\w+|network|network:\w+|operator|operator:\w+|\w+_name|\w+_name:\w+)/i,
};
}
// Test `name` fragments, longest to shortest, to fit them into a "Name Branch" pattern.
// e.g. "TUI ReiseCenter - Neuss Innenstadt" -> ["TUI", "ReiseCenter", "Neuss", "Innenstadt"]
if (tags.name && testNameFragments) {
const nameParts = tags.name.split(/[\s\-\/,.]/);
for (let split = nameParts.length; split > 0; split--) {
const name = nameParts.slice(0, split).join(' '); // e.g. "TUI ReiseCenter"
primary.add(name);
}
}
// Check all tags
Object.keys(tags).forEach(osmkey => {
const osmvalue = tags[osmkey];
if (!osmvalue) return;
if (isNamelike(osmkey, 'primary')) {
if (/;/.test(osmvalue)) {
foundSemi = true;
} else {
primary.add(osmvalue);
alternate.delete(osmvalue);
}
} else if (!primary.has(osmvalue) && isNamelike(osmkey, 'alternate')) {
if (/;/.test(osmvalue)) {
foundSemi = true;
} else {
alternate.add(osmvalue);
}
}
});
// For flags only, fallback to `country` tag only if no other namelike values were found.
// See https://github.com/openstreetmap/iD/pull/8305#issuecomment-769174070
if (tags.man_made === 'flagpole' && !primary.size && !alternate.size && !!tags.country) {
const osmvalue = tags.country;
if (/;/.test(osmvalue)) {
foundSemi = true;
} else {
alternate.add(osmvalue);
}
}
// If any namelike value contained a semicolon, return empty set and don't try matching anything.
if (foundSemi) {
return empty;
} else {
return { primary: primary, alternate: alternate };
}
function isNamelike(osmkey, which) {
if (osmkey === 'old_name') return false;
return patterns[which].test(osmkey) && !notNames.test(osmkey);
}
}
// `gatherTuples()`
// Generate all combinations of [key,value,name] that we want to test.
// This prioritizes them so that the primary name and k/v pairs go first
//
// Arguments
// `tryKVs`: `Object` containing primary and alternate k/v pairs to test
// `tryNames`: `Object` containing primary and alternate names to test
// Returns
// `Array`: tuple objects ordered by priority
//
function gatherTuples(tryKVs, tryNames) {
let tuples = [];
['primary', 'alternate'].forEach(whichName => {
// test names longest to shortest
const arr = Array.from(tryNames[whichName]).sort((a, b) => b.length - a.length);
arr.forEach(n => {
['primary', 'alternate'].forEach(whichKV => {
tryKVs[whichKV].forEach(kv => {
const parts = kv.split('/', 2);
const k = parts[0];
const v = parts[1];
tuples.push({ k: k, v: v, n: n });
});
});
});
});
return tuples;
}
// `_upgradeTags()`
// Try to match a feature to a canonical record in name-suggestion-index
// and upgrade the tags to match.
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// `loc`: Location where this feature exists, as a [lon, lat]
// Returns
// `Object` containing the result, or `null` if no changes needed:
// {
// 'newTags': `Object` - The tags the the feature should have
// 'matched': `Object` - The matched item
// }
//
function _upgradeTags(tags, loc) {
let newTags = Object.assign({}, tags); // shallow copy
let changed = false;
// Before anything, perform trivial Wikipedia/Wikidata replacements
Object.keys(newTags).forEach(osmkey => {
const matchTag = osmkey.match(/^(\w+:)?wikidata$/);
if (matchTag) { // Look at '*:wikidata' tags
const prefix = (matchTag[1] || '');
const wd = newTags[osmkey];
const replace = _nsi.replacements[wd]; // If it matches a QID in the replacement list...
if (replace && replace.wikidata !== undefined) { // replace or delete `*:wikidata` tag
changed = true;
if (replace.wikidata) {
newTags[osmkey] = replace.wikidata;
} else {
delete newTags[osmkey];
}
}
if (replace && replace.wikipedia !== undefined) { // replace or delete `*:wikipedia` tag
changed = true;
const wpkey = `${prefix}wikipedia`;
if (replace.wikipedia) {
newTags[wpkey] = replace.wikipedia;
} else {
delete newTags[wpkey];
}
}
}
});
// Match a 'route_master' as if it were a 'route' - name-suggestion-index#5184
const isRouteMaster = (tags.type === 'route_master');
// Gather key/value tag pairs to try to match
const tryKVs = gatherKVs(tags);
if (!tryKVs.primary.size && !tryKVs.alternate.size) {
return changed ? { newTags: newTags, matched: null } : null;
}
// Gather namelike tag values to try to match
const tryNames = gatherNames(tags);
// Do `wikidata=*` or `wikipedia=*` tags identify this entity as a chain? - See #6416
// If so, these tags can be swapped to e.g. `brand:wikidata`/`brand:wikipedia`.
const foundQID = _nsi.qids.get(tags.wikidata) || _nsi.qids.get(tags.wikipedia);
if (foundQID) tryNames.primary.add(foundQID); // matcher will recognize the Wikidata QID as name too
if (!tryNames.primary.size && !tryNames.alternate.size) {
return changed ? { newTags: newTags, matched: null } : null;
}
// Order the [key,value,name] tuples - test primary before alternate
const tuples = gatherTuples(tryKVs, tryNames);
for (let i = 0; i < tuples.length; i++) {
const tuple = tuples[i];
const hits = _nsi.matcher.match(tuple.k, tuple.v, tuple.n, loc); // Attempt to match an item in NSI
if (!hits || !hits.length) continue; // no match, try next tuple
if (hits[0].match !== 'primary' && hits[0].match !== 'alternate') break; // a generic match, stop looking
// A match may contain multiple results, the first one is likely the best one for this location
// e.g. `['pfk-a54c14', 'kfc-1ff19c', 'kfc-658eea']`
let itemID, item;
for (let j = 0; j < hits.length; j++) {
const hit = hits[j];
itemID = hit.itemID;
if (_nsi.dissolved[itemID]) continue; // Don't upgrade to a dissolved item
item = _nsi.ids.get(itemID);
if (!item) continue;
const mainTag = item.mainTag; // e.g. `brand:wikidata`
const itemQID = item.tags[mainTag]; // e.g. `brand:wikidata` qid
const notQID = newTags[`not:${mainTag}`]; // e.g. `not:brand:wikidata` qid
if ( // Exceptions, skip this hit
(!itemQID || itemQID === notQID) || // No `*:wikidata` or matched a `not:*:wikidata`
(newTags.office && !item.tags.office) // feature may be a corporate office for a brand? - #6416
) {
item = null;
continue; // continue looking
} else {
break; // use `item`
}
}
// Can't use any of these hits, try next tuple..
if (!item) continue;
// At this point we have matched a canonical item and can suggest tag upgrades..
item = JSON.parse(JSON.stringify(item)); // deep copy
const tkv = item.tkv;
const parts = tkv.split('/', 3); // tkv = "tree/key/value"
const k = parts[1];
const v = parts[2];
const category = _nsi.data[tkv];
const properties = category.properties || {};
// Preserve some tags that we specifically don't want NSI to overwrite. ('^name', sometimes)
let preserveTags = item.preserveTags || properties.preserveTags || [];
// These tags can be toplevel tags -or- attributes - so we generally want to preserve existing values - #8615
// We'll only _replace_ the tag value if this tag is the toplevel/defining tag for the matched item (`k`)
['building', 'emergency', 'internet_access', 'takeaway'].forEach(osmkey => {
if (k !== osmkey) preserveTags.push(`^${osmkey}$`);
});
const regexes = preserveTags.map(s => new RegExp(s, 'i'));
let keepTags = {};
Object.keys(newTags).forEach(osmkey => {
if (regexes.some(regex => regex.test(osmkey))) {
keepTags[osmkey] = newTags[osmkey];
}
});
// Remove any primary tags ("amenity", "craft", "shop", "man_made", "route", etc) that have a
// value like `amenity=yes` or `shop=yes` (exceptions have already been added to `keepTags` above)
_nsi.kvt.forEach((vmap, k) => {
if (newTags[k] === 'yes') delete newTags[k];
});
// Replace mistagged `wikidata`/`wikipedia` with e.g. `brand:wikidata`/`brand:wikipedia`
if (foundQID) {
delete newTags.wikipedia;
delete newTags.wikidata;
}
// Do the tag upgrade
Object.assign(newTags, item.tags, keepTags);
// Swap `route` back to `route_master` - name-suggestion-index#5184
if (isRouteMaster) {
newTags.route_master = newTags.route;
delete newTags.route;
}
// Special `branch` splitting rules - IF..
// - NSI is suggesting to replace `name`, AND
// - `branch` doesn't already contain something, AND
// - original name has not moved to an alternate name (e.g. "Dunkin' Donuts" -> "Dunkin'"), AND
// - original name is "some name" + "some stuff", THEN
// consider splitting `name` into `name`/`branch`..
const origName = tags.name;
const newName = newTags.name;
if (newName && origName && newName !== origName && !newTags.branch) {
const newNames = gatherNames(newTags);
const newSet = new Set([...newNames.primary, ...newNames.alternate]);
const isMoved = newSet.has(origName); // another tag holds the original name now
if (!isMoved) {
// Test name fragments, longest to shortest, to fit them into a "Name Branch" pattern.
// e.g. "TUI ReiseCenter - Neuss Innenstadt" -> ["TUI", "ReiseCenter", "Neuss", "Innenstadt"]
const nameParts = origName.split(/[\s\-\/,.]/);
for (let split = nameParts.length; split > 0; split--) {
const name = nameParts.slice(0, split).join(' '); // e.g. "TUI ReiseCenter"
const branch = nameParts.slice(split).join(' '); // e.g. "Neuss Innenstadt"
const nameHits = _nsi.matcher.match(k, v, name, loc);
if (!nameHits || !nameHits.length) continue; // no match, try next name fragment
if (nameHits.some(hit => hit.itemID === itemID)) { // matched the name fragment to the same itemID above
if (branch) {
if (notBranches.test(branch)) { // "branch" was detected but is noise ("factory outlet", etc)
newTags.name = origName; // Leave `name` alone, this part of the name may be significant..
} else {
const branchHits = _nsi.matcher.match(k, v, branch, loc);
if (branchHits && branchHits.length) { // if "branch" matched something else in NSI..
if (branchHits[0].match === 'primary' || branchHits[0].match === 'alternate') { // if another brand! (e.g. "KFC - Taco Bell"?)
return null; // bail out - can't suggest tags in this case
} // else a generic (e.g. "gas", "cafe") - ignore
} else { // "branch" is not noise and not something in NSI
newTags.branch = branch; // Stick it in the `branch` tag..
}
}
}
break;
}
}
}
}
return { newTags: newTags, matched: item };
}
return changed ? { newTags: newTags, matched: null } : null;
}
// `_isGenericName()`
// Is the `name` tag generic?
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `true` if it is generic, `false` if not
//
function _isGenericName(tags) {
const n = tags.name;
if (!n) return false;
// tryNames just contains the `name` tag value and nothing else
const tryNames = { primary: new Set([n]), alternate: new Set() };
// Gather key/value tag pairs to try to match
const tryKVs = gatherKVs(tags);
if (!tryKVs.primary.size && !tryKVs.alternate.size) return false;
// Order the [key,value,name] tuples - test primary before alternate
const tuples = gatherTuples(tryKVs, tryNames);
for (let i = 0; i < tuples.length; i++) {
const tuple = tuples[i];
const hits = _nsi.matcher.match(tuple.k, tuple.v, tuple.n); // Attempt to match an item in NSI
// If we get a `excludeGeneric` hit, this is a generic name.
if (hits && hits.length && hits[0].match === 'excludeGeneric') return true;
}
return false;
}
// PUBLIC INTERFACE
export default {
// `init()`
// On init, start preparing the name-suggestion-index
//
init: () => {
// Note: service.init is called immediately after the presetManager has started loading its data.
// We expect to chain onto an unfulfilled promise here.
setNsiSources();
presetManager.ensureLoaded()
.then(() => loadNsiPresets())
.then(() => delay(100)) // wait briefly for locationSets to enter the locationManager queue
.then(() => locationManager.mergeLocationSets([])) // wait for locationSets to resolve
.then(() => loadNsiData())
.then(() => _nsiStatus = 'ok')
.catch(() => _nsiStatus = 'failed');
function delay(msec) {
return new Promise(resolve => {
window.setTimeout(resolve, msec);
});
}
},
// `reset()`
// Reset is called when user saves data to OSM (does nothing here)
//
reset: () => {},
// `status()`
// To let other code know how it's going...
//
// Returns
// `String`: 'loading', 'ok', 'failed'
//
status: () => _nsiStatus,
// `isGenericName()`
// Is the `name` tag generic?
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `true` if it is generic, `false` if not
//
isGenericName: (tags) => _isGenericName(tags),
// `upgradeTags()`
// Suggest tag upgrades.
// This function will not modify the input tags, it makes a copy.
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// `loc`: Location where this feature exists, as a [lon, lat]
// Returns
// `Object` containing the result, or `null` if no changes needed:
// {
// 'newTags': `Object` - The tags the the feature should have
// 'matched': `Object` - The matched item
// }
//
upgradeTags: (tags, loc) => _upgradeTags(tags, loc),
// `cache()`
// Direct access to the NSI cache, useful for testing or breaking things
//
// Returns
// `Object`: the internal NSI cache
//
cache: () => _nsi
};