in src/js/other-websites/fathom.js [1:966]
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
(global = global || self, factory(global.fathom = {}));
}(this, (function (exports) { 'use strict';
class CycleError extends Error {
}
/**
* Return the passed-in arg. Useful as a default.
*/
function identity(x) {
return x;
}
/*eslint-env browser*/
/**
* From an iterable return the best item, according to an arbitrary comparator
* function. In case of a tie, the first item wins.
*
* @arg by {function} Given an item of the iterable, return a value to compare
* @arg isBetter {function} Return whether its first arg is better than its
* second
*/
function best(iterable, by, isBetter) {
let bestSoFar, bestKeySoFar;
let isFirst = true;
forEach(
function (item) {
const key = by(item);
if (isBetter(key, bestKeySoFar) || isFirst) {
bestSoFar = item;
bestKeySoFar = key;
isFirst = false;
}
},
iterable);
if (isFirst) {
throw new Error('Tried to call best() on empty iterable');
}
return bestSoFar;
}
/**
* Return the maximum item from an iterable, as defined by >.
*
* Works with any type that works with >. If multiple items are equally great,
* return the first.
*
* @arg by {function} Given an item of the iterable, returns a value to
* compare
*/
function max(iterable, by = identity) {
return best(iterable, by, (a, b) => a > b);
}
/**
* Return an Array of maximum items from an iterable, as defined by > and ===.
*
* If an empty iterable is passed in, return [].
*/
function maxes(iterable, by = identity) {
let bests = [];
let bestKeySoFar;
let isFirst = true;
forEach(
function (item) {
const key = by(item);
if (key > bestKeySoFar || isFirst) {
bests = [item];
bestKeySoFar = key;
isFirst = false;
} else if (key === bestKeySoFar) {
bests.push(item);
}
},
iterable);
return bests;
}
/**
* Return the minimum item from an iterable, as defined by <.
*
* If multiple items are equally great, return the first.
*/
function min(iterable, by = identity) {
return best(iterable, by, (a, b) => a < b);
}
/**
* Return the sum of an iterable, as defined by the + operator.
*/
function sum(iterable) {
let total;
let isFirst = true;
forEach(
function assignOrAdd(addend) {
if (isFirst) {
total = addend;
isFirst = false;
} else {
total += addend;
}
},
iterable);
return total;
}
/**
* Return the number of items in an iterable, consuming it as a side effect.
*/
function length(iterable) {
let num = 0;
// eslint-disable-next-line no-unused-vars
for (let item of iterable) {
num++;
}
return num;
}
/**
* Iterate, depth first, over a DOM node. Return the original node first.
*
* @arg shouldTraverse {function} Given a node, say whether we should
* include it and its children. Default: always true.
*/
function *walk(element, shouldTraverse = element => true) {
yield element;
for (let child of element.childNodes) {
if (shouldTraverse(child)) {
for (let w of walk(child, shouldTraverse)) {
yield w;
}
}
}
}
const blockTags = new Set(
['ADDRESS', 'BLOCKQUOTE', 'BODY', 'CENTER', 'DIR', 'DIV', 'DL',
'FIELDSET', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HR',
'ISINDEX', 'MENU', 'NOFRAMES', 'NOSCRIPT', 'OL', 'P', 'PRE',
'TABLE', 'UL', 'DD', 'DT', 'FRAMESET', 'LI', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'HTML']);
/**
* Return whether a DOM element is a block element by default (rather than by
* styling).
*/
function isBlock(element) {
return blockTags.has(element.tagName);
}
/**
* Yield strings of text nodes within a normalized DOM node and its children,
* without venturing into any contained block elements.
*
* @arg shouldTraverse {function} Specify additional elements to exclude by
* returning false
*/
function *inlineTexts(element, shouldTraverse = element => true) {
// TODO: Could we just use querySelectorAll() with a really long
// selector rather than walk(), for speed?
for (let child of walk(element,
element => !(isBlock(element) ||
element.tagName === 'SCRIPT' &&
element.tagName === 'STYLE')
&& shouldTraverse(element))) {
if (child.nodeType === child.TEXT_NODE) {
// wholeText() is not implemented by jsdom, so we use
// textContent(). The result should be the same, since
// we're calling it on only text nodes, but it may be
// slower. On the positive side, it means we don't need to
// normalize the DOM tree first.
yield child.textContent;
}
}
}
/**
* Return the total length of the inline text within an element, with
* whitespace collapsed.
*
* @arg shouldTraverse {function} Specify additional elements to exclude by
* returning false
*/
function inlineTextLength(element, shouldTraverse = element => true) {
return sum(map(text => collapseWhitespace(text).length,
inlineTexts(element, shouldTraverse)));
}
/**
* Return a string with each run of whitespace collapsed to a single space.
*/
function collapseWhitespace(str) {
return str.replace(/\s{2,}/g, ' ');
}
/**
* Return the ratio of the inline text length of the links in an element to the
* inline text length of the entire element.
*
* @arg inlineLength {number} Optionally, the precalculated inline
* length of the fnode. If omitted, we will calculate it ourselves.
*/
function linkDensity(fnode, inlineLength) {
if (inlineLength === undefined) {
inlineLength = inlineTextLength(fnode.element);
}
const lengthWithoutLinks = inlineTextLength(fnode.element,
element => element.tagName !== 'A');
return (inlineLength - lengthWithoutLinks) / inlineLength;
}
/**
* Return whether an element is a text node that consist wholly of whitespace.
*/
function isWhitespace(element) {
return (element.nodeType === element.TEXT_NODE &&
element.textContent.trim().length === 0);
}
/**
* Get a key of a map, first setting it to a default value if it's missing.
*/
function setDefault(map, key, defaultMaker) {
if (map.has(key)) {
return map.get(key);
}
const defaultValue = defaultMaker();
map.set(key, defaultValue);
return defaultValue;
}
/**
* Get a key of a map or, if it's missing, a default value.
*/
function getDefault(map, key, defaultMaker) {
if (map.has(key)) {
return map.get(key);
}
return defaultMaker();
}
/**
* Return an backward iterator over an Array.
*/
function *reversed(array) {
for (let i = array.length - 1; i >= 0; i--) {
yield array[i];
}
}
/**
* Return an Array, the reverse topological sort of the given nodes.
*
* @arg nodes An iterable of arbitrary things
* @arg nodesThatNeed {function} Take a node and returns an Array of nodes
* that depend on it
*/
function toposort(nodes, nodesThatNeed) {
const ret = [];
const todo = new Set(nodes);
const inProgress = new Set();
function visit(node) {
if (inProgress.has(node)) {
throw new CycleError('The graph has a cycle.');
}
if (todo.has(node)) {
inProgress.add(node);
for (let needer of nodesThatNeed(node)) {
visit(needer);
}
inProgress.delete(node);
todo.delete(node);
ret.push(node);
}
}
while (todo.size > 0) {
visit(first(todo));
}
return ret;
}
/**
* A Set with the additional methods it ought to have had
*/
class NiceSet extends Set {
/**
* Remove and return an arbitrary item. Throw an Error if I am empty.
*/
pop() {
for (let v of this.values()) {
this.delete(v);
return v;
}
throw new Error('Tried to pop from an empty NiceSet.');
}
/**
* Union another set or other iterable into myself.
*
* @return myself, for chaining
*/
extend(otherSet) {
for (let item of otherSet) {
this.add(item);
}
return this;
}
/**
* Subtract another set from a copy of me.
*
* @return a copy of myself excluding the elements in ``otherSet``.
*/
minus(otherSet) {
const ret = new NiceSet(this);
for (const item of otherSet) {
ret.delete(item);
}
return ret;
}
/**
* Actually show the items in me.
*/
toString() {
return '{' + Array.from(this).join(', ') + '}';
}
}
/**
* Return the first item of an iterable.
*/
function first(iterable) {
for (let i of iterable) {
return i;
}
}
/**
* Given any node in a DOM tree, return the root element of the tree, generally
* an HTML element.
*/
function rootElement(element) {
return element.ownerDocument.documentElement;
}
/**
* Return the number of times a regex occurs within the string `haystack`.
*
* Caller must make sure `regex` has the 'g' option set.
*/
function numberOfMatches(regex, haystack) {
return (haystack.match(regex) || []).length;
}
/**
* Wrap a scoring callback, and set its element to the page root iff a score is
* returned.
*
* This is used to build rulesets which classify entire pages rather than
* picking out specific elements.
*
* For example, these rules might classify a page as a "login page", influenced
* by whether they have login buttons or username fields:
*
* ``rule(type('loginPage'), score(page(pageContainsLoginButton))),``
* ``rule(type('loginPage'), score(page(pageContainsUsernameField)))``
*/
function page(scoringFunction) {
function wrapper(fnode) {
const scoreAndTypeAndNote = scoringFunction(fnode);
if (scoreAndTypeAndNote.score !== undefined) {
scoreAndTypeAndNote.element = rootElement(fnode.element);
}
return scoreAndTypeAndNote;
}
return wrapper;
}
/**
* Sort the elements by their position in the DOM.
*
* @arg fnodes {iterable} fnodes to sort
* @return {Array} sorted fnodes
*/
function domSort(fnodes) {
function compare(a, b) {
const element = a.element;
const position = element.compareDocumentPosition(b.element);
if (position & element.DOCUMENT_POSITION_FOLLOWING) {
return -1;
} else if (position & element.DOCUMENT_POSITION_PRECEDING) {
return 1;
} else {
return 0;
}
}
return Array.from(fnodes).sort(compare);
}
/* istanbul ignore next */
/**
* @return whether a thing appears to be a DOM element.
*/
function isDomElement(thing) {
return thing.nodeName !== undefined;
}
/* istanbul ignore next */
/**
* Return the DOM element contained in a passed-in fnode. Return passed-in DOM
* elements verbatim.
*
* @arg fnodeOrElement {Node|Fnode}
*/
function toDomElement(fnodeOrElement) {
return isDomElement(fnodeOrElement) ? fnodeOrElement : fnodeOrElement.element;
}
/**
* Checks whether any of the element's attribute values satisfy some condition.
*
* Example::
*
* rule(type('foo'),
* score(attributesMatch(element,
* attr => attr.includes('good'),
* ['id', 'alt']) ? 2 : 1))
*
* @arg element {Node} Element whose attributes you want to search
* @arg predicate {function} A condition to check. Take a string and
* return a boolean. If an attribute has multiple values (e.g. the class
* attribute), attributesMatch will check each one.
* @arg attrs {string[]} An Array of attributes you want to search. If none are
* provided, search all.
* @return Whether any of the attribute values satisfy the predicate function
*/
function attributesMatch(element, predicate, attrs = []) {
const attributes = attrs.length === 0 ? Array.from(element.attributes).map(a => a.name) : attrs;
for (let i = 0; i < attributes.length; i++) {
const attr = element.getAttribute(attributes[i]);
// If the attribute is an array, apply the scoring function to each element
if (attr && ((Array.isArray(attr) && attr.some(predicate)) || predicate(attr))) {
return true;
}
}
return false;
}
/* istanbul ignore next */
/**
* Yield an element and each of its ancestors.
*/
function *ancestors(element) {
yield element;
let parent;
while ((parent = element.parentNode) !== null && parent.nodeType === parent.ELEMENT_NODE) {
yield parent;
element = parent;
}
}
/**
* Return the sigmoid of the argument: 1 / (1 + exp(-x)). This is useful for
* crunching a feature value that may have a wide range into the range (0, 1)
* without a hard ceiling: the sigmoid of even a very large number will be a
* little larger than that of a slightly smaller one.
*
* @arg x {Number} a number to be compressed into the range (0, 1)
*/
function sigmoid(x) {
return 1 / (1 + Math.exp(-x));
}
/* istanbul ignore next */
/**
* Return whether an element is practically visible, considering things like 0
* size or opacity, ``visibility: hidden`` and ``overflow: hidden``.
*
* Merely being scrolled off the page in either horizontally or vertically
* doesn't count as invisible; the result of this function is meant to be
* independent of viewport size.
*/
function isVisible(fnodeOrElement) {
// This could be 5x more efficient if https://github.com/w3c/csswg-drafts/issues/4122 happens.
const element = toDomElement(fnodeOrElement);
const elementRect = element.getBoundingClientRect();
const elementStyle = getComputedStyle(element);
// Alternative to reading ``display: none`` due to Bug 1381071.
if (elementRect.width === 0 && elementRect.height === 0 && elementStyle.overflow !== 'hidden') {
return false;
}
if (elementStyle.visibility === 'hidden') {
return false;
}
// Check if the element is irrevocably off-screen:
if (elementRect.x + elementRect.width < 0 ||
elementRect.y + elementRect.height < 0
) {
return false;
}
for (const ancestor of ancestors(element)) {
const isElement = ancestor === element;
const style = isElement ? elementStyle : getComputedStyle(ancestor);
if (style.opacity === '0') {
return false;
}
if (style.display === 'contents') {
// ``display: contents`` elements have no box themselves, but children are
// still rendered.
continue;
}
const rect = isElement ? elementRect : ancestor.getBoundingClientRect();
if ((rect.width === 0 || rect.height === 0) && elementStyle.overflow === 'hidden') {
// Zero-sized ancestors don’t make descendants hidden unless the descendant
// has ``overflow: hidden``.
return false;
}
}
return true;
}
/**
* Return the extracted [r, g, b, a] values from a string like "rgba(0, 5, 255, 0.8)",
* and scale them to 0..1. If no alpha is specified, return undefined for it.
*/
function rgbaFromString(str) {
const m = str.match(/^rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*(?:,\s*(\d+(?:\.\d+)?)\s*)?\)$/i);
if (m) {
return [m[1] / 255, m[2] / 255, m[3] / 255, m[4] === undefined ? undefined : parseFloat(m[4])];
} else {
throw new Error('Color ' + str + ' did not match pattern rgb[a](r, g, b[, a]).');
}
}
/**
* Return the saturation 0..1 of a color defined by RGB values 0..1.
*/
function saturation(r, g, b) {
const cMax = Math.max(r, g, b);
const cMin = Math.min(r, g, b);
const delta = cMax - cMin;
const lightness = (cMax + cMin) / 2;
const denom = (1 - (Math.abs(2 * lightness - 1)));
// Return 0 if it's black (R, G, and B all 0).
return (denom === 0) ? 0 : delta / denom;
}
/**
* Scale a number to the range [0, 1] using a linear slope.
*
* For a rising line, the result is 0 until the input reaches zeroAt, then
* increases linearly until oneAt, at which it becomes 1. To make a falling
* line, where the result is 1 to the left and 0 to the right, use a zeroAt
* greater than oneAt.
*/
function linearScale(number, zeroAt, oneAt) {
const isRising = zeroAt < oneAt;
if (isRising) {
if (number <= zeroAt) {
return 0;
} else if (number >= oneAt) {
return 1;
}
} else {
if (number >= zeroAt) {
return 0;
} else if (number <= oneAt) {
return 1;
}
}
const slope = 1 / (oneAt - zeroAt);
return slope * (number - zeroAt);
}
// -------- Routines below this point are private to the framework. --------
/**
* Flatten out an iterable of iterables into a single iterable of non-
* iterables. Does not consider strings to be iterable.
*/
function *flatten(iterable) {
for (const i of iterable) {
if (typeof i !== 'string' && isIterable(i)) {
yield *(flatten(i));
} else {
yield i;
}
}
}
/**
* A lazy, top-level ``Array.map()`` workalike that works on anything iterable
*/
function *map(fn, iterable) {
for (const i of iterable) {
yield fn(i);
}
}
/**
* A lazy, top-level ``Array.forEach()`` workalike that works on anything
* iterable
*/
function forEach(fn, iterable) {
for (const i of iterable) {
fn(i);
}
}
function isIterable(thing) {
return thing && typeof thing[Symbol.iterator] === 'function';
}
var utilsForFrontend = /*#__PURE__*/Object.freeze({
__proto__: null,
identity: identity,
best: best,
max: max,
maxes: maxes,
min: min,
sum: sum,
length: length,
walk: walk,
isBlock: isBlock,
inlineTexts: inlineTexts,
inlineTextLength: inlineTextLength,
collapseWhitespace: collapseWhitespace,
linkDensity: linkDensity,
isWhitespace: isWhitespace,
setDefault: setDefault,
getDefault: getDefault,
reversed: reversed,
toposort: toposort,
NiceSet: NiceSet,
first: first,
rootElement: rootElement,
numberOfMatches: numberOfMatches,
page: page,
domSort: domSort,
isDomElement: isDomElement,
toDomElement: toDomElement,
attributesMatch: attributesMatch,
ancestors: ancestors,
sigmoid: sigmoid,
isVisible: isVisible,
rgbaFromString: rgbaFromString,
saturation: saturation,
linearScale: linearScale,
flatten: flatten,
map: map,
forEach: forEach
});
/**
* Return the number of stride nodes between 2 DOM nodes *at the same
* level of the tree*, without going up or down the tree.
*
* ``left`` xor ``right`` may also be undefined.
*/
function numStrides(left, right) {
let num = 0;
// Walk right from left node until we hit the right node or run out:
let sibling = left;
let shouldContinue = sibling && sibling !== right;
while (shouldContinue) {
sibling = sibling.nextSibling;
if ((shouldContinue = sibling && sibling !== right) &&
!isWhitespace(sibling)) {
num += 1;
}
}
if (sibling !== right) { // Don't double-punish if left and right are siblings.
// Walk left from right node:
sibling = right;
while (sibling) {
sibling = sibling.previousSibling;
if (sibling && !isWhitespace(sibling)) {
num += 1;
}
}
}
return num;
}
/**
* Return a topological distance between 2 DOM nodes or :term:`fnodes<fnode>`
* weighted according to the similarity of their ancestry in the DOM. For
* instance, if one node is situated inside ``<div><span><b><theNode>`` and the
* other node is at ``<differentDiv><span><b><otherNode>``, they are considered
* close to each other for clustering purposes. This is useful for picking out
* nodes which have similar purposes.
*
* Return ``Number.MAX_VALUE`` if one of the nodes contains the other.
*
* This is largely an implementation detail of :func:`clusters`, but you can
* call it yourself if you wish to implement your own clustering. Takes O(n log
* n) time.
*
* Note that the default costs may change; pass them in explicitly if they are
* important to you.
*
* @arg fnodeA {Node|Fnode}
* @arg fnodeB {Node|Fnode}
* @arg differentDepthCost {number} Cost for each level deeper one node is than
* the other below their common ancestor
* @arg differentTagCost {number} Cost for a level below the common ancestor
* where tagNames differ
* @arg sameTagCost {number} Cost for a level below the common ancestor where
* tagNames are the same
* @arg strideCost {number} Cost for each stride node between A and B. Stride
* nodes are siblings or siblings-of-ancestors that lie between the 2
* nodes. These interposed nodes make it less likely that the 2 nodes
* should be together in a cluster.
* @arg additionalCost {function} Return an additional cost, given 2 fnodes or
* nodes.
*
*/
function distance(fnodeA,
fnodeB,
{differentDepthCost = 2,
differentTagCost = 2,
sameTagCost = 1,
strideCost = 1,
additionalCost = (fnodeA, fnodeB) => 0} = {}) {
// I was thinking of something that adds little cost for siblings. Up
// should probably be more expensive than down (see middle example in the
// Nokia paper).
// TODO: Test and tune default costs. They're off the cuff at the moment.
if (fnodeA === fnodeB) {
return 0;
}
const elementA = isDomElement(fnodeA) ? fnodeA : fnodeA.element;
const elementB = isDomElement(fnodeB) ? fnodeB : fnodeB.element;
// Stacks that go from the common ancestor all the way to A and B:
const aAncestors = [elementA];
const bAncestors = [elementB];
let aAncestor = elementA;
let bAncestor = elementB;
// Ascend to common parent, stacking them up for later reference:
while (!aAncestor.contains(elementB)) { // Note: an element does contain() itself.
aAncestor = aAncestor.parentNode;
aAncestors.push(aAncestor); //aAncestors = [a, b]. aAncestor = b // if a is outer: no loop here; aAncestors = [a]. aAncestor = a.
}
// In compareDocumentPosition()'s opinion, inside implies after. Basically,
// before and after pertain to opening tags.
const comparison = elementA.compareDocumentPosition(elementB);
// If either contains the other, abort. We'd either return a misleading
// number or else walk upward right out of the document while trying to
// make the ancestor stack.
if (comparison & (elementA.DOCUMENT_POSITION_CONTAINS | elementA.DOCUMENT_POSITION_CONTAINED_BY)) {
return Number.MAX_VALUE;
}
// Make an ancestor stack for the right node too so we can walk
// efficiently down to it:
do {
bAncestor = bAncestor.parentNode; // Assumes we've early-returned above if A === B. This walks upward from the outer node and up out of the tree. It STARTS OUT with aAncestor === bAncestor!
bAncestors.push(bAncestor);
} while (bAncestor !== aAncestor);
// Figure out which node is left and which is right, so we can follow
// sibling links in the appropriate directions when looking for stride
// nodes:
let left = aAncestors;
let right = bAncestors;
let cost = 0;
if (comparison & elementA.DOCUMENT_POSITION_FOLLOWING) {
// A is before, so it could contain the other node. What did I mean to do if one contained the other?
left = aAncestors;
right = bAncestors;
} else if (comparison & elementA.DOCUMENT_POSITION_PRECEDING) {
// A is after, so it might be contained by the other node.
left = bAncestors;
right = aAncestors;
}
// Descend to both nodes in parallel, discounting the traversal
// cost iff the nodes we hit look similar, implying the nodes dwell
// within similar structures.
while (left.length || right.length) {
const l = left.pop();
const r = right.pop();
if (l === undefined || r === undefined) {
// Punishment for being at different depths: same as ordinary
// dissimilarity punishment for now
cost += differentDepthCost;
} else {
// TODO: Consider similarity of classList.
cost += l.tagName === r.tagName ? sameTagCost : differentTagCost;
}
// Optimization: strides might be a good dimension to eliminate.
if (strideCost !== 0) {
cost += numStrides(l, r) * strideCost;
}
}
return cost + additionalCost(fnodeA, fnodeB);
}
/**
* Return the spatial distance between 2 fnodes or elements, assuming a
* rendered page.
*
* Specifically, return the distance in pixels between the centers of
* ``fnodeA.element.getBoundingClientRect()`` and
* ``fnodeB.element.getBoundingClientRect()``.
*/
function euclidean(fnodeA, fnodeB) {
/**
* Return the horizontal distance from the left edge of the viewport to the
* center of an element, given a DOMRect object for it. It doesn't matter
* that the distance is affected by the page's scroll offset, since the 2
* elements have the same offset.
*/
function xCenter(domRect) {
return domRect.left + domRect.width / 2;
}
function yCenter(domRect) {
return domRect.top + domRect.height / 2;
}
const elementA = toDomElement(fnodeA);
const elementB = toDomElement(fnodeB);
const aRect = elementA.getBoundingClientRect();
const bRect = elementB.getBoundingClientRect();
return Math.sqrt((xCenter(aRect) - xCenter(bRect)) ** 2 +
(yCenter(aRect) - yCenter(bRect)) ** 2);
}
/** A lower-triangular matrix of inter-cluster distances */
class DistanceMatrix {
/**
* @arg distance {function} Some notion of distance between 2 given nodes
*/
constructor(elements, distance) {
// A sparse adjacency matrix:
// {A => {},
// B => {A => 4},
// C => {A => 4, B => 4},
// D => {A => 4, B => 4, C => 4}
// E => {A => 4, B => 4, C => 4, D => 4}}
//
// A, B, etc. are arrays of [arrays of arrays of...] nodes, each
// array being a cluster. In this way, they not only accumulate a
// cluster but retain the steps along the way.
//
// This is an efficient data structure in terms of CPU and memory, in
// that we don't have to slide a lot of memory around when we delete a
// row or column from the middle of the matrix while merging. Of
// course, we lose some practical efficiency by using hash tables, and
// maps in particular are slow in their early implementations.
this._matrix = new Map();
// Convert elements to clusters:
const clusters = elements.map(el => [el]);
// Init matrix:
for (let outerCluster of clusters) {
const innerMap = new Map();
for (let innerCluster of this._matrix.keys()) {
innerMap.set(innerCluster, distance(outerCluster[0],
innerCluster[0]));
}
this._matrix.set(outerCluster, innerMap);
}
this._numClusters = clusters.length;
}
// Return (distance, a: clusterA, b: clusterB) of closest-together clusters.
// Replace this to change linkage criterion.
closest() {
const self = this;
if (this._numClusters < 2) {
throw new Error('There must be at least 2 clusters in order to return the closest() ones.');
}
// Return the distances between every pair of clusters.
function clustersAndDistances() {
const ret = [];
for (let [outerKey, row] of self._matrix.entries()) {
for (let [innerKey, storedDistance] of row.entries()) {
ret.push({a: outerKey, b: innerKey, distance: storedDistance});
}
}
return ret;
}
// Optimizing this by inlining the loop and writing it less
// functionally doesn't help:
return min(clustersAndDistances(), x => x.distance);
}
// Look up the distance between 2 clusters in me. Try the lookup in the
// other direction if the first one falls in the nonexistent half of the
// triangle.
_cachedDistance(clusterA, clusterB) {
let ret = this._matrix.get(clusterA).get(clusterB);
if (ret === undefined) {
ret = this._matrix.get(clusterB).get(clusterA);
}
return ret;
}
// Merge two clusters.
merge(clusterA, clusterB) {
// An example showing how rows merge:
// A: {}
// B: {A: 1}
// C: {A: 4, B: 4},
// D: {A: 4, B: 4, C: 4}
// E: {A: 4, B: 4, C: 2, D: 4}}
//
// Step 2:
// C: {}
// D: {C: 4}
// E: {C: 2, D: 4}}
// AB: {C: 4, D: 4, E: 4}
//
// Step 3:
// D: {}
// AB: {D: 4}
// CE: {D: 4, AB: 4}
// Construct new row, finding min distances from either subcluster of
// the new cluster to old clusters.
//
// There will be no repetition in the matrix because, after all,
// nothing pointed to this new cluster before it existed.
const newRow = new Map();
for (let outerKey of this._matrix.keys()) {
if (outerKey !== clusterA && outerKey !== clusterB) {
newRow.set(outerKey, Math.min(this._cachedDistance(clusterA, outerKey),
this._cachedDistance(clusterB, outerKey)));
}
}
// Delete the rows of the clusters we're merging.
this._matrix.delete(clusterA);
this._matrix.delete(clusterB);
// Remove inner refs to the clusters we're merging.
for (let inner of this._matrix.values()) {
inner.delete(clusterA);
inner.delete(clusterB);
}
// Attach new row.
this._matrix.set([clusterA, clusterB], newRow);
// There is a net decrease of 1 cluster:
this._numClusters -= 1;
}