in Readability.js [1767:1873]
_getArticleMetadata(jsonld) {
var metadata = {};
var values = {};
var metaElements = this._doc.getElementsByTagName("meta");
// property is a space-separated list of values
var propertyPattern =
/\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi;
// name is a single value
var namePattern =
/^\s*(?:(dc|dcterm|og|twitter|parsely|weibo:(article|webpage))\s*[-\.:]\s*)?(author|creator|pub-date|description|title|site_name)\s*$/i;
// Find description tags.
this._forEachNode(metaElements, function (element) {
var elementName = element.getAttribute("name");
var elementProperty = element.getAttribute("property");
var content = element.getAttribute("content");
if (!content) {
return;
}
var matches = null;
var name = null;
if (elementProperty) {
matches = elementProperty.match(propertyPattern);
if (matches) {
// Convert to lowercase, and remove any whitespace
// so we can match below.
name = matches[0].toLowerCase().replace(/\s/g, "");
// multiple authors
values[name] = content.trim();
}
}
if (!matches && elementName && namePattern.test(elementName)) {
name = elementName;
if (content) {
// Convert to lowercase, remove any whitespace, and convert dots
// to colons so we can match below.
name = name.toLowerCase().replace(/\s/g, "").replace(/\./g, ":");
values[name] = content.trim();
}
}
});
// get title
metadata.title =
jsonld.title ||
values["dc:title"] ||
values["dcterm:title"] ||
values["og:title"] ||
values["weibo:article:title"] ||
values["weibo:webpage:title"] ||
values.title ||
values["twitter:title"] ||
values["parsely-title"];
if (!metadata.title) {
metadata.title = this._getArticleTitle();
}
const articleAuthor =
typeof values["article:author"] === "string" &&
!this._isUrl(values["article:author"])
? values["article:author"]
: undefined;
// get author
metadata.byline =
jsonld.byline ||
values["dc:creator"] ||
values["dcterm:creator"] ||
values.author ||
values["parsely-author"] ||
articleAuthor;
// get description
metadata.excerpt =
jsonld.excerpt ||
values["dc:description"] ||
values["dcterm:description"] ||
values["og:description"] ||
values["weibo:article:description"] ||
values["weibo:webpage:description"] ||
values.description ||
values["twitter:description"];
// get site name
metadata.siteName = jsonld.siteName || values["og:site_name"];
// get article published time
metadata.publishedTime =
jsonld.datePublished ||
values["article:published_time"] ||
values["parsely-pub-date"] ||
null;
// in many sites the meta value is escaped with HTML entities,
// so here we need to unescape it
metadata.title = this._unescapeHtmlEntities(metadata.title);
metadata.byline = this._unescapeHtmlEntities(metadata.byline);
metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
metadata.siteName = this._unescapeHtmlEntities(metadata.siteName);
metadata.publishedTime = this._unescapeHtmlEntities(metadata.publishedTime);
return metadata;
},