tools/generate-xhtml-entities.js (74 lines of code) (raw):

#!/usr/bin/env node var https = require('https'); function get(uri, callback) { var content = ''; // console.log('Downloading', uri); https.request(uri, function (response) { response.on('data', function (chunk) { content += chunk; }); response.on('end', function () { callback(content); }); }).end(); } function process(uri, callback) { var entities = []; get(uri, function (content) { var lines = content.split('\n'); lines.forEach(function (line) { var name, code; if (line.indexOf('<!ENTITY') !== 0) { return; } name = /<!ENTITY\s([a-zA-Z0-4]+)/g.exec(line)[1]; code = /"&#x([0-9A-Fa-f]+);/g.exec(line); if (code === null) { code = /"&#([0-9]+);/g.exec(line); code = parseInt(code[1], 10); entities.push({ name: name, code: code }); } else { code = parseInt(code[1], 16); entities.push({ name: name, code: code }); } }); callback(entities); }); } function collect(callback) { var entities = []; process('https://www.w3.org/2003/entities/2007/xhtml1-special.ent', function (e) { entities = entities.concat(e); process('https://www.w3.org/2003/entities/2007/xhtml1-lat1.ent', function (e) { entities = entities.concat(e); process('https://www.w3.org/2003/entities/2007/xhtml1-symbol.ent', function (e) { entities = entities.concat(e); process('https://www.w3.org/2003/entities/2007/predefined.ent', function (e) { entities = entities.concat(e); callback(entities); }); }); }); }); } function generate() { var entities = {}; function cp(n) { var str = n.toString(16).toUpperCase(); while (str.length < 4) { str = '0' + str; } return '\'\\u' + str + '\''; } collect(function (data) { var codes = [], lines = []; data.sort(function (p, q) { return p.code - q.code; }); data.forEach(function (u) { if (codes.indexOf(u.code) < 0) { lines.push(' ' + u.name + ': ' + cp(u.code)); } codes.push(u.code); }); console.log('// Generated by generate-xhtml-entities.js. DO NOT MODIFY!'); console.log(); console.log('export const XHTMLEntities = {'); console.log(lines.join(',\n')); console.log('};'); console.log(); }); } generate();