script/js/compare-rule-xml.js (69 lines of code) (raw):
/**
* A script to compare LanguageTool rule files and let the user know if any of
* the supplied rules have been changed.
*
* Accepts rule definition files
* (e.g. https://github.com/languagetool-org/languagetool/blob/master/languagetool-language-modules/en/src/main/resources/org/languagetool/rules/en/grammar.xml)
* and a comma-separated list of rule names, which should be obtainable via
* the Google Sheet or rule management service.
*
* Example output:
*
* Checking NO_SPACE_CLOSING_QUOTE ...
* NO_SPACE_CLOSING_QUOTE is unchanged
* Checking PUBIC_X ...
* PUBIC_X is unchanged
* Checking IN_PRINCIPAL ...
* IN_PRINCIPAL is unchanged
* Checking CURRENCY ...
* CURRENCY is unchanged
* Checking CURRENCY_SPACE ...
* CURRENCY_SPACE has changed:
* 4.3.xml:
* <rule type="whitespace" id="CURRENCY_SPACE" name="Whitespace after currency symbols: '$ 100' ($100)">
* <pattern>
* <token regexp="yes">[$€£¥\u8371]</token>
* <token regexp="yes" spacebefore="yes">\d+</token>
* </pattern>
* <message>The currency mark is usually written without any whitespace: <suggestion>\1\2</suggestion>.</message>
* <short>Remove whitespace</short>
* <example correction="$100">You owe me <marker>$ 100</marker>.</example>
* </rule>
* 5.3.xml:
* <rule type="whitespace" id="CURRENCY_SPACE" name="Whitespace after currency symbols: '$ 100' ($100)">
* <antipattern>
* <token regexp="yes">\d+</token>
* <token regexp="yes">[$€£¥฿\u8371]</token>
* <token regexp="yes">\d+</token>
* </antipattern>
* <pattern>
* <token regexp="yes">[$€£¥฿\u8371]</token>
* <token regexp="yes" spacebefore="yes">\d+</token>
* </pattern>
* <message>The currency mark is usually written without any whitespace.</message>
* <suggestion>\1\2</suggestion>
* <short>Remove whitespace</short>
* <example correction="$100">You owe me <marker>$ 100</marker>.</example>
* </rule>
*/
import * as fs from 'fs';
import { XmlDocument } from 'libxml2-wasm';
const [_, __, filePath1, filePath2, ruleFilePath] = process.argv;
if (!filePath1 || !filePath2 || !ruleFilePath) {
console.error(
"Incorrect arguments. Usage: node compare-rule-xml.js file-1.xml file2.xml rules.txt. \nrules.txt should be a line-separated list of rules, e.g: \n\nNO_SPACE_CLOSING_QUOTE\nCURRENCY_SPACE\n..etc\n\nThe script received: ",
{ filePath1, filePath2, ruleStr: ruleFilePath }
);
process.exit(1);
}
/**
* @param {string} filePath
*/
const getRuleIds = (filePath) => {
try {
return fs.readFileSync(filePath, "utf-8").split("\n");
} catch (/** @type {any}*/e) {
console.error(`Error reading ${filePath}: ${e.message}`);
process.exit(1);
}
};
/**
* @param {string} filePath
* @returns {[string, string]}
*/
const getFileStrFromPath = (filePath) => {
try {
return [filePath, fs.readFileSync(filePath, "utf-8")];
} catch (/** @type {any}*/e) {
console.error(`Error reading ${filePath}: ${e.message}`);
process.exit(1);
}
};
/**
* @param {[string, string]} filePath
* @returns {[string, XmlDocument]}
*/
const getXMLFromFile = ([filePath, str]) => {
try {
return [filePath, XmlDocument.fromString(str)];
} catch (/** @type {any}*/e) {
console.log(`Error parsing ${filePath}: ${e.message}`);
process.exit(1);
}
};
/**
* @param {XmlDocument} doc
* @param {string} ruleId
*/
const getRuleNodeFromDoc = (doc, ruleId) => {
const rulePredicate = `//rule[@id='${ruleId}']`;
const ruleGroupPredicate = `//rulegroup[@id='${ruleId}']`;
const rules = doc.find(rulePredicate) || [];
const ruleGroups = doc.find(ruleGroupPredicate) || [];
return {
ruleId,
rule: rules[0]?.toString() || ruleGroups[0]?.toString || undefined,
};
};
const docs = [filePath1, filePath2].map(getFileStrFromPath).map(getXMLFromFile);
const ruleIds = getRuleIds(ruleFilePath);
ruleIds.map((ruleId) => {
console.log(`Checking ${ruleId} ...`);
const [rulesIn1, rulesIn2] = docs.map(([path, doc]) => {
return { path, ...getRuleNodeFromDoc(doc, ruleId) };
});
if (rulesIn2.rule && !rulesIn1.rule) {
return console.log(`${ruleId} introduced in ${rulesIn2.path}`);
}
if (!rulesIn2.rule && rulesIn1.rule) {
return console.log(`${ruleId} removed in ${rulesIn2.path}`);
}
if (!rulesIn2.rule && !rulesIn1.rule) {
return console.log(`${ruleId} not found in either file`);
}
if (rulesIn1 !== undefined && rulesIn1.rule !== undefined && rulesIn2 !== undefined && rulesIn2.rule !== undefined && rulesIn2.rule.toString() !== rulesIn1.rule.toString()) {
return console.log(
`${ruleId} has changed: \n${rulesIn1.path}:\n${rulesIn1.rule.toString()}\n${
rulesIn2.path
}:\n${rulesIn2.rule.toString()}`
);
}
return console.log(`${ruleId} is unchanged`);
});