scripts/link-checker.js (179 lines of code) (raw):

const fs = require('fs'); const path = require('path'); const process = require('process'); const Listr = require('listr'); const axios = require('axios'); const common = require('./common.js'); const { projectPaths } = common; axios.defaults.timeout = 5000; const tasks = new Listr([ { title: 'Start Link Checker', task: () => {}, }, { title: 'Scan document files', task: (ctx) => { ctx.allDocuments = []; projectPaths.forEach((project) => { Object.values(project.latestDocs).forEach((p) => { if (!fs.existsSync(p)) return; ctx.allDocuments.push({ files: scanFolder(p), path: p, project: project.name, }); }); }); }, }, { title: 'Scan all links', task: (ctx) => { ctx.externalLinks = []; // links to other sites ctx.internalLinks = []; // links to other Markdown files or anchor ctx.allDocuments.forEach((documents) => { documents.files.forEach((file) => { const scanResult = scanLinkInMDFile(file, documents.project); ctx.externalLinks.push(...scanResult.links); ctx.internalLinks.push(...scanResult.filteredLinks); }); }); console.log(`[Link Scanner] Scan result: ${ctx.externalLinks.length} external links, ${ctx.internalLinks.length} internal links`); }, }, { title: 'Start external link check', task: async (ctx) => { ctx.externalBrokenList = []; const externalLinkCheckPromises = []; ctx.externalLinks.forEach((link) => { externalLinkCheckPromises.push(linkValidate(link)); }); const result = await Promise.all(externalLinkCheckPromises); ctx.externalBrokenList.push(...result.filter((item) => item.status !== 200)); }, }, { title: 'Start internal link check', task: (ctx) => { ctx.internalBrokenList = []; ctx.internalLinks.forEach((link) => { if (!fs.existsSync(link.url)) { ctx.internalBrokenList.push(link); } }); }, }, { title: 'Write broken list to file', task: (ctx) => { fs.writeFileSync('./brokenLinks.json', JSON.stringify({ external: ctx.externalBrokenList, internal: ctx.internalBrokenList, })); }, }, ]); tasks.run() .then(() => { console.log('[Finish] Link Checker finished'); }) .catch((err) => { console.error(err); process.exit(1); }); function scanFolder(tarDir) { const filePaths = []; const files = fs.readdirSync(tarDir); files.forEach((file) => { const tarPath = path.join(tarDir, file); const stats = fs.statSync(tarPath); if (stats.isDirectory()) { filePaths.push(...scanFolder(tarPath)); } else { filePaths.push(tarPath); } }); return filePaths; } function scanLinkInMDFile(filePath, project) { const fileContent = fs.readFileSync(filePath, 'utf-8'); const regex = /\[[\s\S]*?\]\([\s\S]*?\)/g; if (fileContent.match(regex)) { const arrayOfLinks = fileContent.match(regex); const links = arrayOfLinks.map((item) => { const textHrefDivide = item.split(']('); const text = textHrefDivide[0].replace('[', ''); const url = textHrefDivide[1].replace(')', ''); return ({ url, text, file: filePath }); }); // filter out links to other Markdown files const filteredList = []; // local files const unfilteredList = links.filter((link) => { // web links let url = link.url.trim(); if (url.startsWith('http://') || url.startsWith('https://')) { // eslint-disable-next-line no-param-reassign link.url = url; return true; } // url preprocess if (url.startsWith('#') || url.indexOf('#') > 0) { // such as "#abcd" const split = url.split('#').filter((item) => item !== ''); if (split.length > 1) { // eslint-disable-next-line no-param-reassign link.anchor = `#${split[1]}`; url = path.normalize(path.dirname(filePath) + path.sep + link.url); } else { // eslint-disable-next-line no-param-reassign link.anchor = link.url; url = filePath; } } else if (url === 'LICENSE' || url === 'logos/apache-apisix.png') { url = `https://github.com/apache/${project}/blob/master/${url}`; } else if (!url.endsWith('.md')) { // not end with ".md" console.log(filePath, link.url, url, filePath.startsWith('website\\docs')); const lang = !filePath.includes('i18n') ? 'en' : filePath.split(`i18n${path.sep}`)[1].split(path.sep)[0]; let subPath = !filePath.includes('i18n') ? path.dirname(filePath.split(`docs${path.sep}${project}${path.sep}`)[1]) : path.dirname(filePath.split(`docs-${project}${path.sep}current${path.sep}`)[1]); subPath = subPath !== '.' ? subPath + path.sep : ''; const originPath = path.normalize(`docs${path.sep}${lang}${path.sep}latest${path.sep}${subPath}${url}`).replace(/\\/g, '/'); url = `https://github.com/apache/${project}/blob/master/${originPath}`; } else { // such as "./abcd", "../abcd", "../../../abcd" url = path.normalize(path.dirname(filePath) + path.sep + url); } // set url const originLink = link.url; // eslint-disable-next-line no-param-reassign link.url = url; // url postprocess if (!url.startsWith('http://') && !url.startsWith('https://')) { filteredList.push(link); return false; } // replace the converted link with the original document let documentContent = fs.readFileSync(filePath, 'utf8'); documentContent = documentContent.replace(new RegExp(originLink, 'g'), link.url); fs.writeFileSync(filePath, documentContent, 'utf8'); return true; }); return { links: unfilteredList, filteredLinks: filteredList, }; } return { links: [], filteredLinks: [], }; } function linkValidate(link) { console.log('checking external link: ', link.url); return new Promise((resolve) => { axios.get(link.url) .then((res) => { resolve({ ...link, status: res.status, statusText: res.statusText, }); }) .catch(() => { resolve({ ...link, status: 0, statusText: 'FAIL', }); }); }); }