scripts/merge_md.js (244 lines of code) (raw):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
const fs = require('fs');
const path = require('path');
const i18nJsonFile = require('../i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2.json');
// v2.0 zh
// const sidebarPath = 'versioned_sidebars/version-2.0-sidebars.json';
// const docsBaseDir = 'i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0';
// const outputPath = 'doc-2.0.md';
// const excludes = [];
// v1.2
const sidebarPath = 'versioned_sidebars/version-1.2-sidebars.json';
const excludes = ['SQL Manual'];
// zh
const docsBaseDir = 'i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2';
const outputPath = 'doc-1.2.md';
// en
// const docsBaseDir = 'versioned_docs/version-1.2';
// const outputPath = 'doc-1.2-en.md';
const fileLinkName = {};
function readJSON(filePath) {
const data = fs.readFileSync(filePath, 'utf-8');
return JSON.parse(data);
}
function readMarkdownFile(filePath) {
return fs.readFileSync(filePath, 'utf-8');
}
function writeMarkdownContent(filePath, content) {
fs.writeFileSync(filePath, content, 'utf-8');
}
function replaceLinkWrap(chapter) {
const hyperLinkPattern = /\[([^\]]+)\]\(([^#)]+)(#[^)]+)?\)/g;
function replaceLink(match, linkName, link, frag) {
if (link.startsWith('http')) {
return match;
} else if (/\.(png|jpeg|svg|gif|jpg)$/.test(link)) {
const imgLink = link.replace(/\/images\//, './static/images/');
return `[${linkName}](${imgLink})`;
} else {
if (link.includes('.md#') && frag) {
return frag.replace(/[\s]+/g, '-').toLowerCase();
} else {
let fullPath = path.join(docsBaseDir, customResolve(link));
if (!link.endsWith('.md')) {
fullPath += '.md';
}
return `[${linkName}](#${getMainTitleFromFile(fullPath).replace(/[\s]+/g, '-').toLowerCase()})`;
}
}
}
return chapter.replace(hyperLinkPattern, replaceLink);
}
function customResolve(relativePath) {
const parts = relativePath.split('/');
const resolvedParts = [];
for (const part of parts) {
if (part === '..') {
resolvedParts.pop();
} else if (part !== '.') {
resolvedParts.push(part);
}
}
return resolvedParts.join('/');
}
function getMainTitleFromFile(filePath) {
if (!fs.existsSync(filePath)) {
return '';
}
const mdContent = fs.readFileSync(filePath, 'utf8');
const match = mdContent.match(/{[^}]*}/);
if (match) {
const mainTitle = JSON.parse(match[0].replace(/'/g, '"')).title;
return mainTitle;
}
return '';
}
function processItems(items, level) {
let content = '';
items.forEach(item => {
if (typeof item === 'string') {
const filePath = path.join(docsBaseDir, item + '.md');
if (fs.existsSync(filePath)) {
let mdContent = readMarkdownFile(filePath);
mdContent = replaceLinkWrap(mdContent);
content += adjustHeaders(removeDuplicateTitle(adjustTips(trimCodeFunc(mdContent))), level) + '\n\n';
}
} else if (typeof item === 'object' && item.items) {
content += `${'#'.repeat(level + 1)} ${item.label}\n\n`;
content += processItems(item.items, level + 1);
}
});
return content;
}
function adjustTips(mdContent) {
if (!/:::/.test(mdContent)) return mdContent;
const lines = mdContent.split('\n');
for (let i = 0; i < lines.length; i++) {
// start :::
if (lines[i].trim().startsWith(':::')) {
const firstLine = lines[i].trim().split(' ')?.[1];
if (firstLine) {
lines[i] = `> ${firstLine}`;
} else {
lines[i] = '';
}
for (let j = i + 1; j < lines.length; j++) {
// end :::
if (lines[j].trim().startsWith(':::')) {
lines[j] = ``;
i = j;
break;
} else {
lines[j] = `> ${lines[j]}`;
}
}
}
}
return lines.join('\n');
}
function trimCodeFunc(mdContent) {
if (!/```/.test(mdContent)) return mdContent;
const lines = mdContent.split('\n');
for (let i = 0; i < lines.length; i++) {
// start ```
if (lines[i].trim().startsWith('```')) {
lines[i] = lines[i].trim();
for (let j = i + 1; j < lines.length; j++) {
// end ```
if (lines[j].trim().startsWith('```')) {
lines[j] = lines[j].trim();
i = j;
break;
}
}
}
}
return lines.join('\n');
}
/**
*
* @example
*
* ---
* {
* "title": "快速体验",
* "language": "zh-CN"
* }
*
* ---
*
* # 快速体验
*
* "# 快速体验" will be parsed as a title, which will cause title duplication, so remove it
*/
function removeDuplicateTitle(mdContent) {
if (!/#\s/.test(mdContent)) return mdContent;
const lines = mdContent.split('\n');
for (let i = 0; i < lines.length; i++) {
if (lines[i].startsWith('# ')) {
lines[i] = '';
break;
}
}
return lines.join('\n');
}
function translateTitle(mdContent) {
const map = getI18nMap();
const lines = mdContent.split('\n');
for (let i = 0; i < lines.length; i++) {
if (
lines[i].startsWith('# ') ||
lines[i].startsWith('## ') ||
lines[i].startsWith('### ') ||
lines[i].startsWith('#### ') ||
lines[i].startsWith('##### ') ||
lines[i].startsWith('###### ')
) {
const tempArr = lines[i].split('# ');
const value = map.get(tempArr[1]);
if (value) {
tempArr[1] = value;
lines[i] = tempArr.join('# ');
}
}
}
return lines.join('\n');
}
function adjustHeaders(mdContent, level) {
const match = mdContent.match(/{[^}]*}/);
const specialTitle = `{ "title": "What's Apache Doris", "language": "en" }`;
const mainTitle = JSON.parse(match[0] === specialTitle ? match[0] : match[0].replace(/'/g, '"')).title;
const lines = mdContent.split('\n');
let hasMainTitle = false;
let firstSeparatorIndex = -1;
let secondSeparatorIndex = -1;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.startsWith('# ')) {
hasMainTitle = true;
break;
}
if (line.trim() === '---') {
if (firstSeparatorIndex === -1) {
firstSeparatorIndex = i;
} else {
secondSeparatorIndex = i;
break;
}
}
}
const adjustedLines = lines.map(line => {
if (line.startsWith('#')) {
const numHashes = line.match(/^#+/)[0].length;
return '#'.repeat(numHashes + level) + line.slice(numHashes);
}
return line;
});
if (!hasMainTitle && secondSeparatorIndex !== -1) {
adjustedLines.splice(secondSeparatorIndex + 2, 0, `${'#'.repeat(level + 1)} ${mainTitle}`);
}
return adjustedLines.join('\n');
}
function traverseSidebarTree(node, excludes) {
if (excludes.includes(node.label)) {
node.needExclude = true;
return;
}
if (node.items.length) {
for (let newNode of node.items) {
if (typeof newNode === 'object') traverseSidebarTree(newNode, excludes);
}
}
for (let i = 0; i < node.items.length; i++) {
let item = node.items[i];
if (item.needExclude) {
node.items.splice(i, 1);
i--;
}
}
}
/**
*
* @description Recursively remove one or more categories under the premise that the default label is unique
*/
function filterSidebarTree(sidebar, excludes) {
for (let node of sidebar.docs) {
traverseSidebarTree(node, excludes);
}
for (let i = 0; i < sidebar.docs.length; i++) {
let item = sidebar.docs[i];
if (item.needExclude) {
sidebar.docs.splice(i, 1);
i--;
}
}
}
function getI18nMap() {
const map = new Map();
Object.keys(i18nJsonFile).forEach(originKey => {
const value = i18nJsonFile[originKey].message;
const temp = originKey.split('.');
const key = temp[temp.length - 1];
map.set(key, value);
});
return map;
}
function mergeMarkdownFiles() {
let sidebarData = readJSON(sidebarPath);
if (excludes?.length) {
filterSidebarTree(sidebarData, excludes);
}
let content = '';
sidebarData.docs.forEach(category => {
content += `# ${category.label}\n\n`;
content += processItems(category.items, 1);
});
writeMarkdownContent(outputPath, translateTitle(content));
// writeMarkdownContent(outputPath, content);
}
mergeMarkdownFiles();
console.log('successfully');