kit/preprocessors/mdsvex/index.js (299 lines of code) (raw):
import hljs from "highlight.js";
import { mdsvex } from "mdsvex";
import katex from "katex";
import { visit } from "unist-util-visit";
import htmlTags from "html-tags";
import { readdir } from "fs/promises";
import path from "path";
import cheerio from "cheerio";
import { renderSvelteChars } from "../utils.js";
/**
* inside `<code>` html elements, we need to replace `&` with `&`
* to correctly render escaped characters like `<`, `{`, etc.
* used for Doc
* @param {string} code
*/
function renderCode(code) {
const REGEX_CODE_TAG = /<code>(((?!<code>).)*)<\/code>/gms;
return code.replace(
REGEX_CODE_TAG,
(_, group1) => `<code>${group1.replace(/&/gm, "&")}</code>`
);
}
const WRAP_CODE_BLOCKS_FLAG = "<!-- WRAP CODE BLOCKS -->";
let wrapCodeBlocks = false;
export const mdsvexPreprocess = {
markup: async ({ content, filename }) => {
if (filename.endsWith("+page.svelte")) {
const markedKatex = {};
// if (filename.includes("course/")) {
// content = addCourseImports(content);
// }
wrapCodeBlocks = content.includes(WRAP_CODE_BLOCKS_FLAG);
content = markKatex(content, markedKatex);
content = escapeSvelteConditionals(content);
const processed = await _mdsvexPreprocess.markup({ content, filename });
processed.code = renderKatex(processed.code, markedKatex);
processed.code = renderCode(processed.code, filename);
return processed;
}
return { code: content };
},
};
/**
* Latex support in mdsvex
* @param {string} content
* @param {Record<any, any>} markedKatex
*/
function markKatex(content, markedKatex) {
const REGEX_LATEX_DISPLAY = /\n\$\$([\s\S]+?)\$\$/g;
const REGEX_LATEX_INLINE = /\s\\\\\(([\s\S]+?)\\\\\)/g;
let counter = 0;
return content
.replace(REGEX_LATEX_DISPLAY, (_, tex) => {
const displayMode = true;
const marker = `KATEXPARSE${counter++}MARKER`;
markedKatex[marker] = { tex, displayMode };
return marker;
})
.replace(REGEX_LATEX_INLINE, (_, tex) => {
const displayMode = false;
const marker = `KATEXPARSE${counter++}MARKER`;
markedKatex[marker] = { tex, displayMode };
return marker;
});
}
function renderKatex(code, markedKatex) {
return code.replace(/KATEXPARSE[0-9]+MARKER/g, (marker) => {
let { tex, displayMode } = markedKatex[marker];
tex = tex.replaceAll("{", "{");
tex = tex.replaceAll("<", "<");
const html = katex.renderToString(renderSvelteChars(tex), {
displayMode,
throwOnError: false,
});
if (html.includes(`katex-error`)) {
throw new Error(`[KaTeX] Error while parsing markdown\n ${html}`);
}
return `{@html ${JSON.stringify(html)}}`;
});
}
async function findSvelteComponentNames(startDir) {
let svelteFiles = [];
async function searchDir(directory) {
const files = await readdir(directory, { withFileTypes: true });
for (const file of files) {
const filePath = path.join(directory, file.name);
if (file.isDirectory()) {
await searchDir(filePath);
} else if (path.extname(file.name) === ".svelte") {
svelteFiles.push(path.basename(file.name, ".svelte")); // strip the directory and .svelte extension
}
}
}
await searchDir(startDir);
return svelteFiles;
}
const dirPath = "./src/lib";
const svelteTags = await findSvelteComponentNames(dirPath);
const validTags = [...htmlTags, ...svelteTags];
let hfDocBodyStart = false;
let hfDocBodyEnd = false;
function addToTree(tree, node) {
if (tree.length === 0 || tree[tree.length - 1].depth >= node.depth) {
tree.push(node);
} else {
const sections = tree[tree.length - 1].sections || [];
tree[tree.length - 1].sections = addToTree(sections, node);
}
return tree;
}
function getTitleText(node) {
if (!node.children || node.children.length === 0) {
return node.value ? node.value.trim() : "";
}
return node.children
.map((child) => getTitleText(child))
.join(" ")
.trim();
}
function treeVisitor() {
return transform;
function transform(tree) {
let headings = [];
visit(tree, "heading", (node, index, parent) => {
const depth = node.depth;
let title = getTitleText(node);
let local = "";
const match = title.match(/\[\s(.*?)\s\]$/);
if (match && match[1]) {
local = match[1];
title = title.replace(match[0], "").trim();
} else {
local = title
.trim()
.toLowerCase()
.replace(/\s+/g, "-") // Replace spaces with hyphens
.replace(/[^\p{L}\p{N}-]+/gu, ""); // Keep letters, numbers, and hyphens only
}
headings = addToTree(headings, { title, local, sections: [], depth });
// Create a svelte node (in remark grammar, the type is "html")
const svelteNode = {
type: "html",
value: `<Heading title="${title.replaceAll(
"{",
"{"
)}" local="${local}" headingTag="h${depth}"/>`,
};
// Replace the old node with the new Svelte node
parent.children[index] = svelteNode;
});
visit(tree, "text", onText);
visit(tree, "html", onHtml);
visit(tree, "blockquote", onBlockquote);
let jsonString = JSON.stringify(headings[0]);
if (jsonString) {
jsonString = jsonString.replaceAll("'", "\\'");
}
tree.children.unshift({
type: "html",
value: `<script context="module">export const metadata = '${jsonString}';</script>`,
});
}
function isWithinDocBody(node) {
if (["<!--HF DOCBUILD BODY START-->", "HF_DOC_BODY_START"].includes(node.value)) {
hfDocBodyStart = true;
hfDocBodyEnd = false;
// delete the marker
if (node.value === "HF_DOC_BODY_START") {
node.value = "";
}
}
if (["<!--HF DOCBUILD BODY END-->", "HF_DOC_BODY_END"].includes(node.value)) {
hfDocBodyEnd = true;
// delete the marker
if (node.value === "HF_DOC_BODY_END") {
node.value = "";
}
}
return hfDocBodyStart && !hfDocBodyEnd;
}
function onText(node) {
if (!isWithinDocBody(node)) {
return;
}
node.value = node.value.replaceAll("{", "{");
node.value = node.value.replaceAll("<", "<");
}
function onHtml(node) {
if (!isWithinDocBody(node)) {
return;
}
const RE_TAG_NAME = /<\/?(\w+)/;
const match = node.value.match(RE_TAG_NAME);
const REGEX_VALID_START_END_TAG = /^<(\w+)[^>]*>.*<\/\1>$/s;
if (match) {
const tagName = match[1];
if (!validTags.includes(tagName)) {
node.value = node.value.replaceAll("<", "<");
} else if (htmlTags.includes(tagName) && REGEX_VALID_START_END_TAG.test(node.value.trim())) {
const $ = cheerio.load(node.value);
// Go through each text node in the HTML and replace "{" with "{"
$("*")
.contents()
.each((index, element) => {
if (element.type === "text") {
element.data = element.data.replaceAll("{", "{");
}
});
// Update the remark HTML node with the modified HTML
node.value = $("body").html();
}
}
}
function onBlockquote(node, index, parent) {
// use github-like Tip & Warning syntax
// see https://github.com/orgs/community/discussions/16925
const { children: childrenLevel1 } = node;
if (!childrenLevel1.length || childrenLevel1[0].type !== "paragraph") {
return;
}
const { children: childrenLevel2 } = childrenLevel1[0];
if (!childrenLevel2.length || childrenLevel2[0].type !== "linkReference") {
return;
}
const TIP_MARKERS = ["!tip", "!warning"];
const { identifier } = childrenLevel2[0];
if (!TIP_MARKERS.includes(identifier)) {
return;
}
if (!parent) {
return;
}
childrenLevel1[0].children = childrenLevel1[0].children.slice(1);
const nodeTagOpen = {
type: "html",
value: `<Tip warning={${identifier === "!warning"}}>\n\n`,
};
const nodeTagClose = {
type: "html",
value: "\n\n</Tip>",
};
const nodes = [nodeTagOpen, ...childrenLevel1, nodeTagClose];
parent.children.splice(index, 1, ...nodes);
}
}
const _mdsvexPreprocess = mdsvex({
remarkPlugins: [treeVisitor],
extensions: ["svelte"],
highlight: {
highlighter: function (code, lang) {
const REGEX_CODE_INPUT = /^(>>>\s|\.\.\.\s)/m;
const _highlight = (code) =>
lang && hljs.getLanguage(lang)
? hljs.highlight(lang, code, true).value
: hljs.highlightAuto(code).value;
const base64 = (val) => btoa(encodeURIComponent(val));
const escape = (code) =>
code.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/}/g, "\\}").replace(/\$/g, "\\$");
const REGEX_FRAMEWORKS_SPLIT = /\s*===(PT-TF|STRINGAPI-READINSTRUCTION)-SPLIT===\s*/gm;
code = renderSvelteChars(code);
if (code.match(REGEX_FRAMEWORKS_SPLIT)) {
const isPtTf = code.match(REGEX_FRAMEWORKS_SPLIT)[0].includes("PT-TF");
let [codeGroup1, _, codeGroup2] = code.split(REGEX_FRAMEWORKS_SPLIT);
const highlightedPt = _highlight(codeGroup1);
const highlightedTf = _highlight(codeGroup2);
// filter out outputs if the code was generated interactively
// `>>> for i in range(5):` becomes `for i in range(5):`
if (codeGroup1.match(REGEX_CODE_INPUT)) {
codeGroup1 = codeGroup1
.split("\n")
.filter((line) => line.match(REGEX_CODE_INPUT) || !line)
.map((line) => line.replace(REGEX_CODE_INPUT, ""))
.join("\n");
}
if (codeGroup2.match(REGEX_CODE_INPUT)) {
codeGroup2 = codeGroup2
.split("\n")
.filter((line) => line.match(REGEX_CODE_INPUT) || !line)
.map((line) => line.replace(REGEX_CODE_INPUT, ""))
.join("\n");
}
return `
<CodeBlockFw
group1={{
id: '${isPtTf ? "pt" : "stringapi"}',
code: \`${base64(codeGroup1)}\`,
highlighted: \`${escape(highlightedPt)}\`
}}
group2={{
id: '${isPtTf ? "tf" : "readinstruction"}',
code: \`${base64(codeGroup2)}\`,
highlighted: \`${escape(highlightedTf)}\`
}}
wrap={${wrapCodeBlocks}}
/>`;
} else {
const highlighted = _highlight(code);
// filter out outputs if the code was generated interactively
// `>>> for i in range(5):` becomes `for i in range(5):`
if (code.match(REGEX_CODE_INPUT)) {
code = code
.split("\n")
.filter((line) => line.match(REGEX_CODE_INPUT) || !line)
.map((line) => line.replace(REGEX_CODE_INPUT, ""))
.join("\n");
}
return `
<CodeBlock
code={\`${base64(code)}\`}
highlighted={\`${escape(highlighted)}\`}
wrap={${wrapCodeBlocks}}
/>`;
}
},
},
});
function escapeSvelteConditionals(code) {
const REGEX_SVELTE_IF_START = /(\{#if[^}]+\})/g;
const SVELTE_ELSE = "{:else}";
const SVELTE_IF_END = "{/if}";
code = code.replace(REGEX_SVELTE_IF_START, "\n\n$1\n\n");
code = code.replaceAll(SVELTE_ELSE, `\n\n${SVELTE_ELSE}\n\n`);
code = code.replaceAll(SVELTE_IF_END, `\n\n${SVELTE_IF_END}\n\n`);
return code;
}