website/plugins/image-ssr-plugin.js (167 lines of code) (raw):
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
const path = require("path");
const fs = require("fs-extra");
const axios = require("axios");
const { createHash } = require("crypto");
const cheerio = require("cheerio");
const url = require("url");
module.exports = function (context) {
const processedImages = new Map();
function getImageFilename(imageUrl) {
const hash = createHash("md5").update(imageUrl).digest("hex");
let ext = ".jpg";
try {
const parsedUrl = url.parse(imageUrl);
const pathname = parsedUrl.pathname || "";
const pathExt = path.extname(pathname);
if (pathExt) ext = pathExt;
if (
imageUrl.includes("img.shields.io") ||
imageUrl.includes("actions?query") ||
imageUrl.includes("github/actions/workflow")
) {
ext = ".svg";
}
} catch (e) {}
return `${hash}${ext}`;
}
async function downloadImage(imageUrl, buildDir) {
if (processedImages.has(imageUrl)) {
return processedImages.get(imageUrl);
}
try {
const filename = getImageFilename(imageUrl);
const buildImagesDir = path.join(buildDir, "img/external");
const buildOutputPath = path.join(buildImagesDir, filename);
fs.ensureDirSync(buildImagesDir);
if (!fs.existsSync(buildOutputPath)) {
console.log(`Downloading image: ${imageUrl}`);
const response = await axios({
url: imageUrl,
responseType: "arraybuffer",
timeout: 20000,
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
Accept: "image/webp,image/apng,image/*,*/*;q=0.8",
},
maxRedirects: 5,
validateStatus: (status) => status < 400,
});
await fs.writeFile(buildOutputPath, response.data);
}
const localUrl = `/img/external/${filename}`;
processedImages.set(imageUrl, localUrl);
return localUrl;
} catch (error) {
console.error(`Error downloading image ${imageUrl}: ${error.message}`);
return imageUrl;
}
}
async function processJSFiles(outDir) {
console.log("Processing JS files for external images...");
const jsFiles = [];
async function findJSFiles(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
await findJSFiles(fullPath);
} else if (entry.name.endsWith(".js")) {
jsFiles.push(fullPath);
}
}
}
await findJSFiles(outDir);
for (const jsFile of jsFiles) {
const content = await fs.readFile(jsFile, "utf8");
let modified = false;
let newContent = content;
// Look for shield.io and other image URLs with a more comprehensive regex
const urlPatterns = [
/"(https?:\/\/[^"]+\.(png|jpg|jpeg|gif|svg|webp))"/g,
/"(https?:\/\/img\.shields\.io\/[^"]+)"/g,
/"(https?:\/\/github\.com\/[^"]+\/actions\/workflow[^"]+)"/g,
/'(https?:\/\/[^']+\.(png|jpg|jpeg|gif|svg|webp))'/g,
/'(https?:\/\/img\.shields\.io\/[^']+)'/g,
/'(https?:\/\/github\.com\/[^']+\/actions\/workflow[^']+)'/g,
];
const allReplacements = [];
for (const pattern of urlPatterns) {
const matches = Array.from(newContent.matchAll(pattern));
for (const match of matches) {
const imageUrl = match[1];
if (!imageUrl) continue;
try {
const localUrl = await downloadImage(imageUrl, outDir);
if (localUrl !== imageUrl) {
allReplacements.push({
original: match[0],
replacement: match[0].replace(imageUrl, localUrl),
});
modified = true;
}
} catch (error) {
console.error(`Error processing URL in JS file: ${error.message}`);
}
}
}
// Apply replacements from longest to shortest to avoid partial replacements
allReplacements.sort((a, b) => b.original.length - a.original.length);
for (const { original, replacement } of allReplacements) {
newContent = newContent.replace(original, replacement);
}
if (modified) {
await fs.writeFile(jsFile, newContent);
}
}
}
return {
name: "docusaurus-ssr-image-plugin",
async postBuild({ outDir }) {
console.log("Processing HTML files for external images...");
const htmlFiles = [];
async function findHtmlFiles(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
await findHtmlFiles(fullPath);
} else if (entry.name.endsWith(".html")) {
htmlFiles.push(fullPath);
}
}
}
await findHtmlFiles(outDir);
for (const htmlFile of htmlFiles) {
const html = await fs.readFile(htmlFile, "utf8");
let $ = cheerio.load(html);
let modified = false;
const externalImages = $("img").filter((_, el) => {
const src = $(el).attr("src");
return src && src.startsWith("http");
});
if (externalImages.length === 0) continue;
const downloadPromises = [];
externalImages.each((_, img) => {
const element = $(img);
const imageUrl = element.attr("src");
if (!imageUrl || !imageUrl.startsWith("http")) return;
downloadPromises.push(
downloadImage(imageUrl, outDir)
.then((localUrl) => {
if (localUrl !== imageUrl) {
element.attr("src", localUrl);
modified = true;
}
})
.catch(() => {}),
);
});
await Promise.all(downloadPromises);
if (modified) {
await fs.writeFile(htmlFile, $.html());
}
}
// Process JS files to update image references in bundled JavaScript
await processJSFiles(outDir);
console.log(`Processed ${processedImages.size} external images`);
},
};
};