library/markdown-parser/models/article.ts (127 lines of code) (raw):

/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import { decode } from 'html-entities'; import marked, { Token, Tokens } from 'marked'; import { Profile, resolveProfile } from './profile'; import { Configuration } from '../config'; import { getValidDate } from '../utils/date'; import { githubSlugger, parseMarkdownToHTML, parseMarkdownWithYAML } from '../utils/markdown'; import { pickRandom } from '../utils/pick-random'; import { generateUUID } from '../utils/uuid'; type ExtendToken = Token & { tokens: ExtendToken[] }; export type ExtendTokensList = ExtendToken[]; export interface TOC { depth: number; text: string; id: string | null; } /** * Raw data parsed from yaml & markdown */ export class Article { constructor( readonly name: string, readonly categories: string[], readonly authors: string[], readonly date: string, readonly content: string, readonly languages: string[] // 'zh' | 'en' ) {} } /** * Resolved article used to render in web */ export class ResolvedArticle { public pre?: Partial<ResolvedArticle>; public next?: Partial<ResolvedArticle>; constructor( readonly id: string, readonly name: string, readonly abstract: string, readonly thumbnail: string, readonly content: string, readonly categories: string[], readonly authors: Profile[], readonly toc: TOC[], readonly date: Date | null, readonly languages: string[] ) {} } export type BriefArticle = Omit<ResolvedArticle, 'content'>; /** * parse article from buffer with yaml & markdown * @param name * @param buffer */ export function parseArticleFromBuffer(name: string, buffer: Buffer): Article { try { const { authors, categories, date, __content, languages } = parseMarkdownWithYAML(buffer); const categoriesString = categories?.map((c: string | number) => c.toString()) || []; return new Article(name, categoriesString, authors || [], date, __content, languages); } catch (e) { console.log(e); throw new Error(name); } } /** * Correct and add more properties to article * @param article * @param profileCollection */ export function resolveArticle(article: Article, profileCollection: Profile[]): ResolvedArticle { const { name, content, categories, authors, date, languages } = article; checkArticleData(name, authors); const tokensList = new marked.Lexer().lex(content) as unknown as ExtendTokensList; const contentString = parseMarkdownToHTML(content); const toc = generateTOC(tokensList); const image = getThumbnail(tokensList) || pickRandom(Configuration.image.article); const id = generateUUID(name); const abstract = generateAbstract(tokensList, Configuration.article.abstract.minLength); const resolvedAuthors = (authors || []).map(author => resolveProfile(author, profileCollection)); const validDate = getValidDate(name, date); return new ResolvedArticle( id, name, abstract, image, contentString, categories, resolvedAuthors, toc, validDate, languages ); } function checkArticleData(name: string, authors: string[]): void { if (authors && !Array.isArray(authors)) { throw new Error(`${name}: authors should be array`); } } /** * generate abstract from markdown tokens, recursive when result less than minLength * @param tokensList * @param minLength */ function generateAbstract(tokensList: ExtendTokensList, minLength: number): string { let abstractContent = ''; tokensList.forEach(token => { if (abstractContent.length < minLength) { if (token.type === 'heading') { return; } else if (token.type === 'text') { abstractContent += decode(token.text); } else if (token.tokens) { abstractContent += generateAbstract(token.tokens, minLength); } } }); return abstractContent; } /** * generate toc from markdown tokens, recursive when result less than maxDepth * @param tokensList */ export function generateTOC(tokensList: ExtendTokensList): TOC[] { githubSlugger.reset(); return ( tokensList.filter(token => token.type === 'heading' && token.depth > 1 && token.depth <= 3) as Tokens.Heading[] ).map(item => { return { depth: item.depth, text: item.text, id: githubSlugger.slug(item.text) }; }); } /** * get first image url from markdown tokens as thumbnail * @param tokensList */ function getThumbnail(tokensList: ExtendTokensList): string | null { let imageUrl: string | null = null; tokensList.forEach(token => { if (!imageUrl) { if (token.type === 'image' && token.href) { imageUrl = token.href; } else if (token.tokens) { imageUrl = getThumbnail(token.tokens); } } }); return imageUrl; } /** * Build list without content * @param articles */ export function buildBriefArticleList(articles: ResolvedArticle[]): BriefArticle[] { return articles.map(article => { const { ...rest } = article; return rest; }); }