internal/mdplain/mdplain.go (34 lines of code) (raw):

package mdplain import ( "regexp" ) type replacement struct { re *regexp.Regexp sub string } var replacements = []replacement{ // rules heavily inspired by: https://github.com/stiang/remove-markdown/blob/master/index.js // back references were removed // Header {regexp.MustCompile(`\n={2,}`), "\n"}, // Fenced codeblocks {regexp.MustCompile(`~{3}.*\n`), ""}, // Strikethrough {regexp.MustCompile("~~"), ""}, // Fenced codeblocks {regexp.MustCompile("`{3}.*\\n"), ""}, // Remove HTML tags {regexp.MustCompile(`<[^>]*>`), ""}, // Remove setext-style headers {regexp.MustCompile(`^[=\-]{2,}\s*$`), ""}, // Remove footnotes? {regexp.MustCompile(`\[\^.+?\](\: .*?$)?`), ""}, {regexp.MustCompile(`\s{0,2}\[.*?\]: .*?$`), ""}, // Remove images {regexp.MustCompile(`\!\[(.*?)\][\[\(].*?[\]\)]`), "$1"}, // Remove inline links {regexp.MustCompile(`\[(.*?)\][\[\(].*?[\]\)]`), "$1"}, // Remove blockquotes {regexp.MustCompile(`^\s{0,3}>\s?`), ""}, // Remove reference-style links? {regexp.MustCompile(`^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$`), ""}, // Remove atx-style headers {regexp.MustCompile(`^(\n)?\s{0,}#{1,6}\s+| {0,}(\n)?\s{0,}#{0,} {0,}(\n)?\s{0,}$`), "$1$2$3"}, // Remove emphasis (repeat the line to remove double emphasis) {regexp.MustCompile(`([*_]{1,3})([^\t\n\f\r *_].*?[^\t\n\f\r *_]{0,1})([*_]{1,3})`), "$2"}, {regexp.MustCompile(`([*_]{1,3})([^\t\n\f\r *_].*?[^\t\n\f\r *_]{0,1})([*_]{1,3})`), "$2"}, // Remove code blocks {regexp.MustCompile("(`{3,})(.*?)(`{3,})"), "$2"}, // Remove inline code {regexp.MustCompile("`(.+?)`"), "$1"}, // Replace two or more newlines with exactly two? Not entirely sure this belongs here... {regexp.MustCompile(`\n{2,}`), "\n\n"}, } // Clean runs a VERY naive cleanup of markdown text to make it more palatable as plain text. func Clean(markdown string) string { // TODO: maybe use https://github.com/russross/blackfriday/tree/v2, write custom renderer or // generate HTML then process that to plaintext using https://github.com/jaytaylor/html2text for _, r := range replacements { markdown = r.re.ReplaceAllString(markdown, r.sub) } return string(markdown) }