internal/parsers/markdown.go (205 lines of code) (raw):

package parsers import ( "errors" "fmt" "regexp" "strconv" "strings" "github.com/Azure/InnovationEngine/internal/logging" "github.com/yuin/goldmark" meta "github.com/yuin/goldmark-meta" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/text" ) var markdownParser = goldmark.New( goldmark.WithExtensions(extension.GFM, meta.New(meta.WithStoresInDocument())), goldmark.WithParserOptions( parser.WithAutoHeadingID(), parser.WithBlockParsers(), ), goldmark.WithRendererOptions( html.WithXHTML(), ), ) // Parses a markdown file into an AST representing the markdown document. func ParseMarkdownIntoAst(source []byte) ast.Node { document := markdownParser.Parser().Parse(text.NewReader(source)) return document } // Extract the metadata from the AST of a markdown document. func ExtractYamlMetadataFromAst(node ast.Node) map[string]interface{} { return node.OwnerDocument().Meta() } // The representation of an expected output block in a markdown file. This is // for scenarios that have expected output that should be validated against the // actual output. type ExpectedOutputBlock struct { Language string `json:"language"` Content string `json:"content"` ExpectedSimilarity float64 `json:"expectedSimilarityScore"` ExpectedRegex *regexp.Regexp `json:"expectedRegexPattern"` } // The representation of a code block in a markdown file. type CodeBlock struct { Language string `json:"language"` Content string `json:"content"` Header string `json:"header"` Description string `json:"description"` ExpectedOutput ExpectedOutputBlock `json:"resultBlock"` } // Assumes the title of the scenario is the first h1 header in the // markdown file. func ExtractScenarioTitleFromAst(node ast.Node, source []byte) (string, error) { header := "" ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) { if entering { switch n := node.(type) { case *ast.Heading: if n.Level == 1 { header = string(extractTextFromMarkdown(&n.BaseBlock, source)) return ast.WalkStop, nil } } } return ast.WalkContinue, nil }) if header == "" { return "", fmt.Errorf("no h1 header found to use as the scenario title") } return header, nil } var expectedSimilarityRegex = regexp.MustCompile( `<!--\s*expected_similarity=\s*(\d+\.?\d*)|"(.*)"\s*-->`, ) // Extracts the code blocks from a provided markdown AST that match the // languagesToExtract. func ExtractCodeBlocksFromAst( node ast.Node, source []byte, languagesToExtract []string, ) []CodeBlock { var lastHeader string var commands []CodeBlock var nextBlockIsExpectedOutput bool var lastExpectedSimilarityScore float64 var lastExpectedRegex *regexp.Regexp var lastNode ast.Node ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) { if entering { switch n := node.(type) { // Set the last header when we encounter a heading. case *ast.Heading: lastHeader = string(extractTextFromMarkdown(&n.BaseBlock, source)) lastNode = node case *ast.Paragraph: lastNode = node // Extract the code block if it matches the language. case *ast.HTMLBlock: content := extractTextFromMarkdown(&n.BaseBlock, source) matches := expectedSimilarityRegex.FindStringSubmatch(content) if len(matches) < 3 { break } match := matches[1] if match != "" { score, err := strconv.ParseFloat(match, 64) logging.GlobalLogger.Debugf("Simalrity score of %f found", score) if err != nil { return ast.WalkStop, err } lastExpectedSimilarityScore = score } else { match = matches[2] logging.GlobalLogger.Debugf("Regex %q found", match) if match == "" { return ast.WalkStop, errors.New("No regex found") } re, err := regexp.Compile(match) if err != nil { return ast.WalkStop, fmt.Errorf("Cannot compile the following regex: %q", match) } lastExpectedRegex = re } nextBlockIsExpectedOutput = true case *ast.FencedCodeBlock: language := string(n.Language((source))) content := extractTextFromMarkdown(&n.BaseBlock, source) description := "" if lastNode != nil { switch n := lastNode.(type) { case *ast.Paragraph: description = string(extractTextFromMarkdown(&n.BaseBlock, source)) default: logging.GlobalLogger.Warnf("The node before the codeblock `%s` is not a paragraph, it is a %s", content, n.Kind()) } } else { logging.GlobalLogger.Warnf("There are no markdown elements before the last codeblock `%s`", content) } lastNode = node for _, desiredLanguage := range languagesToExtract { if language == desiredLanguage { command := CodeBlock{ Language: language, Content: content, Header: lastHeader, Description: description, } commands = append(commands, command) break } else if nextBlockIsExpectedOutput { // Map the expected output to the last command. If there // are no commands, then we ignore the expected output. if len(commands) > 0 { expectedOutputBlock := ExpectedOutputBlock{ Language: language, Content: extractTextFromMarkdown(&n.BaseBlock, source), ExpectedSimilarity: lastExpectedSimilarityScore, ExpectedRegex: lastExpectedRegex, } commands[len(commands)-1].ExpectedOutput = expectedOutputBlock // Reset the expected output state. nextBlockIsExpectedOutput = false lastExpectedSimilarityScore = 0 lastExpectedRegex = nil } break } } } } return ast.WalkContinue, nil }) return commands } // This regex matches HTML comments within markdown blocks that contain // variables to use within the scenario. var variableCommentBlockRegex = regexp.MustCompile("(?s)<!--.*?```variables(.*?)```.*?") // Extracts the variables from a provided markdown AST. func ExtractScenarioVariablesFromAst(node ast.Node, source []byte) map[string]string { scenarioVariables := make(map[string]string) ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) { if entering && node.Kind() == ast.KindHTMLBlock { htmlNode := node.(*ast.HTMLBlock) blockContent := extractTextFromMarkdown(&htmlNode.BaseBlock, source) logging.GlobalLogger.Debugf("Found HTML block with the content: %s\n", blockContent) match := variableCommentBlockRegex.FindStringSubmatch(blockContent) // Extract the variables from the comment block. if len(match) > 1 { variables := convertScenarioVariablesToMap(match[1]) for key, value := range variables { scenarioVariables[key] = value } } } return ast.WalkContinue, nil }) return scenarioVariables } // Converts a string of shell variable exports into a map of key/value pairs. // I.E. `export FOO=bar\nexport BAZ=qux` becomes `{"FOO": "bar", "BAZ": "qux"}` func convertScenarioVariablesToMap(variableBlock string) map[string]string { variableMap := make(map[string]string) // Only process statements that begin with export. for _, variable := range strings.Split(variableBlock, "\n") { if strings.HasPrefix(variable, "export") { parts := strings.SplitN(variable, "=", 2) if len(parts) == 2 { key := strings.TrimPrefix(parts[0], "export ") value := parts[1] logging.GlobalLogger.Debugf("Found variable: %s=%s\n", key, value) variableMap[key] = value } } } return variableMap } // Extract the text from a code blocks base block and return it as a string. func extractTextFromMarkdown(baseBlock *ast.BaseBlock, source []byte) string { lines := baseBlock.Lines() var command strings.Builder for i := 0; i < lines.Len(); i++ { line := lines.At(i) command.WriteString(string(line.Value(source))) } return command.String() }