cqlprotodoc/spec/spec.go (187 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Package spec implements parser for Cassandra protocol specification. package spec import ( "fmt" "github.com/mvdan/xurls" "regexp" "strings" ) type Document struct { License []Text Title string TOC []TOCEntry Sections []Section } type TOCEntry struct { Number string Title string } type Section struct { Number string Title string Body []Text } func (s Section) Empty() bool { return s.Number == "" && s.Title == "" && len(s.Body) == 0 } // Text token in section body. type Text struct { // Text that is displayed. Text string // SectionRef is the number of section this text links to. SectionRef string // Href is URL this text links to. Href string } var commentRegexp = regexp.MustCompile("^# ?(.*)$") var emptyRegexp = regexp.MustCompile(`^\s*$`) var titleRegexp = regexp.MustCompile(`^\s+(.*)\s*$`) var headingRegexp = regexp.MustCompile(`^(\s*)(\d+(?:\.\d+)*)\.? (.*)$`) const ( mhSpaces = 1 mhNumber = 2 mhTitle = 3 ) func Parse(data string) (Document, error) { lines := strings.Split(data, "\n") var license strings.Builder var doc Document l := 0 // license for l < len(lines) { m := commentRegexp.FindStringSubmatch(lines[l]) if len(m) != 2 { break } license.WriteString(m[1]) license.WriteString("\n") l++ } doc.License = parseBody(strings.Trim(license.String(), "\n ")) // empty lines for l < len(lines) && emptyRegexp.MatchString(lines[l]) { l++ } // title if l >= len(lines) { return Document{}, fmt.Errorf("missing title") } m := titleRegexp.FindStringSubmatch(lines[l]) if len(m) != 2 { return Document{}, fmt.Errorf("line %d: title expected on line", l) } doc.Title = m[1] l++ // empty lines for l < len(lines) && emptyRegexp.MatchString(lines[l]) { l++ } // table of contents header if lines[l] != "Table of Contents" { return Document{}, fmt.Errorf("line %d: expected table of contents", l) } l++ // empty lines for l < len(lines) && emptyRegexp.MatchString(lines[l]) { l++ } // toc entries for l < len(lines) { if emptyRegexp.MatchString(lines[l]) { // end of toc break } mh := headingRegexp.FindStringSubmatch(lines[l]) if len(mh) != 4 { return Document{}, fmt.Errorf("line %d: expected toc entry", l) } doc.TOC = append(doc.TOC, TOCEntry{ Number: mh[mhNumber], Title: mh[mhTitle], }) l++ } // empty lines for l < len(lines) && emptyRegexp.MatchString(lines[l]) { l++ } // content tocIdx := 0 var section Section var body []string for l < len(lines) { var sectionStart bool var newSection Section sectionStart, tocIdx, newSection = checkSectionStart(doc.TOC, tocIdx, lines[l]) if sectionStart { section.Body = parseBody(strings.Join(body, "\n")) doc.Sections = append(doc.Sections, section) section = newSection body = nil l++ // Eat empty lines for l < len(lines) && emptyRegexp.MatchString(lines[l]) { l++ } continue } body = append(body, lines[l]) l++ } if len(body) > 0 || !section.Empty() { section.Body = parseBody(strings.Join(body, "\n")) doc.Sections = append(doc.Sections, section) } return doc, nil } // checkSectionStart checks if the line starts a new section and returns a new tocIdx. func checkSectionStart(toc []TOCEntry, tocIdx int, line string) (bool, int, Section) { mh := headingRegexp.FindStringSubmatch(line) if len(mh) != 4 || tocIdx >= len(toc) { return false, tocIdx, Section{} } if mh[mhSpaces] == "" { if mh[mhNumber] == toc[tocIdx].Number { tocIdx++ } return true, tocIdx, Section{ Number: mh[mhNumber], Title: mh[mhTitle], } } t := strings.ToLower(mh[3]) for i := tocIdx; i < len(toc); i++ { t2 := strings.ToLower(toc[i].Title) if mh[mhNumber] == toc[i].Number && (strings.Contains(t, t2) || strings.Contains(t2, t)) { return true, i + 1, Section{ Number: mh[mhNumber], Title: mh[mhTitle], } } } return false, tocIdx, Section{} } var linkifyRegexp *regexp.Regexp var sectionSubexpIdx int var sectionsSubexpIdx int func init() { s := xurls.Strict.String() r := `(?:<URL>)|[Ss]ection (\d+(?:\.\d+)*)|[Ss]ections (\d+(?:\.\d+)*(?:(?:, (?:and )?| and )\d+(?:\.\d+)*)*)` linkifyRegexp = regexp.MustCompile(strings.ReplaceAll(r, "<URL>", s)) sectionSubexpIdx = xurls.Strict.NumSubexp()*2 + 2 sectionsSubexpIdx = (xurls.Strict.NumSubexp()+1)*2 + 2 } var sectionsSplitRegexp = regexp.MustCompile("(?:, (?:and )?| and )") func parseBody(s string) []Text { var body []Text lastIdx := 0 for _, m := range linkifyRegexp.FindAllStringSubmatchIndex(s, -1) { body = append(body, Text{Text: s[lastIdx:m[0]]}) switch { case m[sectionSubexpIdx] != -1: sectionNo := s[m[sectionSubexpIdx]:m[sectionSubexpIdx+1]] body = append(body, Text{Text: s[m[0]:m[1]], SectionRef: sectionNo}) case m[sectionsSubexpIdx] != -1: body = append(body, Text{Text: s[m[0]:m[sectionsSubexpIdx]]}) sections := s[m[sectionsSubexpIdx]:m[sectionsSubexpIdx+1]] lastIdx2 := 0 for _, m2 := range sectionsSplitRegexp.FindAllStringIndex(sections, -1) { sectionNo := sections[lastIdx2:m2[0]] body = append(body, Text{Text: sectionNo, SectionRef: sectionNo}) // separator body = append(body, Text{Text: sections[m2[0]:m2[1]]}) lastIdx2 = m2[1] } sectionNo := sections[lastIdx2:] body = append(body, Text{Text: sectionNo, SectionRef: sectionNo}) default: href := s[m[0]:m[1]] body = append(body, Text{Text: href, Href: href}) } lastIdx = m[1] } body = append(body, Text{Text: s[lastIdx:]}) return body }