func readBitBox()

in ppc64/ppc64spec/spec.go [274:397]


func readBitBox(headline string, content pdf.Content, text []pdf.Text, pageNum int) (string, int) {
	// fields
	i := 0
	if len(text) == 0 || (!match(text[i], "Helvetica", 9, "") && !match(text[i], "Helvetica", 7.26, "") && !match(text[i], "Arial", 9, "") && !match(text[i], "Arial", 7.98, "") && !match(text[i], "Arial", 7.2, "")) {
		fmt.Fprintf(os.Stderr, "page %d: no bit fields for %q\n", pageNum, headline)
		if len(text) > 0 {
			fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[0])
		}
		return "", 0
	}
	sz := text[i].FontSize
	y2 := text[i].Y
	x2 := 0.0
	for i < len(text) && text[i].Y == y2 {
		if x2 < text[i].X+text[i].W {
			x2 = text[i].X + text[i].W
		}
		i++
	}
	y2 += sz / 2

	// bit numbers
	if i >= len(text) || text[i].S != "0" {
		if headline == "Transaction Abort Doubleword Conditional X-form" {
			// Split across the next page.
			return "31@0|TO@6|RA@11|RB@16|814@21|1@31|", i
		}
		if headline == "Add Scaled Immediate SCI8-form" {
			// Very strange fonts.
			return "06@0|RT@6|RA@11|8@16|Rc@20|F@21|SCL@22|UI8@24|", i
		}
		fmt.Fprintf(os.Stderr, "page %d: no bit numbers for %s\n", pageNum, headline)
		if i < len(text) {
			fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[i])
		}
		return "", 0
	}
	sz = text[i].FontSize
	y1 := text[i].Y
	x1 := text[i].X
	for i < len(text) && text[i].Y == y1 {
		if x2 < text[i].X+text[i].W {
			x2 = text[i].X + text[i].W
		}
		i++
	}

	if debugPage > 0 {
		fmt.Println("encoding box", x1, y1, x2, y2, i, text[0], text[i])
	}

	// Find lines (thin rectangles) separating bit fields.
	var bottom, top pdf.Rect
	const (
		yMargin = 0.25 * 72
		xMargin = 1 * 72
	)
	for _, r := range content.Rect {
		// Only consider lines in the same column.
		if (x1 < 306) != (r.Max.X < 306) {
			continue
		}
		if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
			if y1-yMargin < r.Min.Y && r.Min.Y < y1 {
				bottom = r
			}
			if y2 < r.Min.Y && r.Min.Y < y2+8 {
				top = r
			}
		}
	}

	if bottom.Min.X == 0 {
		// maybe bit numbers are outside box; see doze, nap, sleep, rvwinkle.
		for _, r := range content.Rect {
			// Only consider lines in the same column.
			if (x1 < 306) != (r.Max.X < 306) {
				continue
			}
			if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
				if y1+sz/2 < r.Min.Y && r.Min.Y < y2 {
					bottom = r
				}
			}
		}
	}

	if debugPage > 0 {
		fmt.Println("top", top, "bottom", bottom)
	}

	const ε = 0.1 * 72
	var bars []pdf.Rect
	for _, r := range content.Rect {
		if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε && (bottom.Min.X < 306) == (r.Max.X < 306) {
			bars = append(bars, r)
		}
	}
	sort.Sort(RectHorizontal(bars))

	out := ""
	for i := 0; i < len(bars)-1; i++ {
		var sub []pdf.Text
		x1, x2 := bars[i].Min.X, bars[i+1].Min.X
		for _, t := range content.Text {
			tx := t.X + t.W/2
			ty := t.Y + t.FontSize/4
			if x1 < tx && tx < x2 && y1 < ty && ty < y2 {
				sub = append(sub, t)
			}
		}
		var str []string
		for _, t := range findWords(sub) {
			str = append(str, t.S)
		}
		s := strings.Join(str, "@")
		out += s + "|"
	}

	if out == "" {
		fmt.Fprintf(os.Stderr, "page %d: no bit encodings for %s\n", pageNum, headline)
	}
	return out, i
}