in ppc64/ppc64spec/spec.go [274:397]
func readBitBox(headline string, content pdf.Content, text []pdf.Text, pageNum int) (string, int) {
// fields
i := 0
if len(text) == 0 || (!match(text[i], "Helvetica", 9, "") && !match(text[i], "Helvetica", 7.26, "") && !match(text[i], "Arial", 9, "") && !match(text[i], "Arial", 7.98, "") && !match(text[i], "Arial", 7.2, "")) {
fmt.Fprintf(os.Stderr, "page %d: no bit fields for %q\n", pageNum, headline)
if len(text) > 0 {
fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[0])
}
return "", 0
}
sz := text[i].FontSize
y2 := text[i].Y
x2 := 0.0
for i < len(text) && text[i].Y == y2 {
if x2 < text[i].X+text[i].W {
x2 = text[i].X + text[i].W
}
i++
}
y2 += sz / 2
// bit numbers
if i >= len(text) || text[i].S != "0" {
if headline == "Transaction Abort Doubleword Conditional X-form" {
// Split across the next page.
return "31@0|TO@6|RA@11|RB@16|814@21|1@31|", i
}
if headline == "Add Scaled Immediate SCI8-form" {
// Very strange fonts.
return "06@0|RT@6|RA@11|8@16|Rc@20|F@21|SCL@22|UI8@24|", i
}
fmt.Fprintf(os.Stderr, "page %d: no bit numbers for %s\n", pageNum, headline)
if i < len(text) {
fmt.Fprintf(os.Stderr, "\tlast text: %v\n", text[i])
}
return "", 0
}
sz = text[i].FontSize
y1 := text[i].Y
x1 := text[i].X
for i < len(text) && text[i].Y == y1 {
if x2 < text[i].X+text[i].W {
x2 = text[i].X + text[i].W
}
i++
}
if debugPage > 0 {
fmt.Println("encoding box", x1, y1, x2, y2, i, text[0], text[i])
}
// Find lines (thin rectangles) separating bit fields.
var bottom, top pdf.Rect
const (
yMargin = 0.25 * 72
xMargin = 1 * 72
)
for _, r := range content.Rect {
// Only consider lines in the same column.
if (x1 < 306) != (r.Max.X < 306) {
continue
}
if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
if y1-yMargin < r.Min.Y && r.Min.Y < y1 {
bottom = r
}
if y2 < r.Min.Y && r.Min.Y < y2+8 {
top = r
}
}
}
if bottom.Min.X == 0 {
// maybe bit numbers are outside box; see doze, nap, sleep, rvwinkle.
for _, r := range content.Rect {
// Only consider lines in the same column.
if (x1 < 306) != (r.Max.X < 306) {
continue
}
if r.Max.Y-r.Min.Y < 2 && x1-xMargin < r.Min.X && r.Min.X < x1 && x2 < r.Max.X && r.Max.X < x2+xMargin {
if y1+sz/2 < r.Min.Y && r.Min.Y < y2 {
bottom = r
}
}
}
}
if debugPage > 0 {
fmt.Println("top", top, "bottom", bottom)
}
const ε = 0.1 * 72
var bars []pdf.Rect
for _, r := range content.Rect {
if r.Max.X-r.Min.X < 2 && math.Abs(r.Min.Y-bottom.Min.Y) < ε && math.Abs(r.Max.Y-top.Min.Y) < ε && (bottom.Min.X < 306) == (r.Max.X < 306) {
bars = append(bars, r)
}
}
sort.Sort(RectHorizontal(bars))
out := ""
for i := 0; i < len(bars)-1; i++ {
var sub []pdf.Text
x1, x2 := bars[i].Min.X, bars[i+1].Min.X
for _, t := range content.Text {
tx := t.X + t.W/2
ty := t.Y + t.FontSize/4
if x1 < tx && tx < x2 && y1 < ty && ty < y2 {
sub = append(sub, t)
}
}
var str []string
for _, t := range findWords(sub) {
str = append(str, t.S)
}
s := strings.Join(str, "@")
out += s + "|"
}
if out == "" {
fmt.Fprintf(os.Stderr, "page %d: no bit encodings for %s\n", pageNum, headline)
}
return out, i
}