in arm64/arm64spec/spec.go [170:426]
func parsePage(num int, p pdf.Page, f *pdf.Reader) (name string, table []Inst) {
content := p.Content()
var text []pdf.Text
CrossTwoPage := true
for _, t := range content.Text {
text = append(text, t)
}
text = findWords(text)
if !(instRE.MatchString(text[1].S) || instRE_A.MatchString(text[1].S)) || len(text) == 0 || !sectionRE.MatchString(text[2].S) {
return "", nil
}
// Check whether the content crosses the page.
for _, t := range text {
if match(t, "Arial,Bold", 10, "Assembler symbols") {
CrossTwoPage = false
break
}
}
// Deal with cross page issue. To the next page content.
var Ncontent pdf.Content
Npagebox := false
CrossThreePage := false
Noffset := ""
if CrossTwoPage == true {
Np := f.Page(num + 1)
Ncontent = Np.Content()
var Ntext []pdf.Text
for _, t := range Ncontent.Text {
Ntext = append(Ntext, t)
}
Ntext = findWords(Ntext)
if len(Ntext) == 0 || sectionRE.MatchString(Ntext[2].S) {
Ntext = text[:0]
} else {
for _, t := range Ntext {
if match(t, "Arial,Bold", 10, "offset") {
Noffset = t.S
Npagebox = true
}
// This istruction cross three pages.
if match(t, "Arial,Bold", 10, "Assembler symbols") {
CrossThreePage = false
} else {
CrossThreePage = true
}
text = append(text, t)
}
}
}
if CrossThreePage == true {
NNp := f.Page(num + 2)
NNcontent := NNp.Content()
var NNtext []pdf.Text
for _, t := range NNcontent.Text {
NNtext = append(NNtext, t)
}
NNtext = findWords(NNtext)
if len(NNtext) == 0 || sectionRE.MatchString(NNtext[2].S) {
NNtext = text[:0]
} else {
for _, t := range NNtext {
text = append(text, t)
}
}
}
// Get alias and remove text we should ignore.
out := text[:0]
alias := ""
for _, t := range text {
if strings.Contains(t.S, "instruction is used by the alias") || strings.Contains(t.S, "instruction is an alias of") {
alias_t := strings.SplitAfter(t.S, ".")
alias = alias_t[0]
}
// Skip page footer
if match(t, "Arial-ItalicMT", 8, "") || match(t, "ArialMT", 8, "") {
if debugPage > 0 {
fmt.Println("==the skip page footer is:==", t)
}
continue
}
// Skip the body text
if match(t, "TimesNewRoman", 9, "") || match(t, "TimesNewRomanPS-ItalicMT", 9, "") {
if debugPage > 0 {
fmt.Println("==the skip body text is:==", t)
}
continue
}
out = append(out, t)
}
text = out
// Page header must be child title.
if len(text) == 0 || !sectionRE.MatchString(text[0].S) {
return "", nil
}
name = text[1].S
inst := Inst{
Name: name,
Alias: alias,
}
text = text[2:]
// Skip body text before bits.
OffsetMark := false
k := 0
for k = 0; k < len(text); {
if !match(text[k], "Arial", 8, "31") {
k++
} else {
break
}
}
// Check offset.
if k > 0 && match(text[k-1], "Arial,Bold", 10, "") {
OffsetMark = true
text = text[k-1:]
} else {
text = text[k:]
}
// Encodings follow.
BitMark := false
bits := ""
// Find bits.
for i := 0; i < len(text); {
inst.Bits = ""
offset := ""
abits := ""
// Read bits only one time.
if OffsetMark == true {
for i < len(text) && !match(text[i], "Arial", 8, "") {
i++
}
if i < len(text) {
offset = text[i-1].S
BitMark = false
bits = ""
} else {
break
}
}
if BitMark == false {
if Npagebox == true && Noffset == offset {
bits, i = readBitBox(name, Ncontent, text, i)
} else {
bits, i = readBitBox(name, content, text, i)
}
BitMark = true
// Every time, get "then SEE" after get bits.
enc := false
if i < len(text)-1 {
m := i
for m < len(text)-1 && !match(text[m], "Arial-BoldItalicMT", 9, "encoding") {
m++
}
if match(text[m], "Arial-BoldItalicMT", 9, "encoding") && m < len(text) {
enc = true
m = m + 1
}
if enc == true {
for m < len(text) && !match(text[m], "Arial,Bold", 10, "") && match(text[m], "LucidaSansTypewriteX", 6.48, "") {
if strings.Contains(text[m].S, "then SEE") {
inst.Code = text[m].S
break
} else {
m++
}
}
}
}
}
// Possible subarchitecture notes.
ArchLoop:
for i < len(text) {
if !match(text[i], "Arial-BoldItalicMT", 9, "variant") || match(text[i], "Arial-BoldItalicMT", 9, "encoding") {
i++
continue
}
inst.Arch = ""
inst.Arch += offset
inst.Arch += " "
inst.Arch += text[i].S
inst.Arch = strings.TrimSpace(inst.Arch)
i++
// Encoding syntaxes.
sign := ""
SynMark := false
for i < len(text) && match(text[i], "LucidaSansTypewriteX", 6.48, "") && SynMark == false {
if (strings.Contains(text[i].S, "==") || strings.Contains(text[i].S, "!=")) && SynMark == false {
sign = text[i].S
i++
continue
}
// Avoid "equivalent to" another syntax.
if SynMark == false {
SynMark = true
inst.Syntax = ""
inst.Syntax = text[i].S
i++
}
}
abits = bits
// Analyse and replace some bits value.eg, sf==1
if strings.Contains(sign, "&&") {
split := strings.Split(sign, "&&")
for k := 0; k < len(split); {
if strings.Contains(split[k], "==") && !strings.Contains(split[k], "!") {
tmp := strings.Split(split[k], "==")
prefix := strings.TrimSpace(tmp[0])
value := strings.TrimSpace(tmp[1])
if strings.Contains(bits, prefix) && !strings.Contains(value, "x") {
abits = strings.Replace(abits, prefix, value, -1)
}
}
k++
}
} else if strings.Contains(sign, "==") && !strings.Contains(sign, "!") {
split := strings.Split(sign, "==")
prefix := strings.TrimSpace(split[0])
value := strings.TrimSpace(split[1])
if strings.Contains(bits, prefix) && !strings.Contains(value, "x") {
abits = strings.Replace(abits, prefix, value, -1)
}
}
// Deal with syntax contains {2}
if strings.Contains(inst.Syntax, "{2}") {
if !strings.Contains(abits, "Q") {
fmt.Fprintf(os.Stderr, "instruction%s - syntax%s: is wrong!!\n", name, inst.Syntax)
}
syn := inst.Syntax
bits := abits
for i := 0; i < 2; {
if i == 0 {
inst.Bits = strings.Replace(bits, "Q", "0", -1)
inst.Syntax = strings.Replace(syn, "{2}", "", -1)
table = append(table, inst)
}
if i == 1 {
inst.Bits = strings.Replace(bits, "Q", "1", -1)
inst.Syntax = strings.Replace(syn, "{2}", "2", -1)
table = append(table, inst)
}
i++
}
} else {
inst.Bits = abits
table = append(table, inst)
}
if OffsetMark == true && i < len(text) && match(text[i], "Arial-BoldItalicMT", 9, "variant") && !match(text[i], "Arial-BoldItalicMT", 9, "encoding") {
continue ArchLoop
} else {
break
}
}
}
return name, table
}