func parsePage()

in arm64/arm64spec/spec.go [170:426]


func parsePage(num int, p pdf.Page, f *pdf.Reader) (name string, table []Inst) {
	content := p.Content()
	var text []pdf.Text
	CrossTwoPage := true
	for _, t := range content.Text {
		text = append(text, t)
	}
	text = findWords(text)
	if !(instRE.MatchString(text[1].S) || instRE_A.MatchString(text[1].S)) || len(text) == 0 || !sectionRE.MatchString(text[2].S) {
		return "", nil
	}
	// Check whether the content crosses the page.
	for _, t := range text {
		if match(t, "Arial,Bold", 10, "Assembler symbols") {
			CrossTwoPage = false
			break
		}
	}
	// Deal with cross page issue. To the next page content.
	var Ncontent pdf.Content
	Npagebox := false
	CrossThreePage := false
	Noffset := ""
	if CrossTwoPage == true {
		Np := f.Page(num + 1)
		Ncontent = Np.Content()
		var Ntext []pdf.Text
		for _, t := range Ncontent.Text {
			Ntext = append(Ntext, t)
		}
		Ntext = findWords(Ntext)
		if len(Ntext) == 0 || sectionRE.MatchString(Ntext[2].S) {
			Ntext = text[:0]
		} else {
			for _, t := range Ntext {
				if match(t, "Arial,Bold", 10, "offset") {
					Noffset = t.S
					Npagebox = true
				}
				// This istruction cross three pages.
				if match(t, "Arial,Bold", 10, "Assembler symbols") {
					CrossThreePage = false
				} else {
					CrossThreePage = true
				}
				text = append(text, t)
			}
		}
	}
	if CrossThreePage == true {
		NNp := f.Page(num + 2)
		NNcontent := NNp.Content()
		var NNtext []pdf.Text
		for _, t := range NNcontent.Text {
			NNtext = append(NNtext, t)
		}
		NNtext = findWords(NNtext)
		if len(NNtext) == 0 || sectionRE.MatchString(NNtext[2].S) {
			NNtext = text[:0]
		} else {
			for _, t := range NNtext {
				text = append(text, t)
			}
		}
	}
	// Get alias and remove text we should ignore.
	out := text[:0]
	alias := ""
	for _, t := range text {
		if strings.Contains(t.S, "instruction is used by the alias") || strings.Contains(t.S, "instruction is an alias of") {
			alias_t := strings.SplitAfter(t.S, ".")
			alias = alias_t[0]
		}
		// Skip page footer
		if match(t, "Arial-ItalicMT", 8, "") || match(t, "ArialMT", 8, "") {
			if debugPage > 0 {
				fmt.Println("==the skip page footer is:==", t)
			}
			continue
		}
		// Skip the body text
		if match(t, "TimesNewRoman", 9, "") || match(t, "TimesNewRomanPS-ItalicMT", 9, "") {
			if debugPage > 0 {
				fmt.Println("==the skip body text is:==", t)
			}
			continue
		}
		out = append(out, t)
	}
	text = out
	// Page header must be child title.
	if len(text) == 0 || !sectionRE.MatchString(text[0].S) {
		return "", nil
	}

	name = text[1].S
	inst := Inst{
		Name:  name,
		Alias: alias,
	}
	text = text[2:]
	// Skip body text before bits.
	OffsetMark := false
	k := 0
	for k = 0; k < len(text); {
		if !match(text[k], "Arial", 8, "31") {
			k++
		} else {
			break
		}
	}
	// Check offset.
	if k > 0 && match(text[k-1], "Arial,Bold", 10, "") {
		OffsetMark = true
		text = text[k-1:]
	} else {
		text = text[k:]
	}
	// Encodings follow.
	BitMark := false
	bits := ""
	// Find bits.
	for i := 0; i < len(text); {
		inst.Bits = ""
		offset := ""
		abits := ""
		// Read bits only one time.
		if OffsetMark == true {
			for i < len(text) && !match(text[i], "Arial", 8, "") {
				i++
			}
			if i < len(text) {
				offset = text[i-1].S
				BitMark = false
				bits = ""
			} else {
				break
			}
		}
		if BitMark == false {
			if Npagebox == true && Noffset == offset {
				bits, i = readBitBox(name, Ncontent, text, i)
			} else {
				bits, i = readBitBox(name, content, text, i)
			}
			BitMark = true
			// Every time, get "then SEE" after get bits.
			enc := false
			if i < len(text)-1 {
				m := i
				for m < len(text)-1 && !match(text[m], "Arial-BoldItalicMT", 9, "encoding") {
					m++
				}
				if match(text[m], "Arial-BoldItalicMT", 9, "encoding") && m < len(text) {
					enc = true
					m = m + 1
				}
				if enc == true {
					for m < len(text) && !match(text[m], "Arial,Bold", 10, "") && match(text[m], "LucidaSansTypewriteX", 6.48, "") {
						if strings.Contains(text[m].S, "then SEE") {
							inst.Code = text[m].S
							break
						} else {
							m++
						}
					}
				}
			}
		}

		// Possible subarchitecture notes.
	ArchLoop:
		for i < len(text) {
			if !match(text[i], "Arial-BoldItalicMT", 9, "variant") || match(text[i], "Arial-BoldItalicMT", 9, "encoding") {
				i++
				continue
			}
			inst.Arch = ""
			inst.Arch += offset
			inst.Arch += " "
			inst.Arch += text[i].S
			inst.Arch = strings.TrimSpace(inst.Arch)
			i++
			// Encoding syntaxes.
			sign := ""
			SynMark := false
			for i < len(text) && match(text[i], "LucidaSansTypewriteX", 6.48, "") && SynMark == false {
				if (strings.Contains(text[i].S, "==") || strings.Contains(text[i].S, "!=")) && SynMark == false {
					sign = text[i].S
					i++
					continue
				}
				// Avoid "equivalent to" another syntax.
				if SynMark == false {
					SynMark = true
					inst.Syntax = ""
					inst.Syntax = text[i].S
					i++
				}
			}
			abits = bits
			// Analyse and replace some bits value.eg, sf==1
			if strings.Contains(sign, "&&") {
				split := strings.Split(sign, "&&")
				for k := 0; k < len(split); {
					if strings.Contains(split[k], "==") && !strings.Contains(split[k], "!") {
						tmp := strings.Split(split[k], "==")
						prefix := strings.TrimSpace(tmp[0])
						value := strings.TrimSpace(tmp[1])
						if strings.Contains(bits, prefix) && !strings.Contains(value, "x") {
							abits = strings.Replace(abits, prefix, value, -1)
						}
					}
					k++
				}
			} else if strings.Contains(sign, "==") && !strings.Contains(sign, "!") {
				split := strings.Split(sign, "==")
				prefix := strings.TrimSpace(split[0])
				value := strings.TrimSpace(split[1])
				if strings.Contains(bits, prefix) && !strings.Contains(value, "x") {
					abits = strings.Replace(abits, prefix, value, -1)
				}
			}
			// Deal with syntax contains {2}
			if strings.Contains(inst.Syntax, "{2}") {
				if !strings.Contains(abits, "Q") {
					fmt.Fprintf(os.Stderr, "instruction%s - syntax%s: is wrong!!\n", name, inst.Syntax)
				}
				syn := inst.Syntax
				bits := abits
				for i := 0; i < 2; {
					if i == 0 {
						inst.Bits = strings.Replace(bits, "Q", "0", -1)
						inst.Syntax = strings.Replace(syn, "{2}", "", -1)
						table = append(table, inst)
					}
					if i == 1 {
						inst.Bits = strings.Replace(bits, "Q", "1", -1)
						inst.Syntax = strings.Replace(syn, "{2}", "2", -1)
						table = append(table, inst)
					}
					i++
				}
			} else {
				inst.Bits = abits
				table = append(table, inst)
			}

			if OffsetMark == true && i < len(text) && match(text[i], "Arial-BoldItalicMT", 9, "variant") && !match(text[i], "Arial-BoldItalicMT", 9, "encoding") {
				continue ArchLoop
			} else {
				break
			}
		}
	}
	return name, table
}