in x86/x86spec/cleanup.go [820:1139]
func cleanup(insts []*instruction) []*instruction {
var haveOp map[string]bool
if onlySomePages {
haveOp = map[string]bool{}
}
// Clean individual instruction encodings and opcode sequences.
sawJZ := map[string]bool{}
out := insts[:0]
for seq, inst := range insts {
inst.seq = seq
// There are two copies each of JZ rel16 and JZ rel32. Delete the second.
if strings.HasPrefix(inst.syntax, "JZ rel") {
if sawJZ[inst.syntax] {
continue
}
sawJZ[inst.syntax] = true
}
out = append(out, inst)
// Intel CMPXCHG16B and CMPXCHG8B have surprise "m64" or " m128" at end of encoding.
surprises := []string{
" m64",
" m128",
}
for _, s := range surprises {
if strings.HasSuffix(inst.syntax, s) && strings.HasSuffix(inst.opcode, s) {
inst.opcode = strings.TrimSuffix(inst.opcode, s)
}
}
op, args := splitSyntax(inst.syntax)
op = strings.TrimRight(op, "*")
inst.syntax = joinSyntax(op, args)
// Check argument names in syntax against encoding details.
if enc, ok := encodings[inst.syntax]; ok {
inst.args = enc
}
if len(args) == len(inst.args)+1 && args[len(args)-1] == "imm8" {
fixed := make([]string, len(args))
copy(fixed, inst.args)
fixed[len(args)-1] = "imm8"
inst.args = fixed
} else if len(args) == 0 && len(inst.args) == 1 && inst.args[0] == "NA" {
inst.args = []string{}
} else if len(args) != len(inst.args) {
fmt.Fprintf(os.Stderr, "p.%d: %s has %d args but %d encoding details:\n\t%s\n", inst.page, inst.syntax, len(args), len(inst.args), strings.Join(inst.args, "; "))
inst.syntax = joinSyntax(op, args)
continue
}
var action []string
for i, arg := range args {
arg = strings.TrimSpace(arg)
arg = strings.TrimRight(arg, "*")
if (arg == "reg" || strings.HasPrefix(arg, "reg/")) && containsAll(inst.desc, "upper bits", "r64", "zero") {
arg = "r32" + strings.TrimPrefix(arg, "reg")
}
enc := inst.args[i]
enc = strings.TrimSpace(enc)
switch {
case strings.HasSuffix(enc, " (r))"):
enc = strings.TrimSuffix(enc, ")")
case strings.HasSuffix(enc, " (R)"):
enc = strings.TrimSuffix(enc, " (R)") + " (r)"
case strings.HasSuffix(enc, " (W)"):
enc = strings.TrimSuffix(enc, " (W)") + " (w)"
case strings.HasSuffix(enc, " (r,w)"):
enc = strings.TrimSuffix(enc, " (r,w)") + " (r, w)"
case enc == "Imm8":
enc = "imm8"
case enc == "imm8/26/32":
enc = "imm8/16/32"
case enc == "BaseReg (R): VSIB:base, VectorReg(R): VSIB:index":
enc = "vsib (r)"
}
inst.args[i] = enc
switch {
case strings.HasSuffix(enc, " (r)"):
action = append(action, "r")
enc = strings.TrimSuffix(enc, " (r)")
case strings.HasSuffix(enc, " (w)"):
action = append(action, "w")
enc = strings.TrimSuffix(enc, " (w)")
case strings.HasSuffix(enc, " (r, w)"):
action = append(action, "rw")
enc = strings.TrimSuffix(enc, " (r, w)")
case strings.HasPrefix(enc, "imm"), enc == "Offset", enc == "iw", arg == "1", arg == "0", arg == "3":
action = append(action, "r")
case i < len(opAction[op]):
action = append(action, opAction[op][i])
default:
fmt.Fprintf(os.Stderr, "p.%d: %s has encoding %s for %s but no r/w annotations\n", inst.page, inst.syntax, enc, arg)
action = append(action, "?")
}
if arg == "mem" && op == "LDDQU" {
arg = "m128"
}
if arg == "reg" && op == "LAR" {
arg = "r32"
}
if actual := encodeReplace[[2]string{arg, enc}]; actual != "" {
arg = actual
}
if (arg == "r8" || arg == "r16" || arg == "r32" || arg == "r64") && enc == "ModRM:r/m" {
addTag(inst, "modrm_regonly")
arg = "rmr" + arg[1:]
}
if (arg == "xmm2" || arg == "ymm2") && enc == "ModRM:r/m" {
addTag(inst, "modrm_regonly")
}
if (arg == "m8" || arg == "m16" || arg == "m32" || arg == "m64" || arg == "m128" || arg == "m256") && enc == "ModRM:r/m" {
addTag(inst, "modrm_memonly")
}
if arg == "r64" && (inst.syntax == "MOV r64, CR8" || inst.syntax == "MOV CR8, r64") {
arg = "rmr64"
addTag(inst, "modrm_regonly")
}
if arg == "CR8" {
enc = ""
}
if !encodeOK[[2]string{arg, enc}] {
fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s for %s\n\t{%q, %q}: true,\n", inst.page, inst.syntax, enc, arg, arg, enc)
}
args[i] = arg
// Intel SETcc and others are missing the /r.
// But CALL rel16 and CALL rel32 have a bad encoding table so ignore the ModRM there.
if strings.HasPrefix(enc, "ModRM") && !strings.Contains(inst.opcode, " /") && op != "CALL" {
inst.opcode += " /r"
}
if strings.HasPrefix(enc, "ModRM:reg") && !strings.Contains(inst.opcode, "/r") {
// The opcode is taken up with something else. Bug in table.
fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s: no reg field in %s\n", inst.page, inst.syntax, arg, inst.opcode)
}
// XBEGIN is missing cw cd.
if enc == "Offset" && arg == "rel16" && !strings.Contains(inst.opcode, " cw") {
inst.opcode += " cw"
}
if enc == "Offset" && arg == "rel32" && !strings.Contains(inst.opcode, " cd") {
inst.opcode += " cd"
}
if enc == "Moffs" && !strings.Contains(inst.opcode, "cm") {
inst.opcode += " cm"
}
inst.action = strings.Join(action, ",")
}
inst.syntax = joinSyntax(op, args)
// The Intel manual lists each XCHG form with arguments in both orders.
// While this is technically correct, it confuses lots of the analysis.
// Change half of them to start with a fake "XX" byte.
if op == "XCHG" && !strings.HasPrefix(args[0], "r/") && !strings.HasSuffix(args[0], "op") {
inst.opcode = "XX " + inst.opcode
}
// Intel manual is not great about disabling REX instructions on 32-bit systems.
if strings.Contains(inst.opcode, "REX") && inst.valid32 == "V" {
inst.valid32 = "N.E."
}
if inst.valid32 == "V" {
switch {
case containsAll(inst.compat, "not supported", "earlier than the Intel486"):
inst.cpuid = "486"
case containsAll(inst.compat, "not supported", "earlier than the Pentium"),
containsAll(inst.compat, "were introduced", "with the Pentium"):
inst.cpuid = "Pentium"
case containsAll(inst.compat, "were introduced", "in the Pentium II"):
inst.cpuid = "PentiumII"
case containsAll(inst.compat, "were introduced", "in the P6 family"),
containsAll(inst.compat, "were introduced in P6 family"):
addTag(inst, "P6")
}
}
if onlySomePages {
op, _ := splitSyntax(inst.syntax)
haveOp[op] = true
}
}
insts = out
sort.Sort(byOpcode(insts))
// Detect operand size dependencies.
var last *instruction
for _, inst := range insts {
if last != nil {
f1, _ := splitOpcode(last.opcode)
f2, _ := splitOpcode(inst.opcode)
if f1 == f2 {
// Conflict: cannot distinguish instructions based on fixed prefix.
if is16vs32pair(last, inst) {
addTag(last, "operand16")
addTag(inst, "operand32")
continue
}
if is16vs32pair(inst, last) {
addTag(last, "operand32")
addTag(inst, "operand16")
last = inst
continue
}
}
}
last = inst
}
// Detect pseudo-ops, defined as opcode entries subsumed by more general ones.
seen := map[string]*instruction{}
for _, inst := range insts {
if strings.HasPrefix(inst.opcode, "9B ") { // FWAIT prefix
addTag(inst, "pseudo")
continue
}
if inst.opcode == "F0" || inst.opcode == "F2" || inst.opcode == "F3" {
addTag(inst, "pseudo")
continue
}
if strings.HasPrefix(inst.syntax, "REP ") || strings.HasPrefix(inst.syntax, "REPE ") || strings.HasPrefix(inst.syntax, "REPNE ") {
addTag(inst, "pseudo")
continue
}
if strings.HasPrefix(inst.syntax, "SAL ") { // SHL is canonical
addTag(inst, "pseudo")
continue
}
if old := seen[inst.opcode]; old != nil {
if condLess(old.syntax, inst.syntax) {
addTag(inst, "pseudo")
continue
}
if xchgLess(inst.syntax, old.syntax) {
old.tags = append(old.tags, "pseudo")
seen[inst.opcode] = inst
continue
}
}
seen[inst.opcode] = inst
if last != nil && canGenerate(last.opcode, inst.opcode) {
addTag(inst, "pseudo")
continue
}
last = inst
}
for _, inst := range insts {
if strings.Contains(inst.opcode, "REX ") {
if old := seen[strings.Replace(inst.opcode, "REX ", "", 1)]; old != nil && old.syntax == inst.syntax {
addTag(inst, "pseudo64")
continue
} else if old != nil && hasTag(old, "pseudo") {
addTag(inst, "pseudo")
continue
}
}
if strings.Contains(inst.opcode, "REX.W ") {
if old := seen[strings.Replace(inst.opcode, "REX.W ", "", -1)]; old != nil && old.syntax == inst.syntax {
addTag(old, "ignoreREXW")
addTag(inst, "pseudo")
continue
} else if old != nil && hasTag(old, "pseudo") {
addTag(inst, "pseudo")
continue
} else if old != nil && !hasTag(old, "operand16") && !hasTag(old, "operand32") {
// There is a 64-bit form of this instruction.
// Mark this one as only valid in the non-64-bit operand modes.
addTag(old, "operand16")
addTag(old, "operand32")
continue
}
}
}
// Undo XCHG hack above.
for _, inst := range insts {
if strings.HasPrefix(inst.opcode, "XX ") {
inst.opcode = strings.TrimPrefix(inst.opcode, "XX ")
addTag(inst, "pseudo")
removeTag(inst, "pseudo64")
}
}
// Last ditch effort. Manual fixes.
// Some things are too hard to infer.
for _, inst := range insts {
for _, fix := range fixup[[2]string{inst.syntax, inst.opcode}] {
fix(inst)
}
sort.Strings(inst.tags)
}
sort.Sort(bySeq(insts))
if onlySomePages {
for _, inst := range extraInsts {
op, _ := splitSyntax(inst.syntax)
if haveOp[op] {
insts = append(insts, inst)
}
}
} else {
insts = append(insts, extraInsts...)
}
return insts
}