func()

in util/fipstools/delocate/delocate.go [504:720]


func (d *delocation) processAarch64Instruction(statement, instruction *node32) (*node32, error) {
	assertNodeType(instruction, ruleInstructionName)
	instructionName := d.contents(instruction)

	argNodes := instructionArgs(instruction.next)

	switch instructionName {
	case "ccmn", "ccmp", "cinc", "cinv", "cneg", "csel", "cset", "csetm", "csinc", "csinv", "csneg", "fcsel":
		// These functions are special because they take a condition-code name as
		// an argument and that looks like a symbol reference.
		d.writeNode(statement)
		return statement, nil

	case "mrs":
		// Functions that take special register names also look like a symbol
		// reference to the parser.
		d.writeNode(statement)
		return statement, nil

	case "adrp":
		// adrp always generates a relocation, even when the target symbol is in the
		// same segment, because the page-offset of the code isn't known until link
		// time. Thus adrp instructions are turned into either adr instructions
		// (limiting the module to 1MiB offsets) or calls to helper functions, both of
		// which load the full address. Later instructions, which add the low 12 bits
		// of offset, are tweaked to remove the offset since it's already included.
		// Loads of GOT symbols are slightly more complex because it's not possible to
		// avoid dereferencing a GOT entry with Clang's assembler. Thus the later ldr
		// instruction, which would normally do the dereferencing, is dropped
		// completely. (Or turned into a mov if it targets a different register.)
		assertNodeType(argNodes[0], ruleRegisterOrConstant)
		targetReg := d.contents(argNodes[0])
		if !strings.HasPrefix(targetReg, "x") {
			panic("adrp targetting register " + targetReg + ", which has the wrong size")
		}

		var symbol, offset string
		switch argNodes[1].pegRule {
		case ruleGOTSymbolOffset:
			symbol = d.contents(argNodes[1].up)
		case ruleMemoryRef:
			assertNodeType(argNodes[1].up, ruleSymbolRef)
			node, empty := d.gatherOffsets(argNodes[1].up.up, "")
			if len(empty) != 0 {
				panic("prefix offsets found for adrp")
			}
			symbol = d.contents(node)
			_, offset = d.gatherOffsets(node.next, "")
		default:
			panic("Unhandled adrp argument type " + rul3s[argNodes[1].pegRule])
		}

		return d.loadAarch64Address(statement, targetReg, symbol, offset)
	case "bl":
		// We were relying on symbols defined with ".comm" to populate bssAccessorsNeeded,
		// but the gcc release build does not use ".comm" to define common symbols. The
		// symbols requiring accessor functions (i.e. with a suffix "_bss_get") are defined
		// with a ".type $symbol %object" followed with a ".size $symbol $symbol_size"
		// instead. These definition methods are generic and do not only apply to symbols
		// that need accessors. Thus we attempt to reverse engineer the accessor symbols
		// by populating bssAccessorsNeeded with labels from "bl" that have the accessor
		// "_bss_get" at the suffix.
		bss_get_symbol := d.contents(argNodes[0])
		if strings.HasSuffix(bss_get_symbol, "_bss_get") {
			trimmed_symbol := strings.TrimSuffix(bss_get_symbol, "_bss_get")
			d.bssAccessorsNeeded[trimmed_symbol] = trimmed_symbol
		}
	}

	var args []string
	changed := false

	for _, arg := range argNodes {
		fullArg := arg

		switch arg.pegRule {
		case ruleRegisterOrConstant, ruleLocalLabelRef, ruleARMConstantTweak:
			args = append(args, d.contents(fullArg))

		case ruleGOTSymbolOffset:
			// These should only be arguments to adrp and thus unreachable.
			panic("unreachable")

		case ruleMemoryRef:
			ref := arg.up

			switch ref.pegRule {
			case ruleSymbolRef:
				// This is a branch. Either the target needs to be written to a local
				// version of the symbol to ensure that no relocations are emitted, or
				// it needs to jump to a redirector function.
				symbol, offset, _, didChange, symbolIsLocal, _ := d.parseMemRef(arg.up)
				changed = didChange

				if isFipsScopeMarkers(symbol) {
					// fips scope markers are known. But they challenge the adr
					// reach, so go through GOT via an adrp outside the scope.
					redirector := redirectorName(symbol)
					d.redirectors[symbol] = redirector
					symbol = redirector
					changed = true
				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
					symbol = localTargetName(symbol)
					changed = true
				} else if !symbolIsLocal && !isSynthesized(symbol, aarch64) {
					redirector := redirectorName(symbol)
					d.redirectors[symbol] = redirector
					symbol = redirector
					changed = true
				} else if didChange && symbolIsLocal && len(offset) > 0 {
					// didChange is set when the inputFile index is not 0; which is the index of the
					// first file copied to the output, which is the generated assembly of bcm.c.
					// In subsequently copied assembly files, local symbols are changed by appending (BCM_ + index)
					// in order to ensure they don't collide. `index` gets incremented per file.
					// If there is offset after the symbol, append the `offset`.
					symbol = symbol + offset
				}

				args = append(args, symbol)

			case ruleARMBaseIndexScale:
				parts := ref.up
				assertNodeType(parts, ruleARMRegister)
				baseAddrReg := d.contents(parts)
				parts = skipWS(parts.next)

				// Only two forms need special handling. First there's memory references
				// like "[x*, :got_lo12:foo]". The base register here will have been the
				// target of an adrp instruction to load the page address, but the adrp
				// will have turned into loading the full address *and dereferencing it*,
				// above. Thus this instruction needs to be dropped otherwise we'll be
				// dereferencing twice.
				//
				// Second there are forms like "[x*, :lo12:foo]" where the code has used
				// adrp to load the page address into x*. That adrp will have been turned
				// into loading the full address so just the offset needs to be dropped.

				if parts != nil {
					if parts.pegRule == ruleARMGOTLow12 {
						if instructionName != "ldr" && instructionName != "ldrsw" {
							panic("Symbol reference outside of ldr/ldrsw instruction")
						}

						if skipWS(parts.next) != nil || parts.up.next != nil {
							panic("can't handle tweak or post-increment with symbol references")
						}

						// The GOT helper already dereferenced the entry so, at most, just a mov
						// is needed to put things in the right register.
						d.writeCommentedNode(statement)
						if baseAddrReg != args[0] {
							d.output.WriteString("\tmov " + args[0] + ", " + baseAddrReg + "\n")
						}
						return statement, nil
					} else if parts.pegRule == ruleLow12BitsSymbolRef {
						if instructionName != "ldr" && instructionName != "ldrsw" {
							panic("Symbol reference outside of ldr/ldrsw instruction")
						}

						// The check for "parts.up.next != nil" was removed because gcc/release appends an
						// offset to the symbol reference. ex: #:lo12:.LC9+8
						if skipWS(parts.next) != nil {
							panic("can't handle tweak with symbol references")
						}

						// Suppress the offset; adrp loaded the full address.
						args = append(args, "["+baseAddrReg+"]")
						changed = true
						continue
					}
				}

				args = append(args, d.contents(fullArg))

			case ruleLow12BitsSymbolRef:
				// These are the second instruction in a pair:
				//   adrp x0, symbol           // Load the page address into x0
				//   add x1, x0, :lo12:symbol  // Adds the page offset.
				//
				// The adrp instruction will have been turned into a sequence that loads
				// the full address, above, thus the offset is turned into zero. If that
				// results in the instruction being a nop, then it is deleted.
				if instructionName != "add" {
					panic(fmt.Sprintf("unsure how to handle %q instruction using lo12", instructionName))
				}

				if !strings.HasPrefix(args[0], "x") || !strings.HasPrefix(args[1], "x") {
					panic("address arithmetic with incorrectly sized register")
				}

				if args[0] == args[1] {
					d.writeCommentedNode(statement)
					return statement, nil
				}

				args = append(args, "#0")
				changed = true

			default:
				panic(fmt.Sprintf("unhandled MemoryRef type %s", rul3s[ref.pegRule]))
			}

		default:
			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
		}
	}

	if changed {
		d.writeCommentedNode(statement)
		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
		d.output.WriteString(replacement)
	} else {
		d.writeNode(statement)
	}

	return statement, nil
}