static uint32_t GetInstructionSize()

in runtime/arch/x86/fault_handler_x86.cc [93:234]


static uint32_t GetInstructionSize(const uint8_t* pc) {
#if defined(__x86_64)
  const bool x86_64 = true;
#else
  const bool x86_64 = false;
#endif

  const uint8_t* startpc = pc;

  uint8_t opcode = *pc++;
  uint8_t modrm;
  bool has_modrm = false;
  bool two_byte = false;
  uint32_t displacement_size = 0;
  uint32_t immediate_size = 0;
  bool operand_size_prefix = false;

  // Prefixes.
  while (true) {
    bool prefix_present = false;
    switch (opcode) {
      // Group 3
      case 0x66:
        operand_size_prefix = true;
        FALLTHROUGH_INTENDED;

      // Group 1
      case 0xf0:
      case 0xf2:
      case 0xf3:

      // Group 2
      case 0x2e:
      case 0x36:
      case 0x3e:
      case 0x26:
      case 0x64:
      case 0x65:

      // Group 4
      case 0x67:
        opcode = *pc++;
        prefix_present = true;
        break;
    }
    if (!prefix_present) {
      break;
    }
  }

  if (x86_64 && opcode >= 0x40 && opcode <= 0x4f) {
    opcode = *pc++;
  }

  if (opcode == 0x0f) {
    // Two byte opcode
    two_byte = true;
    opcode = *pc++;
  }

  bool unhandled_instruction = false;

  if (two_byte) {
    switch (opcode) {
      case 0x10:        // vmovsd/ss
      case 0x11:        // vmovsd/ss
      case 0xb6:        // movzx
      case 0xb7:
      case 0xbe:        // movsx
      case 0xbf:
        modrm = *pc++;
        has_modrm = true;
        break;
      default:
        unhandled_instruction = true;
        break;
    }
  } else {
    switch (opcode) {
      case 0x88:        // mov byte
      case 0x89:        // mov
      case 0x8b:
      case 0x38:        // cmp with memory.
      case 0x39:
      case 0x3a:
      case 0x3b:
      case 0x3c:
      case 0x3d:
      case 0x85:        // test.
        modrm = *pc++;
        has_modrm = true;
        break;

      case 0x80:        // group 1, byte immediate.
      case 0x83:
      case 0xc6:
        modrm = *pc++;
        has_modrm = true;
        immediate_size = 1;
        break;

      case 0x81:        // group 1, word immediate.
      case 0xc7:        // mov
        modrm = *pc++;
        has_modrm = true;
        immediate_size = operand_size_prefix ? 2 : 4;
        break;

      default:
        unhandled_instruction = true;
        break;
    }
  }

  if (unhandled_instruction) {
    VLOG(signals) << "Unhandled x86 instruction with opcode " << static_cast<int>(opcode);
    return 0;
  }

  if (has_modrm) {
    uint8_t mod = (modrm >> 6) & 3U /* 0b11 */;

    // Check for SIB.
    if (mod != 3U /* 0b11 */ && (modrm & 7U /* 0b111 */) == 4) {
      ++pc;     // SIB
    }

    switch (mod) {
      case 0U /* 0b00 */: break;
      case 1U /* 0b01 */: displacement_size = 1; break;
      case 2U /* 0b10 */: displacement_size = 4; break;
      case 3U /* 0b11 */:
        break;
    }
  }

  // Skip displacement and immediate.
  pc += displacement_size + immediate_size;

  VLOG(signals) << "x86 instruction length calculated as " << (pc - startpc);
  return pc - startpc;
}