size_t DisassemblerArm::DumpThumb16()

in disassembler/disassembler_arm.cc [1597:1946]


size_t DisassemblerArm::DumpThumb16(std::ostream& os, const uint8_t* instr_ptr) {
  uint16_t instr = ReadU16(instr_ptr);
  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
  if (is_32bit) {
    return DumpThumb32(os, instr_ptr);
  } else {
    std::ostringstream opcode;
    std::ostringstream args;
    uint16_t opcode1 = instr >> 10;
    if (opcode1 < 0x10) {
      // shift (immediate), add, subtract, move, and compare
      uint16_t opcode2 = instr >> 9;
      switch (opcode2) {
        case 0x0: case 0x1: case 0x2: case 0x3: case 0x4: case 0x5: case 0x6: case 0x7:
        case 0x8: case 0x9: case 0xA: case 0xB: {
          // Logical shift left     - 00 000xx iii mmm ddd
          // Logical shift right    - 00 001xx iii mmm ddd
          // Arithmetic shift right - 00 010xx iii mmm ddd
          uint16_t imm5 = (instr >> 6) & 0x1F;
          ThumbRegister rm(instr, 3);
          ThumbRegister Rd(instr, 0);
          if (opcode2 <= 3) {
            opcode << "lsls";
          } else if (opcode2 <= 7) {
            opcode << "lsrs";
          } else {
            opcode << "asrs";
          }
          args << Rd << ", " << rm << ", #" << imm5;
          break;
        }
        case 0xC: case 0xD: case 0xE: case 0xF: {
          // Add register        - 00 01100 mmm nnn ddd
          // Sub register        - 00 01101 mmm nnn ddd
          // Add 3-bit immediate - 00 01110 iii nnn ddd
          // Sub 3-bit immediate - 00 01111 iii nnn ddd
          uint16_t imm3_or_Rm = (instr >> 6) & 7;
          ThumbRegister Rn(instr, 3);
          ThumbRegister Rd(instr, 0);
          if ((opcode2 & 2) != 0 && imm3_or_Rm == 0) {
            opcode << "mov";
          } else {
            if ((opcode2 & 1) == 0) {
              opcode << "adds";
            } else {
              opcode << "subs";
            }
          }
          args << Rd << ", " << Rn;
          if ((opcode2 & 2) == 0) {
            ArmRegister Rm(imm3_or_Rm);
            args << ", " << Rm;
          } else if (imm3_or_Rm != 0) {
            args << ", #" << imm3_or_Rm;
          }
          break;
        }
        case 0x10: case 0x11: case 0x12: case 0x13:
        case 0x14: case 0x15: case 0x16: case 0x17:
        case 0x18: case 0x19: case 0x1A: case 0x1B:
        case 0x1C: case 0x1D: case 0x1E: case 0x1F: {
          // MOVS Rd, #imm8 - 00100 ddd iiiiiiii
          // CMP  Rn, #imm8 - 00101 nnn iiiiiiii
          // ADDS Rn, #imm8 - 00110 nnn iiiiiiii
          // SUBS Rn, #imm8 - 00111 nnn iiiiiiii
          ThumbRegister Rn(instr, 8);
          uint16_t imm8 = instr & 0xFF;
          switch (opcode2 >> 2) {
            case 4: opcode << "movs"; break;
            case 5: opcode << "cmp"; break;
            case 6: opcode << "adds"; break;
            case 7: opcode << "subs"; break;
          }
          args << Rn << ", #" << imm8;
          break;
        }
        default:
          break;
      }
    } else if (opcode1 == 0x10) {
      // Data-processing
      uint16_t opcode2 = (instr >> 6) & 0xF;
      ThumbRegister rm(instr, 3);
      ThumbRegister rdn(instr, 0);
      opcode << kThumbDataProcessingOperations[opcode2];
      args << rdn << ", " << rm;
    } else if (opcode1 == 0x11) {
      // Special data instructions and branch and exchange
      uint16_t opcode2 = (instr >> 6) & 0x0F;
      switch (opcode2) {
        case 0x0: case 0x1: case 0x2: case 0x3: {
          // Add low registers  - 010001 0000 xxxxxx
          // Add high registers - 010001 0001/001x xxxxxx
          uint16_t DN = (instr >> 7) & 1;
          ArmRegister rm(instr, 3);
          uint16_t Rdn = instr & 7;
          ArmRegister DN_Rdn((DN << 3) | Rdn);
          opcode << "add";
          args << DN_Rdn << ", " << rm;
          break;
        }
        case 0x8: case 0x9: case 0xA: case 0xB: {
          // Move low registers  - 010001 1000 xxxxxx
          // Move high registers - 010001 1001/101x xxxxxx
          uint16_t DN = (instr >> 7) & 1;
          ArmRegister rm(instr, 3);
          uint16_t Rdn = instr & 7;
          ArmRegister DN_Rdn((DN << 3) | Rdn);
          opcode << "mov";
          args << DN_Rdn << ", " << rm;
          break;
        }
        case 0x5: case 0x6: case 0x7: {
          // Compare high registers - 010001 0101/011x xxxxxx
          uint16_t N = (instr >> 7) & 1;
          ArmRegister rm(instr, 3);
          uint16_t Rn = instr & 7;
          ArmRegister N_Rn((N << 3) | Rn);
          opcode << "cmp";
          args << N_Rn << ", " << rm;
          break;
        }
        case 0xC: case 0xD: case 0xE: case 0xF: {
          // Branch and exchange           - 010001 110x xxxxxx
          // Branch with link and exchange - 010001 111x xxxxxx
          ArmRegister rm(instr, 3);
          opcode << ((opcode2 & 0x2) == 0 ? "bx" : "blx");
          args << rm;
          break;
        }
        default:
          break;
      }
    } else if (opcode1 == 0x12 || opcode1 == 0x13) {  // 01001x
      const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
      const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
      ThumbRegister Rt(instr, 8);
      uint16_t imm8 = instr & 0xFF;
      opcode << "ldr";
      args << Rt << ", [pc, #" << (imm8 << 2) << "]";
      DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, /*U*/ 1u, imm8 << 2, kT2LitHexWord);
    } else if ((opcode1 >= 0x14 && opcode1 <= 0x17) ||  // 0101xx
               (opcode1 >= 0x18 && opcode1 <= 0x1f) ||  // 011xxx
               (opcode1 >= 0x20 && opcode1 <= 0x27)) {  // 100xxx
      // Load/store single data item
      uint16_t opA = (instr >> 12) & 0xF;
      if (opA == 0x5) {
        uint16_t opB = (instr >> 9) & 0x7;
        ThumbRegister Rm(instr, 6);
        ThumbRegister Rn(instr, 3);
        ThumbRegister Rt(instr, 0);
        switch (opB) {
          case 0: opcode << "str"; break;
          case 1: opcode << "strh"; break;
          case 2: opcode << "strb"; break;
          case 3: opcode << "ldrsb"; break;
          case 4: opcode << "ldr"; break;
          case 5: opcode << "ldrh"; break;
          case 6: opcode << "ldrb"; break;
          case 7: opcode << "ldrsh"; break;
        }
        args << Rt << ", [" << Rn << ", " << Rm << "]";
      } else if (opA == 9) {
        uint16_t opB = (instr >> 11) & 1;
        ThumbRegister Rt(instr, 8);
        uint16_t imm8 = instr & 0xFF;
        opcode << (opB == 0 ? "str" : "ldr");
        args << Rt << ", [sp, #" << (imm8 << 2) << "]";
      } else {
        uint16_t imm5 = (instr >> 6) & 0x1F;
        uint16_t opB = (instr >> 11) & 1;
        ThumbRegister Rn(instr, 3);
        ThumbRegister Rt(instr, 0);
        switch (opA) {
          case 6:
            imm5 <<= 2;
            opcode << (opB == 0 ? "str" : "ldr");
            break;
          case 7:
            imm5 <<= 0;
            opcode << (opB == 0 ? "strb" : "ldrb");
            break;
          case 8:
            imm5 <<= 1;
            opcode << (opB == 0 ? "strh" : "ldrh");
            break;
        }
        args << Rt << ", [" << Rn << ", #" << imm5 << "]";
      }
    } else if (opcode1 >= 0x34 && opcode1 <= 0x37) {  // 1101xx
      int8_t imm8 = instr & 0xFF;
      uint32_t cond = (instr >> 8) & 0xF;
      opcode << "b";
      DumpCond(opcode, cond);
      DumpBranchTarget(args, instr_ptr + 4, (imm8 << 1));
    } else if ((instr & 0xF800) == 0xA800) {
      // Generate SP-relative address
      ThumbRegister rd(instr, 8);
      int imm8 = instr & 0xFF;
      opcode << "add";
      args << rd << ", sp, #" << (imm8 << 2);
    } else if ((instr & 0xF000) == 0xB000) {
      // Miscellaneous 16-bit instructions
      uint16_t opcode2 = (instr >> 5) & 0x7F;
      switch (opcode2) {
        case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: {
          // Add immediate to SP        - 1011 00000 ii iiiii
          // Subtract immediate from SP - 1011 00001 ii iiiii
          int imm7 = instr & 0x7F;
          opcode << ((opcode2 & 4) == 0 ? "add" : "sub");
          args << "sp, sp, #" << (imm7 << 2);
          break;
        }
        case 0x08: case 0x09: case 0x0A: case 0x0B:  // 0001xxx
        case 0x0C: case 0x0D: case 0x0E: case 0x0F:
        case 0x18: case 0x19: case 0x1A: case 0x1B:  // 0011xxx
        case 0x1C: case 0x1D: case 0x1E: case 0x1F:
        case 0x48: case 0x49: case 0x4A: case 0x4B:  // 1001xxx
        case 0x4C: case 0x4D: case 0x4E: case 0x4F:
        case 0x58: case 0x59: case 0x5A: case 0x5B:  // 1011xxx
        case 0x5C: case 0x5D: case 0x5E: case 0x5F: {
          // CBNZ, CBZ
          uint16_t op = (instr >> 11) & 1;
          uint16_t i = (instr >> 9) & 1;
          uint16_t imm5 = (instr >> 3) & 0x1F;
          ThumbRegister Rn(instr, 0);
          opcode << (op != 0 ? "cbnz" : "cbz");
          uint32_t imm32 = (i << 6) | (imm5 << 1);
          args << Rn << ", ";
          DumpBranchTarget(args, instr_ptr + 4, imm32);
          break;
        }
        case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27:
        case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: {
          opcode << "push";
          args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 6));
          break;
        }
        case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
        case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: {
          opcode << "pop";
          args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 7));
          break;
        }
        case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: {
          opcode << "bkpt";
          args << "#" << (instr & 0xFF);
          break;
        }
        case 0x50: case 0x51:    // 101000x
        case 0x52: case 0x53:    // 101001x
        case 0x56: case 0x57: {  // 101011x
          uint16_t op = (instr >> 6) & 3;
          opcode << kThumbReverseOperations[op];
          ThumbRegister Rm(instr, 3);
          ThumbRegister Rd(instr, 0);
          args << Rd << ", " << Rm;
          break;
        }
        case 0x78: case 0x79: case 0x7A: case 0x7B:  // 1111xxx
        case 0x7C: case 0x7D: case 0x7E: case 0x7F: {
          // If-Then, and hints
          uint16_t opA = (instr >> 4) & 0xF;
          uint16_t opB = instr & 0xF;
          if (opB == 0) {
            switch (opA) {
              case 0: opcode << "nop"; break;
              case 1: opcode << "yield"; break;
              case 2: opcode << "wfe";  break;
              case 3: opcode << "sev"; break;
              default: break;
            }
          } else {
            uint32_t first_cond = opA;
            uint32_t mask = opB;
            opcode << "it";

            // Flesh out the base "it" opcode with the specific collection of 't's and 'e's,
            // and store up the actual condition codes we'll want to add to the next few opcodes.
            size_t count = 3 - CTZ(mask);
            it_conditions_.resize(count + 2);  // Plus the implicit 't', plus the "" for the IT itself.
            for (size_t i = 0; i < count; ++i) {
              bool positive_cond = ((first_cond & 1) != 0);
              bool positive_mask = ((mask & (1 << (3 - i))) != 0);
              if (positive_mask == positive_cond) {
                opcode << 't';
                it_conditions_[i] = kConditionCodeNames[first_cond];
              } else {
                opcode << 'e';
                it_conditions_[i] = kConditionCodeNames[first_cond ^ 1];
              }
            }
            it_conditions_[count] = kConditionCodeNames[first_cond];  // The implicit 't'.

            it_conditions_[count + 1] = "";  // No condition code for the IT itself...
            DumpCond(args, first_cond);  // ...because it's considered an argument.
          }
          break;
        }
        default:
          break;
      }
    } else if (((instr & 0xF000) == 0x5000) || ((instr & 0xE000) == 0x6000) ||
        ((instr & 0xE000) == 0x8000)) {
      // Load/store single data item
      uint16_t opA = instr >> 12;
      // uint16_t opB = (instr >> 9) & 7;
      switch (opA) {
        case 0x6: {
          // STR Rt, [Rn, #imm] - 01100 iiiii nnn ttt
          // LDR Rt, [Rn, #imm] - 01101 iiiii nnn ttt
          uint16_t imm5 = (instr >> 6) & 0x1F;
          ThumbRegister Rn(instr, 3);
          ThumbRegister Rt(instr, 0);
          opcode << ((instr & 0x800) == 0 ? "str" : "ldr");
          args << Rt << ", [" << Rn << ", #" << (imm5 << 2) << "]";
          break;
        }
        case 0x9: {
          // STR Rt, [SP, #imm] - 01100 ttt iiiiiiii
          // LDR Rt, [SP, #imm] - 01101 ttt iiiiiiii
          uint16_t imm8 = instr & 0xFF;
          ThumbRegister Rt(instr, 8);
          opcode << ((instr & 0x800) == 0 ? "str" : "ldr");
          args << Rt << ", [sp, #" << (imm8 << 2) << "]";
          break;
        }
        default:
          break;
      }
    } else if (opcode1 == 0x38 || opcode1 == 0x39) {
      uint16_t imm11 = instr & 0x7FFF;
      int32_t imm32 = imm11 << 1;
      imm32 = (imm32 << 20) >> 20;  // sign extend 12 bit immediate
      opcode << "b";
      DumpBranchTarget(args, instr_ptr + 4, imm32);
    }

    // Apply any IT-block conditions to the opcode if necessary.
    if (!it_conditions_.empty()) {
      opcode << it_conditions_.back();
      it_conditions_.pop_back();
    }

    os << FormatInstructionPointer(instr_ptr)
       << StringPrintf(": %04x    \t%-7s ", instr, opcode.str().c_str())
       << args.str() << '\n';
  }
  return 2;
}