__m256i SequenceVector::loadSIMDValueBufferAt()

in velox/vector/SequenceVector-inl.h [125:245]


__m256i SequenceVector<T>::loadSIMDValueBufferAt(size_t byteOffset) const {
  auto startIndex = byteOffset / sizeof(T);
  if (checkLoadRange(startIndex, simd::Vectors<T>::VSize)) {
    return simd::setAll256i(valueAtFast(startIndex));
  }

  if constexpr (
      std::is_same<T, int64_t>::value || std::is_same<T, uint64_t>::value) {
    // Note it's important to retrieve these in order for performance
    // reasons which is why we enregister them here and reorder them below
    auto b0 = valueAtFast(startIndex);
    auto b1 = valueAtFast(startIndex + 1);
    auto b2 = valueAtFast(startIndex + 2);
    auto b3 = valueAtFast(startIndex + 3);
    return _mm256_set_epi64x(b3, b2, b1, b0);
  } else if constexpr (
      std::is_same<T, int32_t>::value || std::is_same<T, uint32_t>::value) {
    // Note it's important to retrieve these in order for performance
    // reasons which is why we enregister them here and reorder them below
    auto b0 = valueAtFast(startIndex);
    auto b1 = valueAtFast(startIndex + 1);
    auto b2 = valueAtFast(startIndex + 2);
    auto b3 = valueAtFast(startIndex + 3);
    auto b4 = valueAtFast(startIndex + 4);
    auto b5 = valueAtFast(startIndex + 5);
    auto b6 = valueAtFast(startIndex + 6);
    auto b7 = valueAtFast(startIndex + 7);
    return _mm256_set_epi32(b7, b6, b5, b4, b3, b2, b1, b0);

  } else if constexpr (
      std::is_same<T, int16_t>::value || std::is_same<T, uint16_t>::value) {
    auto b0 = valueAtFast(startIndex);
    auto b1 = valueAtFast(startIndex + 1);
    auto b2 = valueAtFast(startIndex + 2);
    auto b3 = valueAtFast(startIndex + 3);
    auto b4 = valueAtFast(startIndex + 4);
    auto b5 = valueAtFast(startIndex + 5);
    auto b6 = valueAtFast(startIndex + 6);
    auto b7 = valueAtFast(startIndex + 7);
    auto b8 = valueAtFast(startIndex + 8);
    auto b9 = valueAtFast(startIndex + 9);
    auto b10 = valueAtFast(startIndex + 10);
    auto b11 = valueAtFast(startIndex + 11);
    auto b12 = valueAtFast(startIndex + 12);
    auto b13 = valueAtFast(startIndex + 13);
    auto b14 = valueAtFast(startIndex + 14);
    auto b15 = valueAtFast(startIndex + 15);
    return _mm256_set_epi16(
        b15, b14, b13, b12, b11, b10, b9, b8, b7, b6, b5, b4, b3, b2, b1, b0);
  } else if constexpr (
      std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value) {
    auto b0 = valueAtFast(startIndex);
    auto b1 = valueAtFast(startIndex + 1);
    auto b2 = valueAtFast(startIndex + 2);
    auto b3 = valueAtFast(startIndex + 3);
    auto b4 = valueAtFast(startIndex + 4);
    auto b5 = valueAtFast(startIndex + 5);
    auto b6 = valueAtFast(startIndex + 6);
    auto b7 = valueAtFast(startIndex + 7);
    auto b8 = valueAtFast(startIndex + 8);
    auto b9 = valueAtFast(startIndex + 9);
    auto b10 = valueAtFast(startIndex + 10);
    auto b11 = valueAtFast(startIndex + 11);
    auto b12 = valueAtFast(startIndex + 12);
    auto b13 = valueAtFast(startIndex + 13);
    auto b14 = valueAtFast(startIndex + 14);
    auto b15 = valueAtFast(startIndex + 15);
    auto b16 = valueAtFast(startIndex + 16);
    auto b17 = valueAtFast(startIndex + 17);
    auto b18 = valueAtFast(startIndex + 18);
    auto b19 = valueAtFast(startIndex + 19);
    auto b20 = valueAtFast(startIndex + 20);
    auto b21 = valueAtFast(startIndex + 21);
    auto b22 = valueAtFast(startIndex + 22);
    auto b23 = valueAtFast(startIndex + 23);
    auto b24 = valueAtFast(startIndex + 24);
    auto b25 = valueAtFast(startIndex + 25);
    auto b26 = valueAtFast(startIndex + 26);
    auto b27 = valueAtFast(startIndex + 27);
    auto b28 = valueAtFast(startIndex + 28);
    auto b29 = valueAtFast(startIndex + 29);
    auto b30 = valueAtFast(startIndex + 30);
    auto b31 = valueAtFast(startIndex + 31);

    return _mm256_set_epi8(
        b31,
        b30,
        b29,
        b28,
        b27,
        b26,
        b25,
        b24,
        b23,
        b22,
        b21,
        b20,
        b19,
        b18,
        b17,
        b16,
        b15,
        b14,
        b13,
        b12,
        b11,
        b10,
        b9,
        b8,
        b7,
        b6,
        b5,
        b4,
        b3,
        b2,
        b1,
        b0);
  }
  throw std::runtime_error(
      "Sequence encoding only supports SIMD operations on integers");
}