static void WhereN()

in XForm/XForm.Native/Comparer16.cpp [137:235]


static void WhereN(BooleanOperatorN bOp, SigningN sign, unsigned __int16* left, int length, unsigned __int16* right, unsigned __int64* matchVector)
{
	int i = 0;
	unsigned __int64 result;

	// Load a mask to convert unsigned values for signed comparison
	__m256i subtractValue = _mm256_set1_epi16(-32768);
	if (sign == SigningN::Signed) subtractValue = _mm256_set1_epi16(0);

	// Build a PEXT mask asking for every other bit (1010 = A)
	unsigned int everyOtherBit = 0xAAAAAAAA;

	// Compare 64-byte blocks and generate a 64-bit result while there's enough data
	int blockLength = length & ~63;
	for (; i < blockLength; i += 64)
	{
		// Load 64 2-byte values to compare
		__m256i left1 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&left[i])), subtractValue);
		__m256i left2 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&left[i + 16])), subtractValue);
		__m256i left3 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&left[i + 32])), subtractValue);
		__m256i left4 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&left[i + 48])), subtractValue);

		// Load 64 2-byte values to compare
		__m256i right1 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&right[i])), subtractValue);
		__m256i right2 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&right[i + 16])), subtractValue);
		__m256i right3 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&right[i + 32])), subtractValue);
		__m256i right4 = _mm256_sub_epi16(_mm256_loadu_si256((__m256i*)(&right[i + 48])), subtractValue);

		// Compare them to the desired value, building a mask with 0xFFFF for matches and 0x0000 for non-matches
		__m256i matchMask1;
		__m256i matchMask2;
		__m256i matchMask3;
		__m256i matchMask4;

		switch (cOp)
		{
		case CompareOperatorN::GreaterThan:
		case CompareOperatorN::LessThanOrEqual:
			matchMask1 = _mm256_cmpgt_epi16(left1, right1);
			matchMask2 = _mm256_cmpgt_epi16(left2, right2);
			matchMask3 = _mm256_cmpgt_epi16(left3, right3);
			matchMask4 = _mm256_cmpgt_epi16(left4, right4);
			break;
		case CompareOperatorN::LessThan:
		case CompareOperatorN::GreaterThanOrEqual:
			matchMask1 = _mm256_cmpgt_epi16(right1, left1);
			matchMask2 = _mm256_cmpgt_epi16(right2, left2);
			matchMask3 = _mm256_cmpgt_epi16(right3, left3);
			matchMask4 = _mm256_cmpgt_epi16(right4, left4);
			break;
		case CompareOperatorN::Equal:
		case CompareOperatorN::NotEqual:
			matchMask1 = _mm256_cmpeq_epi16(left1, right1);
			matchMask2 = _mm256_cmpeq_epi16(left2, right2);
			matchMask3 = _mm256_cmpeq_epi16(left3, right3);
			matchMask4 = _mm256_cmpeq_epi16(left4, right4);
			break;
		}

		// Convert the masks into bits (one bit per byte, so still two duplicate bits per row matched)
		unsigned int matchBits1 = _mm256_movemask_epi8(matchMask1);
		unsigned int matchBits2 = _mm256_movemask_epi8(matchMask2);
		unsigned int matchBits3 = _mm256_movemask_epi8(matchMask3);
		unsigned int matchBits4 = _mm256_movemask_epi8(matchMask4);

		// Get every other bit (so it's one per row) and merge together pairs
		unsigned int matchBits2_1 = _pext_u32(matchBits2, everyOtherBit) << 16 | _pext_u32(matchBits1, everyOtherBit);
		unsigned int matchBits4_3 = _pext_u32(matchBits4, everyOtherBit) << 16 | _pext_u32(matchBits3, everyOtherBit);

		// Merge the result to get 64 bits for whether 64 rows matched
		result = ((unsigned __int64)matchBits4_3) << 32 | matchBits2_1;

		// Negate the result for operators we ran the opposites of
		if (cOp == CompareOperatorN::LessThanOrEqual || cOp == CompareOperatorN::GreaterThanOrEqual || cOp == CompareOperatorN::NotEqual)
		{
			result = ~result;
		}

		// Merge the result with the existing bit vector bits based on the boolean operator requested
		switch (bOp)
		{
		case BooleanOperatorN::And:
			matchVector[i >> 6] &= result;
			break;
		case BooleanOperatorN::Or:
			matchVector[i >> 6] |= result;
			break;
		}
	}

	// Match remaining values individually
	if (length & 63)
	{
		if (sign == SigningN::Unsigned)
			WhereSingle<cOp, unsigned __int16>(&left[i], length - i, &right[i], bOp, &matchVector[i >> 6]);
		else
			WhereSingle<cOp, __int16>((__int16*)&left[i], length - i, (__int16*)&right[i], bOp, &matchVector[i >> 6]);
	}
}