pdq/cpp/hashing/pdqhashing.cpp (421 lines of code) (raw):
// ================================================================
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// ================================================================
#include <pdq/cpp/hashing/pdqhashing.h>
#include <pdq/cpp/hashing/torben.h>
#include <pdq/cpp/downscaling/downscaling.h>
// ================================================================
// PDQ algorithm description:
// https://github.com/facebookexternal/ThreatExchange-PDQ/blob/main/pdqhash-2017-10-09.pdf
// ================================================================
#if defined(_WIN32)
#define _USE_MATH_DEFINES
#endif
#include <cassert>
#include <chrono>
#include <cmath>
using namespace std;
namespace facebook {
namespace pdq {
namespace hashing {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// From Wikipedia: standard RGB to luminance (the 'Y' in 'YUV').
const float luma_from_R_coeff = 0.299;
const float luma_from_G_coeff = 0.587;
const float luma_from_B_coeff = 0.114;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Minimum size tested.
const int MIN_HASHABLE_DIM = 5;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Tent filter.
const int PDQ_NUM_JAROSZ_XY_PASSES = 2;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Christoph Zauner 'Implementation and Benchmarking of Perceptual
// Image Hash Functions' 2010
static float* fill_dct_matrix_64_cached();
// ----------------------------------------------------------------
void fillFloatLumaFromRGB(
uint8_t* pRbase,
uint8_t* pGbase,
uint8_t* pBbase,
int numRows,
int numCols,
int rowStride,
int colStride,
float* luma // numRows x numCols, row-major
) {
uint8_t* pRrow = pRbase;
uint8_t* pGrow = pGbase;
uint8_t* pBrow = pBbase;
for (int i = 0; i < numRows; i++) {
uint8_t* pR = pRrow;
uint8_t* pG = pGrow;
uint8_t* pB = pBrow;
for (int j = 0; j < numCols; j++) {
float yval = luma_from_R_coeff * (*pR) + luma_from_G_coeff * (*pG) +
luma_from_B_coeff * (*pB);
luma[i * numCols + j] = yval;
pR += colStride;
pG += colStride;
pB += colStride;
}
pRrow += rowStride;
pGrow += rowStride;
pBrow += rowStride;
}
}
// ----------------------------------------------------------------
void fillFloatLumaFromGrey(
uint8_t* pbase,
int numRows,
int numCols,
int rowStride,
int colStride,
float* luma // numRows x numCols, row-major
) {
uint8_t* prow = pbase;
for (int i = 0; i < numRows; i++) {
uint8_t* p = prow;
for (int j = 0; j < numCols; j++) {
luma[i * numCols + j] = (float)(*p);
p += colStride;
}
prow += rowStride;
}
}
// ----------------------------------------------------------------
void pdqHash256FromFloatLuma(
float* fullBuffer1, // numRows x numCols, row-major
float* fullBuffer2, // numRows x numCols, row-major
int numRows,
int numCols,
float buffer64x64[64][64],
float buffer16x64[16][64],
float buffer16x16[16][16],
Hash256& hash,
int& quality) {
if (numRows < MIN_HASHABLE_DIM || numCols < MIN_HASHABLE_DIM) {
hash.clear();
quality = 0;
return;
}
pdqFloat256FromFloatLuma(
fullBuffer1,
fullBuffer2,
numRows,
numCols,
buffer64x64,
buffer16x64,
buffer16x16,
quality);
// Output bits
pdqBuffer16x16ToBits(buffer16x16, &hash);
}
// ----------------------------------------------------------------
void pdqFloat256FromFloatLuma(
float* fullBuffer1, // numRows x numCols, row-major
float* fullBuffer2, // numRows x numCols, row-major
int numRows,
int numCols,
float buffer64x64[64][64],
float buffer16x64[16][64],
float output_buffer16x16[16][16],
int& quality) {
if (numRows == 64 && numCols == 64) {
// E.g. for video-frame processing when we've already used ffmpeg
// to downsample for us.
int i, j, k;
for (k = 0, i = 0; i < 64; i++) {
for (j = 0; j < 64; j++, k++) {
buffer64x64[i][j] = fullBuffer1[k];
}
}
} else {
// Downsample (blur and decimate)
int windowSizeAlongRows =
facebook::pdq::downscaling::computeJaroszFilterWindowSize(numCols, 64);
int windowSizeAlongCols =
facebook::pdq::downscaling::computeJaroszFilterWindowSize(numRows, 64);
facebook::pdq::downscaling::jaroszFilterFloat(
fullBuffer1,
fullBuffer2,
numRows,
numCols,
windowSizeAlongRows,
windowSizeAlongCols,
PDQ_NUM_JAROSZ_XY_PASSES);
facebook::pdq::downscaling::decimateFloat(
fullBuffer1, numRows, numCols, &buffer64x64[0][0], 64, 64);
}
// Quality metric. Reuse the 64x64 image-domain downsample
// since we already have it.
quality = pdqImageDomainQualityMetric(buffer64x64);
// 2D DCT
dct64To16(buffer64x64, buffer16x64, output_buffer16x16);
}
// ----------------------------------------------------------------
bool pdqDihedralHash256esFromFloatLuma(
float* fullBuffer1, // numRows x numCols, row-major
float* fullBuffer2, // numRows x numCols, row-major
int numRows,
int numCols,
float buffer64x64[64][64],
float buffer16x64[16][64],
float buffer16x16[16][16],
float buffer16x16Aux[16][16],
Hash256* hashptrOriginal,
Hash256* hashptrRotate90,
Hash256* hashptrRotate180,
Hash256* hashptrRotate270,
Hash256* hashptrFlipX,
Hash256* hashptrFlipY,
Hash256* hashptrFlipPlus1,
Hash256* hashptrFlipMinus1,
int& quality) {
if (numRows < MIN_HASHABLE_DIM || numCols < MIN_HASHABLE_DIM) {
if (hashptrOriginal != nullptr) {
hashptrOriginal->clear();
}
if (hashptrRotate90 != nullptr) {
hashptrRotate90->clear();
}
if (hashptrRotate180 != nullptr) {
hashptrRotate180->clear();
}
if (hashptrRotate270 != nullptr) {
hashptrRotate270->clear();
}
if (hashptrFlipX != nullptr) {
hashptrFlipX->clear();
}
if (hashptrFlipY != nullptr) {
hashptrFlipY->clear();
}
if (hashptrFlipPlus1 != nullptr) {
hashptrFlipPlus1->clear();
}
if (hashptrFlipMinus1 != nullptr) {
hashptrFlipMinus1->clear();
}
quality = 0;
return true;
}
// Downsample (blur and decimate)
int windowSizeAlongRows =
facebook::pdq::downscaling::computeJaroszFilterWindowSize(numCols, 64);
int windowSizeAlongCols =
facebook::pdq::downscaling::computeJaroszFilterWindowSize(numRows, 64);
facebook::pdq::downscaling::jaroszFilterFloat(
fullBuffer1,
fullBuffer2,
numRows,
numCols,
windowSizeAlongRows,
windowSizeAlongCols,
PDQ_NUM_JAROSZ_XY_PASSES);
facebook::pdq::downscaling::decimateFloat(
fullBuffer1, numRows, numCols, &buffer64x64[0][0], 64, 64);
// Quality metric. Reuse the 64x64 image-domain downsample
// since we already have it.
quality = pdqImageDomainQualityMetric(buffer64x64);
// 2D DCT
dct64To16(buffer64x64, buffer16x64, buffer16x16);
// Output bits
if (hashptrOriginal != nullptr) {
pdqBuffer16x16ToBits(buffer16x16, hashptrOriginal);
}
if (hashptrRotate90 != nullptr) {
dct16OriginalToRotate90(buffer16x16, buffer16x16Aux);
pdqBuffer16x16ToBits(buffer16x16Aux, hashptrRotate90);
}
if (hashptrRotate180 != nullptr) {
dct16OriginalToRotate180(buffer16x16, buffer16x16Aux);
pdqBuffer16x16ToBits(buffer16x16Aux, hashptrRotate180);
}
if (hashptrRotate270 != nullptr) {
dct16OriginalToRotate270(buffer16x16, buffer16x16Aux);
pdqBuffer16x16ToBits(buffer16x16Aux, hashptrRotate270);
}
if (hashptrFlipX != nullptr) {
dct16OriginalToFlipX(buffer16x16, buffer16x16Aux);
pdqBuffer16x16ToBits(buffer16x16Aux, hashptrFlipX);
}
if (hashptrFlipY != nullptr) {
dct16OriginalToFlipY(buffer16x16, buffer16x16Aux);
pdqBuffer16x16ToBits(buffer16x16Aux, hashptrFlipY);
}
if (hashptrFlipPlus1 != nullptr) {
dct16OriginalToFlipPlus1(buffer16x16, buffer16x16Aux);
pdqBuffer16x16ToBits(buffer16x16Aux, hashptrFlipPlus1);
}
if (hashptrFlipMinus1 != nullptr) {
dct16OriginalToFlipMinus1(buffer16x16, buffer16x16Aux);
pdqBuffer16x16ToBits(buffer16x16Aux, hashptrFlipMinus1);
}
return true;
}
// ----------------------------------------------------------------
// This is all heuristic (see the PDQ hashing doc). Quantization matters since
// we want to count *significant* gradients, not just the some of many small
// ones. The constants are all manually selected, and tuned as described in the
// document.
int pdqImageDomainQualityMetric(float buffer64x64[64][64]) {
int gradient_sum = 0;
for (int i = 0; i < 63; i++) {
for (int j = 0; j < 64; j++) {
float u = buffer64x64[i][j];
float v = buffer64x64[i + 1][j];
int d = ((u - v) * 100) / 255;
gradient_sum += (int)std::abs(d);
}
}
for (int i = 0; i < 64; i++) {
for (int j = 0; j < 63; j++) {
float u = buffer64x64[i][j];
float v = buffer64x64[i][j + 1];
int d = ((u - v) * 100) / 255;
gradient_sum += (int)std::abs(d);
}
}
// Heuristic scaling factor.
int quality = gradient_sum / 90;
if (quality > 100) {
quality = 100;
}
return quality;
}
// ----------------------------------------------------------------
// Full 64x64 to 64x64 can be optimized e.g. the Lee algorithm. But here we
// only want slots (1-16)x(1-16) of the full 64x64 output. Careful experiments
// showed that using Lee along all 64 slots in one dimension, then Lee along 16
// slots in the second, followed by extracting slots 1-16 of the output, was
// actually slower than the current implementation which is completely
// non-clever/non-Lee but computes only what is needed.
void dct64To16(float A[64][64], float T[16][64], float B[16][16]) {
// DCT matrix:
// * numRows is 16.
// * numCols is 64.
// * Storage is row-major
// * Element i,j at row i column j is at offset i*16+j.
float* D = fill_dct_matrix_64_cached();
// B = D A Dt
// B = (D A) Dt
// with intermediate T = D A
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 64; j++) {
float* pd = &D[i * 64]; // ith row
float* pa = &A[0][j];
float sumk = 0.0;
for (int k = 0; k < 64;) {
sumk += pd[k++] * pa[0];
sumk += pd[k++] * pa[1 << 6];
sumk += pd[k++] * pa[2 << 6];
sumk += pd[k++] * pa[3 << 6];
sumk += pd[k++] * pa[4 << 6];
sumk += pd[k++] * pa[5 << 6];
sumk += pd[k++] * pa[6 << 6];
sumk += pd[k++] * pa[7 << 6];
sumk += pd[k++] * pa[8 << 6];
sumk += pd[k++] * pa[9 << 6];
sumk += pd[k++] * pa[10 << 6];
sumk += pd[k++] * pa[11 << 6];
sumk += pd[k++] * pa[12 << 6];
sumk += pd[k++] * pa[13 << 6];
sumk += pd[k++] * pa[14 << 6];
sumk += pd[k++] * pa[15 << 6];
pa += 1024;
}
T[i][j] = sumk;
}
}
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
float sumk = 0.0;
float* pd = &D[j * 64]; // jth row
float* pt = &T[i][0];
for (int k = 0; k < 64;) {
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
sumk += pt[k] * pd[k];
k++;
}
B[i][j] = sumk;
}
}
}
// ----------------------------------------------------------------
// orig rot90 rot180 rot270
// noxpose xpose noxpose xpose
// + + + + - + - + + - + - - - - -
// + + + + - + - + - + - + + + + +
// + + + + - + - + + - + - - - - -
// + + + + - + - + - + - + + + + +
//
// flipx flipy flipplus flipminus
// noxpose noxpose xpose xpose
// - - - - - + - + + + + + + - + -
// + + + + - + - + + + + + - + - +
// - - - - - + - + + + + + + - + -
// + + + + - + - + + + + + - + - +
// ----------------------------------------------------------------
void dct16OriginalToRotate90(float A[16][16], float B[16][16]) {
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
if (j & 1) {
B[j][i] = A[i][j];
} else {
B[j][i] = -A[i][j];
}
}
}
}
void dct16OriginalToRotate180(float A[16][16], float B[16][16]) {
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
if ((i + j) & 1) {
B[i][j] = -A[i][j];
} else {
B[i][j] = A[i][j];
}
}
}
}
void dct16OriginalToRotate270(float A[16][16], float B[16][16]) {
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
if (i & 1) {
B[j][i] = A[i][j];
} else {
B[j][i] = -A[i][j];
}
}
}
}
void dct16OriginalToFlipX(float A[16][16], float B[16][16]) {
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
if (i & 1) {
B[i][j] = A[i][j];
} else {
B[i][j] = -A[i][j];
}
}
}
}
void dct16OriginalToFlipY(float A[16][16], float B[16][16]) {
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
if (j & 1) {
B[i][j] = A[i][j];
} else {
B[i][j] = -A[i][j];
}
}
}
}
void dct16OriginalToFlipPlus1(float A[16][16], float B[16][16]) {
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
B[j][i] = A[i][j];
}
}
}
void dct16OriginalToFlipMinus1(float A[16][16], float B[16][16]) {
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
if ((i + j) & 1) {
B[j][i] = -A[i][j];
} else {
B[j][i] = A[i][j];
}
}
}
}
// ----------------------------------------------------------------
// Each bit of the 16x16 output hash is for whether the given frequency
// component is greater than the median frequency component or not.
void pdqBuffer16x16ToBits(float dctOutput16x16[16][16], Hash256* hashptr) {
float dct_median = torben(&dctOutput16x16[0][0], 16 * 16);
hashptr->clear();
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 16; j++) {
if (dctOutput16x16[i][j] > dct_median) {
hashptr->setBit(i * 16 + j);
}
}
}
}
// ----------------------------------------------------------------
// See comments on dct64To16. Input is (0..63)x(0..63); output is
// (1..16)x(1..16) with the latter indexed as (0..15)x(0..15).
//
// * numRows is 16.
// * numCols is 64.
// * Storage is row-major
// * Element i,j at row i column j is at offset i*16+j.
static float* fill_dct_matrix_64_cached() {
static bool initialized = false;
static float buffer[16 * 64];
if (!initialized) {
const float matrix_scale_factor = std::sqrt(2.0 / 64.0);
for (int i = 0; i < 16; i++) {
for (int j = 0; j < 64; j++) {
buffer[i * 64 + j] = matrix_scale_factor *
cos((M_PI / 2 / 64.0) * (i + 1) * (2 * j + 1));
}
}
initialized = false;
}
return &buffer[0];
}
} // namespace hashing
} // namespace pdq
} // namespace facebook