pdq/python/pdqhashing/hasher/pdq_hasher.py (444 lines of code) (raw):
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import math
import time
from typing import List
from PIL import Image
from pdqhashing.types.containers import HashAndQuality, HashesAndQuality
from pdqhashing.types.hash256 import Hash256
from pdqhashing.utils.matrix import MatrixUtil
class PDQHasher:
""" The only class state is the DCT matrix, so this class may either be
instantiated once per image, or instantiated once and used for all images;
the latter will be slightly faster as the DCT matrix will not need to be
recomputed once per image. Methods are threadsafe."""
# From Wikipedia: standard RGB to luminance (the 'Y' in 'YUV').
LUMA_FROM_R_COEFF = float(0.299)
LUMA_FROM_G_COEFF = float(0.587)
LUMA_FROM_B_COEFF = float(0.114)
DCT_MATRIX_SCALE_FACTOR = float(math.sqrt(2.0 / 64.0))
# Wojciech Jarosz 'Fast Image Convolutions' ACM SIGGRAPH 2001:
# X,Y,X,Y passes of 1-D box filters produces a 2D tent filter.
PDQ_NUM_JAROSZ_XY_PASSES = 2
# Since PDQ uses 64x64 blocks, 1/64th of the image height/width
# respectively is a full block. But since we use two passes, we want half
# that window size per pass. Example: 1024x1024 full-resolution input. PDQ
# downsamples to 64x64. Each 16x16 block of the input produces a single
# downsample pixel. X,Y passes with window size 8 (= 1024/128) average
# pixels with 8x8 neighbors. The second X,Y pair of 1D box-filter passes
# accumulate data from all 16x16.
PDQ_JAROSZ_WINDOW_SIZE_DIVISOR = 128
# Flags for which dihedral-transforms are desired to be produced.
PDQ_DO_DIH_ORIGINAL = 0x01
PDQ_DO_DIH_ROTATE_90 = 0x02
PDQ_DO_DIH_ROTATE_180 = 0x04
PDQ_DO_DIH_ROTATE_270 = 0x08
PDQ_DO_DIH_FLIPX = 0x10
PDQ_DO_DIH_FLIPY = 0x20
PDQ_DO_DIH_FLIP_PLUS1 = 0x40
PDQ_DO_DIH_FLIP_MINUS1 = 0x80
PDQ_DO_DIH_ALL = 0xFF
DCT_matrix: List[List[float]] = []
def compute_dct_matrix(self):
matrix_scale_factor = math.sqrt(2.0 / 64.0)
d = [0] * 16
for i in range(0, 16):
di = [0] * 64
for j in range(0, 64):
di[j] = math.cos((math.pi / 2 / 64.0) * (i + 1) * (2 * j + 1))
d[i] = di
return d
def __init__(self) -> None:
""" Christoph Zauner 'Implementation and Benchmarking of Perceptual
Image Hash Functions' 2010
See also comments on dct64To16. Input is (0..63)x(0..63); output is
(1..16)x(1..16) with the latter indexed as (0..15)x(0..15).
Returns 16x64 matrix."""
self.DCT_matrix = self.compute_dct_matrix()
class HashingMetadata:
def __init__(self) -> None:
self.readSeconds = float(-1.0)
self.hashSeconds = float(-1.0)
self.imageHeightTimesWidth = -1
def fromFile(self, filepath, hashingMetadata=None):
t1 = time.time()
img = None
try:
img = Image.open(filepath)
# resizing the image proportionally to max 512px width and max 512px height
img.thumbnail((512, 512))
except IOError as e:
raise e
t2 = time.time()
readSeconds = t2 - t1
numCols, numRows = img.size
buffer1 = MatrixUtil.allocateMatrixAsRowMajorArray(numRows, numCols)
buffer2 = MatrixUtil.allocateMatrixAsRowMajorArray(numRows, numCols)
buffer64x64 = MatrixUtil.allocateMatrix(64, 64)
buffer16x64 = MatrixUtil.allocateMatrix(16, 64)
buffer16x16 = MatrixUtil.allocateMatrix(16, 16)
t1 = time.time()
rv = self.fromImage(
img, buffer1, buffer2, buffer64x64, buffer16x64, buffer16x16
)
t2 = time.time()
if hashingMetadata is not None:
hashingMetadata.imageHeightTimesWidth = numRows * numCols
hashingMetadata.readSeconds = readSeconds
hashingMetadata.hashSeconds = t2 - t1
return rv
def fromBufferedImage(self, img_bytes):
try:
img = Image.open(img_bytes)
# resizing the image proportionally to max 512px width and max 512px height
img.thumbnail((512, 512))
except IOError as e:
raise e
numCols, numRows = img.size
buffer1 = MatrixUtil.allocateMatrixAsRowMajorArray(numRows, numCols)
buffer2 = MatrixUtil.allocateMatrixAsRowMajorArray(numRows, numCols)
buffer64x64 = MatrixUtil.allocateMatrix(64, 64)
buffer16x64 = MatrixUtil.allocateMatrix(16, 64)
buffer16x16 = MatrixUtil.allocateMatrix(16, 16)
return self.fromImage(
img, buffer1, buffer2, buffer64x64, buffer16x64, buffer16x16
)
def fromImage(self, img, buffer1, buffer2, buffer64x64, buffer16x64, buffer16x16):
numCols, numRows = img.size
self.fillFloatLumaFromBufferImage(img, buffer1)
return self.pdqHash256FromFloatLuma(
buffer1, buffer2, numRows, numCols, buffer64x64, buffer16x64, buffer16x16
)
def fillFloatLumaFromBufferImage(self, img, luma):
numCols, numRows = img.size
rgb_image = img.convert("RGB")
numCols, numRows = img.size
for i in range(numRows):
for j in range(numCols):
r, g, b = rgb_image.getpixel((j, i))
luma[i * numCols + j] = (
self.LUMA_FROM_R_COEFF * r
+ self.LUMA_FROM_G_COEFF * g
+ self.LUMA_FROM_B_COEFF * b
)
def pdqHash256FromFloatLuma(
self,
fullBuffer1,
fullBuffer2,
numRows,
numCols,
buffer64x64,
buffer16x64,
buffer16x16,
):
windowSizeAlongRows = self.computeJaroszFilterWindowSize(numCols)
windowSizeAlongCols = self.computeJaroszFilterWindowSize(numRows)
self.jaroszFilterFloat(
fullBuffer1,
fullBuffer2,
numRows,
numCols,
windowSizeAlongRows,
windowSizeAlongCols,
self.PDQ_NUM_JAROSZ_XY_PASSES,
)
self.decimateFloat(fullBuffer1, numRows, numCols, buffer64x64)
quality = self.computePDQImageDomainQualityMetric(buffer64x64)
self.dct64To16(buffer64x64, buffer16x64, buffer16x16)
hash = self.pdqBuffer16x16ToBits(buffer16x16)
return HashAndQuality(hash, quality)
def dihedralFromFile(self, filename, hashingMetadata, dihFlags):
t1 = time.time()
img = None
try:
img = Image.open(filename)
except IOError as e:
raise e
t2 = time.time()
hashingMetadata.readSeconds = t2 - t1
numCols, numRows = img.size
hashingMetadata.imageHeightTimesWidth = numRows * numCols
buffer1 = MatrixUtil.allocateMatrixAsRowMajorArray(numRows, numCols)
buffer2 = MatrixUtil.allocateMatrixAsRowMajorArray(numRows, numCols)
buffer64x64 = MatrixUtil.allocateMatrix(64, 64)
buffer16x64 = MatrixUtil.allocateMatrix(16, 64)
buffer16x16 = MatrixUtil.allocateMatrix(16, 16)
buffer16x16Aux = MatrixUtil.allocateMatrix(16, 16)
t1 = time.time()
rv = self.dihedralFromBufferedImage(
img,
buffer1,
buffer2,
buffer64x64,
buffer16x64,
buffer16x16,
buffer16x16Aux,
dihFlags,
)
t2 = time.time()
hashingMetadata.hashSeconds = t2 - t1
return rv
def dihedralFromBufferedImage(
self,
img,
buffer1,
buffer2,
buffer64x64,
buffer16x64,
buffer16x16,
buffer16x16Aux,
dihFlags,
):
numCols, numRows = img.size
self.fillFloatLumaFromBufferImage(img, buffer1)
return self.pdqHash256esFromFloatLuma(
buffer1,
buffer2,
numRows,
numCols,
buffer64x64,
buffer16x64,
buffer16x16,
buffer16x16Aux,
dihFlags,
)
def pdqHash256esFromFloatLuma(
self,
fullBuffer1,
fullBuffer2,
numRows,
numCols,
buffer64x64,
buffer16x64,
buffer16x16,
buffer16x16Aux,
dihFlags,
):
windowSizeAlongRows = self.computeJaroszFilterWindowSize(numCols)
windowSizeAlongCols = self.computeJaroszFilterWindowSize(numRows)
self.jaroszFilterFloat(
fullBuffer1,
fullBuffer2,
numRows,
numCols,
windowSizeAlongRows,
windowSizeAlongCols,
self.PDQ_NUM_JAROSZ_XY_PASSES,
)
self.decimateFloat(fullBuffer1, numRows, numCols, buffer64x64)
quality = self.computePDQImageDomainQualityMetric(buffer64x64)
self.dct64To16(buffer64x64, buffer16x64, buffer16x16)
hash = None
hashRotate90 = None
hashRotate180 = None
hashRotate270 = None
hashFlipX = None
hashFlipY = None
hashFlipPlus1 = None
hashFlipMinus1 = None
if (dihFlags & self.PDQ_DO_DIH_ORIGINAL) != 0:
hash = self.pdqBuffer16x16ToBits(buffer16x16)
if (dihFlags & self.PDQ_DO_DIH_ROTATE_90) != 0:
self.dct16OriginalToRotate90(buffer16x16, buffer16x16Aux)
hashRotate90 = self.pdqBuffer16x16ToBits(buffer16x16Aux)
if (dihFlags & self.PDQ_DO_DIH_ROTATE_180) != 0:
self.dct16OriginalToRotate180(buffer16x16, buffer16x16Aux)
hashRotate180 = self.pdqBuffer16x16ToBits(buffer16x16Aux)
if (dihFlags & self.PDQ_DO_DIH_ROTATE_270) != 0:
self.dct16OriginalToRotate270(buffer16x16, buffer16x16Aux)
hashRotate270 = self.pdqBuffer16x16ToBits(buffer16x16Aux)
if (dihFlags & self.PDQ_DO_DIH_FLIPX) != 0:
self.dct16OriginalToFlipX(buffer16x16, buffer16x16Aux)
hashFlipX = self.pdqBuffer16x16ToBits(buffer16x16Aux)
if (dihFlags & self.PDQ_DO_DIH_FLIPY) != 0:
self.dct16OriginalToFlipY(buffer16x16, buffer16x16Aux)
hashFlipY = self.pdqBuffer16x16ToBits(buffer16x16Aux)
if (dihFlags & self.PDQ_DO_DIH_FLIP_PLUS1) != 0:
self.dct16OriginalToFlipPlus1(buffer16x16, buffer16x16Aux)
hashFlipPlus1 = self.pdqBuffer16x16ToBits(buffer16x16Aux)
if (dihFlags & self.PDQ_DO_DIH_FLIP_MINUS1) != 0:
self.dct16OriginalToFlipMinus1(buffer16x16, buffer16x16Aux)
hashFlipMinus1 = self.pdqBuffer16x16ToBits(buffer16x16Aux)
return HashesAndQuality(
hash,
hashRotate90,
hashRotate180,
hashRotate270,
hashFlipX,
hashFlipY,
hashFlipPlus1,
hashFlipMinus1,
quality,
)
@classmethod
def decimateFloat(
cls, in_, inNumRows, inNumCols, out # numRows x numCols in row-major order
):
for i in range(64):
ini = int(((i + 0.5) * inNumRows) / 64)
for j in range(64):
inj = int(((j + 0.5) * inNumCols) / 64)
out[i][j] = in_[ini * inNumCols + inj]
@classmethod
def computePDQImageDomainQualityMetric(cls, buffer64x64):
""" This is all heuristic (see the PDQ hashing doc). Quantization
matters since we want to count *significant* gradients, not just the
some of many small ones. The constants are all manually selected, and
tuned as described in the document.
"""
gradientSum = 0
for i in range(63):
for j in range(64):
u = buffer64x64[i][j]
v = buffer64x64[i + 1][j]
d = int(((u - v) * 100) / 255)
gradientSum += int(abs(d))
for i in range(64):
for j in range(63):
u = buffer64x64[i][j]
v = buffer64x64[i][j + 1]
d = int(((u - v) * 100) / 255)
gradientSum += int(abs(d))
quality = int(gradientSum / 90)
if quality > 100:
quality = 100
return quality
def dct64To16(self, A, T, B):
""" Full 64x64 to 64x64 can be optimized e.g. the Lee algorithm.
But here we only want slots (1-16)x(1-16) of the full 64x64 output.
Careful experiments showed that using Lee along all 64 slots in one
dimension, then Lee along 16 slots in the second, followed by
extracting slots 1-16 of the output, was actually slower than the
current implementation which is completely non-clever/non-Lee but
computes only what is needed."""
D = self.DCT_matrix
# B = D A Dt
# B = (D A) Dt ; T = D A
# T is 16x64;
# T = D A
# Tij = sum {k} Dik Akj
T = [0] * 16
for i in range(0, 16):
ti = [0] * 64
for j in range(0, 64):
tij = 0.0
for k in range(0, 64):
tij += D[i][k] * A[k][j]
ti[j] = tij
T[i] = ti
# B = T Dt
# Bij = sum {k} Tik Djk
for i in range(16):
for j in range(16):
sumk = float(0.0)
for k in range(64):
sumk += T[i][k] * D[j][k]
B[i][j] = sumk
"""
-------------------------------------
orig rot90 rot180 rot270
noxpose xpose noxpose xpose
+ + + + - + - + + - + - - - - -
+ + + + - + - + - + - + + + + +
+ + + + - + - + + - + - - - - -
+ + + + - + - + - + - + + + + +
flipx flipy flipplus flipminus
noxpose noxpose xpose xpose
- - - - - + - + + + + + + - + -
+ + + + - + - + + + + + - + - +
- - - - - + - + + + + + + - + -
+ + + + - + - + + + + + - + - +
-------------------------------------
"""
def dct16OriginalToRotate90(self, A, B):
for i in range(16):
for j in range(16):
if (j & 1) != 0:
B[j][i] = A[i][j]
else:
B[j][i] = -A[i][j]
def dct16OriginalToRotate180(self, A, B):
for i in range(16):
for j in range(16):
if ((i + j) & 1) != 0:
B[i][j] = -A[i][j]
else:
B[i][j] = A[i][j]
def dct16OriginalToRotate270(self, A, B):
for i in range(16):
for j in range(16):
if (i & 1) != 0:
B[j][i] = A[i][j]
else:
B[j][i] = -A[i][j]
def dct16OriginalToFlipX(self, A, B):
i = 0
for i in range(16):
for j in range(16):
if (i & 1) != 0:
B[i][j] = A[i][j]
else:
B[i][j] = -A[i][j]
def dct16OriginalToFlipY(self, A, B):
for i in range(16):
for j in range(16):
if (j & 1) != 0:
B[i][j] = A[i][j]
else:
B[i][j] = -A[i][j]
def dct16OriginalToFlipPlus1(self, A, B):
for i in range(16):
for j in range(16):
B[j][i] = A[i][j]
def dct16OriginalToFlipMinus1(self, A, B):
for i in range(16):
for j in range(16):
if ((i + j) & 1) != 0:
B[j][i] = -A[i][j]
else:
B[j][i] = A[i][j]
def pdqBuffer16x16ToBits(self, dctOutput16x16):
"""
Each bit of the 16x16 output hash is for whether the given frequency
component is greater than the median frequency component or not.
"""
hash = Hash256()
dctMedian = MatrixUtil.torben(dctOutput16x16, 16, 16)
for i in range(16):
for j in range(16):
if dctOutput16x16[i][j] > dctMedian:
hash.setBit(i * 16 + j)
return hash
@classmethod
def computeJaroszFilterWindowSize(cls, dimension):
""" Round up. See comments at top of file for details. """
return int(
(dimension + cls.PDQ_JAROSZ_WINDOW_SIZE_DIVISOR - 1)
/ cls.PDQ_JAROSZ_WINDOW_SIZE_DIVISOR
)
@classmethod
def jaroszFilterFloat(
cls,
buffer1,
buffer2,
numRows,
numCols,
windowSizeAlongRows,
windowSizeAlongCols,
nreps,
):
for _i in range(nreps):
cls.boxAlongRowsFloat(
buffer1, buffer2, numRows, numCols, windowSizeAlongRows
)
cls.boxAlongColsFloat(
buffer2, buffer1, numRows, numCols, windowSizeAlongCols
)
"""
----------------------------------------------------------------
# 7 and 4
#
# 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
#
# . PHASE 1: ONLY ADD, NO WRITE, NO SUBTRACT
# . .
# . . .
#
# 0 * . . . PHASE 2: ADD, WRITE, WITH NO SUBTRACTS
# 1 . * . . .
# 2 . . * . . .
# 3 . . . * . . .
#
# 4 . . . * . . . PHASE 3: WRITES WITH ADD & SUBTRACT
# 5 . . . * . . .
# 6 . . . * . . .
# 7 . . . * . . .
# 8 . . . * . . .
# 9 . . . * . . .
# 10 . . . * . . .
# 11 . . . * . . .
# 12 . . . * . . .
#
# 13 . . . * . . PHASE 4: FINAL WRITES WITH NO ADDS
# 14 . . . * .
# 15 . . . *
#
# = 0 = 0 PHASE 1
# = 0+1 = 1
# = 0+1+2 = 3
#
# out[ 0] = 0+1+2+3 = 6 PHASE 2
# out[ 1] = 0+1+2+3+4 = 10
# out[ 2] = 0+1+2+3+4+5 = 15
# out[ 3] = 0+1+2+3+4+5+6 = 21
#
# out[ 4] = 1+2+3+4+5+6+7 = 28 PHASE 3
# out[ 5] = 2+3+4+5+6+7+8 = 35
# out[ 6] = 3+4+5+6+7+8+9 = 42
# out[ 7] = 4+5+6+7+8+9+10 = 49
# out[ 8] = 5+6+7+8+9+10+11 = 56
# out[ 9] = 6+7+8+9+10+11+12 = 63
# out[10] = 7+8+9+10+11+12+13 = 70
# out[11] = 8+9+10+11+12+13+14 = 77
# out[12] = 9+10+11+12+13+14+15 = 84
#
# out[13] = 10+11+12+13+14+15 = 75 PHASE 4
# out[14] = 11+12+13+14+15 = 65
# out[15] = 12+13+14+15 = 54
# ----------------------------------------------------------------
# ----------------------------------------------------------------
# 8 and 5
#
# 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
#
# . PHASE 1: ONLY ADD, NO WRITE, NO SUBTRACT
# . .
# . . .
# . . . .
#
# 0 * . . . . PHASE 2: ADD, WRITE, WITH NO SUBTRACTS
# 1 . * . . . .
# 2 . . * . . . .
# 3 . . . * . . . .
#
# 4 . . . * . . . . PHASE 3: WRITES WITH ADD & SUBTRACT
# 5 . . . * . . . .
# 6 . . . * . . . .
# 7 . . . * . . . .
# 8 . . . * . . . .
# 9 . . . * . . . .
# 10 . . . * . . . .
# 11 . . . * . . . .
#
# 12 . . . * . . . PHASE 4: FINAL WRITES WITH NO ADDS
# 13 . . . * . .
# 14 . . . * .
# 15 . . . *
#
# = 0 = 0 PHASE 1
# = 0+1 = 1
# = 0+1+2 = 3
# = 0+1+2+3 = 6
#
# out[ 0] = 0+1+2+3+4 = 10
# out[ 1] = 0+1+2+3+4+5 = 15
# out[ 2] = 0+1+2+3+4+5+6 = 21
# out[ 3] = 0+1+2+3+4+5+6+7 = 28
#
# out[ 4] = 1+2+3+4+5+6+7+8 = 36 PHASE 3
# out[ 5] = 2+3+4+5+6+7+8+9 = 44
# out[ 6] = 3+4+5+6+7+8+9+10 = 52
# out[ 7] = 4+5+6+7+8+9+10+11 = 60
# out[ 8] = 5+6+7+8+9+10+11+12 = 68
# out[ 9] = 6+7+8+9+10+11+12+13 = 76
# out[10] = 7+8+9+10+11+12+13+14 = 84
# out[11] = 8+9+10+11+12+13+14+15 = 92
#
# out[12] = 9+10+11+12+13+14+15 = 84 PHASE 4
# out[13] = 10+11+12+13+14+15 = 75 PHASE 4
# out[14] = 11+12+13+14+15 = 65
# out[15] = 12+13+14+15 = 54
----------------------------------------------------------------
"""
@classmethod
def box1DFloat(
cls,
invec,
inStartOffset,
outvec,
outStartOffset,
vectorLength,
stride,
fullWindowSize,
):
halfWindowSize = int((fullWindowSize + 2) / 2) # 7->4, 8->5
phase_1_nreps = int(halfWindowSize - 1)
phase_2_nreps = int(fullWindowSize - halfWindowSize + 1)
phase_3_nreps = int(vectorLength - fullWindowSize)
phase_4_nreps = int(halfWindowSize - 1)
li = 0 # Index of left edge of read window, for subtracts
ri = 0 # Index of right edge of read windows, for adds
oi = 0 # Index into output vector
sum = float(0.0)
currentWindowSize = 0
# PHASE 1: ACCUMULATE FIRST SUM NO WRITES
i = 0
while i < phase_1_nreps:
sum += invec[inStartOffset + ri]
currentWindowSize += 1
ri += stride
i += 1
# PHASE 2: INITIAL WRITES WITH SMALL WINDOW
i = 0
while i < phase_2_nreps:
sum += invec[inStartOffset + ri]
currentWindowSize += 1
outvec[outStartOffset + oi] = sum / currentWindowSize
ri += stride
oi += stride
i += 1
# PHASE 3: WRITES WITH FULL WINDOW
i = 0
while i < phase_3_nreps:
sum += invec[inStartOffset + ri]
sum -= invec[inStartOffset + li]
outvec[outStartOffset + oi] = sum / currentWindowSize
li += stride
ri += stride
oi += stride
i += 1
# PHASE 4: FINAL WRITES WITH SMALL WINDOW
i = 0
while i < phase_4_nreps:
sum -= invec[inStartOffset + li]
currentWindowSize -= 1
outvec[outStartOffset + oi] = sum / currentWindowSize
li += stride
oi += stride
i += 1
@classmethod
def boxAlongRowsFloat(cls, input, output, numRows, numCols, windowSize):
"""
input - matrix as numRows x numCols in row-major order
output - matrix as numRows x numCols in row-major order
"""
i = 0
while i < numRows:
cls.box1DFloat(
input,
i * numCols,
output,
i * numCols,
numCols,
1, # stride
windowSize,
)
i += 1
@classmethod
def boxAlongColsFloat(cls, input, output, numRows, numCols, windowSize):
"""
input - matrix as numRows x numCols in row-major order
out - matrix as numRows x numCols in row-major order
"""
j = 0
while j < numCols:
cls.box1DFloat(input, j, output, j, numRows, numCols, windowSize)
j += 1