pdq/python/pypdq.cpp (226 lines of code) (raw):

// ================================================================ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // ================================================================ /* * Wraps the PDQ hashing algorithm so that it can be accessed from Python. * See `pdq_test.py` for example usage from Python. */ #include "common/encode/Base64.h" #include "common/logging/logging.h" #include "pdq/common/pdqhashtypes.h" #include "pdq/io/pdqio.h" #include <Python.h> #include <folly/CppAttributes.h> #include <folly/Format.h> #include <istream> namespace facebook { namespace pdq { namespace python { PyObject* FOLLY_NULLABLE to_py_list(hashing::Hash256& hash) { PyObject* hash_obj = PyList_New(hash.getNumWords()); for (auto i = 0; i < hash.getNumWords(); i++) { #if PY_MAJOR_VERSION >= 3 PyList_SetItem(hash_obj, i, PyLong_FromLong(hash.w[i])); #else PyList_SetItem(hash_obj, i, PyInt_FromLong(hash.w[i])); #endif // PY_MAJOR_VERSION >= 3 } return hash_obj; } bool from_py_list(PyObject* l, hashing::Hash256& hash) { if (!PyList_Check(l)) { PyErr_SetString(PyExc_ValueError, "invalid argument; expected a list"); return false; } if (PyList_Size(l) != hash.getNumWords()) { auto fmt_string = folly::format( "got invalid length {}; hashes must be {} integers", PyList_Size(l), hash.getNumWords()); PyErr_SetString(PyExc_ValueError, fmt_string.str().c_str()); return false; } for (auto i = 0; i < hash.getNumWords(); i++) { #if PY_MAJOR_VERSION >= 3 hash.w[i] = PyLong_AsLong(PyList_GetItem(l, i)); #else hash.w[i] = PyInt_AsLong(PyList_GetItem(l, i)); #endif // PY_MAJOR_VERSION >= 3 } PyObject* exception = PyErr_Occurred(); if (exception != nullptr) { auto fmt_string = folly::format( "error parsing pdq hash: {}", PyUnicode_AsUnicode(PyObject_Repr(exception))); PyErr_SetString(PyExc_ValueError, fmt_string.str().c_str()); return false; } return true; } PyObject* FOLLY_NULLABLE py_get_hash(PyObject* /* unused */, PyObject* args) { const char* file_path; hashing::Hash256 hash; int quality; if (!PyArg_ParseTuple(args, "s", &file_path)) { auto fmt_string = folly::format( "invalid arguments; expected a file path, got {}", PyUnicode_AsUnicode(PyObject_Repr(args))); PyErr_SetString(PyExc_ValueError, fmt_string.str().c_str()); return nullptr; } // Gets the file pointer. FILE* fp; fp = fopen(file_path, "rb"); if (!fp) { PyErr_SetString( #if PY_MAJOR_VERSION >= 3 PyExc_FileNotFoundError, #else PyExc_IOError, #endif folly::format("file not found: {}", file_path).str().c_str()); return nullptr; } // Gets the PDQ hash and quality. try { pdqHash256FromFile(file_path, fp, hash, quality); } catch (const std::exception& e) { PyErr_SetString( PyExc_RuntimeError, folly::format("error getting hash: {}", e.what()).str().c_str()); fclose(fp); return nullptr; } fclose(fp); PyObject* result = PyTuple_New(2); PyTuple_SetItem(result, 0, to_py_list(hash)); #if PY_MAJOR_VERSION >= 3 PyTuple_SetItem(result, 1, PyLong_FromLong(quality)); #else PyTuple_SetItem(result, 1, PyInt_FromLong(quality)); #endif // PY_MAJOR_VERSION >= 3 return result; } PyObject* FOLLY_NULLABLE py_get_all_hashes(PyObject* /* unused */, PyObject* args) { const char* file_path; hashing::Hash256 hash, hash_rotate_90, hash_rotate_180, hash_rotate_270, hash_left_right, hash_top_bottom, hash_transpose, hash_transverse; int quality; if (!PyArg_ParseTuple(args, "s", &file_path)) { auto fmt_string = folly::format( "invalid arguments; expected a file path, got {}", PyUnicode_AsUnicode(PyObject_Repr(args))); PyErr_SetString(PyExc_ValueError, fmt_string.str().c_str()); return nullptr; } // Gets the file pointer. FILE* fp; fp = fopen(file_path, "rb"); if (!fp) { PyErr_SetString( #if PY_MAJOR_VERSION >= 3 PyExc_FileNotFoundError, #else PyExc_IOError, #endif folly::format("file not found: {}", file_path).str().c_str()); return nullptr; } // Gets the PDQ hash and quality. try { pdqDihedralHash256esFromFile( file_path, fp, &hash, &hash_rotate_90, &hash_rotate_180, &hash_rotate_270, &hash_left_right, &hash_top_bottom, &hash_transpose, &hash_transverse, quality); } catch (const std::exception& e) { PyErr_SetString( PyExc_RuntimeError, folly::format("error getting hash: {}", e.what()).str().c_str()); fclose(fp); return nullptr; } fclose(fp); PyObject* result = PyTuple_New(9); PyTuple_SetItem(result, 0, to_py_list(hash)); PyTuple_SetItem(result, 1, to_py_list(hash_rotate_90)); PyTuple_SetItem(result, 2, to_py_list(hash_rotate_180)); PyTuple_SetItem(result, 3, to_py_list(hash_rotate_270)); PyTuple_SetItem(result, 4, to_py_list(hash_left_right)); PyTuple_SetItem(result, 5, to_py_list(hash_top_bottom)); PyTuple_SetItem(result, 6, to_py_list(hash_transpose)); PyTuple_SetItem(result, 7, to_py_list(hash_transverse)); #if PY_MAJOR_VERSION >= 3 PyTuple_SetItem(result, 8, PyLong_FromLong(quality)); #else PyTuple_SetItem(result, 8, PyInt_FromLong(quality)); #endif // PY_MAJOR_VERSION >= 3 return result; } PyObject* FOLLY_NULLABLE py_distance(PyObject* /* unused */, PyObject* args) { PyObject *a_list, *b_list; if (!PyArg_ParseTuple(args, "OO", &a_list, &b_list)) { auto fmt_string = folly::format( "invalid arguments; expected two lists, got {}", PyUnicode_AsUnicode(PyObject_Repr(args))); PyErr_SetString(PyExc_ValueError, fmt_string.str().c_str()); return nullptr; } hashing::Hash256 a, b; if (!from_py_list(a_list, a) || !from_py_list(b_list, b)) { return nullptr; } int result = a.hammingDistance(b); #if PY_MAJOR_VERSION >= 3 return PyLong_FromLong(result); #else return PyInt_FromLong(result); #endif // PY_MAJOR_VERSION >= 3 } PyObject* FOLLY_NULLABLE py_norm(PyObject* /* unused */, PyObject* args) { PyObject* a_list; if (!PyArg_ParseTuple(args, "O", &a_list)) { auto fmt_string = folly::format( "invalid arguments; expected a list, got {}", PyUnicode_AsUnicode(PyObject_Repr(args))); PyErr_SetString(PyExc_ValueError, fmt_string.str().c_str()); return nullptr; } hashing::Hash256 a; if (!from_py_list(a_list, a)) { return nullptr; } int result = a.hammingNorm(); #if PY_MAJOR_VERSION >= 3 return PyLong_FromLong(result); #else return PyInt_FromLong(result); #endif // PY_MAJOR_VERSION >= 3 } static PyMethodDef _PDQMethods[] = { {"get_hash", py_get_hash, METH_VARARGS, "Gets PDQ hash from a given file"}, {"get_all_hashes", py_get_all_hashes, METH_VARARGS, "Gets all PDQ hashes from a given file"}, {"distance", py_distance, METH_VARARGS, "Gets hashes Hamming distance"}, {"norm", py_norm, METH_VARARGS, "Gets hash Hamming norm"}, {nullptr, nullptr, 0, nullptr}}; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef _PDQDef = { PyModuleDef_HEAD_INIT, "pdq", "PDQ hashing function callers", -1, _PDQMethods, }; #endif // PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3 PyMODINIT_FUNC PyInit_pdq() { return PyModule_Create(&_PDQDef); } #else PyMODINIT_FUNC initpdq() { Py_InitModule("pdq", _PDQMethods); } #endif // PY_MAJOR_VERSION >= 3 } // namespace python } // namespace pdq } // namespace facebook