core/maxframe/lib/mmh3_src/mmh3module.cpp (308 lines of code) (raw):

//----------------------------------------------------------------------------- // MurmurHash3 was written by Austin Appleby, and is placed in the public // domain. mmh3 Python module was written by Hajime Senuma, // and is also placed in the public domain. // The authors hereby disclaim copyright to these source codes. // To handle 64-bit data; see https://docs.python.org/2.7/c-api/arg.html #ifndef PY_SSIZE_T_CLEAN #define PY_SSIZE_T_CLEAN #endif #include <stdio.h> #include <string.h> #include <Python.h> #include "MurmurHash3.h" #if defined(_MSC_VER) typedef signed __int8 int8_t; typedef signed __int32 int32_t; typedef signed __int64 int64_t; typedef unsigned __int8 uint8_t; typedef unsigned __int32 uint32_t; typedef unsigned __int64 uint64_t; // Other compilers #else // defined(_MSC_VER) #include <stdint.h> #endif // !defined(_MSC_VER) static int _GetMemoryViewDataAndSize(PyObject *mview, const char **target_str, Py_ssize_t *target_str_len) { Py_buffer *mview_buffer = NULL; if (!PyMemoryView_Check(mview)) { PyErr_Format(PyExc_TypeError, "key must be byte-like object " "or memoryview, not '%.200s'", mview->ob_type->tp_name); return 0; } mview_buffer = PyMemoryView_GET_BUFFER(mview); *target_str = (const char *)mview_buffer->buf; *target_str_len = mview_buffer->len; return 1; } static PyObject * mmh3_hash(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; Py_ssize_t target_str_len; PyObject *target_mview = NULL; uint32_t seed = 0; int32_t result[1]; long long_result = 0; int is_signed = 1; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"signed", NULL}; #ifndef _MSC_VER static uint64_t mask[] = {0x0ffffffff, 0xffffffffffffffff}; #endif if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IB", kwlist, &target_str, &target_str_len, &seed, &is_signed)) { if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|IB", kwlist, &target_mview, &seed, &is_signed)) { return NULL; } PyErr_Clear(); Py_INCREF(target_mview); if (!_GetMemoryViewDataAndSize(target_mview, &target_str, &target_str_len)) { Py_DECREF(target_mview); return NULL; } } MurmurHash3_x86_32(target_str, target_str_len, seed, result); if (target_mview) { Py_DECREF(target_mview); } #if defined(_MSC_VER) /* for Windows envs */ long_result = result[0]; if (is_signed == 1) { return PyLong_FromLong(long_result); } else { return PyLong_FromUnsignedLong(long_result); } #else /* for standard envs */ long_result = result[0] & mask[is_signed]; return PyLong_FromLong(long_result); #endif } static PyObject * mmh3_hash_from_buffer(PyObject *self, PyObject *args, PyObject *keywds) { Py_buffer target_buf; Py_buffer *target_buf_ptr; PyObject *target_mview = NULL; uint32_t seed = 0; int32_t result[1]; long long_result = 0; int is_signed = 1; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"signed", NULL}; #ifndef _MSC_VER static uint64_t mask[] = {0x0ffffffff, 0xffffffffffffffff}; #endif if (!PyArg_ParseTupleAndKeywords(args, keywds, "s*|IB", kwlist, &target_buf, &seed, &is_signed)) { if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|IB", kwlist, &target_mview, &seed, &is_signed)) { return NULL; } PyErr_Clear(); Py_INCREF(target_mview); if (!PyMemoryView_Check(target_mview)) { PyErr_Format(PyExc_TypeError, "key must be byte-like object " "or memoryview, not '%.200s'", target_mview->ob_type->tp_name); Py_DECREF(target_mview); return NULL; } target_buf_ptr = PyMemoryView_GET_BUFFER(target_mview); } else { target_buf_ptr = &target_buf; } MurmurHash3_x86_32(target_buf_ptr->buf, target_buf_ptr->len, seed, result); if (target_mview) { Py_DECREF(target_mview); } #if defined(_MSC_VER) /* for Windows envs */ long_result = result[0]; if (is_signed == 1) { return PyLong_FromLong(long_result); } else { return PyLong_FromUnsignedLong(long_result); } #else /* for standard envs */ long_result = result[0] & mask[is_signed]; return PyLong_FromLong(long_result); #endif } static PyObject * mmh3_hash64(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; Py_ssize_t target_str_len; PyObject *target_mview = NULL; uint32_t seed = 0; uint64_t result[2]; char x64arch = 1; int is_signed = 1; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", (char *)"signed", NULL}; static char *valflag[] = {(char *) "KK", (char *) "LL"}; if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IBB", kwlist, &target_str, &target_str_len, &seed, &x64arch, &is_signed)) { if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|IBB", kwlist, &target_mview, &seed, &x64arch, &is_signed)) { return NULL; } PyErr_Clear(); Py_INCREF(target_mview); if (!_GetMemoryViewDataAndSize(target_mview, &target_str, &target_str_len)) { Py_DECREF(target_mview); return NULL; } } if (x64arch == 1) { MurmurHash3_x64_128(target_str, target_str_len, seed, result); } else { MurmurHash3_x86_128(target_str, target_str_len, seed, result); } if (target_mview) { Py_DECREF(target_mview); } PyObject *retval = Py_BuildValue(valflag[is_signed], result[0], result[1]); return retval; } static PyObject * mmh3_hash128(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; Py_ssize_t target_str_len; PyObject *target_mview = NULL; uint32_t seed = 0; uint64_t result[2]; char x64arch = 1; char is_signed = 0; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", (char *)"signed", NULL}; if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IBB", kwlist, &target_str, &target_str_len, &seed, &x64arch, &is_signed)) { if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|IBB", kwlist, &target_mview, &seed, &x64arch, &is_signed)) { return NULL; } PyErr_Clear(); Py_INCREF(target_mview); if (!_GetMemoryViewDataAndSize(target_mview, &target_str, &target_str_len)) { Py_DECREF(target_mview); return NULL; } } if (x64arch == 1) { MurmurHash3_x64_128(target_str, target_str_len, seed, result); } else { MurmurHash3_x86_128(target_str, target_str_len, seed, result); } if (target_mview) { Py_DECREF(target_mview); } /** * _PyLong_FromByteArray is not a part of official Python/C API * and can be displaced (although it is practically stable). cf. * https://mail.python.org/pipermail/python-list/2006-August/372368.html */ PyObject *retval = _PyLong_FromByteArray((unsigned char *)result, 16, 1, is_signed); return retval; } static PyObject * mmh3_hash_bytes(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str = NULL; Py_ssize_t target_str_len; PyObject *target_mview = NULL; uint32_t seed = 0; uint32_t result[4]; char x64arch = 1; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", NULL}; if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|IB", kwlist, &target_str, &target_str_len, &seed, &x64arch)) { if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|IB", kwlist, &target_mview, &seed, &x64arch)) { return NULL; } PyErr_Clear(); Py_INCREF(target_mview); if (!_GetMemoryViewDataAndSize(target_mview, &target_str, &target_str_len)) { Py_DECREF(target_mview); return NULL; } } if (x64arch == 1) { MurmurHash3_x64_128(target_str, target_str_len, seed, result); } else { MurmurHash3_x86_128(target_str, target_str_len, seed, result); } if (target_mview) { Py_DECREF(target_mview); } char bytes[16]; memcpy(bytes, result, 16); return PyBytes_FromStringAndSize(bytes, 16); } struct module_state { PyObject *error; }; #if PY_MAJOR_VERSION >= 3 #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) #else #define GETSTATE(m) (&_state) static struct module_state _state; #endif static PyMethodDef Mmh3Methods[] = { {"hash", (PyCFunction)mmh3_hash, METH_VARARGS | METH_KEYWORDS, "hash(key[, seed=0, signed=True]) -> hash value\n Return a 32 bit integer."}, {"hash_from_buffer", (PyCFunction)mmh3_hash_from_buffer, METH_VARARGS | METH_KEYWORDS, "hash_from_buffer(key[, seed=0, signed=True]) -> hash value from a memory buffer\n Return a 32 bit integer. Designed for large memory-views such as numpy arrays."}, {"hash64", (PyCFunction)mmh3_hash64, METH_VARARGS | METH_KEYWORDS, "hash64(key[, seed=0, x64arch=True, signed=True]) -> (hash value 1, hash value 2)\n Return a tuple of two 64 bit integers for a string. Optimized for the x64 bit architecture when x64arch=True, otherwise for x86."}, {"hash128", (PyCFunction)mmh3_hash128, METH_VARARGS | METH_KEYWORDS, "hash128(key[, seed=0, x64arch=True, signed=False]]) -> hash value\n Return a 128 bit long integer. Optimized for the x64 bit architecture when x64arch=True, otherwise for x86."}, {"hash_bytes", (PyCFunction)mmh3_hash_bytes, METH_VARARGS | METH_KEYWORDS, "hash_bytes(key[, seed=0, x64arch=True]) -> bytes\n Return a 128 bit hash value as bytes for a string. Optimized for the x64 bit architecture when x64arch=True, otherwise for the x86."}, {NULL, NULL, 0, NULL} }; #if PY_MAJOR_VERSION >= 3 static int mmh3_traverse(PyObject *m, visitproc visit, void *arg) { Py_VISIT(GETSTATE(m)->error); return 0; } static int mmh3_clear(PyObject *m) { Py_CLEAR(GETSTATE(m)->error); return 0; } static struct PyModuleDef mmh3module = { PyModuleDef_HEAD_INIT, "mmh3", "mmh3 is a Python front-end to MurmurHash3, a fast and robust hash library created by Austin Appleby (http://code.google.com/p/smhasher/).\n Ported by Hajime Senuma <hajime.senuma@gmail.com>\n Try hash('foobar') or hash('foobar', 1984).\n If you find any bugs, please submit an issue via https://github.com/hajimes/mmh3", sizeof(struct module_state), Mmh3Methods, NULL, mmh3_traverse, mmh3_clear, NULL }; #define INITERROR return NULL extern "C" { PyMODINIT_FUNC PyInit_mmh3(void) #else // PY_MAJOR_VERSION >= 3 #define INITERROR return extern "C" { void initmmh3(void) #endif // PY_MAJOR_VERSION >= 3 { #if PY_MAJOR_VERSION >= 3 PyObject *module = PyModule_Create(&mmh3module); #else PyObject *module = Py_InitModule("mmh3", Mmh3Methods); #endif if (module == NULL) INITERROR; PyModule_AddStringConstant(module, "__version__", "2.5.1"); struct module_state *st = GETSTATE(module); st->error = PyErr_NewException((char *) "mmh3.Error", NULL, NULL); if (st->error == NULL) { Py_DECREF(module); INITERROR; } #if PY_MAJOR_VERSION >= 3 return module; #endif } } // extern "C"