cext/cutils.c (141 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <Python.h> #ifdef _MSC_VER typedef __int32 int32_t; typedef unsigned __int32 uint32_t; typedef __int64 int64_t; typedef unsigned __int64 uint64_t; #else #include <stdint.h> #endif static int32_t FNV1_OFFSET_BASIS = 0x811c9dc5; static int32_t FNV1_PRIME = 0x01000193; PyObject* hashcode(PyObject* self, PyObject *args); PyObject* schema_id(PyObject* self, PyObject *args); PyObject* str_hashcode(PyObject* data); int32_t str_hashcode_(PyObject* data, int lower); PyObject* b_hashcode(PyObject* data); static PyMethodDef methods[] = { {"hashcode", (PyCFunction) hashcode, METH_VARARGS, ""}, {"schema_id", (PyCFunction) schema_id, METH_VARARGS, ""}, {NULL, NULL, 0, NULL} /* Sentinel */ }; static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_cutils", 0, /* m_doc */ -1, /* m_size */ methods, /* m_methods */ NULL, /* m_slots */ NULL, /* m_traverse */ NULL, /* m_clear */ NULL, /* m_free */ }; static char* hashcode_input_err = "supported only strings, bytearrays, bytes and memoryview"; static char* schema_id_input_err = "input argument must be dict or int"; static char* schema_field_type_err = "schema keys must be strings"; PyMODINIT_FUNC PyInit__cutils(void) { return PyModule_Create(&moduledef); } PyObject* hashcode(PyObject* self, PyObject *args) { PyObject* data; if (!PyArg_ParseTuple(args, "O", &data)) { return NULL; } if (data == Py_None) { return PyLong_FromLong(0); } else if (PyUnicode_CheckExact(data)) { return str_hashcode(data); } else { return b_hashcode(data); } } PyObject* str_hashcode(PyObject* data) { return PyLong_FromLong(str_hashcode_(data, 0)); } int32_t str_hashcode_(PyObject *str, int lower) { int32_t res = 0; Py_ssize_t sz = PyUnicode_GET_LENGTH(str); if (!sz) { return res; } int kind = PyUnicode_KIND(str); void* buf = PyUnicode_DATA(str); Py_ssize_t i; for (i = 0; i < sz; i++) { Py_UCS4 ch = PyUnicode_READ(kind, buf, i); if (lower) { ch = Py_UNICODE_TOLOWER(ch); } res = 31 * res + ch; } return res; } PyObject* b_hashcode(PyObject* data) { int32_t res = 1; Py_ssize_t sz; char* buf; if (PyBytes_CheckExact(data)) { sz = PyBytes_GET_SIZE(data); buf = PyBytes_AS_STRING(data); } else if (PyByteArray_CheckExact(data)) { sz = PyByteArray_GET_SIZE(data); buf = PyByteArray_AS_STRING(data); } else if (PyMemoryView_Check(data)) { Py_buffer* pyBuf = PyMemoryView_GET_BUFFER(data); sz = pyBuf->len; buf = (char*)pyBuf->buf; } else { PyErr_SetString(PyExc_ValueError, hashcode_input_err); return NULL; } Py_ssize_t i; for (i = 0; i < sz; i++) { res = 31 * res + (signed char)buf[i]; } return PyLong_FromLong(res); } PyObject* schema_id(PyObject* self, PyObject *args) { PyObject* data; if (!PyArg_ParseTuple(args, "O", &data)) { return NULL; } if (PyLong_CheckExact(data)) { return PyNumber_Long(data); } else if (data == Py_None) { return PyLong_FromLong(0); } else if (PyDict_Check(data)) { Py_ssize_t sz = PyDict_Size(data); if (sz == 0) { return PyLong_FromLong(0); } int32_t s_id = FNV1_OFFSET_BASIS; PyObject *key, *value; Py_ssize_t pos = 0; while (PyDict_Next(data, &pos, &key, &value)) { if (!PyUnicode_CheckExact(key)) { PyErr_SetString(PyExc_ValueError, schema_field_type_err); return NULL; } int32_t field_id = str_hashcode_(key, 1); s_id ^= field_id & 0xff; s_id *= FNV1_PRIME; s_id ^= (field_id >> 8) & 0xff; s_id *= FNV1_PRIME; s_id ^= (field_id >> 16) & 0xff; s_id *= FNV1_PRIME; s_id ^= (field_id >> 24) & 0xff; s_id *= FNV1_PRIME; } return PyLong_FromLong(s_id); } else { PyErr_SetString(PyExc_ValueError, schema_id_input_err); return NULL; } }