tools/dev/hash-test.c (138 lines of code) (raw):

/* * ==================================================================== * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ==================================================================== */ /* gcc hash-test.c -I/usr/include/apr-1 -Isubversion/include -lsvn_subr-1 -lapr-1 Shows how bad the standard APR hash function can be for 4/8-byte svn_revnum_t keys. Putting the first 1,000,000 revisions into a hash table reveals that 96% of the keys end up in chains with 6 or 7 hash collisions, that means almost all hash lookups degrade to a linked list scan. Subversion has a different hash function available, accessed by using svn_hash__make() instead of apr_hash_make(), that doesn't use a seed and it is much better for svn_revnum_t keys. Another option would be to use the svn_revnum_t values directly as keys with a no-op hash function. */ #include <apr_pools.h> #include <apr_hash.h> #include <stdlib.h> #include <stdio.h> #include "private/svn_subr_private.h" struct e_t { struct e_t *next; unsigned int hash; const void *key; apr_ssize_t klen; const void *val; }; struct i_t { struct h_t *h; struct e_t *t; struct e_t *n; unsigned int i; }; struct h_t { apr_pool_t *pool; struct e_t **array; struct i_t it; unsigned int count; unsigned int max; unsigned int seed; void *func; struct e_t *free; }; static void test_hash(apr_hash_t *hash, const char *name) { struct h_t *hack = (struct h_t *)hash; int num = 0, max = 0, running = 0, i; const int hist_len = 15; int hist[hist_len]; for (i = 0; i < hist_len; ++i) hist[i] = 0; for (i = 0; i <= hack->max; ++i) { struct e_t *e = hack->array[i]; int j = 0; while (e) { ++j; ++num; e = e->next; } if (j) { if (j > max) max = j; if (j < hist_len) ++hist[j - 1]; } } printf("--\n%s\n--\nalloc:%d entries:%d seed:%0x\nhistogram\n", name, hack->max, hack->count, hack->seed); for (i = 0; i < hist_len; ++i) printf("%d ", hist[i]); printf("\ncummulative\n"); for (i = 0; i < hist_len && running < hack->count; ++i) { running += (i + 1) * hist[i]; printf("%0.2f ", ((float)running)/num); } printf("\nlongest:%d found:%d\n", max, num); } unsigned int hash_simple64(const char *key, apr_ssize_t *klen) { unsigned int *p = (unsigned int *)key; return p[0] ^ p[1]; } int main(int argc, char *argv[]) { apr_pool_t *pool; apr_hash_t *hash; long i; long min = 1; long max = 1000000; if (argc > 1) min = atol(argv[1]); if (argc > 2) max = atol(argv[2]); apr_initialize(); apr_pool_create(&pool, NULL); hash = apr_hash_make(pool); for (i = min; i <= max; ++i) { apr_int32_t *mapped = apr_palloc(pool, sizeof(apr_int32_t) * 2); mapped[0] = i; mapped[1] = i + 1; apr_hash_set(hash, mapped, sizeof(apr_int32_t), mapped + 1); } test_hash(hash, "apr 32-bit keys"); apr_pool_clear(pool); hash = apr_hash_make(pool); for (i = min; i <= max; ++i) { apr_int64_t *mapped = apr_palloc(pool, sizeof(apr_int64_t) * 2); mapped[0] = i; mapped[1] = i + 1; apr_hash_set(hash, mapped, sizeof(apr_int64_t), mapped + 1); } test_hash(hash, "apr 64-bit keys"); apr_pool_clear(pool); hash = svn_hash__make(pool); for (i = min; i <= max; ++i) { apr_int32_t *mapped = apr_palloc(pool, sizeof(apr_int32_t) * 2); mapped[0] = i; mapped[1] = i + 1; apr_hash_set(hash, mapped, sizeof(apr_int32_t), mapped + 1); } test_hash(hash, "svn 32-bit keys"); apr_pool_clear(pool); hash = svn_hash__make(pool); for (i = min; i <= max; ++i) { apr_int64_t *mapped = apr_palloc(pool, sizeof(apr_int64_t) * 2); mapped[0] = i; mapped[1] = i + 1; apr_hash_set(hash, mapped, sizeof(apr_int64_t), mapped + 1); } test_hash(hash, "svn 64-bit keys"); apr_pool_clear(pool); hash = apr_hash_make_custom(pool, hash_simple64); for (i = min; i <= max; ++i) { apr_int64_t *mapped = apr_palloc(pool, sizeof(apr_int64_t) * 2); mapped[0] = i; mapped[1] = i + 1; apr_hash_set(hash, mapped, sizeof(apr_int64_t), mapped + 1); } test_hash(hash, "simple 64-bit keys"); apr_pool_clear(pool); apr_pool_destroy(pool); return 0; }