root/branches/append/server/assoc.c

Revision 591, 19.5 kB (checked in by plindner, 2 years ago)

gcc -pedantic changes, comments, signed/unsigned changes. also convert expanded to bool

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 * Hash table
4 *
5 * The hash function used here is by Bob Jenkins, 1996:
6 *    <http://burtleburtle.net/bob/hash/doobs.html>
7 *       "By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.
8 *       You may use this code any way you wish, private, educational,
9 *       or commercial.  It's free."
10 *
11 * The rest of the file is licensed under the BSD license.  See LICENSE.
12 *
13 * $Id$
14 */
15
16#include "memcached.h"
17#include <sys/stat.h>
18#include <sys/socket.h>
19#include <sys/signal.h>
20#include <sys/resource.h>
21#include <fcntl.h>
22#include <netinet/in.h>
23#include <errno.h>
24#include <stdlib.h>
25#include <stdio.h>
26#include <string.h>
27#include <assert.h>
28
29/*
30 * Since the hash function does bit manipulation, it needs to know
31 * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
32 * are set in the configure script.
33 */
34#if ENDIAN_BIG == 1
35# define HASH_LITTLE_ENDIAN 0
36# define HASH_BIG_ENDIAN 1
37#else
38# if ENDIAN_LITTLE == 1
39#  define HASH_LITTLE_ENDIAN 1
40#  define HASH_BIG_ENDIAN 0
41# else
42#  define HASH_LITTLE_ENDIAN 0
43#  define HASH_BIG_ENDIAN 0
44# endif
45#endif
46
47#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
48
49/*
50-------------------------------------------------------------------------------
51mix -- mix 3 32-bit values reversibly.
52
53This is reversible, so any information in (a,b,c) before mix() is
54still in (a,b,c) after mix().
55
56If four pairs of (a,b,c) inputs are run through mix(), or through
57mix() in reverse, there are at least 32 bits of the output that
58are sometimes the same for one pair and different for another pair.
59This was tested for:
60* pairs that differed by one bit, by two bits, in any combination
61  of top bits of (a,b,c), or in any combination of bottom bits of
62  (a,b,c).
63* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
64  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
65  is commonly produced by subtraction) look like a single 1-bit
66  difference.
67* the base values were pseudorandom, all zero but one bit set, or
68  all zero plus a counter that starts at zero.
69
70Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
71satisfy this are
72    4  6  8 16 19  4
73    9 15  3 18 27 15
74   14  9  3  7 17  3
75Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
76for "differ" defined as + with a one-bit base and a two-bit delta.  I
77used http://burtleburtle.net/bob/hash/avalanche.html to choose
78the operations, constants, and arrangements of the variables.
79
80This does not achieve avalanche.  There are input bits of (a,b,c)
81that fail to affect some output bits of (a,b,c), especially of a.  The
82most thoroughly mixed value is c, but it doesn't really even achieve
83avalanche in c.
84
85This allows some parallelism.  Read-after-writes are good at doubling
86the number of bits affected, so the goal of mixing pulls in the opposite
87direction as the goal of parallelism.  I did what I could.  Rotates
88seem to cost as much as shifts on every machine I could lay my hands
89on, and rotates are much kinder to the top and bottom bits, so I used
90rotates.
91-------------------------------------------------------------------------------
92*/
93#define mix(a,b,c) \
94{ \
95  a -= c;  a ^= rot(c, 4);  c += b; \
96  b -= a;  b ^= rot(a, 6);  a += c; \
97  c -= b;  c ^= rot(b, 8);  b += a; \
98  a -= c;  a ^= rot(c,16);  c += b; \
99  b -= a;  b ^= rot(a,19);  a += c; \
100  c -= b;  c ^= rot(b, 4);  b += a; \
101}
102
103/*
104-------------------------------------------------------------------------------
105final -- final mixing of 3 32-bit values (a,b,c) into c
106
107Pairs of (a,b,c) values differing in only a few bits will usually
108produce values of c that look totally different.  This was tested for
109* pairs that differed by one bit, by two bits, in any combination
110  of top bits of (a,b,c), or in any combination of bottom bits of
111  (a,b,c).
112* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
113  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
114  is commonly produced by subtraction) look like a single 1-bit
115  difference.
116* the base values were pseudorandom, all zero but one bit set, or
117  all zero plus a counter that starts at zero.
118
119These constants passed:
120 14 11 25 16 4 14 24
121 12 14 25 16 4 14 24
122and these came close:
123  4  8 15 26 3 22 24
124 10  8 15 26 3 22 24
125 11  8 15 26 3 22 24
126-------------------------------------------------------------------------------
127*/
128#define final(a,b,c) \
129{ \
130  c ^= b; c -= rot(b,14); \
131  a ^= c; a -= rot(c,11); \
132  b ^= a; b -= rot(a,25); \
133  c ^= b; c -= rot(b,16); \
134  a ^= c; a -= rot(c,4);  \
135  b ^= a; b -= rot(a,14); \
136  c ^= b; c -= rot(b,24); \
137}
138
139#if HASH_LITTLE_ENDIAN == 1
140uint32_t hash(
141  const void *key,       /* the key to hash */
142  size_t      length,    /* length of the key */
143  const uint32_t    initval)   /* initval */
144{
145  uint32_t a,b,c;                                          /* internal state */
146  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
147
148  /* Set up the internal state */
149  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
150
151  u.ptr = key;
152  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
153    const uint32_t *k = key;                           /* read 32-bit chunks */
154#ifdef VALGRIND
155    const uint8_t  *k8;
156#endif /* ifdef VALGRIND */
157
158    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
159    while (length > 12)
160    {
161      a += k[0];
162      b += k[1];
163      c += k[2];
164      mix(a,b,c);
165      length -= 12;
166      k += 3;
167    }
168
169    /*----------------------------- handle the last (probably partial) block */
170    /*
171     * "k[2]&0xffffff" actually reads beyond the end of the string, but
172     * then masks off the part it's not allowed to read.  Because the
173     * string is aligned, the masked-off tail is in the same word as the
174     * rest of the string.  Every machine with memory protection I've seen
175     * does it on word boundaries, so is OK with this.  But VALGRIND will
176     * still catch it and complain.  The masking trick does make the hash
177     * noticably faster for short strings (like English words).
178     */
179#ifndef VALGRIND
180
181    switch(length)
182    {
183    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
184    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
185    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
186    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
187    case 8 : b+=k[1]; a+=k[0]; break;
188    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
189    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
190    case 5 : b+=k[1]&0xff; a+=k[0]; break;
191    case 4 : a+=k[0]; break;
192    case 3 : a+=k[0]&0xffffff; break;
193    case 2 : a+=k[0]&0xffff; break;
194    case 1 : a+=k[0]&0xff; break;
195    case 0 : return c;  /* zero length strings require no mixing */
196    }
197
198#else /* make valgrind happy */
199
200    k8 = (const uint8_t *)k;
201    switch(length)
202    {
203    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
204    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
205    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
206    case 9 : c+=k8[8];                   /* fall through */
207    case 8 : b+=k[1]; a+=k[0]; break;
208    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
209    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
210    case 5 : b+=k8[4];                   /* fall through */
211    case 4 : a+=k[0]; break;
212    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
213    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
214    case 1 : a+=k8[0]; break;
215    case 0 : return c;  /* zero length strings require no mixing */
216    }
217
218#endif /* !valgrind */
219
220  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
221    const uint16_t *k = key;                           /* read 16-bit chunks */
222    const uint8_t  *k8;
223
224    /*--------------- all but last block: aligned reads and different mixing */
225    while (length > 12)
226    {
227      a += k[0] + (((uint32_t)k[1])<<16);
228      b += k[2] + (((uint32_t)k[3])<<16);
229      c += k[4] + (((uint32_t)k[5])<<16);
230      mix(a,b,c);
231      length -= 12;
232      k += 6;
233    }
234
235    /*----------------------------- handle the last (probably partial) block */
236    k8 = (const uint8_t *)k;
237    switch(length)
238    {
239    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
240             b+=k[2]+(((uint32_t)k[3])<<16);
241             a+=k[0]+(((uint32_t)k[1])<<16);
242             break;
243    case 11: c+=((uint32_t)k8[10])<<16;     /* @fallthrough */
244    case 10: c+=k[4];                       /* @fallthrough@ */
245             b+=k[2]+(((uint32_t)k[3])<<16);
246             a+=k[0]+(((uint32_t)k[1])<<16);
247             break;
248    case 9 : c+=k8[8];                      /* @fallthrough */
249    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
250             a+=k[0]+(((uint32_t)k[1])<<16);
251             break;
252    case 7 : b+=((uint32_t)k8[6])<<16;      /* @fallthrough */
253    case 6 : b+=k[2];
254             a+=k[0]+(((uint32_t)k[1])<<16);
255             break;
256    case 5 : b+=k8[4];                      /* @fallthrough */
257    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
258             break;
259    case 3 : a+=((uint32_t)k8[2])<<16;      /* @fallthrough */
260    case 2 : a+=k[0];
261             break;
262    case 1 : a+=k8[0];
263             break;
264    case 0 : return c;  /* zero length strings require no mixing */
265    }
266
267  } else {                        /* need to read the key one byte at a time */
268    const uint8_t *k = key;
269
270    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
271    while (length > 12)
272    {
273      a += k[0];
274      a += ((uint32_t)k[1])<<8;
275      a += ((uint32_t)k[2])<<16;
276      a += ((uint32_t)k[3])<<24;
277      b += k[4];
278      b += ((uint32_t)k[5])<<8;
279      b += ((uint32_t)k[6])<<16;
280      b += ((uint32_t)k[7])<<24;
281      c += k[8];
282      c += ((uint32_t)k[9])<<8;
283      c += ((uint32_t)k[10])<<16;
284      c += ((uint32_t)k[11])<<24;
285      mix(a,b,c);
286      length -= 12;
287      k += 12;
288    }
289
290    /*-------------------------------- last block: affect all 32 bits of (c) */
291    switch(length)                   /* all the case statements fall through */
292    {
293    case 12: c+=((uint32_t)k[11])<<24;
294    case 11: c+=((uint32_t)k[10])<<16;
295    case 10: c+=((uint32_t)k[9])<<8;
296    case 9 : c+=k[8];
297    case 8 : b+=((uint32_t)k[7])<<24;
298    case 7 : b+=((uint32_t)k[6])<<16;
299    case 6 : b+=((uint32_t)k[5])<<8;
300    case 5 : b+=k[4];
301    case 4 : a+=((uint32_t)k[3])<<24;
302    case 3 : a+=((uint32_t)k[2])<<16;
303    case 2 : a+=((uint32_t)k[1])<<8;
304    case 1 : a+=k[0];
305             break;
306    case 0 : return c;  /* zero length strings require no mixing */
307    }
308  }
309
310  final(a,b,c);
311  return c;             /* zero length strings require no mixing */
312}
313
314#elif HASH_BIG_ENDIAN == 1
315/*
316 * hashbig():
317 * This is the same as hashword() on big-endian machines.  It is different
318 * from hashlittle() on all machines.  hashbig() takes advantage of
319 * big-endian byte ordering.
320 */
321uint32_t hash( const void *key, size_t length, const uint32_t initval)
322{
323  uint32_t a,b,c;
324  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
325
326  /* Set up the internal state */
327  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
328
329  u.ptr = key;
330  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
331    const uint32_t *k = key;                           /* read 32-bit chunks */
332#ifdef VALGRIND
333    const uint8_t  *k8;
334#endif /* ifdef VALGRIND */
335
336    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
337    while (length > 12)
338    {
339      a += k[0];
340      b += k[1];
341      c += k[2];
342      mix(a,b,c);
343      length -= 12;
344      k += 3;
345    }
346
347    /*----------------------------- handle the last (probably partial) block */
348    /*
349     * "k[2]<<8" actually reads beyond the end of the string, but
350     * then shifts out the part it's not allowed to read.  Because the
351     * string is aligned, the illegal read is in the same word as the
352     * rest of the string.  Every machine with memory protection I've seen
353     * does it on word boundaries, so is OK with this.  But VALGRIND will
354     * still catch it and complain.  The masking trick does make the hash
355     * noticably faster for short strings (like English words).
356     */
357#ifndef VALGRIND
358
359    switch(length)
360    {
361    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
362    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
363    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
364    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
365    case 8 : b+=k[1]; a+=k[0]; break;
366    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
367    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
368    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
369    case 4 : a+=k[0]; break;
370    case 3 : a+=k[0]&0xffffff00; break;
371    case 2 : a+=k[0]&0xffff0000; break;
372    case 1 : a+=k[0]&0xff000000; break;
373    case 0 : return c;              /* zero length strings require no mixing */
374    }
375
376#else  /* make valgrind happy */
377
378    k8 = (const uint8_t *)k;
379    switch(length)                   /* all the case statements fall through */
380    {
381    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
382    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
383    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
384    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
385    case 8 : b+=k[1]; a+=k[0]; break;
386    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
387    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
388    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
389    case 4 : a+=k[0]; break;
390    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
391    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
392    case 1 : a+=((uint32_t)k8[0])<<24; break;
393    case 0 : return c;
394    }
395
396#endif /* !VALGRIND */
397
398  } else {                        /* need to read the key one byte at a time */
399    const uint8_t *k = key;
400
401    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
402    while (length > 12)
403    {
404      a += ((uint32_t)k[0])<<24;
405      a += ((uint32_t)k[1])<<16;
406      a += ((uint32_t)k[2])<<8;
407      a += ((uint32_t)k[3]);
408      b += ((uint32_t)k[4])<<24;
409      b += ((uint32_t)k[5])<<16;
410      b += ((uint32_t)k[6])<<8;
411      b += ((uint32_t)k[7]);
412      c += ((uint32_t)k[8])<<24;
413      c += ((uint32_t)k[9])<<16;
414      c += ((uint32_t)k[10])<<8;
415      c += ((uint32_t)k[11]);
416      mix(a,b,c);
417      length -= 12;
418      k += 12;
419    }
420
421    /*-------------------------------- last block: affect all 32 bits of (c) */
422    switch(length)                   /* all the case statements fall through */
423    {
424    case 12: c+=k[11];
425    case 11: c+=((uint32_t)k[10])<<8;
426    case 10: c+=((uint32_t)k[9])<<16;
427    case 9 : c+=((uint32_t)k[8])<<24;
428    case 8 : b+=k[7];
429    case 7 : b+=((uint32_t)k[6])<<8;
430    case 6 : b+=((uint32_t)k[5])<<16;
431    case 5 : b+=((uint32_t)k[4])<<24;
432    case 4 : a+=k[3];
433    case 3 : a+=((uint32_t)k[2])<<8;
434    case 2 : a+=((uint32_t)k[1])<<16;
435    case 1 : a+=((uint32_t)k[0])<<24;
436             break;
437    case 0 : return c;
438    }
439  }
440
441  final(a,b,c);
442  return c;
443}
444#else // HASH_XXX_ENDIAN == 1
445#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
446#endif // hash_XXX_ENDIAN == 1
447
448typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
449typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
450
451/* how many powers of 2's worth of buckets we use */
452static unsigned int hashpower = 16;
453
454#define hashsize(n) ((ub4)1<<(n))
455#define hashmask(n) (hashsize(n)-1)
456
457/* Main hash table. This is where we look except during expansion. */
458static item** primary_hashtable = 0;
459
460/*
461 * Previous hash table. During expansion, we look here for keys that haven't
462 * been moved over to the primary yet.
463 */
464static item** old_hashtable = 0;
465
466/* Number of items in the hash table. */
467static unsigned int hash_items = 0;
468
469/* Flag: Are we in the middle of expanding now? */
470static bool expanding = false;
471
472/*
473 * During expansion we migrate values with bucket granularity; this is how
474 * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1.
475 */
476static unsigned int expand_bucket = 0;
477
478void assoc_init(void) {
479    unsigned int hash_size = hashsize(hashpower) * sizeof(void*);
480    primary_hashtable = malloc(hash_size);
481    if (! primary_hashtable) {
482        fprintf(stderr, "Failed to init hashtable.\n");
483        exit(EXIT_FAILURE);
484    }
485    memset(primary_hashtable, 0, hash_size);
486}
487
488item *assoc_find(const char *key, const size_t nkey) {
489    uint32_t hv = hash(key, nkey, 0);
490    item *it;
491    unsigned int oldbucket;
492
493    if (expanding &&
494        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
495    {
496        it = old_hashtable[oldbucket];
497    } else {
498        it = primary_hashtable[hv & hashmask(hashpower)];
499    }
500
501    while (it) {
502        if ((nkey == it->nkey) &&
503            (memcmp(key, ITEM_key(it), nkey) == 0)) {
504            return it;
505        }
506        it = it->h_next;
507    }
508    return 0;
509}
510
511/* returns the address of the item pointer before the key.  if *item == 0,
512   the item wasn't found */
513
514static item** _hashitem_before (const char *key, const size_t nkey) {
515    uint32_t hv = hash(key, nkey, 0);
516    item **pos;
517    unsigned int oldbucket;
518
519    if (expanding &&
520        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
521    {
522        pos = &old_hashtable[oldbucket];
523    } else {
524        pos = &primary_hashtable[hv & hashmask(hashpower)];
525    }
526
527    while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) {
528        pos = &(*pos)->h_next;
529    }
530    return pos;
531}
532
533/* grows the hashtable to the next power of 2. */
534static void assoc_expand(void) {
535    old_hashtable = primary_hashtable;
536
537    primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
538    if (primary_hashtable) {
539        if (settings.verbose > 1)
540            fprintf(stderr, "Hash table expansion starting\n");
541        hashpower++;
542        expanding = true;
543        expand_bucket = 0;
544        do_assoc_move_next_bucket();
545    } else {
546        primary_hashtable = old_hashtable;
547        /* Bad news, but we can keep running. */
548    }
549}
550
551/* migrates the next bucket to the primary hashtable if we're expanding. */
552void do_assoc_move_next_bucket(void) {
553    item *it, *next;
554    int bucket;
555
556    if (expanding) {
557        for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
558            next = it->h_next;
559
560            bucket = hash(ITEM_key(it), it->nkey, 0) & hashmask(hashpower);
561            it->h_next = primary_hashtable[bucket];
562            primary_hashtable[bucket] = it;
563        }
564
565        old_hashtable[expand_bucket] = NULL;
566
567        expand_bucket++;
568        if (expand_bucket == hashsize(hashpower - 1)) {
569            expanding = false;
570            free(old_hashtable);
571            if (settings.verbose > 1)
572                fprintf(stderr, "Hash table expansion done\n");
573        }
574    }
575}
576
577/* Note: this isn't an assoc_update.  The key must not already exist to call this */
578int assoc_insert(item *it) {
579    uint32_t hv;
580    unsigned int oldbucket;
581
582    assert(assoc_find(ITEM_key(it), it->nkey) == 0);  /* shouldn't have duplicately named things defined */
583
584    hv = hash(ITEM_key(it), it->nkey, 0);
585    if (expanding &&
586        (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
587    {
588        it->h_next = old_hashtable[oldbucket];
589        old_hashtable[oldbucket] = it;
590    } else {
591        it->h_next = primary_hashtable[hv & hashmask(hashpower)];
592        primary_hashtable[hv & hashmask(hashpower)] = it;
593    }
594
595    hash_items++;
596    if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
597        assoc_expand();
598    }
599
600    return 1;
601}
602
603void assoc_delete(const char *key, const size_t nkey) {
604    item **before = _hashitem_before(key, nkey);
605
606    if (*before) {
607        item *nxt = (*before)->h_next;
608        (*before)->h_next = 0;   /* probably pointless, but whatever. */
609        *before = nxt;
610        hash_items--;
611        return;
612    }
613    /* Note:  we never actually get here.  the callers don't delete things
614       they can't find. */
615    assert(*before != 0);
616}
Note: See TracBrowser for help on using the browser.