/* ** Act - Abstract Container Type Library ** Copyright (c) 1999-2000 Ralf S. Engelschall ** ** This file is part of Act, a library for dealing with Abstract ** Container Types which can be found at http://www.ossp.org/pkg/act/. ** ** Permission to use, copy, modify, and distribute this software for ** any purpose with or without fee is hereby granted, provided that ** the above copyright notice and this permission notice appear in all ** copies. ** ** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED ** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR ** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ** SUCH DAMAGE. ** ** act_hash_lh.c: Linear Hashing (dynamic) */ /* ** This module implements a Dynamic Hash Table, based on WITOLD LITWIN ** and PER-AKE LARSON's ``Linear Hashing'' algorithm (1980/1988), ** implemented on top of a two-level virtual array with separate ** collision chains as the backend data structure. Some ideas were ** taken over from MIKAEL PETTERSON's Linear Hashing enhancements ** (1993). ** ** Linear Hashing can be summarized as following: ** ** o drawback of classical hashing is that the hash table is of ** fixed size; if table is filled, the table has to be expanded; ** unfortunately this requires re-hashing of all existing elements. ** ** o Extendible Hashing solves this problem by allowing the table ** to grow without complete re-hashing of elements every time; ** unfortunately the table must double in size to grow - not a ** problem for small tables, but a disadvantage for large ones. ** ** o Linear Hashing allows the table to grow one bucket at a time: ** table is extended when its load factor passes a critical value; buckets ** are split "in sequence" starting at index 0 and ending at 2^n; two ** hashing functions are always "active" - one for split buckets and one ** for unsplit; when last bucket (the 2^n-th one) is split, the table has ** been doubled in size, and a new cycle of splitting begins starting at ** bucket 0. ** ** The internal structure of the linear hashing table is illustrated ** in the following figure: ** ** -----BEGIN EMBEDDED OBJECT----- ** Content-type: application/fig ** Description: linear hashing structure layout ** Version: eo/1.0 ** H4sIACfenzkCA62WTYvbMBCGz+tfIeixJOhb9rnQsrC3XntxE7ExdZ0ldhf233dG ** sr6aNllRE2K90USPpNGMRx8+P34hYs+bp346zof+xTaf7LTYS/M4HU52bp7sAr8I ** aRile0qbr8P0PNpmxxvGKSW84QQ/jOwYMURRQlFx+CtK9wMb1TwQBlYB3R1NLX6x ** P7TQX0OkK5IbmguNNGdLagXDT/jyzoM5PCLYRC6HzRIJ/+BKZYIJVGhLCoyVXNhm ** 4EXFJV15SaG1Eu1Z5UPIsNSk0FDhZt6Gxaq4Aa86JDprUn8su725bIFdTOM68fgz ** 1foTLZRWNWgd0UYVqo0xklQVWvKAlkIVCoHemlQduo3oThXKAVtaKI9mKxqOLJwj ** zc8RbbJ5QMGgn2i0EshfTGjIIskV8Y4xmfDRmUXizWnMvVm06/KZHloMSPcakVvs ** gwvYh49vE9PJxCCgmzlMUHSPgdmEKJVWZTz970Q64rtSubDgm00kecBLVSo30VWs ** 7djNmfi7fIeK822oyVF6O2rmFb4htY3UNlHD60HQe+9LlzVGpdasRTC01yWJ/RsY ** 0m896SBczXS2pGR90XA56EpFplp1/WC0zgnU+c2XikyZUCoyVedfHdFGFSpeLDJV ** hXbp6oCuVGTKhFKRqTp0G9GdKpQJpSJTIYx9KKdjZLU1I5bQKGR5Ldpgji6go/BX ** m3iNkd4L+GAaotU7CMDuPqkZ5BoDjovg43Cxh+V8eftGKbseyfbK0DaM1OFWgzey ** 2T7/hHvx/PeB5ZTS1U6fOMv5ZTfaVzveGwgn28FAoXCtuPHDeRyHeThP5HDqh+k9 ** E7P19efi/od9+3jsl/4dEzNm0O04EkP9dbgsv/qRnPr5RJb++2gd4zf8ksj2KwwA ** AA== ** -----END EMBEDDED OBJECT----- ** ** As you can see, it consists of four classes of memory chunks: ** 1. a top-level structure which acts as the primary handle ** 2. the virtual hash table consisting of a single (growing) ** directory and one or more (fixed size) directory segments ** 3. the collision chains consisting of element structures ** 4. the actual elements consisting of key+value structures */ #include "act_p.h" /* fixed size (number of pointers) of the directory and of each segment */ #define INITDIRSIZE 256 /* can be an arbitrary value */ #define SEGMENTSIZE 512 /* has to be a power of 2 for below arithmetic */ /* the borders for the hash table load */ #define MINLOADFCTR 1 /* should be between 0 and 1 */ #define MAXLOADFCTR 2 /* should be between 2 and 4 */ /* the per-element structure (keep as small as possible!) */ typedef struct element_st element_t; struct element_st { element_t *e_next; /* pointer to next element in collision chain */ unsigned long e_hash; /* cached hash value of element (rehashing optimization) */ void *e_keyptr; /* pointer to key (= also pointer to key+data memory chunk) */ void *e_datptr; /* pointer to data in key+data memory chunk */ void *e_endptr; /* pointer to end of key+data memory chunk */ }; /* the hash table segments */ typedef struct segment_st segment_t; struct segment_st { element_t *s_element[SEGMENTSIZE]; /* array of pointers to elements */ }; /* the top-level hash table structure */ typedef struct act_hash_lh_st act_hash_lh_t; struct act_hash_lh_st { act_ctx_t *h_ctx; /* context structure */ act_hash_fct_t h_func; /* hash function (copied from context) */ unsigned int h_p; /* pointer to start of unsplit region */ unsigned int h_pmax; /* pointer to end of unsplit region */ int h_slack; /* grow/shrink indicator */ unsigned h_dirsize; /* current size of directory */ segment_t **h_dir; /* pointer to directory */ }; /* on-the-fly calculate index into directory and segment from virtual array index */ #define DIRINDEX(addr) (int)((addr) / SEGMENTSIZE) #define SEGINDEX(addr) (int)((addr) % SEGMENTSIZE) /* on-the-fly calculate lengths of key and data to reduce memory in element_t */ #define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr)) #define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr)) /* create the hash table structure */ static act_hash_lh_t * act_hash_lh_new( act_ctx_t *ctx) { act_hash_lh_t *h; /* allocate hash structure */ if ((h = (act_hash_lh_t *)act_mem_alloc_ctx(ctx, sizeof(act_hash_lh_t))) == NULL) return NULL; h->h_ctx = ctx; /* allocate and clear hash table directory */ h->h_dirsize = INITDIRSIZE; if ((h->h_dir = (segment_t **)act_mem_alloc_ctx( ctx, h->h_dirsize * sizeof(segment_t *))) == NULL) { errno_safe(act_mem_free_ctx(ctx, h)); return NULL; } act_mem_set(h->h_dir, 0, h->h_dirsize * sizeof(segment_t *)); /* allocate and clear first segment of hash table array */ if ((h->h_dir[0] = (segment_t *)act_mem_alloc_ctx(ctx, sizeof(segment_t))) == NULL) { errno_safe(act_mem_free_ctx(ctx, h->h_dir); act_mem_free_ctx(ctx, h)); return NULL; } act_mem_set(h->h_dir[0], 0, sizeof(segment_t)); /* initialize hash table control attributes */ h->h_p = 0; h->h_pmax = SEGMENTSIZE; h->h_slack = SEGMENTSIZE * MAXLOADFCTR; /* inline values from context (for performance reasons only) */ if (!act_ctx_get(ctx, ACT_HASH_FUNC, &h->h_func)) h->h_func = act_hash_fct(djbx33a); return h; } /* expand the hash table */ static void act_hash_lh_expand( act_ctx_t *ctx, act_hash_lh_t *h) { unsigned int pmax0; unsigned int newaddr; segment_t *seg; element_t **pelold; element_t *el, *headofold, *headofnew, *next; unsigned int n; /* calculate next new address */ pmax0 = h->h_pmax; newaddr = pmax0 + h->h_p; /* have to enlarge directory? */ if (h->h_dirsize <= DIRINDEX(newaddr)) { n = h->h_dirsize * sizeof(segment_t *); h->h_dirsize *= 2; if ((h->h_dir = (segment_t **)act_mem_realloc_ctx(ctx, h->h_dir, h->h_dirsize*sizeof(segment_t *))) == NULL) return; act_mem_set((char *)h->h_dir+n, 0, n); } /* have to create a new table segment? */ if (SEGINDEX(newaddr) == 0) { if ((seg = (segment_t *)act_mem_alloc_ctx(ctx, sizeof(segment_t))) == NULL) return; act_mem_set(seg, 0, sizeof(segment_t)); h->h_dir[DIRINDEX(newaddr)] = seg; } /* locate P-element */ pelold = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)]; /* adjust the state variables */ if (++(h->h_p) >= h->h_pmax) { h->h_pmax = (h->h_pmax << 1); /* == h->h_pmax * 2 */ h->h_p = 0; } h->h_slack += MAXLOADFCTR; /* relocate and split between P-element new element */ headofold = NULL; headofnew = NULL; for (el = *pelold; el != NULL; el = next) { next = el->e_next; if (el->e_hash & pmax0) { el->e_next = headofnew; headofnew = el; } else { el->e_next = headofold; headofold = el; } } *pelold = headofold; h->h_dir[DIRINDEX(newaddr)]->s_element[SEGINDEX(newaddr)] = headofnew; return; } /* shrink hash table */ static void act_hash_lh_shrink( act_ctx_t *ctx, act_hash_lh_t *h) { segment_t *lastseg; element_t **pel; unsigned int oldlast; unsigned int dirsize; void *dir; /* calculate old last element */ oldlast = h->h_p + h->h_pmax - 1; /* we cannot shrink below one segment */ if (oldlast == 0) return; /* adjust the state variables */ if (h->h_p == 0) { h->h_pmax = (h->h_pmax >> 1); /* == h->h_pmax / 2 */; h->h_p = h->h_pmax - 1; } else h->h_p--; h->h_slack -= MAXLOADFCTR; /* relocate the lost old last element to the end of the P-element */ pel = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)]; while (*pel != NULL) pel = &((*pel)->e_next); lastseg = h->h_dir[DIRINDEX(oldlast)]; *pel = lastseg->s_element[SEGINDEX(oldlast)]; lastseg->s_element[SEGINDEX(oldlast)] = NULL; /* if possible, deallocate the last segment */ if (SEGINDEX(oldlast) == 0) { h->h_dir[DIRINDEX(oldlast)] = NULL; free(lastseg); } /* if possible, deallocate the end of the directory */ if (h->h_dirsize > INITDIRSIZE && DIRINDEX(oldlast) < h->h_dirsize/2) { dirsize = (h->h_dirsize >> 1); /* == h->h_dirsize / 2 */ if ((dir = (segment_t **)act_mem_realloc_ctx(ctx, h->h_dir, dirsize*sizeof(segment_t *))) != NULL) { h->h_dirsize = dirsize; h->h_dir = dir; } } return; } /* insert element into hash table */ static int act_hash_lh_insert( act_ctx_t *ctx, act_hash_lh_t *h, void *keyptr, int keylen, void *datptr, int datlen, int override) { unsigned int hash, addr; element_t *el, **pel; int bFound; void *vp; /* argument consistency check */ if (h == NULL || keyptr == NULL || keylen <= 0) return_errno(FALSE, EINVAL); /* calculate hash address */ hash = h->h_func(keyptr, keylen); addr = hash % h->h_pmax; /* unsplit region */ if (addr < h->h_p) addr = hash % (h->h_pmax << 1); /* split region */ /* locate hash element's collision list */ pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)]; /* check whether element is already in the hash table */ bFound = FALSE; for (el = *pel; el != NULL; el = el->e_next) { if ( el->e_hash == hash && el_keylen(el) == keylen && act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) { bFound = TRUE; break; } } /* only override on request */ if (bFound && !override) return FALSE; /* create a duplicate of key and data */ if ((vp = act_mem_alloc_ctx(ctx, keylen+datlen)) == NULL) return FALSE; act_mem_move(vp, keyptr, keylen); act_mem_move((char *)vp+keylen, datptr, datlen); keyptr = vp; datptr = (char *)vp+keylen; if (bFound) { /* reuse existing element by freeing old contents */ if (el->e_keyptr != NULL) act_mem_free_ctx(ctx, el->e_keyptr); } else { /* allocate new element and chain into list */ if ((el = (element_t *)act_mem_alloc_ctx(ctx, sizeof(element_t))) == 0) { act_mem_free_ctx(ctx, vp); return FALSE; } while (*pel != NULL) pel = &((*pel)->e_next); el->e_next = *pel; *pel = el; } /* insert contents into element structure */ el->e_keyptr = keyptr; el->e_datptr = datptr; el->e_endptr = (char *)datptr+datlen; el->e_hash = hash; /* do we need to expand the table now? */ if (--(h->h_slack) < 0) act_hash_lh_expand(ctx, h); return TRUE; } /* lookup an element in hash table */ static int act_hash_lh_lookup( act_ctx_t *ctx, act_hash_lh_t *h, void *keyptr, int keylen, void **datptr, int *datlen) { unsigned int hash, addr; element_t *el, **pel; /* argument consistency check */ if (h == NULL || keyptr == NULL || keylen <= 0) return_errno(FALSE, EINVAL); /* calculate hash address */ hash = h->h_func(keyptr, keylen); addr = hash % h->h_pmax; /* unsplit region */ if (addr < h->h_p) addr = hash % (h->h_pmax << 1); /* split region */ /* locate hash element collision list */ pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)]; /* search for element in collision list */ for (el = *pel; el != NULL; el = el->e_next) { if ( el->e_hash == hash && el_keylen(el) == keylen && act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) { /* provide results */ if (datptr != NULL) *datptr = el->e_datptr; if (datlen != NULL) *datlen = el_datlen(el); return TRUE; } } return FALSE; } /* delete an element in hash table */ static int act_hash_lh_delete( act_ctx_t *ctx, act_hash_lh_t *h, void *keyptr, int keylen) { unsigned int hash, addr; element_t *el, *lel, **pel; int rv; /* argument consistency check */ if (h == NULL || keyptr == NULL || keylen <= 0) return_errno(FALSE, EINVAL); /* calculate hash address */ hash = h->h_func(keyptr, keylen); addr = hash % h->h_pmax; /* unsplit region */ if (addr < h->h_p) addr = hash % (h->h_pmax << 1); /* split region */ /* locate hash element collision chain */ pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)]; /* search for element in collision chain */ rv = FALSE; for (lel = NULL, el = *pel; el != NULL; lel = el, el = el->e_next) { if ( el->e_hash == hash && el_keylen(el) == keylen && act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) { /* free key+data memory chunk */ if (el->e_keyptr != NULL) act_mem_free_ctx(ctx, el->e_keyptr); /* remove element from collision chain */ if (lel == NULL) *pel = el->e_next; else lel->e_next = el->e_next; /* deallocate element structure */ act_mem_free_ctx(ctx, el); rv = TRUE; break; } } /* do we need to shrink the table now? */ if (++(h->h_slack) > ((h->h_pmax + h->h_p) * (MAXLOADFCTR-MINLOADFCTR))) act_hash_lh_shrink(ctx, h); return rv; } /* calculate total size of hash table */ static int act_hash_lh_size( act_ctx_t *ctx, act_hash_lh_t *h, long *pbytes, long *pelements) { long bytes, elements; int i, j; element_t *el; /* argument consistency check */ if (h == NULL) return_errno(FALSE, EINVAL); /* start with 0 bytes and elements */ bytes = 0; elements = 0; /* add bytes for top-level structure and directory */ bytes += sizeof(act_hash_lh_t); bytes += h->h_dirsize * sizeof(segment_t *); /* add size for segments */ for (i = 0; i < h->h_dirsize; i++) { if (h->h_dir[i] == NULL) continue; bytes += sizeof(segment_t); /* add size of elements */ for (j = 0; j < SEGMENTSIZE; j++) { if (h->h_dir[i]->s_element[j] == NULL) continue; el = h->h_dir[i]->s_element[j]; for (; el != NULL; el = el->e_next) { elements++; /* add size of key+data */ bytes += sizeof(element_t); bytes += el_keylen(el); bytes += el_datlen(el); } } } /* provide results */ if (pbytes != NULL) *pbytes = bytes; if (pelements != NULL) *pelements = elements; return TRUE; } /* calculate total size of hash table */ static int act_hash_lh_status( act_ctx_t *ctx, act_hash_lh_t *h, char *pstatus, int nstatus) { int bytes, elements; int dirsize, segsize; int segments; int chains; int chainlen_min, chainlen_max; int chainlen_all, chainlen_avg; double used; int i, j, k; element_t *el; /* argument consistency check */ if (h == NULL) return_errno(FALSE, EINVAL); /* initial values */ bytes = 0; elements = 0; dirsize = h->h_dirsize; segsize = SEGMENTSIZE; segments = 0; chains = 0; chainlen_min = 0; chainlen_max = 0; chainlen_all = 0; chainlen_avg = 0; used = 0; /* add bytes for top-level structure and directory */ bytes += sizeof(act_hash_lh_t); bytes += h->h_dirsize * sizeof(segment_t *); /* add size for segments */ for (i = 0; i < h->h_dirsize; i++) { if (h->h_dir[i] == NULL) continue; segments++; bytes += sizeof(segment_t); /* add size of elements */ for (j = 0; j < SEGMENTSIZE; j++) { if (h->h_dir[i]->s_element[j] == NULL) continue; chains++; el = h->h_dir[i]->s_element[j]; k = 0; for (; el != NULL; el = el->e_next) { elements++; k++; /* add size of key+data */ bytes += sizeof(element_t); bytes += el_keylen(el); bytes += el_datlen(el); } chainlen_all += k; if (chainlen_min == 0 || chainlen_min > k) chainlen_min = k; if (chainlen_max == 0 || chainlen_max < k) chainlen_max = k; } } /* calculate total table usage */ used = ((double)chains / (double)(segments*SEGMENTSIZE)) * 100; if (chains == 0) chainlen_avg = 0; else chainlen_avg = chainlen_all / chains; /* provide results */ act_snprintf(pstatus, nstatus, "size=%d@%dB dir=1*%dB seg=%d*%dB chains=%d@%d/%d/%d usage=%0.1f", elements, bytes, dirsize, segments, segsize, chains, chainlen_min, chainlen_avg, chainlen_max, used); return TRUE; } /* destroy the whole hash table */ static int act_hash_lh_free( act_ctx_t *ctx, act_hash_lh_t *h) { element_t *el, **pel; unsigned int i, j; /* argument consistency check */ if (h == NULL) return_errno(FALSE, EINVAL); /* deallocate all segment's entries */ for (i = 0; i < h->h_dirsize; i++) { if (h->h_dir[i] == NULL) continue; /* deallocate all collision chains */ for (j = 0; j < SEGMENTSIZE; j++) { if (h->h_dir[i]->s_element[j] == NULL) continue; /* deallocate all elements in collision chain */ pel = &h->h_dir[i]->s_element[j]; for (el = *pel; el != NULL; el = el->e_next) { /* deallocate key+data memory chunk */ if (el->e_keyptr != NULL) act_mem_free_ctx(ctx, el->e_keyptr); } } act_mem_free_ctx(ctx, h->h_dir[i]); } /* deallocate hash table directory */ act_mem_free_ctx(ctx, h->h_dir); /* deallocate hash table top-level structure */ act_mem_free_ctx(ctx, h); return TRUE; } /* the ACT dispatch structure for the hashing API */ intern act_hash_method_t act_hash_lh = { ACT_HASH_METHOD_TAG, (act_hash_new_t) act_hash_lh_new, (act_hash_insert_t) act_hash_lh_insert, (act_hash_lookup_t) act_hash_lh_lookup, (act_hash_delete_t) act_hash_lh_delete, (act_hash_size_t) act_hash_lh_size, (act_hash_status_t) act_hash_lh_status, (act_hash_free_t) act_hash_lh_free };