ossp-pkg/act/act_hash_lh.c
/*
** OSSP act - Abstract Container Types
** Copyright (c) 1999-2003 Ralf S. Engelschall <rse@engelschall.com>
** Copyright (c) 1999-2003 The OSSP Project <http://www.ossp.org/>
**
** This file is part of OSSP act, an abstract container type library
** which can be found at http://www.ossp.org/pkg/lib/act/.
**
** Permission to use, copy, modify, and distribute this software for
** any purpose with or without fee is hereby granted, provided that
** the above copyright notice and this permission notice appear in all
** copies.
**
** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE.
**
** act_hash_lh.c: Linear Hashing (dynamic)
*/
/*
** This module implements a Dynamic Hash Table, based on WITOLD LITWIN
** and PER-AKE LARSON's "Linear Hashing" algorithm (1980/1988),
** implemented on top of a two-level virtual array with separate
** collision chains as the backend data structure. Some ideas were
** taken over from MIKAEL PETTERSON's Linear Hashing enhancements
** (1993).
**
** Linear Hashing can be summarized as following:
**
** o drawback of classical hashing is that the hash table is of
** fixed size; if table is filled, the table has to be expanded;
** unfortunately this requires re-hashing of all existing elements.
**
** o Extendible Hashing solves this problem by allowing the table
** to grow without complete re-hashing of elements every time;
** unfortunately the table must double in size to grow - not a
** problem for small tables, but a disadvantage for large ones.
**
** o Linear Hashing allows the table to grow one bucket at a time:
** table is extended when its load factor passes a critical value; buckets
** are split "in sequence" starting at index 0 and ending at 2^n; two
** hashing functions are always "active" - one for split buckets and one
** for unsplit; when last bucket (the 2^n-th one) is split, the table has
** been doubled in size, and a new cycle of splitting begins starting at
** bucket 0.
**
** The internal structure of the linear hashing table is illustrated
** in the following figure:
**
** -----BEGIN EMBEDDED OBJECT-----
** Content-type: application/fig
** Description: linear hashing structure layout
** Version: eo/1.0
** H4sIACfenzkCA62WTYvbMBCGz+tfIeixJOhb9rnQsrC3XntxE7ExdZ0ldhf233dG
** sr6aNllRE2K90USPpNGMRx8+P34hYs+bp346zof+xTaf7LTYS/M4HU52bp7sAr8I
** aRile0qbr8P0PNpmxxvGKSW84QQ/jOwYMURRQlFx+CtK9wMb1TwQBlYB3R1NLX6x
** P7TQX0OkK5IbmguNNGdLagXDT/jyzoM5PCLYRC6HzRIJ/+BKZYIJVGhLCoyVXNhm
** 4EXFJV15SaG1Eu1Z5UPIsNSk0FDhZt6Gxaq4Aa86JDprUn8su725bIFdTOM68fgz
** 1foTLZRWNWgd0UYVqo0xklQVWvKAlkIVCoHemlQduo3oThXKAVtaKI9mKxqOLJwj
** zc8RbbJ5QMGgn2i0EshfTGjIIskV8Y4xmfDRmUXizWnMvVm06/KZHloMSPcakVvs
** gwvYh49vE9PJxCCgmzlMUHSPgdmEKJVWZTz970Q64rtSubDgm00kecBLVSo30VWs
** 7djNmfi7fIeK822oyVF6O2rmFb4htY3UNlHD60HQe+9LlzVGpdasRTC01yWJ/RsY
** 0m896SBczXS2pGR90XA56EpFplp1/WC0zgnU+c2XikyZUCoyVedfHdFGFSpeLDJV
** hXbp6oCuVGTKhFKRqTp0G9GdKpQJpSJTIYx9KKdjZLU1I5bQKGR5Ldpgji6go/BX
** m3iNkd4L+GAaotU7CMDuPqkZ5BoDjovg43Cxh+V8eftGKbseyfbK0DaM1OFWgzey
** 2T7/hHvx/PeB5ZTS1U6fOMv5ZTfaVzveGwgn28FAoXCtuPHDeRyHeThP5HDqh+k9
** E7P19efi/od9+3jsl/4dEzNm0O04EkP9dbgsv/qRnPr5RJb++2gd4zf8ksj2KwwA
** AA==
** -----END EMBEDDED OBJECT-----
**
** As you can see, it consists of four classes of memory chunks:
** 1. a top-level structure which acts as the primary handle
** 2. the virtual hash table consisting of a single (growing)
** directory and one or more (fixed size) directory segments
** 3. the collision chains consisting of element structures
** 4. the actual elements consisting of key+value structures
*/
#include "act_p.h"
/* fixed size (number of pointers) of the directory and of each segment */
#define INITDIRSIZE 256 /* can be an arbitrary value */
#define SEGMENTSIZE 512 /* has to be a power of 2 for below arithmetic */
/* the borders for the hash table load */
#define MINLOADFCTR 1 /* should be between 0 and 1 */
#define MAXLOADFCTR 2 /* should be between 2 and 4 */
/* the per-element structure (keep as small as possible!) */
typedef struct element_st element_t;
struct element_st {
element_t *e_next; /* pointer to next element in collision chain */
unsigned long e_hash; /* cached hash value of element (rehashing optimization) */
void *e_keyptr; /* pointer to key (= also pointer to key+data memory chunk) */
void *e_datptr; /* pointer to data in key+data memory chunk */
void *e_endptr; /* pointer to end of key+data memory chunk */
};
/* the hash table segments */
typedef struct segment_st segment_t;
struct segment_st {
element_t *s_element[SEGMENTSIZE]; /* array of pointers to elements */
};
/* the top-level hash table structure */
typedef struct act_hash_lh_st act_hash_lh_t;
struct act_hash_lh_st {
act_ctx_t *h_ctx; /* context structure */
act_grid_t *h_grid_seg; /* memory grid for segments */
act_grid_t *h_grid_el; /* memory grid for elements */
act_hash_fct_t h_func; /* hash function (copied from context) */
unsigned int h_p; /* pointer to start of unsplit region */
unsigned int h_pmax; /* pointer to end of unsplit region */
int h_slack; /* grow/shrink indicator */
unsigned h_dirsize; /* current size of directory */
segment_t **h_dir; /* pointer to directory */
};
/* on-the-fly calculate index into directory and segment from virtual array index */
#define DIRINDEX(addr) (int)((addr) / SEGMENTSIZE)
#define SEGINDEX(addr) (int)((addr) % SEGMENTSIZE)
/* on-the-fly calculate lengths of key and data to reduce memory in element_t */
#define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr))
#define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr))
/* create the hash table structure */
static act_hash_lh_t *
act_hash_lh_new(
act_ctx_t *ctx)
{
act_hash_lh_t *h;
/* allocate hash structure */
if ((h = (act_hash_lh_t *)act_mem_alloc_ctx(ctx, sizeof(act_hash_lh_t))) == NULL)
return NULL;
h->h_ctx = ctx;
/* create the internal memory grid for segments */
if (act_grid_create(&h->h_grid_seg, ctx, sizeof(segment_t), 1) != ACT_OK) {
errno_safe(act_mem_free_ctx(ctx, h));
return NULL;
}
/* create the internal memory grid for elements */
if (act_grid_create(&h->h_grid_el, ctx, sizeof(element_t), SEGMENTSIZE) != ACT_OK) {
errno_safe(act_grid_destroy(h->h_grid_seg));
errno_safe(act_mem_free_ctx(ctx, h));
return NULL;
}
/* allocate and clear hash table directory */
h->h_dirsize = INITDIRSIZE;
if ((h->h_dir = (segment_t **)act_mem_alloc_ctx(
ctx, h->h_dirsize * sizeof(segment_t *))) == NULL) {
errno_safe(act_grid_destroy(h->h_grid_seg));
errno_safe(act_grid_destroy(h->h_grid_el));
errno_safe(act_mem_free_ctx(ctx, h));
return NULL;
}
act_mem_set(h->h_dir, 0, h->h_dirsize * sizeof(segment_t *));
/* allocate and clear first segment of hash table array */
if (act_grid_alloc(h->h_grid_seg, (void **)&(h->h_dir[0])) != ACT_OK) {
errno_safe(act_grid_destroy(h->h_grid_seg));
errno_safe(act_grid_destroy(h->h_grid_el));
errno_safe(act_mem_free_ctx(ctx, h->h_dir));
errno_safe(act_mem_free_ctx(ctx, h));
return NULL;
}
act_mem_set(h->h_dir[0], 0, sizeof(segment_t));
/* initialize hash table control attributes */
h->h_p = 0;
h->h_pmax = SEGMENTSIZE;
h->h_slack = SEGMENTSIZE * MAXLOADFCTR;
/* inline values from context (for performance reasons only) */
if (!act_ctx_get(ctx, ACT_HASH_FUNC, &h->h_func))
h->h_func = act_hash_fct(djbx33a);
return h;
}
/* expand the hash table */
static void
act_hash_lh_expand(
act_ctx_t *ctx,
act_hash_lh_t *h)
{
unsigned int pmax0;
unsigned int newaddr;
segment_t *seg;
element_t **pelold;
element_t *el, *headofold, *headofnew, *next;
unsigned int n;
/* calculate next new address */
pmax0 = h->h_pmax;
newaddr = pmax0 + h->h_p;
/* have to enlarge directory? */
if (h->h_dirsize <= DIRINDEX(newaddr)) {
n = h->h_dirsize * sizeof(segment_t *);
h->h_dirsize *= 2;
if ((h->h_dir = (segment_t **)act_mem_realloc_ctx(ctx,
h->h_dir, h->h_dirsize*sizeof(segment_t *))) == NULL)
return;
act_mem_set((char *)h->h_dir+n, 0, n);
}
/* have to create a new table segment? */
if (SEGINDEX(newaddr) == 0) {
if (act_grid_alloc(h->h_grid_seg, (void **)&seg) != ACT_OK)
return;
act_mem_set(seg, 0, sizeof(segment_t));
h->h_dir[DIRINDEX(newaddr)] = seg;
}
/* locate P-element */
pelold = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)];
/* adjust the state variables */
if (++(h->h_p) >= h->h_pmax) {
h->h_pmax = (h->h_pmax << 1); /* == h->h_pmax * 2 */
h->h_p = 0;
}
h->h_slack += MAXLOADFCTR;
/* relocate and split between P-element new element */
headofold = NULL;
headofnew = NULL;
for (el = *pelold; el != NULL; el = next) {
next = el->e_next;
if (el->e_hash & pmax0) {
el->e_next = headofnew;
headofnew = el;
} else {
el->e_next = headofold;
headofold = el;
}
}
*pelold = headofold;
h->h_dir[DIRINDEX(newaddr)]->s_element[SEGINDEX(newaddr)] = headofnew;
return;
}
/* shrink hash table */
static void
act_hash_lh_shrink(
act_ctx_t *ctx,
act_hash_lh_t *h)
{
segment_t *lastseg;
element_t **pel;
unsigned int oldlast;
unsigned int dirsize;
void *dir;
/* calculate old last element */
oldlast = h->h_p + h->h_pmax - 1;
/* we cannot shrink below one segment */
if (oldlast == 0)
return;
/* adjust the state variables */
if (h->h_p == 0) {
h->h_pmax = (h->h_pmax >> 1); /* == h->h_pmax / 2 */;
h->h_p = h->h_pmax - 1;
} else
h->h_p--;
h->h_slack -= MAXLOADFCTR;
/* relocate the lost old last element to the end of the P-element */
pel = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)];
while (*pel != NULL)
pel = &((*pel)->e_next);
lastseg = h->h_dir[DIRINDEX(oldlast)];
*pel = lastseg->s_element[SEGINDEX(oldlast)];
lastseg->s_element[SEGINDEX(oldlast)] = NULL;
/* if possible, deallocate the last segment */
if (SEGINDEX(oldlast) == 0) {
h->h_dir[DIRINDEX(oldlast)] = NULL;
act_grid_free(h->h_grid_seg, lastseg);
}
/* if possible, deallocate the end of the directory */
if (h->h_dirsize > INITDIRSIZE && DIRINDEX(oldlast) < h->h_dirsize/2) {
dirsize = (h->h_dirsize >> 1); /* == h->h_dirsize / 2 */
if ((dir = (segment_t **)act_mem_realloc_ctx(ctx,
h->h_dir, dirsize*sizeof(segment_t *))) != NULL) {
h->h_dirsize = dirsize;
h->h_dir = dir;
}
}
return;
}
/* insert element into hash table */
static int
act_hash_lh_insert(
act_ctx_t *ctx,
act_hash_lh_t *h,
void *keyptr, int keylen,
void *datptr, int datlen,
int override)
{
unsigned int hash, addr;
element_t *el, **pel;
int bFound;
void *vp;
/* argument consistency check */
if (h == NULL || keyptr == NULL || keylen <= 0)
return_errno(FALSE, EINVAL);
/* calculate hash address */
hash = h->h_func(keyptr, keylen);
addr = hash % h->h_pmax; /* unsplit region */
if (addr < h->h_p)
addr = hash % (h->h_pmax << 1); /* split region */
/* locate hash element's collision list */
pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];
/* check whether element is already in the hash table */
bFound = FALSE;
for (el = *pel; el != NULL; el = el->e_next) {
if ( el->e_hash == hash
&& el_keylen(el) == keylen
&& act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
bFound = TRUE;
break;
}
}
/* only override on request */
if (bFound && !override)
return FALSE;
/* create a duplicate of key and data */
if ((vp = act_mem_alloc_ctx(ctx, keylen+datlen)) == NULL)
return FALSE;
act_mem_move(vp, keyptr, keylen);
act_mem_move((char *)vp+keylen, datptr, datlen);
keyptr = vp;
datptr = (char *)vp+keylen;
if (bFound) {
/* reuse existing element by freeing old contents */
if (el->e_keyptr != NULL)
act_mem_free_ctx(ctx, el->e_keyptr);
}
else {
/* allocate new element and chain into list */
if (act_grid_alloc(h->h_grid_el, (void **)&el) != ACT_OK) {
act_mem_free_ctx(ctx, vp);
return FALSE;
}
while (*pel != NULL)
pel = &((*pel)->e_next);
el->e_next = *pel;
*pel = el;
}
/* insert contents into element structure */
el->e_keyptr = keyptr;
el->e_datptr = datptr;
el->e_endptr = (char *)datptr+datlen;
el->e_hash = hash;
/* do we need to expand the table now? */
if (--(h->h_slack) < 0)
act_hash_lh_expand(ctx, h);
return TRUE;
}
/* lookup an element in hash table */
static int
act_hash_lh_lookup(
act_ctx_t *ctx,
act_hash_lh_t *h,
void *keyptr, int keylen,
void **datptr, int *datlen)
{
unsigned int hash, addr;
element_t *el, **pel;
/* argument consistency check */
if (h == NULL || keyptr == NULL || keylen <= 0)
return_errno(FALSE, EINVAL);
/* calculate hash address */
hash = h->h_func(keyptr, keylen);
addr = hash % h->h_pmax; /* unsplit region */
if (addr < h->h_p)
addr = hash % (h->h_pmax << 1); /* split region */
/* locate hash element collision list */
pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];
/* search for element in collision list */
for (el = *pel; el != NULL; el = el->e_next) {
if ( el->e_hash == hash
&& el_keylen(el) == keylen
&& act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
/* provide results */
if (datptr != NULL)
*datptr = el->e_datptr;
if (datlen != NULL)
*datlen = el_datlen(el);
return TRUE;
}
}
return FALSE;
}
/* delete an element in hash table */
static int
act_hash_lh_delete(
act_ctx_t *ctx,
act_hash_lh_t *h,
void *keyptr, int keylen)
{
unsigned int hash, addr;
element_t *el, *lel, **pel;
int rv;
/* argument consistency check */
if (h == NULL || keyptr == NULL || keylen <= 0)
return_errno(FALSE, EINVAL);
/* calculate hash address */
hash = h->h_func(keyptr, keylen);
addr = hash % h->h_pmax; /* unsplit region */
if (addr < h->h_p)
addr = hash % (h->h_pmax << 1); /* split region */
/* locate hash element collision chain */
pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];
/* search for element in collision chain */
rv = FALSE;
for (lel = NULL, el = *pel; el != NULL; lel = el, el = el->e_next) {
if ( el->e_hash == hash
&& el_keylen(el) == keylen
&& act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
/* free key+data memory chunk */
if (el->e_keyptr != NULL)
act_mem_free_ctx(ctx, el->e_keyptr);
/* remove element from collision chain */
if (lel == NULL)
*pel = el->e_next;
else
lel->e_next = el->e_next;
/* deallocate element structure */
act_grid_free(h->h_grid_el, el);
rv = TRUE;
break;
}
}
/* do we need to shrink the table now? */
if (++(h->h_slack) > ((h->h_pmax + h->h_p) * (MAXLOADFCTR-MINLOADFCTR)))
act_hash_lh_shrink(ctx, h);
return rv;
}
/* calculate total size of hash table */
static int
act_hash_lh_size(
act_ctx_t *ctx,
act_hash_lh_t *h,
long *pbytes, long *pelements)
{
long bytes, elements;
int i, j;
element_t *el;
/* argument consistency check */
if (h == NULL)
return_errno(FALSE, EINVAL);
/* start with 0 bytes and elements */
bytes = 0;
elements = 0;
/* add bytes for top-level structure and directory */
bytes += sizeof(act_hash_lh_t);
bytes += h->h_dirsize * sizeof(segment_t *);
/* add size for segments */
for (i = 0; i < h->h_dirsize; i++) {
if (h->h_dir[i] == NULL)
continue;
bytes += sizeof(segment_t);
/* add size of elements */
for (j = 0; j < SEGMENTSIZE; j++) {
if (h->h_dir[i]->s_element[j] == NULL)
continue;
el = h->h_dir[i]->s_element[j];
for (; el != NULL; el = el->e_next) {
elements++;
/* add size of key+data */
bytes += sizeof(element_t);
bytes += el_keylen(el);
bytes += el_datlen(el);
}
}
}
/* provide results */
if (pbytes != NULL)
*pbytes = bytes;
if (pelements != NULL)
*pelements = elements;
return TRUE;
}
/* calculate total size of hash table */
static int
act_hash_lh_status(
act_ctx_t *ctx,
act_hash_lh_t *h,
char *pstatus,
int nstatus)
{
int bytes, elements;
int dirsize, segsize;
int segments;
int chains;
int chainlen_min, chainlen_max;
int chainlen_all, chainlen_avg;
double used;
int i, j, k;
element_t *el;
/* argument consistency check */
if (h == NULL)
return_errno(FALSE, EINVAL);
/* initial values */
bytes = 0;
elements = 0;
dirsize = h->h_dirsize;
segsize = SEGMENTSIZE;
segments = 0;
chains = 0;
chainlen_min = 0;
chainlen_max = 0;
chainlen_all = 0;
chainlen_avg = 0;
used = 0;
/* add bytes for top-level structure and directory */
bytes += sizeof(act_hash_lh_t);
bytes += h->h_dirsize * sizeof(segment_t *);
/* add size for segments */
for (i = 0; i < h->h_dirsize; i++) {
if (h->h_dir[i] == NULL)
continue;
segments++;
bytes += sizeof(segment_t);
/* add size of elements */
for (j = 0; j < SEGMENTSIZE; j++) {
if (h->h_dir[i]->s_element[j] == NULL)
continue;
chains++;
el = h->h_dir[i]->s_element[j];
k = 0;
for (; el != NULL; el = el->e_next) {
elements++;
k++;
/* add size of key+data */
bytes += sizeof(element_t);
bytes += el_keylen(el);
bytes += el_datlen(el);
}
chainlen_all += k;
if (chainlen_min == 0 || chainlen_min > k)
chainlen_min = k;
if (chainlen_max == 0 || chainlen_max < k)
chainlen_max = k;
}
}
/* calculate total table usage */
used = ((double)chains / (double)(segments*SEGMENTSIZE)) * 100;
if (chains == 0)
chainlen_avg = 0;
else
chainlen_avg = chainlen_all / chains;
/* provide results */
act_snprintf(pstatus, nstatus,
"size=%d@%dB dir=1*%dB seg=%d*%dB chains=%d@%d/%d/%d usage=%0.1f",
elements, bytes, dirsize, segments, segsize,
chains, chainlen_min, chainlen_avg, chainlen_max, used);
return TRUE;
}
/* destroy the whole hash table */
static int
act_hash_lh_free(
act_ctx_t *ctx,
act_hash_lh_t *h)
{
element_t *el, **pel, *el_next;
unsigned int i, j;
/* argument consistency check */
if (h == NULL)
return_errno(FALSE, EINVAL);
/* deallocate all segment's entries */
for (i = 0; i < h->h_dirsize; i++) {
if (h->h_dir[i] == NULL)
continue;
/* deallocate all collision chains */
for (j = 0; j < SEGMENTSIZE; j++) {
if (h->h_dir[i]->s_element[j] == NULL)
continue;
/* deallocate all elements in collision chain */
pel = &h->h_dir[i]->s_element[j];
for (el = *pel; el != NULL; ) {
/* deallocate key+data memory chunk */
if (el->e_keyptr != NULL)
act_mem_free_ctx(ctx, el->e_keyptr);
el_next = el->e_next;
act_grid_free(h->h_grid_el, el);
el = el_next;
}
}
act_grid_free(h->h_grid_seg, h->h_dir[i]);
}
/* deallocate hash table directory */
act_mem_free_ctx(ctx, h->h_dir);
/* deallocate hash table top-level structure */
act_mem_free_ctx(ctx, h);
return TRUE;
}
/* the ACT dispatch structure for the hashing API */
intern act_hash_method_t act_hash_lh = {
ACT_HASH_METHOD_TAG,
(act_hash_new_t) act_hash_lh_new,
(act_hash_insert_t) act_hash_lh_insert,
(act_hash_lookup_t) act_hash_lh_lookup,
(act_hash_delete_t) act_hash_lh_delete,
(act_hash_size_t) act_hash_lh_size,
(act_hash_status_t) act_hash_lh_status,
(act_hash_free_t) act_hash_lh_free
};