OSSP CVS Repository

ossp - ossp-pkg/act/act_hash_lh.c 1.4
Not logged in
[Honeypot]  [Browse]  [Directory]  [Home]  [Login
[Reports]  [Search]  [Ticket]  [Timeline
  [Raw

ossp-pkg/act/act_hash_lh.c 1.4
/* 
**  Act - Abstract Container Type Library
**  Copyright (c) 1999-2000 Ralf S. Engelschall <rse@engelschall.com>
**
**  This file is part of Act, a library for dealing with Abstract 
**  Container Types which can be found at http://www.ossp.org/pkg/act/.
**
**  Permission to use, copy, modify, and distribute this software for
**  any purpose with or without fee is hereby granted, provided that
**  the above copyright notice and this permission notice appear in all
**  copies.
**
**  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
**  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
**  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
**  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
**  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
**  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
**  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
**  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
**  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
**  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
**  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
**  SUCH DAMAGE.
**
**  act_hash_lh.c: Linear Hashing (dynamic)
*/

/*
**  This module implements a Dynamic Hash Table, based on WITOLD LITWIN
**  and PER-AKE LARSON's ``Linear Hashing'' algorithm (1980/1988),
**  implemented on top of a two-level virtual array with separate
**  collision chains as the backend data structure. Some ideas were
**  taken over from MIKAEL PETTERSON's Linear Hashing enhancements
**  (1993).
*/

#include "act_p.h"

/* fixed size (number of pointers) of the directory and of each segment */
#define INITDIRSIZE  256 /* can be an arbitrary value */
#define SEGMENTSIZE  512 /* = 2^9, must be a power of 2 */

/* calculate index in directory and segments from virtual array index */
#define DIRINDEX(addr)  ((addr) >> 9)      /* == ((addr) / SEGMENTSIZE) */
#define SEGINDEX(addr)  ((addr) & (512-1)) /* == ((addr) % SEGMENTSIZE) */

/* load of hash table (maximum should be between 2 and 4) */
#define MINLOADFCTR  1
#define MAXLOADFCTR  2

/* the per-element structure (keep this as small as possible!) */
typedef struct element_st element_t;
struct element_st {
    element_t    *e_next;    /* pointer to next element in collision chain */
    unsigned long e_hash;    /* cached hash value of element (rehashing optimization) */
    void         *e_keyptr;  /* pointer to key (= also pointer to key+data memory chunk) */
    void         *e_datptr;  /* pointer to data in key+data memory chunk */
    void         *e_endptr;  /* pointer to end of key+data memory chunk */
};

/* on-the-fly calculate lengths of key and data to reduce memory in element_t */
#define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr))
#define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr))

/* the hash table segments */
typedef struct segment_st segment_t;
struct segment_st {
    element_t *s_element[SEGMENTSIZE]; /* array of pointers to elements */
}; 

/* the top-level hash table structure */
typedef struct act_hash_lh_st act_hash_lh_t;
struct act_hash_lh_st {
    act_ctx_t     *h_ctx;       /* context structure */
    act_hash_fct_t h_func;      /* hash function (copied from context) */
    unsigned int   h_p;         /* pointer to start of unsplit region */
    unsigned int   h_pmax;      /* pointer to end of unsplit region */
    int            h_slack;     /* grow/shrink indicator */
    unsigned       h_dirsize;   /* current size of directory */
    segment_t    **h_dir;       /* pointer to directory */
};

/* create the hash table structure */
static act_hash_lh_t *act_hash_lh_new(act_ctx_t *ctx)
{
    act_hash_lh_t *h;

    /* allocate hash structure */
    if ((h = (act_hash_lh_t *)act_mem_alloc_ctx(ctx, sizeof(act_hash_lh_t))) == NULL)
        return NULL;
    h->h_ctx = ctx;

    /* allocate and clear hash table directory */
    h->h_dirsize = INITDIRSIZE;
    if ((h->h_dir = (segment_t **)act_mem_alloc_ctx(ctx, h->h_dirsize * sizeof(segment_t *))) == NULL) {
        errno_safe( act_mem_free_ctx(ctx, h) );
        return NULL;
    }
    act_mem_set(h->h_dir, 0, h->h_dirsize * sizeof(segment_t *));

    /* allocate and clear first segment of hash table array */
    if ((h->h_dir[0] = (segment_t *)act_mem_alloc_ctx(ctx, sizeof(segment_t))) == NULL) {
        errno_safe( act_mem_free_ctx(ctx, h->h_dir); act_mem_free_ctx(ctx, h) );
        return NULL;
    }
    act_mem_set(h->h_dir[0], 0, sizeof(segment_t));

    /* initialize hash table control attributes */
    h->h_p     = 0;
    h->h_pmax  = SEGMENTSIZE;
    h->h_slack = SEGMENTSIZE * MAXLOADFCTR;

    /* inline values from context (for performance reasons only) */
    if (!act_ctx_get(ctx, ACT_HASH_FUNC, &h->h_func))
        h->h_func = act_hash_fct(djbx33a);

    return h;
}

/* expand the hash table */
static void act_hash_lh_expand(act_ctx_t *ctx, act_hash_lh_t *h)
{
    unsigned int pmax0;
    unsigned int newaddr;
    segment_t *seg;
    element_t **pelold;
    element_t *el, *headofold, *headofnew, *next;
    unsigned int n;
    
    /* calculate next new address */
    pmax0   = h->h_pmax;
    newaddr = pmax0 + h->h_p;

    /* have to enlarge directory? */
    if (h->h_dirsize <= DIRINDEX(newaddr)) {
        n = h->h_dirsize * sizeof(segment_t *);
        h->h_dirsize *= 2;
        if ((h->h_dir = (segment_t **)act_mem_realloc_ctx(ctx,
                         h->h_dir, h->h_dirsize*sizeof(segment_t *))) == NULL)
             return;
        act_mem_set((char *)h->h_dir+n, 0, n);
    }
    
    /* have to create a new table segment? */
    if (SEGINDEX(newaddr) == 0) {
        if ((seg = (segment_t *)act_mem_alloc_ctx(ctx, sizeof(segment_t))) == NULL)
            return;
        act_mem_set(seg, 0, sizeof(segment_t));
        h->h_dir[DIRINDEX(newaddr)] = seg;
    }

    /* locate P-element */
    pelold = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)];

    /* adjust the state variables */
    if (++(h->h_p) >= h->h_pmax) {
        h->h_pmax = (h->h_pmax << 1); /* == h->h_pmax * 2 */
        h->h_p = 0;
    }
    h->h_slack += MAXLOADFCTR;
    
    /* relocate and split between P-element new element */
    headofold = NULL;
    headofnew = NULL;
    for (el = *pelold; el != NULL; el = next) {
        next = el->e_next;
        if (el->e_hash & pmax0) {
            el->e_next = headofnew;
            headofnew  = el;
        } else {
            el->e_next = headofold;
            headofold  = el;
        }
    }
    *pelold = headofold;
    h->h_dir[DIRINDEX(newaddr)]->s_element[SEGINDEX(newaddr)] = headofnew;

    return;
}

/* shrink hash table */
static void act_hash_lh_shrink(act_ctx_t *ctx, act_hash_lh_t *h)
{
    segment_t *lastseg;
    element_t **pel;
    unsigned int oldlast;
    unsigned int dirsize;
    void *dir;

    /* calculate old last element */
    oldlast = h->h_p + h->h_pmax - 1;

    /* we cannot shrink below one segment */
    if (oldlast == 0)
        return;

    /* adjust the state variables */
    if (h->h_p == 0) {
        h->h_pmax = (h->h_pmax >> 1); /* == h->h_pmax / 2 */;
        h->h_p = h->h_pmax - 1;
    } else
        h->h_p--;
    h->h_slack -= MAXLOADFCTR;

    /* relocate the lost old last element to the end of the P-element */
    pel = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)];
    while (*pel != NULL)
        pel = &((*pel)->e_next);
    lastseg = h->h_dir[DIRINDEX(oldlast)];
    *pel = lastseg->s_element[SEGINDEX(oldlast)];
    lastseg->s_element[SEGINDEX(oldlast)] = NULL;

    /* if possible, deallocate the last segment */
    if (SEGINDEX(oldlast) == 0) {
        h->h_dir[DIRINDEX(oldlast)] = NULL;
        free(lastseg);
    }

    /* if possible, deallocate the end of the directory */
    if (h->h_dirsize > INITDIRSIZE && DIRINDEX(oldlast) < h->h_dirsize/2) {
        dirsize = (h->h_dirsize >> 1); /* == h->h_dirsize / 2 */
        if ((dir = (segment_t **)act_mem_realloc_ctx(ctx,
                   h->h_dir, dirsize*sizeof(segment_t *))) != NULL) {
            h->h_dirsize = dirsize;
            h->h_dir = dir;
        }
    }
    return;
}

/* insert element into hash table */
static int act_hash_lh_insert(act_ctx_t *ctx, act_hash_lh_t *h, void *keyptr, int keylen, 
                              void *datptr, int datlen, int override)
{
    unsigned int hash, addr;
    element_t *el, **pel;
    int bFound;
    void *vp;

    /* argument consistency check */
    if (h == NULL || keyptr == NULL || keylen <= 0)
        return_errno(FALSE, EINVAL);

    /* calculate hash address */
    hash = h->h_func(keyptr, keylen);
    addr = hash % h->h_pmax; /* unsplit region */
    if (addr < h->h_p)
        addr = hash % (h->h_pmax << 1); /* split region */

    /* locate hash element's collision list */
    pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];

    /* check whether element is already in the hash table */
    bFound = FALSE;
    for (el = *pel; el != NULL; el = el->e_next) {
        if (   el->e_hash == hash 
            && el_keylen(el) == keylen
            && act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
            bFound = TRUE;
            break;
        }
    }

    /* only override on request */
    if (bFound && !override)
        return FALSE;

    /* create a duplicate of key and data */
    if ((vp = act_mem_alloc_ctx(ctx, keylen+datlen)) == NULL)
        return FALSE;
    act_mem_move(vp, keyptr, keylen);
    act_mem_move((char *)vp+keylen, datptr, datlen);
    keyptr = vp;
    datptr = (char *)vp+keylen;

    if (bFound) {
        /* reuse existing element by freeing old contents */
        if (el->e_keyptr != NULL)
            act_mem_free_ctx(ctx, el->e_keyptr);
    }
    else {
        /* allocate new element and chain into list */
        if ((el = (element_t *)act_mem_alloc_ctx(ctx, sizeof(element_t))) == 0) {
            act_mem_free_ctx(ctx, vp);
            return FALSE;
        }
        while (*pel != NULL)
            pel = &((*pel)->e_next);
        el->e_next = *pel;
        *pel = el;
    }

    /* insert contents into element structure */
    el->e_keyptr = keyptr;
    el->e_datptr = datptr;
    el->e_endptr = (char *)datptr+datlen;
    el->e_hash   = hash;

    /* do we need to expand the table now? */
    if (--(h->h_slack) < 0)
        act_hash_lh_expand(ctx, h);

    return TRUE;
}

/* lookup an element in hash table */
static int act_hash_lh_lookup(act_ctx_t *ctx, act_hash_lh_t *h, 
                              void *keyptr, int keylen, void **datptr, int *datlen)
{
    unsigned int hash, addr;
    element_t *el, **pel;

    /* argument consistency check */
    if (h == NULL || keyptr == NULL || keylen <= 0)
        return_errno(FALSE, EINVAL);

    /* calculate hash address */
    hash = h->h_func(keyptr, keylen);
    addr = hash % h->h_pmax; /* unsplit region */
    if (addr < h->h_p)
        addr = hash % (h->h_pmax << 1); /* split region */

    /* locate hash element collision list */
    pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];

    /* search for element in collision list */
    for (el = *pel; el != NULL; el = el->e_next) {
        if (   el->e_hash == hash 
            && el_keylen(el) == keylen
            && act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
            /* provide results */
            if (datptr != NULL)
                *datptr = el->e_datptr;
            if (datlen != NULL)
                *datlen = el_datlen(el);
            return TRUE;
        }
    }
    return FALSE;
}

/* delete an element in hash table */
static int act_hash_lh_delete(act_ctx_t *ctx, act_hash_lh_t *h, void *keyptr, int keylen)
{
    unsigned int hash, addr;
    element_t *el, *lel, **pel;
    int rv;

    /* argument consistency check */
    if (h == NULL || keyptr == NULL || keylen <= 0)
        return_errno(FALSE, EINVAL);

    /* calculate hash address */
    hash = h->h_func(keyptr, keylen);
    addr = hash % h->h_pmax; /* unsplit region */
    if (addr < h->h_p)
        addr = hash % (h->h_pmax << 1); /* split region */

    /* locate hash element collision chain */
    pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];

    /* search for element in collision chain */
    rv = FALSE;
    for (lel = NULL, el = *pel; el != NULL; lel = el, el = el->e_next) {
        if (   el->e_hash == hash 
            && el_keylen(el) == keylen
            && act_mem_cmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
            /* free key+data memory chunk */
            if (el->e_keyptr != NULL)
                act_mem_free_ctx(ctx, el->e_keyptr);
            /* remove element from collision chain */
            if (lel == NULL)
                *pel = el->e_next; 
            else
                lel->e_next = el->e_next;
            /* deallocate element structure */
            act_mem_free_ctx(ctx, el);
            rv = TRUE;
            break;
        }
    }

    /* do we need to shrink the table now? */
    if (++(h->h_slack) > ((h->h_pmax + h->h_p) * (MAXLOADFCTR-MINLOADFCTR)))
        act_hash_lh_shrink(ctx, h);

    return rv;
}

/* calculate total size of hash table */
static int act_hash_lh_size(act_ctx_t *ctx, act_hash_lh_t *h, long *pbytes, long *pelements)
{
    long bytes, elements;
    int i, j;
    element_t *el;

    /* argument consistency check */
    if (h == NULL)
        return_errno(FALSE, EINVAL);

    /* start with 0 bytes and elements */
    bytes    = 0;
    elements = 0;

    /* add bytes for top-level structure and directory */
    bytes += sizeof(act_hash_lh_t);
    bytes += h->h_dirsize * sizeof(segment_t *);

    /* add size for segments */
    for (i = 0; i < h->h_dirsize; i++) {
        if (h->h_dir[i] == NULL)
            continue;
        bytes += sizeof(segment_t);
        /* add size of elements */
        for (j = 0; j < SEGMENTSIZE; j++) {
            if (h->h_dir[i]->s_element[j] == NULL)
                continue;
            el = h->h_dir[i]->s_element[j];
            for (; el != NULL; el = el->e_next) {
                elements++;
                /* add size of key+data */
                bytes += sizeof(element_t);
                bytes += el_keylen(el);
                bytes += el_datlen(el);
            }
        }
    }

    /* provide results */
    if (pbytes != NULL)
        *pbytes = bytes;
    if (pelements != NULL)
        *pelements = elements;

    return TRUE;
}

/* destroy the whole hash table */
static int act_hash_lh_free(act_ctx_t *ctx, act_hash_lh_t *h)
{
    element_t *el, **pel;
    unsigned int i, j;

    /* argument consistency check */
    if (h == NULL)
        return_errno(FALSE, EINVAL);

    /* deallocate all segment's entries */
    for (i = 0; i < h->h_dirsize; i++) {
        if (h->h_dir[i] == NULL)
            continue;
        /* deallocate all collision chains */
        for (j = 0; j < SEGMENTSIZE; j++) {
            if (h->h_dir[i]->s_element[j] == NULL)
                continue;
            /* deallocate all elements in collision chain */
            pel = &h->h_dir[i]->s_element[j];
            for (el = *pel; el != NULL; el = el->e_next) {
                /* deallocate key+data memory chunk */
                if (el->e_keyptr != NULL)
                    act_mem_free_ctx(ctx, el->e_keyptr);
            }
        }
        act_mem_free_ctx(ctx, h->h_dir[i]);
    }

    /* deallocate hash table directory */
    act_mem_free_ctx(ctx, h->h_dir);

    /* deallocate hash table top-level structure */
    act_mem_free_ctx(ctx, h);

    return TRUE;
}

/* the ACT dispatch structure for the hashing API */
intern act_hash_method_t act_hash_lh = {
    ACT_HASH_METHOD_TAG,
    (act_hash_new_t)     act_hash_lh_new,
    (act_hash_insert_t)  act_hash_lh_insert,
    (act_hash_lookup_t)  act_hash_lh_lookup,
    (act_hash_delete_t)  act_hash_lh_delete,
    (act_hash_size_t)    act_hash_lh_size,
    (act_hash_free_t)    act_hash_lh_free
};


CVSTrac 2.0.1