OSSP: CVS Repository: ossp-pkg/lmtp2nntp/lmtp2nntp

ossp-pkg/lmtp2nntp/lmtp2nntp_lh.c

/* 
**  Act - Abstract Container Type Library
**  Copyright (c) 1999-2002 Ralf S. Engelschall <rse@engelschall.com>
**
**  This file is part of Act, a library for dealing with Abstract 
**  Container Types which can be found at http://www.ossp.org/pkg/act/.
**
**  Permission to use, copy, modify, and distribute this software for
**  any purpose with or without fee is hereby granted, provided that
**  the above copyright notice and this permission notice appear in all
**  copies.
**
**  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
**  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
**  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
**  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
**  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
**  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
**  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
**  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
**  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
**  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
**  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
**  SUCH DAMAGE.
*/

/*
**  This module implements a Dynamic Hash Table, based on WITOLD LITWIN
**  and PER-AKE LARSON's ``Linear Hashing'' algorithm (1980/1988),
**  implemented on top of a two-level virtual array with separate
**  collision chains as the backend data structure. Some ideas were
**  taken over from MIKAEL PETTERSON's Linear Hashing enhancements
**  (1993).
**   
**  Linear Hashing can be summarized as following:
**
**  o drawback of classical hashing is that the hash table is of
**    fixed size; if table is filled, the table has to be expanded;
**    unfortunately this requires re-hashing of all existing elements.
**
**  o Extendible Hashing solves this problem by allowing the table
**    to grow without complete re-hashing of elements every time;
**    unfortunately the table must double in size to grow - not a
**    problem for small tables, but a disadvantage for large ones.
**
**  o Linear Hashing allows the table to grow one bucket at a time:
**    table is extended when its load factor passes a critical value; buckets
**    are split "in sequence" starting at index 0 and ending at 2^n; two
**    hashing functions are always "active" - one for split buckets and one
**    for unsplit; when last bucket (the 2^n-th one) is split, the table has
**    been doubled in size, and a new cycle of splitting begins starting at
**    bucket 0.
**
**  The internal structure of the linear hashing table is illustrated
**  in the following figure:
**
**  -----BEGIN EMBEDDED OBJECT-----
**  Content-type: application/fig
**  Description: linear hashing structure layout
**  Version: eo/1.0
**  H4sIACfenzkCA62WTYvbMBCGz+tfIeixJOhb9rnQsrC3XntxE7ExdZ0ldhf233dG
**  sr6aNllRE2K90USPpNGMRx8+P34hYs+bp346zof+xTaf7LTYS/M4HU52bp7sAr8I
**  aRile0qbr8P0PNpmxxvGKSW84QQ/jOwYMURRQlFx+CtK9wMb1TwQBlYB3R1NLX6x
**  P7TQX0OkK5IbmguNNGdLagXDT/jyzoM5PCLYRC6HzRIJ/+BKZYIJVGhLCoyVXNhm
**  4EXFJV15SaG1Eu1Z5UPIsNSk0FDhZt6Gxaq4Aa86JDprUn8su725bIFdTOM68fgz
**  1foTLZRWNWgd0UYVqo0xklQVWvKAlkIVCoHemlQduo3oThXKAVtaKI9mKxqOLJwj
**  zc8RbbJ5QMGgn2i0EshfTGjIIskV8Y4xmfDRmUXizWnMvVm06/KZHloMSPcakVvs
**  gwvYh49vE9PJxCCgmzlMUHSPgdmEKJVWZTz970Q64rtSubDgm00kecBLVSo30VWs
**  7djNmfi7fIeK822oyVF6O2rmFb4htY3UNlHD60HQe+9LlzVGpdasRTC01yWJ/RsY
**  0m896SBczXS2pGR90XA56EpFplp1/WC0zgnU+c2XikyZUCoyVedfHdFGFSpeLDJV
**  hXbp6oCuVGTKhFKRqTp0G9GdKpQJpSJTIYx9KKdjZLU1I5bQKGR5Ldpgji6go/BX
**  m3iNkd4L+GAaotU7CMDuPqkZ5BoDjovg43Cxh+V8eftGKbseyfbK0DaM1OFWgzey
**  2T7/hHvx/PeB5ZTS1U6fOMv5ZTfaVzveGwgn28FAoXCtuPHDeRyHeThP5HDqh+k9
**  E7P19efi/od9+3jsl/4dEzNm0O04EkP9dbgsv/qRnPr5RJb++2gd4zf8ksj2KwwA
**  AA==
**  -----END EMBEDDED OBJECT-----
**
**  As you can see, it consists of four classes of memory chunks:
**  1. a top-level structure which acts as the primary handle
**  2. the virtual hash table consisting of a single (growing) 
**     directory and one or more (fixed size) directory segments
**  3. the collision chains consisting of element structures
**  4. the actual elements consisting of key+value structures
*/

#include <stdlib.h>
#include <string.h>

#include "lmtp2nntp_lh.h"

/* fixed size (number of pointers) of the directory and of each segment */
#define INITDIRSIZE  256 /* can be an arbitrary value */
#define SEGMENTSIZE  512 /* has to be a power of 2 for below arithmetic */

/* the borders for the hash table load */
#define MINLOADFCTR  1   /* should be between 0 and 1 */
#define MAXLOADFCTR  2   /* should be between 2 and 4 */

/* the per-element structure (keep as small as possible!) */
typedef struct element_st element_t;
struct element_st {
    element_t    *e_next;    /* pointer to next element in collision chain */
    unsigned long e_hash;    /* cached hash value of element (rehashing optimization) */
    void         *e_keyptr;  /* pointer to key (= also pointer to key+data memory chunk) */
    void         *e_datptr;  /* pointer to data in key+data memory chunk */
    void         *e_endptr;  /* pointer to end of key+data memory chunk */
};

/* the hash table segments */
typedef struct segment_st segment_t;
struct segment_st {
    element_t *s_element[SEGMENTSIZE]; /* array of pointers to elements */
}; 

/* the top-level hash table structure */
struct lh_st {
    unsigned int   h_p;         /* pointer to start of unsplit region */
    unsigned int   h_pmax;      /* pointer to end of unsplit region */
    int            h_slack;     /* grow/shrink indicator */
    unsigned       h_dirsize;   /* current size of directory */
    segment_t    **h_dir;       /* pointer to directory */
};

/* on-the-fly calculate index into directory and segment from virtual array index */
#define DIRINDEX(addr) (int)((addr) / SEGMENTSIZE)
#define SEGINDEX(addr) (int)((addr) % SEGMENTSIZE)

/* on-the-fly calculate lengths of key and data to reduce memory in element_t */
#define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr))
#define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr))

/*
 * BJDDJ Hash Function (Bob Jenkins, Dr. Dobbs Journal):
 * This is a very complex but also very good hash function, as proposed
 * in the March'97 issue of Dr. Dobbs Journal (DDJ) by Bob Jenkins (see
 * http://burtleburtle.net/bob/hash/doobs.html for online version). He
 * showed that this hash function has both very good distribution and
 * performance and our own hash function comparison confirmed this. The
 * only difference to the original function of B.J. here is that our
 * version doesn't provide the `level' (= previous hash) argument for
 * consistency reasons with the other hash functions (i.e. same function
 * signature). It can be definetely recommended as a good general
 * purpuse hash function.
 */
static long 
lh_hash(
    register const unsigned char *k,
    register size_t length)
{
    register long a,b,c,len;

    /* some abbreviations */
#define ub4 long
#define mix(a,b,c) { \
        a -= b; a -= c; a ^= (c>>13); \
        b -= c; b -= a; b ^= (a<< 8); \
        c -= a; c -= b; c ^= (b>>13); \
        a -= b; a -= c; a ^= (c>>12); \
        b -= c; b -= a; b ^= (a<<16); \
        c -= a; c -= b; c ^= (b>> 5); \
        a -= b; a -= c; a ^= (c>> 3); \
        b -= c; b -= a; b ^= (a<<10); \
        c -= a; c -= b; c ^= (b>>15); \
    }

    /* setup the internal state */
    len = length;
    a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
    c = 0;

    /* handle most of the key */
    while (len >= 12) {
        a += (k[0] +((ub4)k[1]<<8) +((ub4)k[ 2]<<16) +((ub4)k[ 3]<<24));
        b += (k[4] +((ub4)k[5]<<8) +((ub4)k[ 6]<<16) +((ub4)k[ 7]<<24));
        c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16) +((ub4)k[11]<<24));
        mix(a,b,c);
        k += 12; len -= 12;
    }

    /* handle the last 11 bytes */
    c += length;
    switch(len) { 
        /* all the case statements fall through */
        case 11: c+=((ub4)k[10]<<24);
        case 10: c+=((ub4)k[ 9]<<16);
        case 9 : c+=((ub4)k[ 8]<< 8);
        /* the first byte of c is reserved for the length */
        case 8 : b+=((ub4)k[ 7]<<24);
        case 7 : b+=((ub4)k[ 6]<<16);
        case 6 : b+=((ub4)k[ 5]<< 8);
        case 5 : b+=k[4];
        case 4 : a+=((ub4)k[ 3]<<24);
        case 3 : a+=((ub4)k[ 2]<<16);
        case 2 : a+=((ub4)k[ 1]<< 8);
        case 1 : a+=k[0];
        /* case 0: nothing left to add */
    }
    mix(a,b,c);

#undef ub4
#undef mix

    /* report the result */
    return c;
}

/* create the hash table structure */
lh_t *lh_create(void)
{
    lh_t *h;

    /* allocate hash structure */
    if ((h = (lh_t *)malloc(sizeof(lh_t))) == NULL)
        return NULL;

    /* allocate and clear hash table directory */
    h->h_dirsize = INITDIRSIZE;
    if ((h->h_dir = (segment_t **)malloc(h->h_dirsize * sizeof(segment_t *))) == NULL) {
        free(h);
        return NULL;
    }
    memset(h->h_dir, 0, h->h_dirsize * sizeof(segment_t *));

    /* allocate and clear first segment of hash table array */
    if ((h->h_dir[0] = (segment_t *)malloc(sizeof(segment_t))) == NULL) {
        free(h->h_dir); 
        free(h);
        return NULL;
    }
    memset(h->h_dir[0], 0, sizeof(segment_t));

    /* initialize hash table control attributes */
    h->h_p     = 0;
    h->h_pmax  = SEGMENTSIZE;
    h->h_slack = SEGMENTSIZE * MAXLOADFCTR;

    return h;
}

/* expand the hash table */
static void lh_expand(lh_t *h)
{
    unsigned int pmax0;
    unsigned int newaddr;
    segment_t *seg;
    element_t **pelold;
    element_t *el, *headofold, *headofnew, *next;
    unsigned int n;
    
    /* calculate next new address */
    pmax0   = h->h_pmax;
    newaddr = pmax0 + h->h_p;

    /* have to enlarge directory? */
    if (h->h_dirsize <= DIRINDEX(newaddr)) {
        n = h->h_dirsize * sizeof(segment_t *);
        h->h_dirsize *= 2;
        if ((h->h_dir = (segment_t **)realloc(
                         h->h_dir, h->h_dirsize*sizeof(segment_t *))) == NULL)
             return;
        memset((char *)h->h_dir+n, 0, n);
    }
    
    /* have to create a new table segment? */
    if (SEGINDEX(newaddr) == 0) {
        if ((seg = (segment_t *)malloc(sizeof(segment_t))) == NULL)
            return;
        memset(seg, 0, sizeof(segment_t));
        h->h_dir[DIRINDEX(newaddr)] = seg;
    }

    /* locate P-element */
    pelold = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)];

    /* adjust the state variables */
    if (++(h->h_p) >= h->h_pmax) {
        h->h_pmax = (h->h_pmax << 1); /* == h->h_pmax * 2 */
        h->h_p = 0;
    }
    h->h_slack += MAXLOADFCTR;
    
    /* relocate and split between P-element new element */
    headofold = NULL;
    headofnew = NULL;
    for (el = *pelold; el != NULL; el = next) {
        next = el->e_next;
        if (el->e_hash & pmax0) {
            el->e_next = headofnew;
            headofnew  = el;
        } else {
            el->e_next = headofold;
            headofold  = el;
        }
    }
    *pelold = headofold;
    h->h_dir[DIRINDEX(newaddr)]->s_element[SEGINDEX(newaddr)] = headofnew;

    return;
}

/* shrink hash table */
static void lh_shrink(lh_t *h)
{
    segment_t *lastseg;
    element_t **pel;
    unsigned int oldlast;
    unsigned int dirsize;
    void *dir;

    /* calculate old last element */
    oldlast = h->h_p + h->h_pmax - 1;

    /* we cannot shrink below one segment */
    if (oldlast == 0)
        return;

    /* adjust the state variables */
    if (h->h_p == 0) {
        h->h_pmax = (h->h_pmax >> 1); /* == h->h_pmax / 2 */;
        h->h_p = h->h_pmax - 1;
    } else
        h->h_p--;
    h->h_slack -= MAXLOADFCTR;

    /* relocate the lost old last element to the end of the P-element */
    pel = &h->h_dir[DIRINDEX(h->h_p)]->s_element[SEGINDEX(h->h_p)];
    while (*pel != NULL)
        pel = &((*pel)->e_next);
    lastseg = h->h_dir[DIRINDEX(oldlast)];
    *pel = lastseg->s_element[SEGINDEX(oldlast)];
    lastseg->s_element[SEGINDEX(oldlast)] = NULL;

    /* if possible, deallocate the last segment */
    if (SEGINDEX(oldlast) == 0) {
        h->h_dir[DIRINDEX(oldlast)] = NULL;
        free(lastseg);
    }

    /* if possible, deallocate the end of the directory */
    if (h->h_dirsize > INITDIRSIZE && DIRINDEX(oldlast) < h->h_dirsize/2) {
        dirsize = (h->h_dirsize >> 1); /* == h->h_dirsize / 2 */
        if ((dir = (segment_t **)realloc(
                   h->h_dir, dirsize*sizeof(segment_t *))) != NULL) {
            h->h_dirsize = dirsize;
            h->h_dir = dir;
        }
    }
    return;
}

/* insert element into hash table */
int lh_insert(lh_t *h, const void *keyptr, int keylen, const void *datptr, int datlen, int override)
{
    unsigned int hash, addr;
    element_t *el, **pel;
    int bFound;
    void *vp;

    /* argument consistency check */
    if (h == NULL || keyptr == NULL || keylen <= 0)
        return FALSE;

    /* calculate hash address */
    hash = lh_hash(keyptr, keylen);
    addr = hash % h->h_pmax; /* unsplit region */
    if (addr < h->h_p)
        addr = hash % (h->h_pmax << 1); /* split region */

    /* locate hash element's collision list */
    pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];

    /* check whether element is already in the hash table */
    bFound = FALSE;
    for (el = *pel; el != NULL; el = el->e_next) {
        if (   el->e_hash == hash 
            && el_keylen(el) == keylen
            && memcmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
            bFound = TRUE;
            break;
        }
    }

    /* only override on request */
    if (bFound && !override)
        return FALSE;

    /* create a duplicate of key and data */
    if ((vp = malloc(keylen+datlen)) == NULL)
        return FALSE;
    memmove(vp, keyptr, keylen);
    memmove((char *)vp+keylen, datptr, datlen);
    keyptr = vp;
    datptr = (char *)vp+keylen;

    if (bFound) {
        /* reuse existing element by freeing old contents */
        if (el->e_keyptr != NULL)
            free(el->e_keyptr);
    }
    else {
        /* allocate new element and chain into list */
        if ((el = (element_t *)malloc(sizeof(element_t))) == 0) {
            free(vp);
            return FALSE;
        }
        while (*pel != NULL)
            pel = &((*pel)->e_next);
        el->e_next = *pel;
        *pel = el;
    }

    /* insert contents into element structure */
    el->e_keyptr = (void *)keyptr;
    el->e_datptr = (void *)datptr;
    el->e_endptr = (char *)datptr+datlen;
    el->e_hash   = hash;

    /* do we need to expand the table now? */
    if (--(h->h_slack) < 0)
        lh_expand(h);

    return TRUE;
}

/* lookup an element in hash table */
int lh_lookup(lh_t *h, const void *keyptr, int keylen, void **datptr, int *datlen)
{
    unsigned int hash, addr;
    element_t *el, **pel;

    /* argument consistency check */
    if (h == NULL || keyptr == NULL || keylen <= 0)
        return FALSE;

    /* calculate hash address */
    hash = lh_hash(keyptr, keylen);
    addr = hash % h->h_pmax; /* unsplit region */
    if (addr < h->h_p)
        addr = hash % (h->h_pmax << 1); /* split region */

    /* locate hash element collision list */
    pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];

    /* search for element in collision list */
    for (el = *pel; el != NULL; el = el->e_next) {
        if (   el->e_hash == hash 
            && el_keylen(el) == keylen
            && memcmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
            /* provide results */
            if (datptr != NULL)
                *datptr = el->e_datptr;
            if (datlen != NULL)
                *datlen = el_datlen(el);
            return TRUE;
        }
    }
    return FALSE;
}

/* delete an element in hash table */
int lh_delete(lh_t *h, const void *keyptr, int keylen)
{
    unsigned int hash, addr;
    element_t *el, *lel, **pel;
    int rv;

    /* argument consistency check */
    if (h == NULL || keyptr == NULL || keylen <= 0)
        return FALSE;

    /* calculate hash address */
    hash = lh_hash(keyptr, keylen);
    addr = hash % h->h_pmax; /* unsplit region */
    if (addr < h->h_p)
        addr = hash % (h->h_pmax << 1); /* split region */

    /* locate hash element collision chain */
    pel = &h->h_dir[DIRINDEX(addr)]->s_element[SEGINDEX(addr)];

    /* search for element in collision chain */
    rv = FALSE;
    for (lel = NULL, el = *pel; el != NULL; lel = el, el = el->e_next) {
        if (   el->e_hash == hash 
            && el_keylen(el) == keylen
            && memcmp(el->e_keyptr, keyptr, el_keylen(el)) == 0) {
            /* free key+data memory chunk */
            if (el->e_keyptr != NULL)
                free(el->e_keyptr);
            /* remove element from collision chain */
            if (lel == NULL)
                *pel = el->e_next; 
            else
                lel->e_next = el->e_next;
            /* deallocate element structure */
            free(el);
            rv = TRUE;
            break;
        }
    }

    /* do we need to shrink the table now? */
    if (++(h->h_slack) > ((h->h_pmax + h->h_p) * (MAXLOADFCTR-MINLOADFCTR)))
        lh_shrink(h);

    return rv;
}

/* apply a callback for all elements in the hash table */
int lh_apply(lh_t *h, lh_cb_t cb, void *ctx)
{
    element_t *el, **pel;
    unsigned int i, j;

    /* argument consistency check */
    if (h == NULL || cb == NULL)
        return FALSE;

    /* interate over all segment's entries */
    for (i = 0; i < h->h_dirsize; i++) {
        if (h->h_dir[i] == NULL)
            continue;
        /* interate over all collision chains */
        for (j = 0; j < SEGMENTSIZE; j++) {
            if (h->h_dir[i]->s_element[j] == NULL)
                continue;
            /* interate over all elements in collision chain */
            el = h->h_dir[i]->s_element[j];
            for (; el != NULL; el = el->e_next) {
                if (!cb(ctx, el->e_keyptr, el_keylen(el), el->e_datptr, el_datlen(el)))
                    return;
            }
        }
    }
    return TRUE;
}

/* destroy the whole hash table */
int lh_destroy(lh_t *h)
{
    element_t *el, **pel;
    unsigned int i, j;

    /* argument consistency check */
    if (h == NULL)
        return FALSE;

    /* deallocate all segment's entries */
    for (i = 0; i < h->h_dirsize; i++) {
        if (h->h_dir[i] == NULL)
            continue;
        /* deallocate all collision chains */
        for (j = 0; j < SEGMENTSIZE; j++) {
            if (h->h_dir[i]->s_element[j] == NULL)
                continue;
            /* deallocate all elements in collision chain */
            pel = &h->h_dir[i]->s_element[j];
            for (el = *pel; el != NULL; el = el->e_next) {
                /* deallocate key+data memory chunk */
                if (el->e_keyptr != NULL)
                    free(el->e_keyptr);
            }
        }
        free(h->h_dir[i]);
    }

    /* deallocate hash table directory */
    free(h->h_dir);

    /* deallocate hash table top-level structure */
    free(h);

    return TRUE;
}
OSSP CVS Repository