OSSP CVS Repository

ossp - Difference in ossp-pkg/act/act_hash_lh.c versions 1.6 and 1.7
Not logged in
[Honeypot]  [Browse]  [Home]  [Login]  [Reports
[Search]  [Ticket]  [Timeline
  [History

ossp-pkg/act/act_hash_lh.c 1.6 -> 1.7

--- act_hash_lh.c        2000/08/19 14:21:08     1.6
+++ act_hash_lh.c        2000/08/20 13:57:53     1.7
@@ -33,44 +33,69 @@
 **  collision chains as the backend data structure. Some ideas were
 **  taken over from MIKAEL PETTERSON's Linear Hashing enhancements
 **  (1993).
+**   
+**  Linear Hashing can be summarized as following:
 **
-**  The internal structure of the linear hashing table is illustrated in
-**  the following figure:
+**  o drawback of classical hashing is that the hash table is of
+**    fixed size; if table is filled, the table has to be expanded;
+**    unfortunately this requires re-hashing of all existing elements.
+**
+**  o Extendible Hashing solves this problem by allowing the table
+**    to grow without complete re-hashing of elements every time;
+**    unfortunately the table must double in size to grow - not a
+**    problem for small tables, but a disadvantage for large ones.
+**
+**  o Linear Hashing allows the table to grow one bucket at a time:
+**    table is extended when its load factor passes a critical value; buckets
+**    are split "in sequence" starting at index 0 and ending at 2^n; two
+**    hashing functions are always "active" - one for split buckets and one
+**    for unsplit; when last bucket (the 2^n-th one) is split, the table has
+**    been doubled in size, and a new cycle of splitting begins starting at
+**    bucket 0.
+**
+**  The internal structure of the linear hashing table is illustrated
+**  in the following figure:
 **
 **  -----BEGIN EMBEDDED OBJECT-----
 **  Content-type: application/fig
 **  Description: linear hashing structure layout
 **  Version: eo/1.0
-**  H4sIAKaXnjkCA5WRzwrCMAyHz+YpAp430nR/3FlQBrv5BGMWFWSI2/tjG9uuNy2l
-**  9Gt/5SMk+1N/Rl0yDON8XabxZeBo5tW8oZ+nu1lgMKu9IYIiKong8phvTwMFg2Ii
-**  ZGBkJFR2t1iTPQrL9qfD78WuGnZY2dTtjrYzZZ//L1TaG1WTgtQl2UZZXla+Gq5S
-**  ELlkG+V5D75MTZRA49rooghZVq2DLDRYwMkkipBnbYMsjEZAZG2dQJa14jDnMDoB
-**  J5MoQv681IHCnCJJeyXdyKVWrX+rtVOLsAtNFYpv8AGUMVDTPQMAAA==
+**  H4sIACfenzkCA62WTYvbMBCGz+tfIeixJOhb9rnQsrC3XntxE7ExdZ0ldhf233dG
+**  sr6aNllRE2K90USPpNGMRx8+P34hYs+bp346zof+xTaf7LTYS/M4HU52bp7sAr8I
+**  aRile0qbr8P0PNpmxxvGKSW84QQ/jOwYMURRQlFx+CtK9wMb1TwQBlYB3R1NLX6x
+**  P7TQX0OkK5IbmguNNGdLagXDT/jyzoM5PCLYRC6HzRIJ/+BKZYIJVGhLCoyVXNhm
+**  4EXFJV15SaG1Eu1Z5UPIsNSk0FDhZt6Gxaq4Aa86JDprUn8su725bIFdTOM68fgz
+**  1foTLZRWNWgd0UYVqo0xklQVWvKAlkIVCoHemlQduo3oThXKAVtaKI9mKxqOLJwj
+**  zc8RbbJ5QMGgn2i0EshfTGjIIskV8Y4xmfDRmUXizWnMvVm06/KZHloMSPcakVvs
+**  gwvYh49vE9PJxCCgmzlMUHSPgdmEKJVWZTz970Q64rtSubDgm00kecBLVSo30VWs
+**  7djNmfi7fIeK822oyVF6O2rmFb4htY3UNlHD60HQe+9LlzVGpdasRTC01yWJ/RsY
+**  0m896SBczXS2pGR90XA56EpFplp1/WC0zgnU+c2XikyZUCoyVedfHdFGFSpeLDJV
+**  hXbp6oCuVGTKhFKRqTp0G9GdKpQJpSJTIYx9KKdjZLU1I5bQKGR5Ldpgji6go/BX
+**  m3iNkd4L+GAaotU7CMDuPqkZ5BoDjovg43Cxh+V8eftGKbseyfbK0DaM1OFWgzey
+**  2T7/hHvx/PeB5ZTS1U6fOMv5ZTfaVzveGwgn28FAoXCtuPHDeRyHeThP5HDqh+k9
+**  E7P19efi/od9+3jsl/4dEzNm0O04EkP9dbgsv/qRnPr5RJb++2gd4zf8ksj2KwwA
+**  AA==
 **  -----END EMBEDDED OBJECT-----
 **
-**  As you can see, it consists of four levels of memory chunks:
-**  1. the top-level structure one sees in the API
-**  2. the hash table consisting of a single directory and one or
-**     more directory segments
-**  3. the collision chain consisting of element structures
-**  4. the actual elements consisting of key and value structures
+**  As you can see, it consists of four classes of memory chunks:
+**  1. a top-level structure which acts as the primary handle
+**  2. the virtual hash table consisting of a single (growing) 
+**     directory and one or more (fixed size) directory segments
+**  3. the collision chains consisting of element structures
+**  4. the actual elements consisting of key+value structures
 */
 
 #include "act_p.h"
 
 /* fixed size (number of pointers) of the directory and of each segment */
 #define INITDIRSIZE  256 /* can be an arbitrary value */
-#define SEGMENTSIZE  512 /* = 2^9, must be a power of 2 for below fast arithmetic */
-
-/* calculate index in directory and segment from virtual array index */
-#define DIRINDEX(addr)  ((addr) >> 9)      /* == ((addr) / SEGMENTSIZE) */
-#define SEGINDEX(addr)  ((addr) & (512-1)) /* == ((addr) % SEGMENTSIZE) */
+#define SEGMENTSIZE  512 /* has to be a power of 2 for below arithmetic */
 
-/* load of hash table (maximum should be between 2 and 4) */
-#define MINLOADFCTR  1
-#define MAXLOADFCTR  2
+/* the borders for the hash table load */
+#define MINLOADFCTR  1   /* should be between 0 and 1 */
+#define MAXLOADFCTR  2   /* should be between 2 and 4 */
 
-/* the per-element structure (keep this as small as possible!) */
+/* the per-element structure (keep as small as possible!) */
 typedef struct element_st element_t;
 struct element_st {
     element_t    *e_next;    /* pointer to next element in collision chain */
@@ -80,10 +105,6 @@
     void         *e_endptr;  /* pointer to end of key+data memory chunk */
 };
 
-/* on-the-fly calculate lengths of key and data to reduce memory in element_t */
-#define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr))
-#define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr))
-
 /* the hash table segments */
 typedef struct segment_st segment_t;
 struct segment_st {
@@ -102,6 +123,14 @@
     segment_t    **h_dir;       /* pointer to directory */
 };
 
+/* on-the-fly calculate index into directory and segment from virtual array index */
+#define DIRINDEX(addr) (int)((addr) / SEGMENTSIZE)
+#define SEGINDEX(addr) (int)((addr) % SEGMENTSIZE)
+
+/* on-the-fly calculate lengths of key and data to reduce memory in element_t */
+#define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr))
+#define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr))
+
 /* create the hash table structure */
 static act_hash_lh_t *
 act_hash_lh_new(
@@ -116,7 +145,8 @@
 
     /* allocate and clear hash table directory */
     h->h_dirsize = INITDIRSIZE;
-    if ((h->h_dir = (segment_t **)act_mem_alloc_ctx(ctx, h->h_dirsize * sizeof(segment_t *))) == NULL) {
+    if ((h->h_dir = (segment_t **)act_mem_alloc_ctx(
+                    ctx, h->h_dirsize * sizeof(segment_t *))) == NULL) {
         errno_safe(act_mem_free_ctx(ctx, h));
         return NULL;
     }
@@ -482,6 +512,90 @@
     return TRUE;
 }
 
+/* calculate total size of hash table */
+static int 
+act_hash_lh_status(
+    act_ctx_t *ctx, 
+    act_hash_lh_t *h, 
+    char *pstatus,
+    int nstatus)
+{
+    int bytes, elements;
+    int dirsize, segsize;
+    int segments;
+    int chains;
+    int chainlen_min, chainlen_max;
+    int chainlen_all, chainlen_avg;
+    double used;
+    int i, j, k;
+    element_t *el;
+
+    /* argument consistency check */
+    if (h == NULL)
+        return_errno(FALSE, EINVAL);
+
+    /* initial values */
+    bytes        = 0;
+    elements     = 0;
+    dirsize      = h->h_dirsize;
+    segsize      = SEGMENTSIZE;
+    segments     = 0;
+    chains       = 0;
+    chainlen_min = 0;
+    chainlen_max = 0;
+    chainlen_all = 0;
+    chainlen_avg = 0;
+    used         = 0;
+
+    /* add bytes for top-level structure and directory */
+    bytes += sizeof(act_hash_lh_t);
+    bytes += h->h_dirsize * sizeof(segment_t *);
+
+    /* add size for segments */
+    for (i = 0; i < h->h_dirsize; i++) {
+        if (h->h_dir[i] == NULL)
+            continue;
+        segments++;
+        bytes += sizeof(segment_t);
+        /* add size of elements */
+        for (j = 0; j < SEGMENTSIZE; j++) {
+            if (h->h_dir[i]->s_element[j] == NULL)
+                continue;
+            chains++;
+            el = h->h_dir[i]->s_element[j];
+            k = 0;
+            for (; el != NULL; el = el->e_next) {
+                elements++;
+                k++;
+                /* add size of key+data */
+                bytes += sizeof(element_t);
+                bytes += el_keylen(el);
+                bytes += el_datlen(el);
+            }
+            chainlen_all += k;
+            if (chainlen_min == 0 || chainlen_min > k)
+                chainlen_min = k;
+            if (chainlen_max == 0 || chainlen_max < k)
+                chainlen_max = k;
+        }
+    }
+
+    /* calculate total table usage */
+    used = ((double)chains / (double)(segments*SEGMENTSIZE)) * 100;
+    if (chains == 0)
+        chainlen_avg = 0;
+    else
+        chainlen_avg = chainlen_all / chains;
+
+    /* provide results */
+    act_snprintf(pstatus, nstatus, 
+                 "size=%d@%dB dir=1*%dB seg=%d*%dB chains=%d@%d/%d/%d usage=%0.1f",
+                 elements, bytes, dirsize, segments, segsize,
+                 chains, chainlen_min, chainlen_avg, chainlen_max, used);
+
+    return TRUE;
+}
+
 /* destroy the whole hash table */
 static int 
 act_hash_lh_free(
@@ -531,6 +645,7 @@
     (act_hash_lookup_t)  act_hash_lh_lookup,
     (act_hash_delete_t)  act_hash_lh_delete,
     (act_hash_size_t)    act_hash_lh_size,
+    (act_hash_status_t)  act_hash_lh_status,
     (act_hash_free_t)    act_hash_lh_free
 };
 

CVSTrac 2.0.1