--- act_hash_lh.c 2000/08/19 14:21:08 1.6
+++ act_hash_lh.c 2000/08/20 13:57:53 1.7
@@ -33,44 +33,69 @@
** collision chains as the backend data structure. Some ideas were
** taken over from MIKAEL PETTERSON's Linear Hashing enhancements
** (1993).
+**
+** Linear Hashing can be summarized as following:
**
-** The internal structure of the linear hashing table is illustrated in
-** the following figure:
+** o drawback of classical hashing is that the hash table is of
+** fixed size; if table is filled, the table has to be expanded;
+** unfortunately this requires re-hashing of all existing elements.
+**
+** o Extendible Hashing solves this problem by allowing the table
+** to grow without complete re-hashing of elements every time;
+** unfortunately the table must double in size to grow - not a
+** problem for small tables, but a disadvantage for large ones.
+**
+** o Linear Hashing allows the table to grow one bucket at a time:
+** table is extended when its load factor passes a critical value; buckets
+** are split "in sequence" starting at index 0 and ending at 2^n; two
+** hashing functions are always "active" - one for split buckets and one
+** for unsplit; when last bucket (the 2^n-th one) is split, the table has
+** been doubled in size, and a new cycle of splitting begins starting at
+** bucket 0.
+**
+** The internal structure of the linear hashing table is illustrated
+** in the following figure:
**
** -----BEGIN EMBEDDED OBJECT-----
** Content-type: application/fig
** Description: linear hashing structure layout
** Version: eo/1.0
-** H4sIAKaXnjkCA5WRzwrCMAyHz+YpAp430nR/3FlQBrv5BGMWFWSI2/tjG9uuNy2l
-** 9Gt/5SMk+1N/Rl0yDON8XabxZeBo5tW8oZ+nu1lgMKu9IYIiKong8phvTwMFg2Ii
-** ZGBkJFR2t1iTPQrL9qfD78WuGnZY2dTtjrYzZZ//L1TaG1WTgtQl2UZZXla+Gq5S
-** ELlkG+V5D75MTZRA49rooghZVq2DLDRYwMkkipBnbYMsjEZAZG2dQJa14jDnMDoB
-** J5MoQv681IHCnCJJeyXdyKVWrX+rtVOLsAtNFYpv8AGUMVDTPQMAAA==
+** H4sIACfenzkCA62WTYvbMBCGz+tfIeixJOhb9rnQsrC3XntxE7ExdZ0ldhf233dG
+** sr6aNllRE2K90USPpNGMRx8+P34hYs+bp346zof+xTaf7LTYS/M4HU52bp7sAr8I
+** aRile0qbr8P0PNpmxxvGKSW84QQ/jOwYMURRQlFx+CtK9wMb1TwQBlYB3R1NLX6x
+** P7TQX0OkK5IbmguNNGdLagXDT/jyzoM5PCLYRC6HzRIJ/+BKZYIJVGhLCoyVXNhm
+** 4EXFJV15SaG1Eu1Z5UPIsNSk0FDhZt6Gxaq4Aa86JDprUn8su725bIFdTOM68fgz
+** 1foTLZRWNWgd0UYVqo0xklQVWvKAlkIVCoHemlQduo3oThXKAVtaKI9mKxqOLJwj
+** zc8RbbJ5QMGgn2i0EshfTGjIIskV8Y4xmfDRmUXizWnMvVm06/KZHloMSPcakVvs
+** gwvYh49vE9PJxCCgmzlMUHSPgdmEKJVWZTz970Q64rtSubDgm00kecBLVSo30VWs
+** 7djNmfi7fIeK822oyVF6O2rmFb4htY3UNlHD60HQe+9LlzVGpdasRTC01yWJ/RsY
+** 0m896SBczXS2pGR90XA56EpFplp1/WC0zgnU+c2XikyZUCoyVedfHdFGFSpeLDJV
+** hXbp6oCuVGTKhFKRqTp0G9GdKpQJpSJTIYx9KKdjZLU1I5bQKGR5Ldpgji6go/BX
+** m3iNkd4L+GAaotU7CMDuPqkZ5BoDjovg43Cxh+V8eftGKbseyfbK0DaM1OFWgzey
+** 2T7/hHvx/PeB5ZTS1U6fOMv5ZTfaVzveGwgn28FAoXCtuPHDeRyHeThP5HDqh+k9
+** E7P19efi/od9+3jsl/4dEzNm0O04EkP9dbgsv/qRnPr5RJb++2gd4zf8ksj2KwwA
+** AA==
** -----END EMBEDDED OBJECT-----
**
-** As you can see, it consists of four levels of memory chunks:
-** 1. the top-level structure one sees in the API
-** 2. the hash table consisting of a single directory and one or
-** more directory segments
-** 3. the collision chain consisting of element structures
-** 4. the actual elements consisting of key and value structures
+** As you can see, it consists of four classes of memory chunks:
+** 1. a top-level structure which acts as the primary handle
+** 2. the virtual hash table consisting of a single (growing)
+** directory and one or more (fixed size) directory segments
+** 3. the collision chains consisting of element structures
+** 4. the actual elements consisting of key+value structures
*/
#include "act_p.h"
/* fixed size (number of pointers) of the directory and of each segment */
#define INITDIRSIZE 256 /* can be an arbitrary value */
-#define SEGMENTSIZE 512 /* = 2^9, must be a power of 2 for below fast arithmetic */
-
-/* calculate index in directory and segment from virtual array index */
-#define DIRINDEX(addr) ((addr) >> 9) /* == ((addr) / SEGMENTSIZE) */
-#define SEGINDEX(addr) ((addr) & (512-1)) /* == ((addr) % SEGMENTSIZE) */
+#define SEGMENTSIZE 512 /* has to be a power of 2 for below arithmetic */
-/* load of hash table (maximum should be between 2 and 4) */
-#define MINLOADFCTR 1
-#define MAXLOADFCTR 2
+/* the borders for the hash table load */
+#define MINLOADFCTR 1 /* should be between 0 and 1 */
+#define MAXLOADFCTR 2 /* should be between 2 and 4 */
-/* the per-element structure (keep this as small as possible!) */
+/* the per-element structure (keep as small as possible!) */
typedef struct element_st element_t;
struct element_st {
element_t *e_next; /* pointer to next element in collision chain */
@@ -80,10 +105,6 @@
void *e_endptr; /* pointer to end of key+data memory chunk */
};
-/* on-the-fly calculate lengths of key and data to reduce memory in element_t */
-#define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr))
-#define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr))
-
/* the hash table segments */
typedef struct segment_st segment_t;
struct segment_st {
@@ -102,6 +123,14 @@
segment_t **h_dir; /* pointer to directory */
};
+/* on-the-fly calculate index into directory and segment from virtual array index */
+#define DIRINDEX(addr) (int)((addr) / SEGMENTSIZE)
+#define SEGINDEX(addr) (int)((addr) % SEGMENTSIZE)
+
+/* on-the-fly calculate lengths of key and data to reduce memory in element_t */
+#define el_keylen(el) ((char *)((el)->e_datptr)-(char *)((el)->e_keyptr))
+#define el_datlen(el) ((char *)((el)->e_endptr)-(char *)((el)->e_datptr))
+
/* create the hash table structure */
static act_hash_lh_t *
act_hash_lh_new(
@@ -116,7 +145,8 @@
/* allocate and clear hash table directory */
h->h_dirsize = INITDIRSIZE;
- if ((h->h_dir = (segment_t **)act_mem_alloc_ctx(ctx, h->h_dirsize * sizeof(segment_t *))) == NULL) {
+ if ((h->h_dir = (segment_t **)act_mem_alloc_ctx(
+ ctx, h->h_dirsize * sizeof(segment_t *))) == NULL) {
errno_safe(act_mem_free_ctx(ctx, h));
return NULL;
}
@@ -482,6 +512,90 @@
return TRUE;
}
+/* calculate total size of hash table */
+static int
+act_hash_lh_status(
+ act_ctx_t *ctx,
+ act_hash_lh_t *h,
+ char *pstatus,
+ int nstatus)
+{
+ int bytes, elements;
+ int dirsize, segsize;
+ int segments;
+ int chains;
+ int chainlen_min, chainlen_max;
+ int chainlen_all, chainlen_avg;
+ double used;
+ int i, j, k;
+ element_t *el;
+
+ /* argument consistency check */
+ if (h == NULL)
+ return_errno(FALSE, EINVAL);
+
+ /* initial values */
+ bytes = 0;
+ elements = 0;
+ dirsize = h->h_dirsize;
+ segsize = SEGMENTSIZE;
+ segments = 0;
+ chains = 0;
+ chainlen_min = 0;
+ chainlen_max = 0;
+ chainlen_all = 0;
+ chainlen_avg = 0;
+ used = 0;
+
+ /* add bytes for top-level structure and directory */
+ bytes += sizeof(act_hash_lh_t);
+ bytes += h->h_dirsize * sizeof(segment_t *);
+
+ /* add size for segments */
+ for (i = 0; i < h->h_dirsize; i++) {
+ if (h->h_dir[i] == NULL)
+ continue;
+ segments++;
+ bytes += sizeof(segment_t);
+ /* add size of elements */
+ for (j = 0; j < SEGMENTSIZE; j++) {
+ if (h->h_dir[i]->s_element[j] == NULL)
+ continue;
+ chains++;
+ el = h->h_dir[i]->s_element[j];
+ k = 0;
+ for (; el != NULL; el = el->e_next) {
+ elements++;
+ k++;
+ /* add size of key+data */
+ bytes += sizeof(element_t);
+ bytes += el_keylen(el);
+ bytes += el_datlen(el);
+ }
+ chainlen_all += k;
+ if (chainlen_min == 0 || chainlen_min > k)
+ chainlen_min = k;
+ if (chainlen_max == 0 || chainlen_max < k)
+ chainlen_max = k;
+ }
+ }
+
+ /* calculate total table usage */
+ used = ((double)chains / (double)(segments*SEGMENTSIZE)) * 100;
+ if (chains == 0)
+ chainlen_avg = 0;
+ else
+ chainlen_avg = chainlen_all / chains;
+
+ /* provide results */
+ act_snprintf(pstatus, nstatus,
+ "size=%d@%dB dir=1*%dB seg=%d*%dB chains=%d@%d/%d/%d usage=%0.1f",
+ elements, bytes, dirsize, segments, segsize,
+ chains, chainlen_min, chainlen_avg, chainlen_max, used);
+
+ return TRUE;
+}
+
/* destroy the whole hash table */
static int
act_hash_lh_free(
@@ -531,6 +645,7 @@
(act_hash_lookup_t) act_hash_lh_lookup,
(act_hash_delete_t) act_hash_lh_delete,
(act_hash_size_t) act_hash_lh_size,
+ (act_hash_status_t) act_hash_lh_status,
(act_hash_free_t) act_hash_lh_free
};
|