00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "config.h"
00023 #include "system.h"
00024 #include "symtab.h"
00025
00026
00027
00028
00029
00030
00031
00032
00033 static unsigned int calc_hash (const unsigned char *, size_t);
00034 static void ht_expand (hash_table *);
00035 static double approx_sqrt (double);
00036
00037
00038
00039 static unsigned int
00040 calc_hash (const unsigned char *str, size_t len)
00041 {
00042 size_t n = len;
00043 unsigned int r = 0;
00044
00045 while (n--)
00046 r = HT_HASHSTEP (r, *str++);
00047
00048 return HT_HASHFINISH (r, len);
00049 }
00050
00051
00052
00053 hash_table *
00054 ht_create (unsigned int order)
00055 {
00056 unsigned int nslots = 1 << order;
00057 hash_table *table;
00058
00059 table = xcalloc (1, sizeof (hash_table));
00060
00061
00062 _obstack_begin (&table->stack, 0, 0,
00063 (void *(*) (long)) xmalloc,
00064 (void (*) (void *)) free);
00065
00066 obstack_alignment_mask (&table->stack) = 0;
00067
00068 table->entries = xcalloc (nslots, sizeof (hashnode));
00069 table->entries_owned = true;
00070 table->nslots = nslots;
00071 return table;
00072 }
00073
00074
00075
00076 void
00077 ht_destroy (hash_table *table)
00078 {
00079 obstack_free (&table->stack, NULL);
00080 if (table->entries_owned)
00081 free (table->entries);
00082 free (table);
00083 }
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 hashnode
00094 ht_lookup (hash_table *table, const unsigned char *str, size_t len,
00095 enum ht_lookup_option insert)
00096 {
00097 return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
00098 insert);
00099 }
00100
00101 hashnode
00102 ht_lookup_with_hash (hash_table *table, const unsigned char *str,
00103 size_t len, unsigned int hash,
00104 enum ht_lookup_option insert)
00105 {
00106 unsigned int hash2;
00107 unsigned int index;
00108 size_t sizemask;
00109 hashnode node;
00110
00111 sizemask = table->nslots - 1;
00112 index = hash & sizemask;
00113 table->searches++;
00114
00115 node = table->entries[index];
00116
00117 if (node != NULL)
00118 {
00119 if (node->hash_value == hash
00120 && HT_LEN (node) == (unsigned int) len
00121 && !memcmp (HT_STR (node), str, len))
00122 {
00123 if (insert == HT_ALLOCED)
00124
00125
00126 obstack_free (&table->stack, (void *) str);
00127 return node;
00128 }
00129
00130
00131
00132 hash2 = ((hash * 17) & sizemask) | 1;
00133
00134 for (;;)
00135 {
00136 table->collisions++;
00137 index = (index + hash2) & sizemask;
00138 node = table->entries[index];
00139 if (node == NULL)
00140 break;
00141
00142 if (node->hash_value == hash
00143 && HT_LEN (node) == (unsigned int) len
00144 && !memcmp (HT_STR (node), str, len))
00145 {
00146 if (insert == HT_ALLOCED)
00147
00148
00149 obstack_free (&table->stack, (void *) str);
00150 return node;
00151 }
00152 }
00153 }
00154
00155 if (insert == HT_NO_INSERT)
00156 return NULL;
00157
00158 node = (*table->alloc_node) (table);
00159 table->entries[index] = node;
00160
00161 HT_LEN (node) = (unsigned int) len;
00162 node->hash_value = hash;
00163 if (insert == HT_ALLOC)
00164 HT_STR (node) = obstack_copy0 (&table->stack, str, len);
00165 else
00166 HT_STR (node) = str;
00167
00168 if (++table->nelements * 4 >= table->nslots * 3)
00169
00170 ht_expand (table);
00171
00172 return node;
00173 }
00174
00175
00176
00177 static void
00178 ht_expand (hash_table *table)
00179 {
00180 hashnode *nentries, *p, *limit;
00181 unsigned int size, sizemask;
00182
00183 size = table->nslots * 2;
00184 nentries = xcalloc (size, sizeof (hashnode));
00185 sizemask = size - 1;
00186
00187 p = table->entries;
00188 limit = p + table->nslots;
00189 do
00190 if (*p)
00191 {
00192 unsigned int index, hash, hash2;
00193
00194 hash = (*p)->hash_value;
00195 index = hash & sizemask;
00196
00197 if (nentries[index])
00198 {
00199 hash2 = ((hash * 17) & sizemask) | 1;
00200 do
00201 {
00202 index = (index + hash2) & sizemask;
00203 }
00204 while (nentries[index]);
00205 }
00206 nentries[index] = *p;
00207 }
00208 while (++p < limit);
00209
00210 if (table->entries_owned)
00211 free (table->entries);
00212 table->entries_owned = true;
00213 table->entries = nentries;
00214 table->nslots = size;
00215 }
00216
00217
00218
00219 void
00220 ht_forall (hash_table *table, ht_cb cb, const void *v)
00221 {
00222 hashnode *p, *limit;
00223
00224 p = table->entries;
00225 limit = p + table->nslots;
00226 do
00227 if (*p)
00228 {
00229 if ((*cb) (table->pfile, *p, v) == 0)
00230 break;
00231 }
00232 while (++p < limit);
00233 }
00234
00235
00236 void
00237 ht_load (hash_table *ht, hashnode *entries,
00238 unsigned int nslots, unsigned int nelements,
00239 bool own)
00240 {
00241 if (ht->entries_owned)
00242 free (ht->entries);
00243 ht->entries = entries;
00244 ht->nslots = nslots;
00245 ht->nelements = nelements;
00246 ht->entries_owned = own;
00247 }
00248
00249
00250
00251 void
00252 ht_dump_statistics (hash_table *table)
00253 {
00254 size_t nelts, nids, overhead, headers;
00255 size_t total_bytes, longest;
00256 double sum_of_squares, exp_len, exp_len2, exp2_len;
00257 hashnode *p, *limit;
00258
00259 #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
00260 ? (x) \
00261 : ((x) < 1024*1024*10 \
00262 ? (x) / 1024 \
00263 : (x) / (1024*1024))))
00264 #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
00265
00266 total_bytes = longest = sum_of_squares = nids = 0;
00267 p = table->entries;
00268 limit = p + table->nslots;
00269 do
00270 if (*p)
00271 {
00272 size_t n = HT_LEN (*p);
00273
00274 total_bytes += n;
00275 sum_of_squares += (double) n * n;
00276 if (n > longest)
00277 longest = n;
00278 nids++;
00279 }
00280 while (++p < limit);
00281
00282 nelts = table->nelements;
00283 overhead = obstack_memory_used (&table->stack) - total_bytes;
00284 headers = table->nslots * sizeof (hashnode);
00285
00286 fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
00287 (unsigned long) nelts);
00288 fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
00289 (unsigned long) nids, nids * 100.0 / nelts);
00290 fprintf (stderr, "slots\t\t%lu\n",
00291 (unsigned long) table->nslots);
00292 fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
00293 SCALE (total_bytes), LABEL (total_bytes),
00294 SCALE (overhead), LABEL (overhead));
00295 fprintf (stderr, "table size\t%lu%c\n",
00296 SCALE (headers), LABEL (headers));
00297
00298 exp_len = (double)total_bytes / (double)nelts;
00299 exp2_len = exp_len * exp_len;
00300 exp_len2 = (double) sum_of_squares / (double) nelts;
00301
00302 fprintf (stderr, "coll/search\t%.4f\n",
00303 (double) table->collisions / (double) table->searches);
00304 fprintf (stderr, "ins/search\t%.4f\n",
00305 (double) nelts / (double) table->searches);
00306 fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
00307 exp_len, approx_sqrt (exp_len2 - exp2_len));
00308 fprintf (stderr, "longest entry\t%lu\n",
00309 (unsigned long) longest);
00310 #undef SCALE
00311 #undef LABEL
00312 }
00313
00314
00315
00316 static double
00317 approx_sqrt (double x)
00318 {
00319 double s, d;
00320
00321 if (x < 0)
00322 abort ();
00323 if (x == 0)
00324 return 0;
00325
00326 s = x;
00327 do
00328 {
00329 d = (s * s - x) / (2 * s);
00330 s -= d;
00331 }
00332 while (d > .0001);
00333 return s;
00334 }