ARB
adhash.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adhash.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "gb_data.h"
12 #include "gb_tune.h"
13 #include "gb_hashindex.h"
14 
15 #include <arb_strbuf.h>
16 #include <arb_sort.h>
17 
18 #include <climits>
19 #include <cfloat>
20 #include <cctype>
21 
22 
24  char *key;
25  long val;
27 };
28 struct GB_HASH {
29  size_t size; // size of hashtable
30  size_t nelem; // number of elements inserted
32  gbs_hash_entry **entries; // the hash table (has 'size' entries)
33 
34  void (*freefun)(long val); // function to free hash values (see GBS_create_dynaval_hash)
35 
36 };
37 
38 struct numhash_entry {
39  long key;
40  long val;
42 };
43 
44 struct GB_NUMHASH {
45  long size; // size of hashtable
46  size_t nelem; // number of elements inserted
48 };
49 
50 // prime numbers
51 
52 #define KNOWN_PRIMES 279
53 static size_t sorted_primes[KNOWN_PRIMES] = {
54  3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 47, 53, 59, 67, 71, 79, 89, 97, 103, 109, 127, 137, 149, 157, 167, 179, 191, 211,
55  223, 239, 257, 271, 293, 311, 331, 349, 373, 397, 419, 443, 467, 499, 541, 571, 607, 641, 677, 719, 757, 797, 839, 887, 937,
56  991, 1049, 1109, 1171, 1237, 1303, 1373, 1447, 1531, 1613, 1699, 1789, 1889, 1993, 2099, 2213, 2333, 2459, 2591, 2729, 2879,
57  3037, 3203, 3373, 3557, 3761, 3967, 4177, 4397, 4637, 4889, 5147, 5419, 5711, 6029, 6353, 6689, 7043, 7417, 7817, 8231, 8669,
58  9127, 9613, 10133, 10667, 11239, 11831, 12457, 13121, 13829, 14557, 15329, 16139, 16993, 17891, 18839, 19841, 20887, 21991, 23159,
59  24379, 25667, 27031, 28463, 29983, 31567, 33247, 35023, 36871, 38821, 40867, 43019, 45289, 47681, 50207, 52859, 55661, 58601,
60  61687, 64937, 68371, 71971, 75767, 79757, 83969, 88397, 93053, 97961, 103123, 108553, 114269, 120293, 126631, 133303, 140321,
61  147709, 155501, 163697, 172313, 181387, 190979, 201031, 211619, 222773, 234499, 246889, 259907, 273601, 288007, 303187, 319147,
62  335953, 353641, 372263, 391861, 412487, 434201, 457057, 481123, 506449, 533111, 561173, 590713, 621821, 654553, 689021, 725293,
63  763471, 803659, 845969, 890501, 937373, 986717, 1038671, 1093357, 1150909, 1211489, 1275269, 1342403, 1413077, 1487459, 1565747,
64  1648181, 1734937, 1826257, 1922383, 2023577, 2130101, 2242213, 2360243, 2484473, 2615243, 2752889, 2897789, 3050321, 3210871,
65  3379877, 3557773, 3745051, 3942209, 4149703, 4368113, 4598063, 4840103, 5094853, 5363011, 5645279, 5942399, 6255157, 6584377,
66  6930929, 7295719, 7679713, 8083919, 8509433, 8957309, 9428759, 9925021, 10447391, 10997279, 11576087, 12185359, 12826699, 13501819,
67  14212447, 14960471, 15747869, 16576727, 17449207, 18367597, 19334317, 20351927, 21423107, 22550639, 23737523, 24986867, 26301967,
68  27686291, 29143493, 30677363, 32291971, 33991597, 35780639, 37663841, 39646153, 41732809, 43929307, 46241389, 48675167, 51237019,
69  53933713, 56772371, 59760391, 62905681, 66216511, 69701591, 73370107, 77231711, 81296543, 85575313, 90079313, 94820347, 99810899
70 };
71 
72 // define CALC_PRIMES only to expand the above table
73 #if defined(DEBUG)
74 // #define CALC_PRIMES
75 #endif // DEBUG
76 
77 #ifdef CALC_PRIMES
78 
79 #define CALC_PRIMES_UP_TO 100000000U
80 #define PRIME_UNDENSITY 20U // the higher, the less primes are stored
81 
82 #warning "please don't define CALC_PRIMES permanently"
83 
84 static unsigned char bit_val[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
85 
86 static int bit_value(const unsigned char *eratosthenes, long num) {
87  // 'num' is odd and lowest 'num' is 3
88  long bit_num = ((num-1) >> 1)-1; // 3->0 5->1 7->2 etc.
89  long byte_num = bit_num >> 3; // div 8
90  char byte = eratosthenes[byte_num];
91 
92  gb_assert(bit_num >= 0);
93  gb_assert((num&1) == 1); // has to odd
94 
95  bit_num = bit_num & 7;
96 
97  return (byte & bit_val[bit_num]) ? 1 : 0;
98 }
99 static void set_bit_value(unsigned char *eratosthenes, long num, int val) {
100  // 'num' is odd and lowest 'num' is 3; val is 0 or 1
101  long bit_num = ((num-1) >> 1)-1; // 3->0 5->1 7->2 etc.
102  long byte_num = bit_num >> 3; // div 8
103  char byte = eratosthenes[byte_num];
104 
105  gb_assert(bit_num >= 0);
106  gb_assert((num&1) == 1); // has to odd
107 
108  bit_num = bit_num & 7;
109 
110  if (val) {
111  byte |= bit_val[bit_num];
112  }
113  else {
114  byte &= (0xff - bit_val[bit_num]);
115  }
116  eratosthenes[byte_num] = byte;
117 }
118 
119 static void calculate_primes_upto() {
120  {
121  size_t bits_needed = CALC_PRIMES_UP_TO/2+1; // only need bits for odd numbers
122  size_t bytes_needed = (bits_needed/8)+1;
123  unsigned char *eratosthenes = ARB_calloc<unsigned char>(bytes_needed); // bit = 1 means "is not a prime"
124  size_t prime_count = 0;
125  size_t num;
126 
127  printf("eratosthenes' size = %zu\n", bytes_needed);
128  GBK_dump_backtrace(stderr, "calculate_primes_upto");
129 
130  for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) {
131  if (bit_value(eratosthenes, num) == 0) { // is a prime number
132  size_t num2;
133  prime_count++;
134  for (num2 = num*2; num2 <= CALC_PRIMES_UP_TO; num2 += num) { // with all multiples
135  if ((num2&1) == 1) { // skip even numbers
136  set_bit_value(eratosthenes, num2, 1);
137  }
138  }
139  }
140  // otherwise it is no prime and all multiples are already set to 1
141  }
142 
143  // thin out prime numbers (we don't need all of them)
144  {
145  size_t prime_count2 = 0;
146  size_t last_prime = 1;
147  size_t printed = 0;
148 
149  for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) {
150  if (bit_value(eratosthenes, num) == 0) { // is a prime number
151  size_t diff = num-last_prime;
152  if ((diff*PRIME_UNDENSITY)<num) {
153  set_bit_value(eratosthenes, num, 1); // delete unneeded prime
154  }
155  else {
156  prime_count2++; // count needed primes
157  last_prime = num;
158  }
159  }
160  }
161 
162  printf("\nUsing %zu prime numbers up to %zu:\n\n", prime_count2, CALC_PRIMES_UP_TO);
163  printf("#define KNOWN_PRIMES %zu\n", prime_count2);
164  printf("static size_t sorted_primes[KNOWN_PRIMES] = {\n ");
165  printed = 4;
166 
167  for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) {
168  if (bit_value(eratosthenes, num) == 0) { // is a prime number
169  if (printed>128) {
170  printf("\n ");
171  printed = 4;
172  }
173 
174  if (num>INT_MAX) {
175  printed += printf("%zuU, ", num);
176  }
177  else {
178  printed += printf("%zu, ", num);
179  }
180  }
181  }
182  printf("\n};\n\n");
183  }
184 
185  free(eratosthenes);
186  }
187  fflush(stdout);
188  exit(1);
189 }
190 
191 #endif // CALC_PRIMES
192 
193 size_t gbs_get_a_prime(size_t above_or_equal_this) {
194  // return a prime number above_or_equal_this
195  // NOTE: it is not necessarily the next prime number, because we don't calculate all prime numbers!
196 
197 #if defined(CALC_PRIMES)
198  calculate_primes_upto();
199 #endif // CALC_PRIMES
200 
201  if (sorted_primes[KNOWN_PRIMES-1] >= above_or_equal_this) {
202  int l = 0, h = KNOWN_PRIMES-1;
203 
204  while (l < h) {
205  int m = (l+h)/2;
206 #if defined(DEBUG) && 0
207  printf("l=%-3i m=%-3i h=%-3i above_or_equal_this=%li sorted_primes[%i]=%li sorted_primes[%i]=%li sorted_primes[%i]=%li\n",
208  l, m, h, above_or_equal_this, l, sorted_primes[l], m, sorted_primes[m], h, sorted_primes[h]);
209 #endif // DEBUG
210  gb_assert(l <= m);
211  gb_assert(m <= h);
212  if (sorted_primes[m] > above_or_equal_this) {
213  h = m-1;
214  }
215  else {
216  if (sorted_primes[m] < above_or_equal_this) {
217  l = m+1;
218  }
219  else {
220  h = l = m;
221  }
222  }
223  }
224 
225  if (sorted_primes[l] < above_or_equal_this) {
226  l++; // take next
228  }
229 
230  gb_assert(sorted_primes[l] >= above_or_equal_this);
231  gb_assert(l == 0 || sorted_primes[l-1] < above_or_equal_this);
232 
233  return sorted_primes[l];
234  }
235 
236  fprintf(stderr, "Warning: gbs_get_a_prime failed for value %zu (performance bleed)\n", above_or_equal_this);
237  gb_assert(0); // add more primes to sorted_primes[]
238 
239  return above_or_equal_this; // NEED_NO_COV
240 }
241 
242 // -----------------------------------------------
243 // Some Hash Procedures for [string,long]
244 
245 inline size_t hash_size(size_t estimated_elements) {
246  size_t min_hash_size = 2*estimated_elements; // -> fill rate ~ 50% -> collisions unlikely
247  size_t next_prime = gbs_get_a_prime(min_hash_size); // use next prime number
248 
249  return next_prime;
250 }
251 
252 
253 GB_HASH *GBS_create_hash(long estimated_elements, GB_CASE case_sens) {
259  long size = hash_size(estimated_elements);
260  GB_HASH *hs = ARB_calloc<GB_HASH>(1);
261 
262  hs->size = size;
263  hs->nelem = 0;
264  hs->case_sens = case_sens;
265  ARB_calloc(hs->entries, size);
266  hs->freefun = NULp;
267 
268  return hs;
269 }
270 
271 GB_HASH *GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void (*freefun)(long)) {
273  GB_HASH *hs = GBS_create_hash(estimated_elements, case_sens);
274  hs->freefun = freefun;
275  return hs;
276 }
277 
278 void GBS_dynaval_free(long val) {
279  free((void*)val);
280 }
281 
282 #if defined(DEBUG)
283 inline void dump_access(const char *title, const GB_HASH *hs, double mean_access) {
284  fprintf(stderr,
285  "%s: size=%zu elements=%zu mean_access=%.2f hash-speed=%.1f%%\n",
286  title, hs->size, hs->nelem, mean_access, 100.0/mean_access);
287 }
288 
289 static double hash_mean_access_costs(const GB_HASH *hs) {
290  /* returns the mean access costs of the hash [1.0 .. inf[
291  * 1.0 is optimal
292  * 2.0 means: hash speed is 50% (1/2.0)
293  */
294  double mean_access = 1.0;
295 
296  if (hs->nelem) {
297  int strcmps_needed = 0;
298  size_t pos;
299 
300  for (pos = 0; pos<hs->size; pos++) {
301  int strcmps = 1;
302  gbs_hash_entry *e;
303 
304  for (e = hs->entries[pos]; e; e = e->next) {
305  strcmps_needed += strcmps++;
306  }
307  }
308 
309  mean_access = (double)strcmps_needed/hs->nelem;
310  }
311  return mean_access;
312 }
313 #endif // DEBUG
314 
315 
316 void GBS_optimize_hash(const GB_HASH *hs) {
317  if (hs->nelem > hs->size) { // hash is overfilled (Note: even 50% fillrate is slow)
318  size_t new_size = gbs_get_a_prime(hs->nelem*3);
319 
320 #if defined(DEBUG)
321  dump_access("Optimizing filled hash", hs, hash_mean_access_costs(hs));
322 #endif // DEBUG
323 
324  if (new_size>hs->size) { // avoid overflow
325  gbs_hash_entry **new_entries; ARB_calloc(new_entries, new_size);
326 
327  for (size_t pos = 0; pos<hs->size; ++pos) {
328  gbs_hash_entry *e;
329  gbs_hash_entry *next;
330 
331  for (e = hs->entries[pos]; e; e = next) {
332  long new_idx;
333  next = e->next;
334 
335  GB_CALC_HASH_INDEX(e->key, new_idx, new_size, hs->case_sens);
336 
337  e->next = new_entries[new_idx];
338  new_entries[new_idx] = e;
339  }
340  }
341 
342  free(hs->entries);
343 
344  {
345  GB_HASH *hs_mutable = const_cast<GB_HASH*>(hs);
346  hs_mutable->size = new_size;
347  hs_mutable->entries = new_entries;
348  }
349  }
350 #if defined(DEBUG)
351  dump_access("Optimized hash ", hs, hash_mean_access_costs(hs));
352 #endif // DEBUG
353 
354  }
355 }
356 
357 static void gbs_hash_to_strstruct(const char *key, long val, void *cd_out) {
358  const char *p;
359  int c;
360  GBS_strstruct *out = (GBS_strstruct*)cd_out;
361 
362  for (p = key; (c=*p); p++) {
363  GBS_chrcat(out, c);
364  if (c==':') GBS_chrcat(out, c);
365  }
366  GBS_chrcat(out, ':');
367  GBS_intcat(out, val);
368  GBS_chrcat(out, ' ');
369 }
370 
372  GBS_strstruct *out = GBS_stropen(1024);
374  return GBS_strclose(out);
375 }
376 
377 
378 static gbs_hash_entry *find_hash_entry(const GB_HASH *hs, const char *key, size_t *index) {
379  gbs_hash_entry *e;
380  if (hs->case_sens == GB_IGNORE_CASE) {
381  GB_CALC_HASH_INDEX_CASE_IGNORED(key, *index, hs->size);
382  for (e=hs->entries[*index]; e; e=e->next) {
383  if (!strcasecmp(e->key, key)) return e;
384  }
385  }
386  else {
387  GB_CALC_HASH_INDEX_CASE_SENSITIVE(key, *index, hs->size);
388  for (e=hs->entries[*index]; e; e=e->next) {
389  if (!strcmp(e->key, key)) return e;
390  }
391  }
392  return NULp;
393 }
394 
395 long GBS_read_hash(const GB_HASH *hs, const char *key) {
396  size_t i;
397  gbs_hash_entry *e = find_hash_entry(hs, key, &i);
398 
399  return e ? e->val : 0;
400 }
401 
402 static void delete_from_list(GB_HASH *hs, size_t i, gbs_hash_entry *e) {
403  // delete the hash entry 'e' from list at index 'i'
404  hs->nelem--;
405  if (hs->entries[i] == e) {
406  hs->entries[i] = e->next;
407  }
408  else {
409  gbs_hash_entry *ee;
410  for (ee = hs->entries[i]; ee->next != e; ee = ee->next) ;
411  if (ee->next == e) {
412  ee->next = e->next;
413  }
414  else {
415  GB_internal_error("Database may be corrupt, hash tables error"); // NEED_NO_COV
416  }
417  }
418  free(e->key);
419  if (hs->freefun) hs->freefun(e->val);
421 }
422 
423 static long write_hash(GB_HASH *hs, char *key, bool copyKey, long val) {
424  /* returns the old value (or 0 if key had no entry)
425  * if 'copyKey' == false, 'key' will be freed (now or later) and may be invalid!
426  * if 'copyKey' == true, 'key' will not be touched in any way!
427  */
428 
429  size_t i;
430  gbs_hash_entry *e = find_hash_entry(hs, key, &i);
431  long oldval = 0;
432 
433  if (e) {
434  oldval = e->val;
435 
436  if (!val) delete_from_list(hs, i, e); // (val == 0 is not stored, cause 0 is the default value)
437  else e->val = val;
438 
439  if (!copyKey) free(key); // already had an entry -> delete unused mem
440  }
441  else if (val != 0) { // don't store 0
442  // create new hash entry
444  e->next = hs->entries[i];
445  e->key = copyKey ? ARB_strdup(key) : key;
446  e->val = val;
447 
448  hs->entries[i] = e;
449  hs->nelem++;
450  }
451  else {
452  if (!copyKey) free(key); // don't need an entry -> delete unused mem
453  }
454  return oldval;
455 }
456 
457 long GBS_write_hash(GB_HASH *hs, const char *key, long val) {
458  // returns the old value (or 0 if key had no entry)
459  return write_hash(hs, (char*)key, true, val);
460 }
461 
462 long GBS_write_hash_no_strdup(GB_HASH *hs, char *key, long val) {
463  /* same as GBS_write_hash, but does no strdup. 'key' is freed later in GBS_free_hash,
464  * so the user has to 'malloc' the string and give control to the hash.
465  * Note: after calling this function 'key' may be invalid!
466  */
467  return write_hash(hs, key, false, val);
468 }
469 
470 long GBS_incr_hash(GB_HASH *hs, const char *key) {
471  // returns new value
472  size_t i;
473  gbs_hash_entry *e = find_hash_entry(hs, key, &i);
474  long result;
475 
476  if (e) {
477  result = ++e->val;
478  if (!result) delete_from_list(hs, i, e);
479  }
480  else {
482  e->next = hs->entries[i];
483  e->key = ARB_strdup(key);
484  e->val = result = 1;
485 
486  hs->entries[i] = e;
487  hs->nelem++;
488  }
489  return result;
490 }
491 
492 #if defined(DEVEL_RALF)
493 // #define DUMP_HASH_ENTRIES
494 #endif // DEVEL_RALF
495 
496 static void GBS_erase_hash(GB_HASH *hs) {
497  size_t hsize = hs->size;
498 
499 #if defined(DUMP_HASH_ENTRIES)
500  for (size_t i = 0; i < hsize; i++) {
501  printf("hash[%zu] =", i);
502  for (gbs_hash_entry *e = hs->entries[i]; e; e = e->next) {
503  printf(" '%s'", e->key);
504  }
505  printf("\n");
506  }
507 #endif // DUMP_HASH_ENTRIES
508 
509  // check hash size
510  if (hsize >= 10) { // ignore small hashes
511 #if defined(DEBUG)
512  double mean_access = hash_mean_access_costs(hs);
513  if (mean_access > 1.5) { // every 2nd access is a collision - increase hash size?
514  dump_access("hash-size-warning", hs, mean_access);
515 #if defined(DEVEL_RALF) && !defined(UNIT_TESTS)
516  gb_assert(mean_access<2.0); // hash with 50% speed or less
517 #endif // DEVEL_RALF
518  }
519 #else
520  if (hs->nelem >= (2*hsize)) {
521  GB_warningf("Performance leak - very slow hash detected (elems=%zu, size=%zu)\n", hs->nelem, hs->size);
522  GBK_dump_backtrace(stderr, "detected performance leak");
523  }
524 #endif // DEBUG
525  }
526 
527  for (size_t i = 0; i < hsize; i++) {
528  for (gbs_hash_entry *e = hs->entries[i]; e; ) {
529  free(e->key);
530  if (hs->freefun) hs->freefun(e->val);
531 
532  gbs_hash_entry *next = e->next;
534  e = next;
535  }
536  hs->entries[i] = NULp;
537  }
538  hs->nelem = 0;
539 }
540 
542  gb_assert(hs);
543  GBS_erase_hash(hs);
544  free(hs->entries);
545  free(hs);
546 }
547 
548 void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data) {
549  size_t hsize = hs->size;
550  for (size_t i=0; i<hsize; i++) {
551  for (gbs_hash_entry *e = hs->entries[i]; e; ) {
552  gbs_hash_entry *next = e->next;
553  if (e->val) {
554  e->val = func(e->key, e->val, client_data);
555  if (!e->val) delete_from_list(hs, i, e);
556  }
557  e = next;
558  }
559  }
560 }
561 
562 void GBS_hash_do_const_loop(const GB_HASH *hs, gb_hash_const_loop_type func, void *client_data) {
563  size_t hsize = hs->size;
564  for (size_t i=0; i<hsize; i++) {
565  for (gbs_hash_entry *e = hs->entries[i]; e; ) {
566  gbs_hash_entry *next = e->next;
567  if (e->val) func(e->key, e->val, client_data);
568  e = next;
569  }
570  }
571 }
572 
573 size_t GBS_hash_elements(const GB_HASH *hs) {
574  return hs->nelem;
575 }
576 
577 const char *GBS_hash_next_element_that(const GB_HASH *hs, const char *last_key, bool (*condition)(const char *key, long val, void *cd), void *cd) {
578  /* Returns the key of the next element after 'last_key' matching 'condition' (i.e. where condition returns true).
579  * If 'last_key' is NULp, the first matching element is returned.
580  * Returns NULp if no (more) elements match the 'condition'.
581  */
582 
583  size_t size = hs->size;
584  size_t i = 0;
585  gbs_hash_entry *e = NULp;
586 
587  if (last_key) {
588  e = find_hash_entry(hs, last_key, &i);
589  if (!e) return NULp;
590 
591  e = e->next; // use next entry after 'last_key'
592  if (!e) i++;
593  }
594 
595  for (; i<size && !e; ++i) e = hs->entries[i]; // search first/next entry
596 
597  while (e) {
598  if ((*condition)(e->key, e->val, cd)) break;
599  e = e->next;
600  if (!e) {
601  for (i++; i<size && !e; ++i) e = hs->entries[i];
602  }
603  }
604 
605  return e ? e->key : NULp;
606 }
607 
608 static int wrap_hashCompare4gb_sort(const void *v0, const void *v1, void *sorter) {
609  const gbs_hash_entry *e0 = (const gbs_hash_entry*)v0;
610  const gbs_hash_entry *e1 = (const gbs_hash_entry*)v1;
611 
612  return ((gbs_hash_compare_function)sorter)(e0->key, e0->val, e1->key, e1->val);
613 }
614 
615 static size_t get_sorted_hash_values(const GB_HASH *hs, gbs_hash_compare_function sorter, gbs_hash_entry**& mtab) {
616  size_t hsize = hs->size;
617  ARB_calloc(mtab, hs->nelem);
618 
619  size_t values = 0;
620  for (size_t i = 0; i < hsize; i++) {
621  for (gbs_hash_entry *e = hs->entries[i]; e; e = e->next) {
622  if (e->val) {
623  mtab[values++] = e;
624  }
625  }
626  }
627 
628  GB_sort((void**)mtab, 0, values, wrap_hashCompare4gb_sort, (void*)sorter);
629  return values;
630 }
631 
633  gbs_hash_entry **mtab;
634  size_t values = get_sorted_hash_values(hs, sorter, mtab);
635 
636  for (size_t i = 0; i < values; i++) {
637  long new_val = func(mtab[i]->key, mtab[i]->val, client_data);
638  if (new_val != mtab[i]->val) GBS_write_hash(hs, mtab[i]->key, new_val);
639  }
640 
641  free(mtab);
642 }
643 
645  gbs_hash_entry **mtab;
646  size_t values = get_sorted_hash_values(hs, sorter, mtab);
647 
648  for (size_t i = 0; i < values; i++) {
649  func(mtab[i]->key, mtab[i]->val, client_data);
650  }
651 
652  free(mtab);
653 }
654 
655 
656 int GBS_HCF_sortedByKey(const char *k0, long /*v0*/, const char *k1, long /*v1*/) {
657  return strcmp(k0, k1);
658 }
659 
660 // ---------------------------------------------
661 // Some Hash Procedures for [long,long]
662 
663 inline long gbs_numhash_index(long key, long size) {
664  long x;
665  x = (key * (long long)97)%size; // make one multiplier a (long long) to avoid
666  if (x<0) x += size; // int overflow and abort if compiled with -ftrapv
667  return x;
668 }
669 
670 
671 GB_NUMHASH *GBS_create_numhash(size_t estimated_elements) {
672  size_t size = hash_size(estimated_elements);
673  GB_NUMHASH *hs = ARB_calloc<GB_NUMHASH>(1);
674 
675  hs->size = size;
676  hs->nelem = 0;
677  ARB_calloc(hs->entries, size);
678 
679  return hs;
680 }
681 
682 long GBS_read_numhash(GB_NUMHASH *hs, long key) {
683  size_t i = gbs_numhash_index(key, hs->size);
684  for (numhash_entry *e = hs->entries[i]; e; e = e->next) {
685  if (e->key==key) return e->val;
686  }
687  return 0;
688 }
689 
690 long GBS_write_numhash(GB_NUMHASH *hs, long key, long val) {
691  size_t i = gbs_numhash_index(key, hs->size);
692  long oldval = 0;
693 
694  if (val == 0) { // erase
695  numhash_entry **nextPtr = &(hs->entries[i]);
696 
697  for (numhash_entry *e = hs->entries[i]; e; e = e->next) {
698  if (e->key == key) {
699  *nextPtr = e->next; // unlink entry
700  gbm_free_mem(e, sizeof(*e), GBM_HASH_INDEX);
701  hs->nelem--;
702  return 0;
703  }
704  nextPtr = &(e->next);
705  }
706  }
707  else {
708  for (numhash_entry *e=hs->entries[i]; e; e=e->next) {
709  if (e->key==key) {
710  oldval = e->val; gb_assert(oldval);
711  e->val = val;
712  break;
713  }
714  }
715 
716  if (!oldval) {
718 
719  e->next = hs->entries[i];
720  e->key = key;
721  e->val = val;
722 
723  hs->nelem++;
724  hs->entries[i] = e;
725  }
726  }
727  return oldval;
728 }
729 
730 static void GBS_erase_numhash(GB_NUMHASH *hs) {
731  size_t hsize = hs->size;
732 
733  for (size_t i=0; i<hsize; i++) {
734  for (numhash_entry *e = hs->entries[i]; e; ) {
735  numhash_entry *next = e->next;
736 
737  gbm_free_mem(e, sizeof(*e), GBM_HASH_INDEX);
738  e = next;
739  }
740  }
741 
742  hs->nelem = 0;
743 }
744 
746  GBS_erase_numhash(hs);
747  free(hs->entries);
748  free(hs);
749 }
750 
751 // --------------------------------------------------------------------------------
752 
753 #ifdef UNIT_TESTS
754 
755 #include <test_unit.h>
756 
757 // determine hash quality
758 
759 struct gbs_hash_statistic_summary {
760  long count; // how many stats
761  long min_size, max_size, sum_size;
762  long min_nelem, max_nelem, sum_nelem;
763  long min_collisions, max_collisions, sum_collisions;
764  double min_fill_ratio, max_fill_ratio, sum_fill_ratio;
765  double min_hash_quality, max_hash_quality, sum_hash_quality;
766 
767  void init() {
768  count = 0;
769  min_size = min_nelem = min_collisions = LONG_MAX;
770  max_size = max_nelem = max_collisions = LONG_MIN;
771  min_fill_ratio = min_hash_quality = DBL_MAX;
772  max_fill_ratio = max_hash_quality = DBL_MIN;
773 
774  sum_size = sum_nelem = sum_collisions = 0;
775  sum_fill_ratio = sum_hash_quality = 0.0;
776  }
777 };
778 
779 class hash_statistic_manager : virtual Noncopyable {
780  GB_HASH *stat_hash;
781 public:
782  hash_statistic_manager() : stat_hash(NULp) { }
783  ~hash_statistic_manager() {
784  if (stat_hash) GBS_free_hash(stat_hash);
785  }
786 
787  gbs_hash_statistic_summary *get_stat_summary(const char *id) {
788  if (!stat_hash) stat_hash = GBS_create_dynaval_hash(10, GB_MIND_CASE, GBS_dynaval_free);
789 
790  long found = GBS_read_hash(stat_hash, id);
791  if (!found) {
792  gbs_hash_statistic_summary *stat; ARB_calloc(stat, 1);
793  stat->init();
794  found = (long)stat;
795  GBS_write_hash(stat_hash, id, found);
796  }
797 
798  return (gbs_hash_statistic_summary*)found;
799  }
800 };
801 
802 static void addto_hash_statistic_summary(gbs_hash_statistic_summary *stat, long size, long nelem, long collisions, double fill_ratio, double hash_quality) {
803  stat->count++;
804 
805  if (stat->min_size > size) stat->min_size = size;
806  if (stat->max_size < size) stat->max_size = size;
807 
808  if (stat->min_nelem > nelem) stat->min_nelem = nelem;
809  if (stat->max_nelem < nelem) stat->max_nelem = nelem;
810 
811  if (stat->min_collisions > collisions) stat->min_collisions = collisions;
812  if (stat->max_collisions < collisions) stat->max_collisions = collisions;
813 
814  if (stat->min_fill_ratio > fill_ratio) stat->min_fill_ratio = fill_ratio;
815  if (stat->max_fill_ratio < fill_ratio) stat->max_fill_ratio = fill_ratio;
816 
817  if (stat->min_hash_quality > hash_quality) stat->min_hash_quality = hash_quality;
818  if (stat->max_hash_quality < hash_quality) stat->max_hash_quality = hash_quality;
819 
820  stat->sum_size += size;
821  stat->sum_nelem += nelem;
822  stat->sum_collisions += collisions;
823  stat->sum_fill_ratio += fill_ratio;
824  stat->sum_hash_quality += hash_quality;
825 }
826 
827 static hash_statistic_manager hash_stat_man;
828 
829 static void test_clear_hash_statistic_summary(const char *id) {
830  hash_stat_man.get_stat_summary(id)->init();
831 }
832 
833 static void test_print_hash_statistic_summary(const char *id) {
834  gbs_hash_statistic_summary *stat = hash_stat_man.get_stat_summary(id);
835  long count = stat->count;
836  printf("Statistic summary for %li hashes of type '%s':\n", count, id);
837  printf("- size: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_size, stat->max_size, (double)stat->sum_size/count);
838  printf("- nelem: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_nelem, stat->max_nelem, (double)stat->sum_nelem/count);
839  printf("- fill_ratio: min = %5.1f%% ; max = %5.1f%% ; mean = %5.1f%%\n", stat->min_fill_ratio*100.0, stat->max_fill_ratio*100.0, (double)stat->sum_fill_ratio/count*100.0);
840  printf("- collisions: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_collisions, stat->max_collisions, (double)stat->sum_collisions/count);
841  printf("- hash_quality: min = %5.1f%% ; max = %5.1f%% ; mean = %5.1f%%\n", stat->min_hash_quality*100.0, stat->max_hash_quality*100.0, (double)stat->sum_hash_quality/count*100.0);
842 }
843 
844 static void test_calc_hash_statistic(const GB_HASH *hs, const char *id, int print) {
845  long queues = 0;
846  long collisions;
847  double fill_ratio = (double)hs->nelem/hs->size;
848  double hash_quality;
849 
850  for (size_t i = 0; i < hs->size; i++) {
851  if (hs->entries[i]) queues++;
852  }
853  collisions = hs->nelem - queues;
854 
855  hash_quality = (double)queues/hs->nelem; // no collisions means 100% quality
856 
857  if (print != 0) {
858  printf("Statistic for hash '%s':\n", id);
859  printf("- size = %zu\n", hs->size);
860  printf("- elements = %zu (fill ratio = %4.1f%%)\n", hs->nelem, fill_ratio*100.0);
861  printf("- collisions = %li (hash quality = %4.1f%%)\n", collisions, hash_quality*100.0);
862  }
863 
864  addto_hash_statistic_summary(hash_stat_man.get_stat_summary(id), hs->size, hs->nelem, collisions, fill_ratio, hash_quality);
865 }
866 
867 static long test_numhash_count_elems(GB_NUMHASH *hs) {
868  return hs->nelem;
869 }
870 
871 static void insert_into_hash(const char *key, long val, void *cl_toHash) {
872  GB_HASH *toHash = (GB_HASH*)cl_toHash;
873  GBS_write_hash(toHash, key, val);
874 }
875 static void erase_from_hash(const char *key, long val, void *cl_fromHash) {
876  GB_HASH *fromHash = (GB_HASH*)cl_fromHash;
877  long val2 = GBS_read_hash(fromHash, key);
878 
879  if (val2 == val) {
880  GBS_write_hash(fromHash, key, 0);
881  }
882  else {
883  printf("value mismatch in hashes_are_equal(): key='%s' val: %li != %li\n", key, val2, val); // NEED_NO_COV
884  }
885 }
886 
887 static bool hashes_are_equal(GB_HASH *h1, GB_HASH *h2) {
888  size_t count1 = GBS_hash_elements(h1);
889  size_t count2 = GBS_hash_elements(h2);
890 
891  bool equal = (count1 == count2);
892  if (equal) {
894 
895  GBS_hash_do_const_loop(h1, insert_into_hash, copy);
896  GBS_hash_do_const_loop(h2, erase_from_hash, copy);
897 
898  equal = (GBS_hash_elements(copy) == 0);
899  GBS_free_hash(copy);
900  }
901  return equal;
902 }
903 
904 struct TestData : virtual Noncopyable {
905  GB_HASH *mind;
906  GB_HASH *ignore;
907  GB_NUMHASH *num;
908 
909  TestData() {
910  mind = GBS_create_hash(100, GB_MIND_CASE);
911  ignore = GBS_create_hash(100, GB_IGNORE_CASE);
912  num = GBS_create_numhash(100);
913  }
914  ~TestData() {
915  GBS_free_numhash(num);
916  GBS_free_hash(ignore);
917  GBS_free_hash(mind);
918  }
919 
920  void reset() {
921  GBS_erase_hash(mind);
922  GBS_erase_hash(ignore);
923  GBS_erase_numhash(num);
924  }
925 
926  GB_HASH *get_hash(bool case_sens) {
927  return case_sens ? mind : ignore;
928  }
929 };
930 
931 static TestData TEST;
932 
933 static size_t test_hash_count_value(GB_HASH *hs, long val) {
934  size_t hsize = hs->size;
935  size_t count = 0;
936 
937  gb_assert(val != 0); // counting zero values makes no sense (cause these are not stored in the hash)
938 
939  for (size_t i = 0; i<hsize; ++i) {
940  for (gbs_hash_entry *e=hs->entries[i]; e; e=e->next) {
941  if (e->val == val) {
942  ++count;
943  }
944  }
945  }
946 
947  return count;
948 }
949 
950 void TEST_GBS_write_hash() {
951  TEST.reset();
952 
953  for (int case_sens = 0; case_sens <= 1; ++case_sens) {
954  GB_HASH *hash = TEST.get_hash(case_sens);
955 
956  GBS_write_hash(hash, "foo", 1);
958  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 1);
959 
960  GBS_write_hash(hash, "foo", 2);
962  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 2);
963 
964  GBS_write_hash(hash, "foo", 0);
966  TEST_EXPECT_ZERO(GBS_read_hash(hash, "foo"));
967 
968  GBS_write_hash(hash, "foo", 1);
969  GBS_write_hash(hash, "FOO", 2);
970  GBS_write_hash(hash, "BAR", 1);
971  GBS_write_hash(hash, "bar", 2);
972 
973  if (case_sens) {
975 
976  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 1);
977  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "FOO"), 2);
978  TEST_EXPECT_ZERO(GBS_read_hash(hash, "Foo"));
979 
980  TEST_EXPECT_EQUAL(test_hash_count_value(hash, 1), 2);
981  TEST_EXPECT_EQUAL(test_hash_count_value(hash, 2), 2);
982  }
983  else {
985 
986  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 2);
987  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "FOO"), 2);
988  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "Foo"), 2);
989 
990  TEST_EXPECT_ZERO(test_hash_count_value(hash, 1));
991  TEST_EXPECT_EQUAL(test_hash_count_value(hash, 2), 2);
992  }
993 
994  if (case_sens) {
995  TEST_EXPECT_ZERO(GBS_read_hash(hash, "foobar"));
996  GBS_write_hash_no_strdup(hash, ARB_strdup("foobar"), 0);
997  TEST_EXPECT_ZERO(GBS_read_hash(hash, "foobar"));
998  GBS_write_hash_no_strdup(hash, ARB_strdup("foobar"), 3);
999  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foobar"), 3);
1000  GBS_write_hash_no_strdup(hash, ARB_strdup("foobar"), 0);
1001  TEST_EXPECT_ZERO(GBS_read_hash(hash, "foobar"));
1002  }
1003  }
1004 }
1005 
1006 void TEST_GBS_incr_hash() {
1007  TEST.reset();
1008 
1009  for (int case_sens = 0; case_sens <= 1; ++case_sens) {
1010  GB_HASH *hash = TEST.get_hash(case_sens);
1011 
1012  GBS_incr_hash(hash, "foo");
1013  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 1);
1014 
1015  GBS_incr_hash(hash, "foo");
1016  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), 2);
1017 
1018  GBS_incr_hash(hash, "FOO");
1019  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "foo"), case_sens ? 2 : 3);
1020  TEST_EXPECT_EQUAL(GBS_read_hash(hash, "FOO"), case_sens ? 1 : 3);
1021  }
1022 }
1023 
1024 static void test_string_2_hashtab(GB_HASH *hash, char *data) {
1025  // modifies data
1026  char *p, *d, *dp;
1027  int c;
1028  char *nextp;
1029  char *str;
1030  int strlen;
1031  long val;
1032 
1033  for (p = data; p; p = nextp) {
1034  strlen = 0;
1035  for (dp = p; (c = *dp); dp++) {
1036  if (c==':') {
1037  if (dp[1] == ':') dp++;
1038  else break;
1039  }
1040  strlen++;
1041  }
1042  if (*dp) {
1043  nextp = strchr(dp, ' ');
1044  if (nextp) nextp++;
1045  }
1046  else break;
1047 
1048  str = ARB_calloc<char>(strlen+1);
1049  for (dp = p, d = str; (c = *dp); dp++) {
1050  if (c==':') {
1051  if (dp[1] == ':') {
1052  *(d++) = c;
1053  dp++;
1054  }
1055  else break;
1056  }
1057  else {
1058  *(d++) = c;
1059  }
1060  }
1061  val = atoi(dp+1);
1062  GBS_write_hash_no_strdup(hash, str, val);
1063  }
1064 }
1065 
1066 void TEST_GBS_hashtab_2_string() {
1067  TEST.reset();
1068 
1069  for (int case_sens = 0; case_sens <= 1; ++case_sens) {
1070  GB_HASH *hash = TEST.get_hash(case_sens);
1071 
1072  GBS_write_hash(hash, "foo", 1);
1073  GBS_write_hash(hash, "bar", 2);
1074  GBS_write_hash(hash, "FOO", 3);
1075  GBS_write_hash(hash, "BAR", 4);
1076  GBS_write_hash(hash, "foo:bar", 3);
1077  GBS_write_hash(hash, "FOO:bar", 4);
1078  }
1079  for (int case_sens = 0; case_sens <= 1; ++case_sens) {
1080  GB_HASH *hash = TEST.get_hash(case_sens);
1081 
1082  char *as_string = GBS_hashtab_2_string(hash);
1083  TEST_REJECT_NULL(as_string);
1084 
1085  GB_HASH *hash2 = GBS_create_hash(1000, case_sens ? GB_MIND_CASE : GB_IGNORE_CASE);
1086  test_string_2_hashtab(hash2, as_string);
1087  TEST_EXPECT(hashes_are_equal(hash, hash2));
1088  TEST_EXPECT(hashes_are_equal(hash, hash2));
1089 
1090  free(as_string);
1091  GBS_free_hash(hash2);
1092  }
1093 
1094  {
1095  GB_HASH *hash = TEST.get_hash(true);
1096  char *as_string = GBS_hashtab_2_string(hash);
1097 
1098  GB_HASH *hash2 = GBS_create_hash(21, GB_MIND_CASE);
1099  GBS_hash_do_const_sorted_loop(hash, insert_into_hash, GBS_HCF_sortedByKey, hash2);
1100 
1101  GB_HASH *hash3 = GBS_create_hash(100, GB_MIND_CASE);
1102  GBS_hash_do_const_sorted_loop(hash, insert_into_hash, GBS_HCF_sortedByKey, hash3);
1103 
1104  char *as_string2 = GBS_hashtab_2_string(hash2);
1105  char *as_string3 = GBS_hashtab_2_string(hash3);
1106 
1107  TEST_EXPECT_EQUAL__BROKEN(as_string, as_string2, "FOO::bar:4 BAR:4 bar:2 foo:1 FOO:3 foo::bar:3 ");
1108  TEST_EXPECT_EQUAL (as_string, as_string3);
1109 
1110  GBS_free_hash(hash3);
1111  GBS_free_hash(hash2);
1112 
1113  free(as_string3);
1114  free(as_string2);
1115  free(as_string);
1116  }
1117 }
1118 
1119 inline long key2val(long key, int pass) {
1120  long val;
1121  switch (pass) {
1122  case 1:
1123  val = key/3;
1124  break;
1125  case 2:
1126  val = key*17461;
1127  break;
1128  default :
1129  val = LONG_MIN;
1130  TEST_EXPECT(0); // NEED_NO_COV
1131  break;
1132  }
1133  return val;
1134 }
1135 
1136 void TEST_numhash() {
1137  GB_NUMHASH *numhash = GBS_create_numhash(10);
1138  GB_NUMHASH *numhash2 = GBS_create_numhash(10);
1139 
1140  const long LOW = -200;
1141  const long HIGH = 200;
1142  const long STEP = 17;
1143 
1144  long added = 0;
1145  for (int pass = 1; pass <= 2; ++pass) {
1146  added = 0;
1147  for (long key = LOW; key <= HIGH; key += STEP) {
1148  long val = key2val(key, pass);
1149  GBS_write_numhash(numhash, key, val);
1150  added++;
1151  }
1152 
1153  TEST_EXPECT_EQUAL(test_numhash_count_elems(numhash), added);
1154 
1155  for (long key = LOW; key <= HIGH; key += STEP) {
1156  TEST_EXPECT_EQUAL(key2val(key, pass), GBS_read_numhash(numhash, key));
1157  }
1158  }
1159 
1160  TEST_EXPECT_ZERO(GBS_read_numhash(numhash, -4711)); // not-existing entry
1161 
1162  // erase by overwrite:
1163  for (long key = LOW; key <= HIGH; key += STEP) {
1164  GBS_write_numhash(numhash2, key, GBS_read_numhash(numhash, key)); // copy numhash->numhash2
1165  GBS_write_numhash(numhash, key, (long)NULp);
1166  }
1167  TEST_EXPECT_EQUAL(test_numhash_count_elems(numhash2), added);
1168  TEST_EXPECT_ZERO(test_numhash_count_elems(numhash));
1169 
1170  GBS_free_numhash(numhash2); // free filled hash
1171  GBS_free_numhash(numhash); // free empty hash
1172 }
1173 
1174 
1175 static int freeCounter;
1176 static void freeDynamicHashElem(long cl_ptr) {
1177  GBS_dynaval_free(cl_ptr);
1178  freeCounter++;
1179 }
1180 
1181 void TEST_GBS_dynaval_hash() {
1182  const int SIZE = 10;
1183  const int ELEMS = 30;
1184 
1185  GB_HASH *dynahash = GBS_create_dynaval_hash(SIZE, GB_MIND_CASE, freeDynamicHashElem);
1186 
1187  for (int pass = 1; pass <= 2; ++pass) {
1188  freeCounter = 0;
1189 
1190  for (int i = 0; i<ELEMS; ++i) {
1191  char *val = GBS_global_string_copy("value %i", i);
1192  char *oldval = (char*)GBS_write_hash(dynahash, GBS_global_string("key %i", i), (long)val);
1193  free(oldval);
1194  }
1195 
1196  TEST_EXPECT_ZERO(freeCounter); // overwriting values shall not automatically free them
1197  }
1198 
1199  freeCounter = 0;
1200  GBS_free_hash(dynahash);
1201  TEST_EXPECT_EQUAL(freeCounter, ELEMS);
1202 }
1203 
1204 void TEST_GBS_optimize_hash_and_stats() {
1205  const int SIZE = 10;
1206  const int FILL = 70;
1207 
1208  test_clear_hash_statistic_summary("test");
1209  for (int pass = 1; pass <= 3; ++pass) {
1210  GB_HASH *hash = GBS_create_hash(SIZE, GB_MIND_CASE);
1211 
1212  for (int i = 1; i <= FILL; ++i) {
1213  const char *key = GBS_global_string("%i", i);
1214  GBS_write_hash(hash, key, i);
1215  }
1216  TEST_EXPECT(hash->nelem > hash->size); // ensure hash is overfilled!
1217 
1218  switch (pass) {
1219  case 1: // nothing, only free overfilled hash below
1220  break;
1221  case 2: // test overwrite overfilled hash
1222  for (int i = 1; i <= FILL; ++i) {
1223  const char *key = GBS_global_string("%i", i);
1224 
1225  TEST_EXPECT_EQUAL(GBS_read_hash(hash, key), i);
1226  GBS_write_hash(hash, key, 0);
1227  TEST_EXPECT_ZERO(GBS_read_hash(hash, key));
1228  }
1229  break;
1230  case 3: // test optimize
1231  GBS_optimize_hash(hash);
1232  TEST_EXPECT_LESS_EQUAL(hash->nelem, hash->size);
1233  break;
1234  default :
1235  TEST_EXPECT(0); // NEED_NO_COV
1236  break;
1237  }
1238 
1239  test_calc_hash_statistic(hash, "test", 1);
1240  GBS_free_hash(hash);
1241  }
1242 
1243  test_print_hash_statistic_summary("test");
1244 }
1245 
1246 static bool has_value(const char *, long val, void *cd) { return val == (long)cd; }
1247 static bool has_value_greater(const char *, long val, void *cd) { return val > (long)cd; }
1248 
1249 void TEST_GBS_hash_next_element_that() {
1250  TEST.reset();
1251 
1252  for (int case_sens = 0; case_sens <= 1; ++case_sens) {
1253  GB_HASH *hash = TEST.get_hash(case_sens);
1254 
1255  GBS_write_hash(hash, "foo", 0);
1256  GBS_write_hash(hash, "bar", 1);
1257  GBS_write_hash(hash, "foobar", 2);
1258  GBS_write_hash(hash, "barfoo", 3);
1259 
1260 #define READ_REVERSE(value) GBS_hash_next_element_that(hash, NULp, has_value, (void*)value)
1261 #define ASSERT_READ_REVERSE_RETURNS(value, expected) TEST_EXPECT_EQUAL((const char *)expected, READ_REVERSE(value));
1262 
1263  ASSERT_READ_REVERSE_RETURNS(/*0*/ NULp, NULp); // test search for element '0' (cast to 'void*' leads to warning)
1264  ASSERT_READ_REVERSE_RETURNS(1, "bar");
1265  ASSERT_READ_REVERSE_RETURNS(2, "foobar");
1266  ASSERT_READ_REVERSE_RETURNS(3, "barfoo");
1267  ASSERT_READ_REVERSE_RETURNS(4, NULp);
1268 
1269  const char *key = NULp;
1270  long sum = 0;
1271 
1272  for (int iter = 1; iter <= 3; ++iter) {
1273  key = GBS_hash_next_element_that(hash, key, has_value_greater, (void*)1);
1274  if (iter == 3) TEST_REJECT(key);
1275  else {
1276  TEST_REJECT_NULL(key);
1277  sum += GBS_read_hash(hash, key);
1278  }
1279  }
1280  TEST_EXPECT_EQUAL(sum, 5); // sum of all values > 1
1281  }
1282 }
1283 
1284 const size_t MAX_PRIME = sorted_primes[KNOWN_PRIMES-1];
1285 
1286 #if !defined(ASSERTION_USED) || defined(ENABLE_CRASH_TESTS)
1287 static size_t get_overflown_prime() { return gbs_get_a_prime(MAX_PRIME+1); }
1288 #endif
1289 #if defined(ASSERTION_USED) && defined(ENABLE_CRASH_TESTS)
1290 static void detect_prime_overflow() { get_overflown_prime(); }
1291 #endif // ASSERTION_USED
1292 
1293 void TEST_hash_specials__crashtest() {
1294  const size_t SOME_PRIME = 434201;
1295  TEST_EXPECT_EQUAL(gbs_get_a_prime(SOME_PRIME), SOME_PRIME);
1296  TEST_EXPECT_EQUAL(gbs_get_a_prime(MAX_PRIME), MAX_PRIME);
1297 
1298 #if defined(ASSERTION_USED)
1299  TEST_EXPECT_CODE_ASSERTION_FAILS(detect_prime_overflow);
1300 #else
1301  TEST_EXPECT_EQUAL(get_overflown_prime(), MAX_PRIME+1);
1302 #endif // ASSERTION_USED
1303 }
1304 
1305 #endif // UNIT_TESTS
void GBS_dynaval_free(long val)
Definition: adhash.cxx:278
static long write_hash(GB_HASH *hs, char *key, bool copyKey, long val)
Definition: adhash.cxx:423
string result
void GBS_hash_do_const_loop(const GB_HASH *hs, gb_hash_const_loop_type func, void *client_data)
Definition: adhash.cxx:562
long val
Definition: adhash.cxx:25
long gbs_numhash_index(long key, long size)
Definition: adhash.cxx:663
GB_CASE case_sens
Definition: adhash.cxx:31
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
size_t hash_size(size_t estimated_elements)
Definition: adhash.cxx:245
long val
Definition: adhash.cxx:40
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
static size_t sorted_primes[KNOWN_PRIMES]
Definition: adhash.cxx:53
long
Definition: AW_awar.cxx:154
int(* gbs_hash_compare_function)(const char *key0, long val0, const char *key1, long val1)
Definition: arbdb.h:132
#define KNOWN_PRIMES
Definition: adhash.cxx:52
static void gbs_hash_to_strstruct(const char *key, long val, void *cd_out)
Definition: adhash.cxx:357
void GBS_intcat(GBS_strstruct *strstr, long val)
Definition: arb_strbuf.cxx:127
long GBS_read_numhash(GB_NUMHASH *hs, long key)
Definition: adhash.cxx:682
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
GB_NUMHASH * GBS_create_numhash(size_t estimated_elements)
Definition: adhash.cxx:671
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
const char * title
Definition: readseq.c:22
void GBS_hash_do_const_sorted_loop(const GB_HASH *hs, gb_hash_const_loop_type func, gbs_hash_compare_function sorter, void *client_data)
Definition: adhash.cxx:644
#define TEST_EXPECT_LESS_EQUAL(val, ref)
Definition: test_unit.h:1295
long(* gb_hash_loop_type)(const char *key, long val, void *client_data)
Definition: arbdb.h:130
long GBS_write_numhash(GB_NUMHASH *hs, long key, long val)
Definition: adhash.cxx:690
static void delete_from_list(GB_HASH *hs, size_t i, gbs_hash_entry *e)
Definition: adhash.cxx:402
static void GBS_erase_numhash(GB_NUMHASH *hs)
Definition: adhash.cxx:730
numhash_entry * next
Definition: adhash.cxx:41
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
#define TEST_EXPECT(cond)
Definition: test_unit.h:1313
fflush(stdout)
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:490
Definition: adhash.cxx:38
Definition: adhash.cxx:23
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:457
static int wrap_hashCompare4gb_sort(const void *v0, const void *v1, void *sorter)
Definition: adhash.cxx:608
#define TEST_EXPECT_EQUAL__BROKEN(expr, want, got)
Definition: test_unit.h:1284
void GBS_optimize_hash(const GB_HASH *hs)
Definition: adhash.cxx:316
#define TEST_REJECT(cond)
Definition: test_unit.h:1315
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1310
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
Definition: adhash.cxx:271
size_t nelem
Definition: adhash.cxx:46
char * key
Definition: adhash.cxx:24
#define SIZE
Definition: date.cxx:7
static gbs_hash_entry * find_hash_entry(const GB_HASH *hs, const char *key, size_t *index)
Definition: adhash.cxx:378
char * str
Definition: defines.h:20
void(* freefun)(long val)
Definition: adhash.cxx:34
#define GB_CALC_HASH_INDEX_CASE_IGNORED(string, index, size)
Definition: gb_hashindex.h:36
GB_write_int const char GB_write_autoconv_string WRITE_SKELETON(write_pointer, GBDATA *,"%p", GB_write_pointer) char *AW_awa if)(!gb_var) return strdup("")
Definition: AW_awar.cxx:166
void * gbm_get_mem(size_t size, long index)
Definition: gb_memory.h:130
gbs_hash_entry ** entries
Definition: adhash.cxx:32
const char * GBS_hash_next_element_that(const GB_HASH *hs, const char *last_key, bool(*condition)(const char *key, long val, void *cd), void *cd)
Definition: adhash.cxx:577
long key
Definition: adhash.cxx:39
numhash_entry ** entries
Definition: adhash.cxx:47
GB_CASE
Definition: arb_core.h:30
void(* gb_hash_const_loop_type)(const char *key, long val, void *client_data)
Definition: arbdb.h:131
#define TEST_EXPECT_ZERO(cond)
Definition: test_unit.h:1074
gbs_hash_entry * next
Definition: adhash.cxx:26
void GBS_chrcat(GBS_strstruct *strstr, char ch)
Definition: arb_strbuf.cxx:119
while(1)
static void copy(double **i, double **j)
Definition: trnsprob.cxx:32
void GB_internal_error(const char *message)
Definition: arb_msg.cxx:435
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
void GBS_hash_do_sorted_loop(GB_HASH *hs, gb_hash_loop_type func, gbs_hash_compare_function sorter, void *client_data)
Definition: adhash.cxx:632
#define gb_assert(cond)
Definition: arbdbt.h:11
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
#define TEST_EXPECT_CODE_ASSERTION_FAILS(cb)
Definition: test_unit.h:1241
static size_t get_sorted_hash_values(const GB_HASH *hs, gbs_hash_compare_function sorter, gbs_hash_entry **&mtab)
Definition: adhash.cxx:615
void GBK_dump_backtrace(FILE *out, const char *message)
Definition: arb_msg.cxx:428
size_t nelem
Definition: adhash.cxx:30
size_t GBS_hash_elements(const GB_HASH *hs)
Definition: adhash.cxx:573
char * GBS_hashtab_2_string(const GB_HASH *hash)
Definition: adhash.cxx:371
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
long GBS_incr_hash(GB_HASH *hs, const char *key)
Definition: adhash.cxx:470
long GBS_write_hash_no_strdup(GB_HASH *hs, char *key, long val)
Definition: adhash.cxx:462
static ARB_init_perl_interface init
Definition: ARB_ext.c:101
#define NULp
Definition: cxxforward.h:97
#define GB_CALC_HASH_INDEX(string, index, size, caseSens)
Definition: gb_hashindex.h:70
#define GB_CALC_HASH_INDEX_CASE_SENSITIVE(string, index, size)
Definition: gb_hashindex.h:26
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
static void GBS_erase_hash(GB_HASH *hs)
Definition: adhash.cxx:496
void GBS_free_numhash(GB_NUMHASH *hs)
Definition: adhash.cxx:745
#define TEST()
Definition: admalloc.cxx:296
void gbm_free_mem(void *block, size_t size, long index)
Definition: gb_memory.h:131
long size
Definition: adhash.cxx:45
size_t size
Definition: adhash.cxx:29
size_t gbs_get_a_prime(size_t above_or_equal_this)
Definition: adhash.cxx:193
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
int GBS_HCF_sortedByKey(const char *k0, long, const char *k1, long)
Definition: adhash.cxx:656
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
void print(const T &t)
Definition: test_unit.h:348
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)
Definition: adhash.cxx:548