52 #define KNOWN_PRIMES 279
54 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 47, 53, 59, 67, 71, 79, 89, 97, 103, 109, 127, 137, 149, 157, 167, 179, 191, 211,
55 223, 239, 257, 271, 293, 311, 331, 349, 373, 397, 419, 443, 467, 499, 541, 571, 607, 641, 677, 719, 757, 797, 839, 887, 937,
56 991, 1049, 1109, 1171, 1237, 1303, 1373, 1447, 1531, 1613, 1699, 1789, 1889, 1993, 2099, 2213, 2333, 2459, 2591, 2729, 2879,
57 3037, 3203, 3373, 3557, 3761, 3967, 4177, 4397, 4637, 4889, 5147, 5419, 5711, 6029, 6353, 6689, 7043, 7417, 7817, 8231, 8669,
58 9127, 9613, 10133, 10667, 11239, 11831, 12457, 13121, 13829, 14557, 15329, 16139, 16993, 17891, 18839, 19841, 20887, 21991, 23159,
59 24379, 25667, 27031, 28463, 29983, 31567, 33247, 35023, 36871, 38821, 40867, 43019, 45289, 47681, 50207, 52859, 55661, 58601,
60 61687, 64937, 68371, 71971, 75767, 79757, 83969, 88397, 93053, 97961, 103123, 108553, 114269, 120293, 126631, 133303, 140321,
61 147709, 155501, 163697, 172313, 181387, 190979, 201031, 211619, 222773, 234499, 246889, 259907, 273601, 288007, 303187, 319147,
62 335953, 353641, 372263, 391861, 412487, 434201, 457057, 481123, 506449, 533111, 561173, 590713, 621821, 654553, 689021, 725293,
63 763471, 803659, 845969, 890501, 937373, 986717, 1038671, 1093357, 1150909, 1211489, 1275269, 1342403, 1413077, 1487459, 1565747,
64 1648181, 1734937, 1826257, 1922383, 2023577, 2130101, 2242213, 2360243, 2484473, 2615243, 2752889, 2897789, 3050321, 3210871,
65 3379877, 3557773, 3745051, 3942209, 4149703, 4368113, 4598063, 4840103, 5094853, 5363011, 5645279, 5942399, 6255157, 6584377,
66 6930929, 7295719, 7679713, 8083919, 8509433, 8957309, 9428759, 9925021, 10447391, 10997279, 11576087, 12185359, 12826699, 13501819,
67 14212447, 14960471, 15747869, 16576727, 17449207, 18367597, 19334317, 20351927, 21423107, 22550639, 23737523, 24986867, 26301967,
68 27686291, 29143493, 30677363, 32291971, 33991597, 35780639, 37663841, 39646153, 41732809, 43929307, 46241389, 48675167, 51237019,
69 53933713, 56772371, 59760391, 62905681, 66216511, 69701591, 73370107, 77231711, 81296543, 85575313, 90079313, 94820347, 99810899
79 #define CALC_PRIMES_UP_TO 100000000U
80 #define PRIME_UNDENSITY 20U // the higher, the less primes are stored
82 #warning "please don't define CALC_PRIMES permanently"
84 static unsigned char bit_val[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
86 static int bit_value(
const unsigned char *eratosthenes,
long num) {
88 long bit_num = ((num-1) >> 1)-1;
89 long byte_num = bit_num >> 3;
90 char byte = eratosthenes[byte_num];
95 bit_num = bit_num & 7;
97 return (byte & bit_val[bit_num]) ? 1 : 0;
99 static void set_bit_value(
unsigned char *eratosthenes,
long num,
int val) {
101 long bit_num = ((num-1) >> 1)-1;
102 long byte_num = bit_num >> 3;
103 char byte = eratosthenes[byte_num];
108 bit_num = bit_num & 7;
111 byte |= bit_val[bit_num];
114 byte &= (0xff - bit_val[bit_num]);
116 eratosthenes[byte_num] = byte;
119 static void calculate_primes_upto() {
121 size_t bits_needed = CALC_PRIMES_UP_TO/2+1;
122 size_t bytes_needed = (bits_needed/8)+1;
123 unsigned char *eratosthenes = ARB_calloc<unsigned char>(bytes_needed);
124 size_t prime_count = 0;
127 printf(
"eratosthenes' size = %zu\n", bytes_needed);
130 for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) {
131 if (bit_value(eratosthenes, num) == 0) {
134 for (num2 = num*2; num2 <= CALC_PRIMES_UP_TO; num2 += num) {
136 set_bit_value(eratosthenes, num2, 1);
145 size_t prime_count2 = 0;
146 size_t last_prime = 1;
149 for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) {
150 if (bit_value(eratosthenes, num) == 0) {
151 size_t diff = num-last_prime;
152 if ((diff*PRIME_UNDENSITY)<num) {
153 set_bit_value(eratosthenes, num, 1);
162 printf(
"\nUsing %zu prime numbers up to %zu:\n\n", prime_count2, CALC_PRIMES_UP_TO);
163 printf(
"#define KNOWN_PRIMES %zu\n", prime_count2);
164 printf(
"static size_t sorted_primes[KNOWN_PRIMES] = {\n ");
167 for (num = 3; num <= CALC_PRIMES_UP_TO; num += 2) {
168 if (bit_value(eratosthenes, num) == 0) {
175 printed += printf(
"%zuU, ", num);
178 printed += printf(
"%zu, ", num);
191 #endif // CALC_PRIMES
197 #if defined(CALC_PRIMES)
198 calculate_primes_upto();
199 #endif // CALC_PRIMES
201 if (sorted_primes[
KNOWN_PRIMES-1] >= above_or_equal_this) {
206 #if defined(DEBUG) && 0
207 printf(
"l=%-3i m=%-3i h=%-3i above_or_equal_this=%li sorted_primes[%i]=%li sorted_primes[%i]=%li sorted_primes[%i]=%li\n",
208 l, m, h, above_or_equal_this, l, sorted_primes[l], m, sorted_primes[m], h, sorted_primes[h]);
212 if (sorted_primes[m] > above_or_equal_this) {
216 if (sorted_primes[m] < above_or_equal_this) {
225 if (sorted_primes[l] < above_or_equal_this) {
230 gb_assert(sorted_primes[l] >= above_or_equal_this);
231 gb_assert(l == 0 || sorted_primes[l-1] < above_or_equal_this);
233 return sorted_primes[l];
236 fprintf(stderr,
"Warning: gbs_get_a_prime failed for value %zu (performance bleed)\n", above_or_equal_this);
239 return above_or_equal_this;
246 size_t min_hash_size = 2*estimated_elements;
259 long size =
hash_size(estimated_elements);
260 GB_HASH *hs = ARB_calloc<GB_HASH>(1);
283 inline void dump_access(
const char *
title,
const GB_HASH *hs,
double mean_access) {
285 "%s: size=%zu elements=%zu mean_access=%.2f hash-speed=%.1f%%\n",
286 title, hs->
size, hs->
nelem, mean_access, 100.0/mean_access);
289 static double hash_mean_access_costs(
const GB_HASH *hs) {
294 double mean_access = 1.0;
297 int strcmps_needed = 0;
300 for (pos = 0; pos<hs->
size; pos++) {
305 strcmps_needed += strcmps++;
309 mean_access = (double)strcmps_needed/hs->
nelem;
321 dump_access(
"Optimizing filled hash", hs, hash_mean_access_costs(hs));
324 if (new_size>hs->
size) {
327 for (
size_t pos = 0; pos<hs->
size; ++pos) {
331 for (e = hs->
entries[pos]; e; e = next) {
337 e->
next = new_entries[new_idx];
338 new_entries[new_idx] = e;
346 hs_mutable->
size = new_size;
347 hs_mutable->
entries = new_entries;
351 dump_access(
"Optimized hash ", hs, hash_mean_access_costs(hs));
360 for (
size_t i = 0; key[i]; ++i) {
361 out.
nput(key[i], (key[i] ==
':')+1);
380 if (!strcasecmp(e->
key, key))
return e;
386 if (!strcmp(e->
key, key))
return e;
396 return e ? e->
val : 0;
436 if (!copyKey) free(key);
449 if (!copyKey) free(key);
489 #if defined(DEVEL_RALF)
494 size_t hsize = hs->
size;
496 #if defined(DUMP_HASH_ENTRIES)
497 for (
size_t i = 0; i < hsize; i++) {
498 printf(
"hash[%zu] =", i);
500 printf(
" '%s'", e->key);
504 #endif // DUMP_HASH_ENTRIES
509 double mean_access = hash_mean_access_costs(hs);
510 if (mean_access > 1.5) {
511 dump_access(
"hash-size-warning", hs, mean_access);
512 #if defined(DEVEL_RALF) && !defined(UNIT_TESTS)
517 if (hs->
nelem >= (2*hsize)) {
518 GB_warningf(
"Performance leak - very slow hash detected (elems=%zu, size=%zu)\n", hs->
nelem, hs->
size);
524 for (
size_t i = 0; i < hsize; i++) {
546 size_t hsize = hs->
size;
547 for (
size_t i=0; i<hsize; i++) {
551 e->
val = func(e->key, e->val, client_data);
560 size_t hsize = hs->
size;
561 for (
size_t i=0; i<hsize; i++) {
564 if (e->val) func(e->key, e->val, client_data);
580 size_t size = hs->
size;
592 for (; i<size && !e; ++i) e = hs->
entries[i];
595 if ((*condition)(e->
key, e->
val, cd))
break;
598 for (i++; i<size && !e; ++i) e = hs->
entries[i];
613 size_t hsize = hs->
size;
617 for (
size_t i = 0; i < hsize; i++) {
633 for (
size_t i = 0; i < values; i++) {
634 long new_val = func(mtab[i]->key, mtab[i]->val, client_data);
635 if (new_val != mtab[i]->val)
GBS_write_hash(hs, mtab[i]->key, new_val);
645 for (
size_t i = 0; i < values; i++) {
646 func(mtab[i]->key, mtab[i]->val, client_data);
654 return strcmp(k0, k1);
662 x = (key * (
long long)97)%size;
669 size_t size =
hash_size(estimated_elements);
682 if (e->key==key)
return e->val;
701 nextPtr = &(e->next);
728 size_t hsize = hs->
size;
730 for (
size_t i=0; i<hsize; i++) {
756 struct gbs_hash_statistic_summary {
758 long min_size, max_size, sum_size;
759 long min_nelem, max_nelem, sum_nelem;
760 long min_collisions, max_collisions, sum_collisions;
761 double min_fill_ratio, max_fill_ratio, sum_fill_ratio;
762 double min_hash_quality, max_hash_quality, sum_hash_quality;
766 min_size = min_nelem = min_collisions = LONG_MAX;
767 max_size = max_nelem = max_collisions = LONG_MIN;
768 min_fill_ratio = min_hash_quality = DBL_MAX;
769 max_fill_ratio = max_hash_quality = DBL_MIN;
771 sum_size = sum_nelem = sum_collisions = 0;
772 sum_fill_ratio = sum_hash_quality = 0.0;
776 class hash_statistic_manager :
virtual Noncopyable {
779 hash_statistic_manager() : stat_hash(
NULp) { }
780 ~hash_statistic_manager() {
784 gbs_hash_statistic_summary *get_stat_summary(
const char *
id) {
789 gbs_hash_statistic_summary *stat;
ARB_calloc(stat, 1);
795 return (gbs_hash_statistic_summary*)found;
799 static void addto_hash_statistic_summary(gbs_hash_statistic_summary *stat,
long size,
long nelem,
long collisions,
double fill_ratio,
double hash_quality) {
802 if (stat->min_size > size) stat->min_size = size;
803 if (stat->max_size < size) stat->max_size = size;
805 if (stat->min_nelem > nelem) stat->min_nelem = nelem;
806 if (stat->max_nelem < nelem) stat->max_nelem = nelem;
808 if (stat->min_collisions > collisions) stat->min_collisions = collisions;
809 if (stat->max_collisions < collisions) stat->max_collisions = collisions;
811 if (stat->min_fill_ratio > fill_ratio) stat->min_fill_ratio = fill_ratio;
812 if (stat->max_fill_ratio < fill_ratio) stat->max_fill_ratio = fill_ratio;
814 if (stat->min_hash_quality > hash_quality) stat->min_hash_quality = hash_quality;
815 if (stat->max_hash_quality < hash_quality) stat->max_hash_quality = hash_quality;
817 stat->sum_size += size;
818 stat->sum_nelem += nelem;
819 stat->sum_collisions += collisions;
820 stat->sum_fill_ratio += fill_ratio;
821 stat->sum_hash_quality += hash_quality;
824 static hash_statistic_manager hash_stat_man;
826 static void test_clear_hash_statistic_summary(
const char *
id) {
827 hash_stat_man.get_stat_summary(
id)->init();
830 static void test_print_hash_statistic_summary(
const char *
id) {
831 gbs_hash_statistic_summary *stat = hash_stat_man.get_stat_summary(
id);
832 long count = stat->count;
833 printf(
"Statistic summary for %li hashes of type '%s':\n", count,
id);
834 printf(
"- size: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_size, stat->max_size, (
double)stat->sum_size/count);
835 printf(
"- nelem: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_nelem, stat->max_nelem, (
double)stat->sum_nelem/count);
836 printf(
"- fill_ratio: min = %5.1f%% ; max = %5.1f%% ; mean = %5.1f%%\n", stat->min_fill_ratio*100.0, stat->max_fill_ratio*100.0, (
double)stat->sum_fill_ratio/count*100.0);
837 printf(
"- collisions: min = %6li ; max = %6li ; mean = %6.1f\n", stat->min_collisions, stat->max_collisions, (
double)stat->sum_collisions/count);
838 printf(
"- hash_quality: min = %5.1f%% ; max = %5.1f%% ; mean = %5.1f%%\n", stat->min_hash_quality*100.0, stat->max_hash_quality*100.0, (
double)stat->sum_hash_quality/count*100.0);
841 static void test_calc_hash_statistic(
const GB_HASH *hs,
const char *
id,
int print) {
844 double fill_ratio = (double)hs->
nelem/hs->
size;
847 for (
size_t i = 0; i < hs->
size; i++) {
850 collisions = hs->
nelem - queues;
852 hash_quality = (double)queues/hs->
nelem;
855 printf(
"Statistic for hash '%s':\n",
id);
856 printf(
"- size = %zu\n", hs->
size);
857 printf(
"- elements = %zu (fill ratio = %4.1f%%)\n", hs->
nelem, fill_ratio*100.0);
858 printf(
"- collisions = %li (hash quality = %4.1f%%)\n", collisions, hash_quality*100.0);
861 addto_hash_statistic_summary(hash_stat_man.get_stat_summary(
id), hs->
size, hs->
nelem, collisions, fill_ratio, hash_quality);
864 static long test_numhash_count_elems(
GB_NUMHASH *hs) {
868 static void insert_into_hash(
const char *key,
long val,
void *cl_toHash) {
872 static void erase_from_hash(
const char *key,
long val,
void *cl_fromHash) {
880 printf(
"value mismatch in hashes_are_equal(): key='%s' val: %li != %li\n", key, val2, val);
888 bool equal = (count1 == count2);
923 GB_HASH *get_hash(
bool case_sens) {
924 return case_sens ? mind : ignore;
928 static TestData
TEST;
930 static size_t test_hash_count_value(
GB_HASH *hs,
long val) {
931 size_t hsize = hs->
size;
936 for (
size_t i = 0; i<hsize; ++i) {
947 void TEST_GBS_write_hash() {
950 for (
int case_sens = 0; case_sens <= 1; ++case_sens) {
1003 void TEST_GBS_incr_hash() {
1006 for (
int case_sens = 0; case_sens <= 1; ++case_sens) {
1007 GB_HASH *hash = TEST.get_hash(case_sens);
1021 static void test_string_2_hashtab(
GB_HASH *hash,
char *data) {
1030 for (p = data; p; p = nextp) {
1032 for (dp = p; (c = *dp); dp++) {
1034 if (dp[1] ==
':') dp++;
1040 nextp = strchr(dp,
' ');
1045 str = ARB_calloc<char>(strlen+1);
1046 for (dp = p, d = str; (c = *dp); dp++) {
1063 void TEST_GBS_hashtab_2_string() {
1066 for (
int case_sens = 0; case_sens <= 1; ++case_sens) {
1067 GB_HASH *hash = TEST.get_hash(case_sens);
1076 for (
int case_sens = 0; case_sens <= 1; ++case_sens) {
1077 GB_HASH *hash = TEST.get_hash(case_sens);
1083 test_string_2_hashtab(hash2, as_string);
1092 GB_HASH *hash = TEST.get_hash(
true);
1116 inline long key2val(
long key,
int pass) {
1133 void TEST_numhash() {
1137 const long LOW = -200;
1138 const long HIGH = 200;
1139 const long STEP = 17;
1142 for (
int pass = 1; pass <= 2; ++pass) {
1144 for (
long key = LOW; key <= HIGH; key += STEP) {
1145 long val = key2val(key, pass);
1152 for (
long key = LOW; key <= HIGH; key += STEP) {
1160 for (
long key = LOW; key <= HIGH; key += STEP) {
1172 static int freeCounter;
1173 static void freeDynamicHashElem(
long cl_ptr) {
1178 void TEST_GBS_dynaval_hash() {
1179 const int SIZE = 10;
1180 const int ELEMS = 30;
1184 for (
int pass = 1; pass <= 2; ++pass) {
1187 for (
int i = 0; i<ELEMS; ++i) {
1201 void TEST_GBS_optimize_hash_and_stats() {
1202 const int SIZE = 10;
1203 const int FILL = 70;
1205 test_clear_hash_statistic_summary(
"test");
1206 for (
int pass = 1; pass <= 3; ++pass) {
1209 for (
int i = 1; i <= FILL; ++i) {
1219 for (
int i = 1; i <= FILL; ++i) {
1236 test_calc_hash_statistic(hash,
"test", 1);
1240 test_print_hash_statistic_summary(
"test");
1243 static bool has_value(
const char *,
long val,
void *cd) {
return val == (
long)cd; }
1244 static bool has_value_greater(
const char *,
long val,
void *cd) {
return val > (
long)cd; }
1246 void TEST_GBS_hash_next_element_that() {
1249 for (
int case_sens = 0; case_sens <= 1; ++case_sens) {
1250 GB_HASH *hash = TEST.get_hash(case_sens);
1257 #define READ_REVERSE(value) GBS_hash_next_element_that(hash, NULp, has_value, (void*)value)
1258 #define ASSERT_READ_REVERSE_RETURNS(value, expected) TEST_EXPECT_EQUAL((const char *)expected, READ_REVERSE(value));
1260 ASSERT_READ_REVERSE_RETURNS(
NULp,
NULp);
1261 ASSERT_READ_REVERSE_RETURNS(1,
"bar");
1262 ASSERT_READ_REVERSE_RETURNS(2,
"foobar");
1263 ASSERT_READ_REVERSE_RETURNS(3,
"barfoo");
1264 ASSERT_READ_REVERSE_RETURNS(4,
NULp);
1266 const char *key =
NULp;
1269 for (
int iter = 1; iter <= 3; ++iter) {
1281 const size_t MAX_PRIME = sorted_primes[
KNOWN_PRIMES-1];
1283 #if !defined(ASSERTION_USED) || defined(ENABLE_CRASH_TESTS)
1284 static size_t get_overflown_prime() {
return gbs_get_a_prime(MAX_PRIME+1); }
1286 #if defined(ASSERTION_USED) && defined(ENABLE_CRASH_TESTS)
1287 static void detect_prime_overflow() { get_overflown_prime(); }
1288 #endif // ASSERTION_USED
1290 void TEST_hash_specials__crashtest() {
1291 const size_t SOME_PRIME = 434201;
1295 #if defined(ASSERTION_USED)
1299 #endif // ASSERTION_USED
1302 #endif // UNIT_TESTS
void GBS_dynaval_free(long val)
static long write_hash(GB_HASH *hs, char *key, bool copyKey, long val)
void GBS_hash_do_const_loop(const GB_HASH *hs, gb_hash_const_loop_type func, void *client_data)
long gbs_numhash_index(long key, long size)
void GBS_free_hash(GB_HASH *hs)
size_t hash_size(size_t estimated_elements)
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
static size_t sorted_primes[KNOWN_PRIMES]
int(* gbs_hash_compare_function)(const char *key0, long val0, const char *key1, long val1)
static void gbs_hash_to_strstruct(const char *key, long val, void *cd_out)
long GBS_read_numhash(GB_NUMHASH *hs, long key)
char * ARB_strdup(const char *str)
GB_NUMHASH * GBS_create_numhash(size_t estimated_elements)
const char * GBS_global_string(const char *templat,...)
void GBS_hash_do_const_sorted_loop(const GB_HASH *hs, gb_hash_const_loop_type func, gbs_hash_compare_function sorter, void *client_data)
#define TEST_EXPECT_LESS_EQUAL(val, ref)
void nput(char c, size_t count)
long(* gb_hash_loop_type)(const char *key, long val, void *client_data)
long GBS_write_numhash(GB_NUMHASH *hs, long key, long val)
static void delete_from_list(GB_HASH *hs, size_t i, gbs_hash_entry *e)
static void GBS_erase_numhash(GB_NUMHASH *hs)
static int diff(int v1, int v2, int v3, int v4, int st, int en)
#define TEST_EXPECT(cond)
void GB_warningf(const char *templat,...)
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
static int wrap_hashCompare4gb_sort(const void *v0, const void *v1, void *sorter)
#define TEST_EXPECT_EQUAL__BROKEN(expr, want, got)
void GBS_optimize_hash(const GB_HASH *hs)
#define TEST_REJECT(cond)
#define TEST_REJECT_NULL(n)
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
static gbs_hash_entry * find_hash_entry(const GB_HASH *hs, const char *key, size_t *index)
void(* freefun)(long val)
#define GB_CALC_HASH_INDEX_CASE_IGNORED(string, index, size)
GB_write_int const char GB_write_autoconv_string WRITE_SKELETON(write_pointer, GBDATA *,"%p", GB_write_pointer) char *AW_awa if)(!gb_var) return strdup("")
void * gbm_get_mem(size_t size, long index)
gbs_hash_entry ** entries
const char * GBS_hash_next_element_that(const GB_HASH *hs, const char *last_key, bool(*condition)(const char *key, long val, void *cd), void *cd)
void(* gb_hash_const_loop_type)(const char *key, long val, void *client_data)
#define TEST_EXPECT_ZERO(cond)
static void copy(double **i, double **j)
void GB_internal_error(const char *message)
TYPE * ARB_calloc(size_t nelem)
void GBS_hash_do_sorted_loop(GB_HASH *hs, gb_hash_loop_type func, gbs_hash_compare_function sorter, void *client_data)
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
#define TEST_EXPECT_CODE_ASSERTION_FAILS(cb)
static size_t get_sorted_hash_values(const GB_HASH *hs, gbs_hash_compare_function sorter, gbs_hash_entry **&mtab)
void GBK_dump_backtrace(FILE *out, const char *message)
size_t GBS_hash_elements(const GB_HASH *hs)
char * GBS_hashtab_2_string(const GB_HASH *hash)
long GBS_incr_hash(GB_HASH *hs, const char *key)
long GBS_write_hash_no_strdup(GB_HASH *hs, char *key, long val)
static ARB_init_perl_interface init
#define GB_CALC_HASH_INDEX(string, index, size, caseSens)
#define GB_CALC_HASH_INDEX_CASE_SENSITIVE(string, index, size)
long GBS_read_hash(const GB_HASH *hs, const char *key)
static void GBS_erase_hash(GB_HASH *hs)
void GBS_free_numhash(GB_NUMHASH *hs)
void gbm_free_mem(void *block, size_t size, long index)
size_t gbs_get_a_prime(size_t above_or_equal_this)
#define TEST_EXPECT_EQUAL(expr, want)
int GBS_HCF_sortedByKey(const char *k0, long, const char *k1, long)
char * GBS_global_string_copy(const char *templat,...)
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)