ARB
adcache.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adcache.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "gb_storage.h"
12 #include "gb_main.h"
13 #include "gb_tune.h"
14 
19  char *data;
20  long clock;
21  size_t sizeof_data;
22 
23 #if defined(GEN_CACHE_STATS)
24  long reused;
25  char *dbpath;
26 #endif // GEN_CACHE_STATS
27 };
28 
30  gb_cache_entry& entry = cache.entries[index];
31  return
32  (entry.prev || cache.newest_entry == index) &&
33  (entry.next || cache.oldest_entry == index);
34 }
35 
37  gb_assert(entry_is_linked(cache, index));
38 
39  gb_cache_entry& entry = cache.entries[index];
40 
41  gb_cache_idx p = entry.prev;
42  gb_cache_idx n = entry.next;
43 
44  if (index == cache.newest_entry) cache.newest_entry = n;
45  if (index == cache.oldest_entry) cache.oldest_entry = p;
46 
47  cache.entries[n].prev = p;
48  cache.entries[p].next = n;
49 
50  entry.prev = entry.next = 0;
51 
52  return entry;
53 }
54 
56  gb_assert(!entry_is_linked(cache, index));
57 
58  gb_cache_entry& entry = cache.entries[index];
59 
60  entry.prev = entry.next = 0;
61 
62  if (!cache.newest_entry) { // first entry
63  gb_assert(!cache.oldest_entry);
64  cache.newest_entry = cache.oldest_entry = index;
65  }
66  else if (entry.sizeof_data >= cache.big_data_min_size) {
67  // Do NOT put big entries to top - instead put them to bottom!
68  // This is done to avoid that reading a big entry flushes the complete cache.
69  entry.prev = cache.oldest_entry;
70  cache.entries[entry.prev].next = index;
71  cache.oldest_entry = index;
72  }
73  else {
74  entry.next = cache.newest_entry;
75  cache.entries[entry.next].prev = index;
76  cache.newest_entry = index;
77  }
78 }
79 
81  gb_assert(!entry_is_linked(cache, index));
82 
83  gb_cache_entry& entry = cache.entries[index];
84 
85  freenull(entry.data);
86  cache.sum_data_size -= entry.sizeof_data;
87  gb_assert(entry.gbe->cache_index == index); // oops - cache error
88  entry.gbe->cache_index = 0;
89 
90  // insert deleted entry in free list
91  entry.next = cache.firstfree_entry;
92  cache.firstfree_entry = index;
93 
94 #if defined(GEN_CACHE_STATS)
95  if (entry.reused) {
96  GBS_incr_hash(cache.reused, entry.dbpath);
97  GBS_write_hash(cache.reuse_sum, entry.dbpath, GBS_read_hash(cache.reuse_sum, entry.dbpath)+entry.reused);
98  }
99  else {
100  GBS_incr_hash(cache.not_reused, entry.dbpath);
101  }
102  freenull(entry.dbpath);
103 #endif // GEN_CACHE_STATS
104 }
105 
108  firstfree_entry(1),
109  newest_entry(0),
110  oldest_entry(0),
111  sum_data_size(0),
112  max_data_size(GB_TOTAL_CACHE_SIZE),
113  big_data_min_size(max_data_size / 4)
114 {
115  for (gb_cache_idx i=0; i<GB_MAX_CACHED_ENTRIES-1; i++) {
116  entries[i].next = i+1;
117  }
118 
119 #if defined(GEN_CACHE_STATS)
120  not_reused = GBS_create_hash(1000, GB_MIND_CASE);
121  reused = GBS_create_hash(1000, GB_MIND_CASE);
122  reuse_sum = GBS_create_hash(1000, GB_MIND_CASE);
123 #endif // GEN_CACHE_STATS
124 }
125 
126 #if defined(GEN_CACHE_STATS)
127 static void sum_hash_values(const char */*key*/, long val, void *client_data) {
128  size_t *sum = (size_t*)client_data;
129  *sum += val;
130 }
131 static void list_hash_entries(const char *key, long val, void *client_data) {
132  if (client_data) {
133  GB_HASH *reuse_sum_hash = (GB_HASH*)client_data;
134  long reuse_sum = GBS_read_hash(reuse_sum_hash, key);
135 
136  printf("%s %li (%5.2f)\n", key, val, (double)reuse_sum/val);
137  }
138  else {
139  printf("%s %li\n", key, val);
140  }
141 }
142 #endif // GEN_CACHE_STATS
143 
145  if (entries) {
146  gb_assert(newest_entry == 0); // cache has to be flushed before!
147  gb_assert(sum_data_size == 0);
148  freenull(entries);
149 
150 #if defined(GEN_CACHE_STATS)
151  size_t NotReUsed = 0;
152  size_t ReUsed = 0;
153  size_t ReUseSum = 0;
154 
155  GBS_hash_do_const_loop(reuse_sum, sum_hash_values, &ReUseSum);
156  GBS_hash_do_const_loop(not_reused, sum_hash_values, &NotReUsed);
157  GBS_hash_do_const_loop(reused, sum_hash_values, &ReUsed);
158 
159  size_t overall = NotReUsed+ReUsed;
160 
161  printf("Cache stats:\n"
162  "Overall entries: %zu\n"
163  "Reused entries: %zu (%5.2f%%)\n"
164  "Mean reuse count: %5.2f\n",
165  overall,
166  ReUsed, (double)ReUsed/overall*100.0,
167  (double)ReUseSum/ReUsed);
168 
169  printf("Not reused:\n"); GBS_hash_do_const_sorted_loop(not_reused, list_hash_entries, GBS_HCF_sortedByKey, NULp);
170  printf("Reused:\n"); GBS_hash_do_const_sorted_loop(reused, list_hash_entries, GBS_HCF_sortedByKey, reuse_sum);
171 
172  GBS_free_hash(not_reused);
173  GBS_free_hash(reused);
174  GBS_free_hash(reuse_sum);
175 #endif // GEN_CACHE_STATS
176  }
177 }
178 
179 char *gb_read_cache(GBENTRY *gbe) {
180  char *cached_data = NULp;
181  gb_cache_idx index = gbe->cache_index;
182 
183  if (index) {
184  gb_cache& cache = GB_MAIN(gbe)->cache;
185  gb_cache_entry& entry = unlink_cache_entry(cache, index);
186  gb_assert(entry.gbe == gbe);
187 
188  // check validity
189  if (gbe->update_date() > entry.clock) {
190  flush_cache_entry(cache, index);
191  }
192  else {
193  link_cache_entry_to_top(cache, index);
194  cached_data = entry.data;
195 #if defined(GEN_CACHE_STATS)
196  entry.reused++;
197 #endif // GEN_CACHE_STATS
198  }
199  }
200 
201  return cached_data;
202 }
203 
204 void gb_free_cache(GB_MAIN_TYPE *Main, GBENTRY *gbe) {
205  gb_cache_idx index = gbe->cache_index;
206 
207  if (index) {
208  gb_cache& cache = Main->cache;
209  unlink_cache_entry(cache, index);
210  flush_cache_entry(cache, index);
211  }
212 }
213 
214 static void gb_uncache(GBCONTAINER *gbc);
215 void gb_uncache(GBENTRY *gbe) { gb_free_cache(GB_MAIN(gbe), gbe); }
216 
217 inline void gb_uncache(GBDATA *gbd) {
218  if (gbd->is_container()) gb_uncache(gbd->as_container());
219  else gb_uncache(gbd->as_entry());
220 }
221 static void gb_uncache(GBCONTAINER *gbc) {
222  for (GBDATA *gb_child = GB_child(gbc); gb_child; gb_child = GB_nextChild(gb_child)) {
223  gb_uncache(gb_child);
224  }
225 }
226 void GB_flush_cache(GBDATA *gbd) {
227  // flushes cache of DB-entry or -subtree
228  gb_uncache(gbd);
229 }
230 
231 static char *cache_free_some_memory(gb_cache& cache, size_t needed_mem) {
232  // free up cache entries until
233  // - at least 'needed_mem' bytes are available and
234  // - at least one free cache entry exists
235  // (if one of the free'd entries has exactly 'needed_mem' bytes size,
236  // it will be returned and can be re-used or has to be freed)
237 
238  long avail_mem = (long)cache.max_data_size - (long)cache.sum_data_size; // may be negative!
239  long need_to_free = needed_mem-avail_mem;
240  char *data = NULp;
241 
242  // ignore really big requests (such cache entries will
243  // be appended to end of cache list and flushed quickly)
244  if (need_to_free>(long)cache.sum_data_size) need_to_free = 0;
245 
246  while ((!cache.firstfree_entry || need_to_free>0) && cache.oldest_entry) {
247  gb_cache_idx index = cache.oldest_entry;
248  gb_assert(index);
249 
250  gb_cache_entry& entry = unlink_cache_entry(cache, index);
251 
252  need_to_free -= entry.sizeof_data;
253  if (entry.sizeof_data == needed_mem) reassign(data, entry.data);
254  flush_cache_entry(cache, index);
255  }
256 
257  return data;
258 }
259 
260 char *gb_alloc_cache_index(GBENTRY *gbe, size_t size) {
261  gb_assert(gbe->cache_index == 0);
262 
263  gb_cache& cache = GB_MAIN(gbe)->cache;
264  char *data = cache_free_some_memory(cache, size);
265  gb_cache_idx index = cache.firstfree_entry;
266 
267  gb_assert(index);
268  gb_cache_entry& entry = cache.entries[index];
269 
270  cache.firstfree_entry = entry.next; // remove free element from free-list
271  entry.next = 0;
272 
273  // create data
274  if (!data) ARB_alloc(data, size);
275 
276  entry.sizeof_data = size;
277  entry.data = data;
278  entry.gbe = gbe;
279  entry.clock = gbe->update_date();
280 
281 #if defined(GEN_CACHE_STATS)
282  entry.reused = 0;
283  entry.dbpath = ARB_strdup(GB_get_db_path(gbe));
284 #endif // GEN_CACHE_STATS
285 
286  gbe->cache_index = index;
287 
288  link_cache_entry_to_top(cache, index);
289  cache.sum_data_size += size;
290 
291  return data;
292 }
293 
294 char *GB_set_cache_size(GBDATA *gbd, size_t size) {
295  gb_cache& cache = GB_MAIN(gbd)->cache;
296 
297  cache.max_data_size = size;
298  cache.big_data_min_size = cache.max_data_size / 4;
299  return NULp;
300 }
301 
static char * cache_free_some_memory(gb_cache &cache, size_t needed_mem)
Definition: adcache.cxx:231
size_t sizeof_data
Definition: adcache.cxx:21
void GBS_hash_do_const_sorted_loop(const GB_HASH *hs, gb_hash_const_loop_type func, gbs_hash_compare_function sorter, void *client_data)
Definition: adhash.cxx:641
long update_date() const
Definition: gb_data.h:193
char * GB_set_cache_size(GBDATA *gbd, size_t size)
Definition: adcache.cxx:294
GBDATA * GB_child(GBDATA *father)
Definition: adquery.cxx:322
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:454
uint16_t gb_cache_idx
Definition: gb_main.h:57
long GBS_incr_hash(GB_HASH *hs, const char *key)
Definition: adhash.cxx:467
GB_MAIN_TYPE * GB_MAIN(GBDATA *gbd)
Definition: gb_data.h:291
long clock
Definition: adcache.cxx:20
long
Definition: AW_awar.cxx:152
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
size_t big_data_min_size
Definition: gb_main.h:69
char * gb_read_cache(GBENTRY *gbe)
Definition: adcache.cxx:179
gb_cache_idx prev
Definition: adcache.cxx:17
Definition: adcache.cxx:15
gb_cache_idx firstfree_entry
Definition: gb_main.h:63
gb_cache_idx oldest_entry
Definition: gb_main.h:65
void flush_cache_entry(gb_cache &cache, gb_cache_idx index)
Definition: adcache.cxx:80
GBENTRY * gbe
Definition: adcache.cxx:16
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
gb_cache_entry * entries
Definition: gb_main.h:61
const int GB_TOTAL_CACHE_SIZE
Definition: adtune.cxx:20
void GBS_hash_do_const_loop(const GB_HASH *hs, gb_hash_const_loop_type func, void *client_data)
Definition: adhash.cxx:559
size_t max_data_size
Definition: gb_main.h:68
char * gb_alloc_cache_index(GBENTRY *gbe, size_t size)
Definition: adcache.cxx:260
~gb_cache()
Definition: adcache.cxx:144
bool is_container() const
Definition: gb_data.h:147
size_t sum_data_size
Definition: gb_main.h:67
gb_cache()
Definition: adcache.cxx:106
void gb_free_cache(GB_MAIN_TYPE *Main, GBENTRY *gbe)
Definition: adcache.cxx:204
static void gb_uncache(GBCONTAINER *gbc)
Definition: adcache.cxx:221
GBCONTAINER * as_container() const
Definition: gb_data.h:155
int cache_index
Definition: gb_data.h:206
void GB_flush_cache(GBDATA *gbd)
Definition: adcache.cxx:226
bool entry_is_linked(gb_cache &cache, gb_cache_idx index)
Definition: adcache.cxx:29
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
#define gb_assert(cond)
Definition: arbdbt.h:11
gb_cache_idx next
Definition: adcache.cxx:18
gb_cache_entry & unlink_cache_entry(gb_cache &cache, gb_cache_idx index)
Definition: adcache.cxx:36
const char * GB_get_db_path(GBDATA *gbd)
Definition: adTest.cxx:14
int GBS_HCF_sortedByKey(const char *k0, long dummy_1x, const char *k1, long dummy_2x)
Definition: adhash.cxx:653
GBENTRY * as_entry() const
Definition: gb_data.h:150
#define NULp
Definition: cxxforward.h:116
char * data
Definition: adcache.cxx:19
GBDATA * GB_nextChild(GBDATA *child)
Definition: adquery.cxx:326
gb_cache_idx newest_entry
Definition: gb_main.h:64
const int GB_MAX_CACHED_ENTRIES
Definition: adtune.cxx:21
gb_cache cache
Definition: gb_main.h:128
void link_cache_entry_to_top(gb_cache &cache, gb_cache_idx index)
Definition: adcache.cxx:55
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
Definition: cache.h:31