ARB
PT_debug.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : PT_debug.cxx //
4 // Purpose : debugging code //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in March 2011 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ============================================================= //
11 
12 #include "probe.h"
13 #include "probe_tree.h"
14 #include "pt_prototypes.h"
15 
16 #include <arb_misc.h>
17 #include <arb_file.h>
18 
19 #if defined(CALCULATE_STATS_ON_QUERY)
20 
21 #define DEBUG_MAX_CHAIN_SIZE 10000000
22 #define DEBUG_MAX_CHAIN_NAME_DIST 99999
23 #define DEBUG_TREE_DEPTH (PT_POS_TREE_HEIGHT+1)
24 
25 struct PT_statistic {
26  static bool show_for_index(int i, int imax) {
27  // return true; // uncomment to show all
28  if (i == imax) return true; // always show last index
29  if (i<20) return true;
30  if (i<100) return (i%10) == 9;
31  if (i<1000) return (i%100) == 99;
32  if (i<10000) return (i%1000) == 999;
33  if (i<100000) return (i%10000) == 9999;
34  if (i<1000000) return (i%100000) == 99999;
35  if (i<10000000) return (i%1000000) == 999999;
36  pt_assert(0);
37  return true;
38  }
39 
40  // nodes
43 
45 
46  // leafs
49 
50  // chains
53 
56 
57  size_t chaincount;
58 
60 
61  struct chain_head_mem {
62  size_t flag;
63  size_t apos;
64  size_t size;
65  } chm;
66 
67  PT_statistic() { memset(this, 0, sizeof(*this)); }
68 
69  void analyse(POS_TREE2 *pt, int height) {
71  switch (pt->get_type()) {
72  case PT2_NODE: {
73  int basecnt = 0;
74  for (int i=PT_QU; i<PT_BASES; i++) {
75  POS_TREE2 *pt_help = PT_read_son(pt, (PT_base)i);
76  if (pt_help) {
77  basecnt++;
78  analyse(pt_help, height+1);
79  }
80  }
81 
82  nodes[height]++;
83  nodes_mem[height] += PT_node_size(pt);
84 
85  splits[height][basecnt]++;
86  break;
87  }
88  case PT2_LEAF:
89  tips[height]++;
90  tips_mem[height] += PT_leaf_size(pt);
91  break;
92 
93  case PT2_CHAIN: {
94  size_t size = 1;
95  ChainIteratorStage2 iter(pt);
96 
97  {
98  chm.flag++;
99  const char *ptr = ((char*)pt)+1;
100  const char *next = ptr;
101 
102  next = next + (pt->flags&1 ? 4 : 2); chm.apos += (next-ptr); ptr = next;
103  PT_read_compact_nat(next); chm.size += (next-ptr); ptr = next;
104  }
105 
106  int lastName = iter.at().get_name();
107  while (++iter) {
108  {
109  int thisName = iter.at().get_name();
110  pt_assert(thisName >= lastName);
111 
112  int nameDist = thisName-lastName;
113  if (nameDist>DEBUG_MAX_CHAIN_NAME_DIST) nameDist = DEBUG_MAX_CHAIN_NAME_DIST;
114  ++chain_name_dist[nameDist];
115  lastName = thisName;
116  }
117  ++size;
118  }
119 
121 
122  size_t mem = iter.memptr()-(char*)pt;
123 
124  chains_of_size[size]++;
125  chains_of_size_mem[size] += mem;
126  chains_at_depth[height]++;
127  chains_at_depth_mem[height] += mem;
128 
129  chaincount++;
130  break;
131  }
132  }
133  }
134 
135  void printCountAndMem(size_t count, size_t mem) {
136  printf("%10zu mem:%7s mean:%5.2f b", count, GBS_readable_size(mem, "b"), mem/double(count));
137  }
138  void printCountPercentAndMem(size_t count, size_t all, size_t mem, size_t allmem) {
139  printf("%10zu (=%5.2f%%) mem:%7s (=%5.2f%%) mean:%5.2f b",
140  count, count*100.0/all,
141  GBS_readable_size(mem, "b"), mem*100.0/allmem,
142  mem/double(count));
143  }
144 
145  void LF() { putchar('\n'); }
146 
147  void dump(size_t indexsize) {
148  size_t sum = 0;
149  size_t mem = 0;
150  size_t allmem = 0;
151 
152  for (int i=0; i<DEBUG_TREE_DEPTH; i++) {
153  size_t k = nodes[i];
154  if (k) {
155  sum += k;
156  mem += nodes_mem[i];
157  printf("nodes at depth %2i:", i); printCountAndMem(k, nodes_mem[i]);
158  for (int j=0; j<PT_BASES; j++) {
159  k = splits[i][j];
160  printf(" [%i]=%-10zu", j, k);
161  }
162  LF();
163  }
164  }
165  fputs("nodes (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize);
166  allmem += mem;
167 
168  mem = sum = 0;
169  for (int i=0; i<DEBUG_TREE_DEPTH; i++) {
170  size_t k = tips[i];
171  if (k) {
172  sum += k;
173  mem += tips_mem[i];
174  printf("tips at depth %2i:", i); printCountAndMem(k, tips_mem[i]); LF();
175  }
176  }
177  fputs("tips (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize);
178  allmem += mem;
179 
180  mem = sum = 0;
181  for (int i=0; i<DEBUG_TREE_DEPTH; i++) {
182  size_t k = chains_at_depth[i];
183  if (k) {
184  sum += k;
185  mem += chains_at_depth_mem[i];
186  printf("chains at depth %2i:", i); printCountAndMem(k, chains_at_depth_mem[i]); LF();
187  }
188  }
189  fputs("chains (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize);
190  allmem += mem;
191 
192 #if defined(ASSERTION_USED)
193  size_t chain_mem1 = mem;
194 #endif
195 
196  size_t chain_sum = 0;
197 
198  mem = sum = 0;
199  for (int i=0; i <= DEBUG_MAX_CHAIN_SIZE; i++) {
200  size_t k = chains_of_size[i];
201  if (k) {
202  size_t e = i*k;
203 
204  chain_sum += k;
205  sum += e;
206  mem += chains_of_size_mem[i];
207  }
208  }
209  {
210  int first_skipped = 0;
211 
212  size_t k_sum = 0;
213  size_t e_sum = 0;
214  size_t m_sum = 0;
215 
216  for (int i=0; i <= DEBUG_MAX_CHAIN_SIZE; i++) {
217  bool show = show_for_index(i, DEBUG_MAX_CHAIN_SIZE);
218  if (!show && !first_skipped) {
219  first_skipped = i;
220  pt_assert(first_skipped); // 0 has to be shown always
221  }
222 
223  size_t k = chains_of_size[i];
224  if (k) {
225  size_t e = i*k;
226 
227  k_sum += k;
228  e_sum += e;
229  m_sum += chains_of_size_mem[i];
230  }
231 
232  if (show) {
233  if (k_sum) {
234  if (first_skipped) printf("chain of size %7i-%7i", first_skipped, i);
235  else printf("chain of size %15i", i);
236 
237  printf(" occur %10zu (%5.2f%%) entries:", k_sum, k_sum*100.0/chain_sum);
238  printCountPercentAndMem(e_sum, sum, m_sum, mem);
239  LF();
240  }
241 
242  first_skipped = 0;
243 
244  k_sum = 0;
245  e_sum = 0;
246  m_sum = 0;
247  }
248  }
249  }
250  fputs("ch.entries (all): ", stdout); printCountAndMem(sum, mem); printf(" (%5.2f%%)\n", mem*100.0/indexsize);
251  // Note: chains were already added to allmem above
252  pt_assert(chain_mem1 == mem); // both chain-examinations should result in same mem size
253 
254  {
255  size_t followup_chain_entries = 0;
256  for (int i = 0; i <= DEBUG_MAX_CHAIN_NAME_DIST; ++i) { // LOOP_VECTORIZED
257  followup_chain_entries += chain_name_dist[i];
258  }
259 
260  double pcsum = 0.0;
261  int first_skipped = 0;
262  size_t k_sum = 0;
263 
264  for (int i = 0; i <= DEBUG_MAX_CHAIN_NAME_DIST; ++i) {
266  if (!show && !first_skipped) {
267  first_skipped = i;
268  pt_assert(first_skipped); // 0 has to be shown always
269  }
270 
271  k_sum += chain_name_dist[i];
272  if (show) {
273  if (k_sum) {
274  if (first_skipped) printf("chain-entry-name-dist of %5i-%5i", first_skipped, i);
275  else printf("chain-entry-name-dist of %11i", i);
276 
277  double pc = k_sum*100.0/followup_chain_entries;
278  pcsum += pc;
279 
280  printf(" occurs %10zu (=%5.2f%%; sum=%5.2f%%)\n", k_sum, pc, pcsum);
281  }
282 
283  first_skipped = 0;
284  k_sum = 0;
285  }
286  }
287  printf("overall followup-chain-entries: %zu\n", followup_chain_entries);
288  }
289 
290  {
291  size_t allhead = chm.flag+chm.apos+chm.size;
292  printf("Chain header summary:\n"
293  " flag %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(chm.flag, "b"), chm.flag *100.0/mem, 1.0);
294  printf(" apos %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(chm.apos, "b"), chm.apos *100.0/mem, double(chm.apos) /chm.flag);
295  printf(" size %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(chm.size, "b"), chm.size *100.0/mem, double(chm.size) /chm.flag);
296  printf(" all %s (=%5.2f%%) mean:%5.2f b\n", GBS_readable_size(allhead, "b"), allhead *100.0/mem, double(allhead) /chm.flag);
297  }
298 
299  size_t known_other_mem =
300  1 + // dummy byte at start of file (avoids that address of a node is zero)
301  4 + // PT_SERVER_MAGIC
302  1 + // PT_SERVER_VERSION
303  2 + // info block size
304  (psg.big_db ? 8 : 0) + // big pointer to root node (if final int is 0xffffffff)
305  4; // final int
306 
307  allmem += known_other_mem;
308 
309  printf("overall mem:%7s\n", GBS_readable_size(allmem, "b"));
310  printf("indexsize: %7s\n", GBS_readable_size(indexsize, "b"));
311 
312  if (indexsize>allmem) {
313  printf("(file-mem):%7s\n", GBS_readable_size(indexsize-allmem, "b"));
314  }
315  else if (indexsize<allmem) {
316  printf("(mem-file):%7s\n", GBS_readable_size(allmem-indexsize, "b"));
317  }
318  else {
319  puts("indexsize exactly matches counted memory");
320  }
321  fflush_all();
322  }
323 };
324 #endif
325 
326 void PT_dump_tree_statistics(const char *indexfilename) {
327 #if defined(CALCULATE_STATS_ON_QUERY)
328  // show various debug information about the tree
329  PT_statistic *stat = new PT_statistic;
330  stat->analyse(psg.TREE_ROOT2(), 0);
331 
332  size_t filesize = GB_size_of_file(indexfilename);
333  stat->dump(filesize);
334 
335  delete stat;
336 #endif
337 }
338 
339 // --------------------------------------------------------------------------------
340 
341 class PT_dump_loc {
342  const char *prefix;
343  FILE *out;
344 public:
345  PT_dump_loc(const char *Prefix, FILE *Out) : prefix(Prefix), out(Out) {}
346 
347  int operator()(const DataLoc& loc) {
348  fprintf(out, "%s %i=%s@%i>%i\n", prefix, loc.get_name(), loc.get_pid().get_shortname(), loc.get_abs_pos(), loc.get_rel_pos());
349  return 0;
350  }
351  int operator()(const AbsLoc& loc) {
352  fprintf(out, "%s %i=%s@%i\n", prefix, loc.get_name(), loc.get_pid().get_shortname(), loc.get_abs_pos());
353  return 0;
354  }
355 };
356 
357 template <typename PT> inline void dump_POS_TREE_special(PT *pt, const char *prefix, FILE *out);
358 template <> inline void dump_POS_TREE_special(POS_TREE1 *pt, const char *prefix, FILE *out) {
359  if (pt->is_saved()) {
360  fprintf(out, "{x} %s [saved]\n", prefix);
361  }
362  else {
363  pt_assert(pt->get_type() == PT1_UNDEF);
364  fprintf(out, "{?} %s [undefined]\n", prefix);
365  }
366 }
367 template <> inline void dump_POS_TREE_special(POS_TREE2 *, const char *, FILE *) {}
368 
369 template <typename PT> void PT_dump_POS_TREE_recursive(PT *pt, const char *prefix, FILE *out) {
370  if (pt->is_node()) {
371  for (int b = PT_QU; b<PT_BASES; b++) {
372  PT *son = PT_read_son<PT>(pt, PT_base(b));
373  if (son) {
374  char *subPrefix = GBS_global_string_copy("%s%c", prefix, base_2_readable(b));
375  PT_dump_POS_TREE_recursive(son, subPrefix, out);
376  free(subPrefix);
377  }
378  }
379  }
380  else if (pt->is_leaf()) {
381  char *subPrefix = GBS_global_string_copy("{l} %s", prefix);
382  PT_dump_loc dump_leaf(subPrefix, out);
383  dump_leaf(DataLoc(pt));
384  free(subPrefix);
385  }
386  else if (pt->is_chain()) {
387  char *subPrefix = GBS_global_string_copy("{c} %s", prefix);
388  PT_dump_loc locDumper(subPrefix, out);
389  PT_forwhole_chain(pt, locDumper);
390  free(subPrefix);
391  }
392  else {
393  dump_POS_TREE_special(pt, prefix, out);
394  }
395 }
396 
397 // --------------------------------------------------------------------------------
398 
400  // Debug function for all stages
401 #if defined(DEBUG)
402  if (!node) fputs("<zero node>\n", out);
403 
404  fprintf(out, "node father %p\n", node->get_father());
405 
406  switch (node->get_type()) {
407  case PT1_LEAF: {
408  DataLoc loc(node);
409  fprintf(out, "leaf %i:%i,%i\n", loc.get_name(), loc.get_rel_pos(), loc.get_abs_pos());
410  break;
411  }
412  case PT1_NODE:
413  for (long i = 0; i < PT_BASES; i++) {
414  fprintf(out, "%6li:0x%p\n", i, PT_read_son(node, (PT_base)i));
415  }
416  break;
417  case PT1_CHAIN:
418  fputs("chain:\n", out);
419  PTD_chain_print chainPrinter;
420  PT_forwhole_chain(node, chainPrinter);
421  break;
422  case PT1_SAVED:
423  fputs("saved:\n", out);
424  break;
425  case PT1_UNDEF:
426  fputs("<invalid node type>\n", out);
427  break;
428  }
429  fflush_all();
430 #endif
431 }
432 
433 static void PT_dump_POS_TREE_to_file(const char *dumpfile) {
434  FILE *dump = fopen(dumpfile, "wt");
435  if (!dump) {
436  GBK_terminate(GB_IO_error("writing", dumpfile));
437  }
438  if (psg.get_stage() == STAGE1) {
440  }
441  else {
443  }
444  fclose(dump);
445 
446  fprintf(stderr, "Note: index has been dumped to '%s'\n", dumpfile);
447 }
448 
449 
450 int PT_index_dump(const PT_main *main) {
451  const char *outfile = main->dumpname;
452  PT_dump_POS_TREE_to_file(outfile);
453  return 0;
454 }
size_t tips_mem[DEBUG_TREE_DEPTH]
Definition: PT_debug.cxx:48
size_t PT_leaf_size(POS_TREE2 *node)
Definition: probe_tree.h:408
Definition: probe.h:122
size_t splits[DEBUG_TREE_DEPTH][PT_BASES]
Definition: PT_debug.cxx:44
size_t chains_at_depth[DEBUG_TREE_DEPTH]
Definition: PT_debug.cxx:51
size_t chaincount
Definition: PT_debug.cxx:57
Stage get_stage() const
Definition: probe.h:387
group_matcher all()
Definition: test_unit.h:1011
Definition: probe.h:83
void PT_dump_POS_TREE_recursive(PT *pt, const char *prefix, FILE *out)
Definition: PT_debug.cxx:369
static void PT_dump_POS_TREE_to_file(const char *dumpfile)
Definition: PT_debug.cxx:433
void fflush_all()
Definition: PT_tools.h:211
POS_TREE1 *& TREE_ROOT1()
Definition: probe.h:389
void analyse(POS_TREE2 *pt, int height)
Definition: PT_debug.cxx:69
struct PT_statistic::chain_head_mem chm
int PT_forwhole_chain(PT *node, T &func)
int get_name() const
Definition: probe_tree.h:434
int main(int argc, char **argv)
Definition: aisc.c:359
probe_struct_global psg
Definition: PT_main.cxx:36
GB_ERROR GB_IO_error(const char *action, const char *filename)
Definition: arb_msg.cxx:285
uchar flags
Definition: probe_tree.h:235
void dump(size_t indexsize)
Definition: PT_debug.cxx:147
#define DEBUG_MAX_CHAIN_SIZE
Definition: PT_debug.cxx:21
size_t tips[DEBUG_TREE_DEPTH]
Definition: PT_debug.cxx:47
int PT_index_dump(const PT_main *main)
Definition: PT_debug.cxx:450
static bool show_for_index(int i, int imax)
Definition: PT_debug.cxx:26
#define DEBUG_MAX_CHAIN_NAME_DIST
Definition: PT_debug.cxx:22
long GB_size_of_file(const char *path)
Definition: arb_file.cxx:28
uint_32 PT_read_compact_nat(const char *&fromMem)
Definition: PT_tools.h:196
const probe_input_data & get_pid() const
Definition: probe_tree.h:437
int get_abs_pos() const
Definition: probe_tree.h:435
TYPE get_type() const
Definition: probe_tree.h:226
size_t nodes[DEBUG_TREE_DEPTH]
Definition: PT_debug.cxx:41
void GBK_terminate(const char *error) __ATTR__NORETURN
Definition: arb_msg.cxx:509
int operator()(const DataLoc &loc)
Definition: PT_debug.cxx:347
void printCountAndMem(size_t count, size_t mem)
Definition: PT_debug.cxx:135
const char * GBS_readable_size(unsigned long long size, const char *unit_suffix)
Definition: arb_misc.cxx:23
size_t nodes_mem[DEBUG_TREE_DEPTH]
Definition: PT_debug.cxx:42
void printCountPercentAndMem(size_t count, size_t all, size_t mem, size_t allmem)
Definition: PT_debug.cxx:138
const char * get_shortname() const
Definition: probe.h:170
#define DEBUG_TREE_DEPTH
Definition: PT_debug.cxx:23
const AbsLoc & at() const
Definition: probe_tree.h:685
size_t chains_at_depth_mem[DEBUG_TREE_DEPTH]
Definition: PT_debug.cxx:52
TYPE get_type() const
Definition: probe_tree.h:246
bool is_saved() const
Definition: probe_tree.h:231
#define pt_assert(bed)
Definition: PT_tools.h:22
int operator()(const AbsLoc &loc)
Definition: PT_debug.cxx:351
PT * PT_read_son(PT *node, PT_base base)
CONSTEXPR_INLINE char base_2_readable(char base)
Definition: probe.h:98
fputs(TRACE_PREFIX, stderr)
PT_base
Definition: probe.h:82
void dump_POS_TREE_special(PT *pt, const char *prefix, FILE *out)
size_t chains_of_size_mem[DEBUG_MAX_CHAIN_SIZE+1]
Definition: PT_debug.cxx:55
PT_dump_loc(const char *Prefix, FILE *Out)
Definition: PT_debug.cxx:345
POS_TREE2 *& TREE_ROOT2()
Definition: probe.h:390
Definition: probe.h:89
int get_rel_pos() const
Definition: probe_tree.h:471
void PT_dump_tree_statistics(const char *indexfilename)
Definition: PT_debug.cxx:326
size_t PT_node_size(POS_TREE2 *node)
Definition: probe_tree.h:283
size_t chain_name_dist[DEBUG_MAX_CHAIN_NAME_DIST+1]
Definition: PT_debug.cxx:59
#define IF_DEBUG(x)
Definition: arb_assert.h:303
size_t chains_of_size[DEBUG_MAX_CHAIN_SIZE+1]
Definition: PT_debug.cxx:54
void PT_dump_POS_TREE(POS_TREE1 *IF_DEBUG(node), FILE *IF_DEBUG(out))
Definition: PT_debug.cxx:399
const char * memptr() const
Definition: probe_tree.h:688
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194