ARB
PT_buildtree.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : PT_buildtree.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "probe.h"
12 #include <PT_server_prototypes.h>
13 #include "probe_tree.h"
14 #include "pt_prototypes.h"
15 #include "PT_partition.h"
16 
17 #include <arb_defs.h>
18 #include <arb_file.h>
19 #include <arb_misc.h>
20 #include <arb_diff.h>
21 
22 #include <arb_progress.h>
23 
24 #include <unistd.h>
25 #include <ctype.h>
26 
27 // #define PTM_TRACE_MAX_MEM_USAGE
28 // #define PTM_TRACE_PARTITION_DETECTION
29 
30 // AISC_MKPT_PROMOTE: class DataLoc;
31 
32 static POS_TREE1 *build_pos_tree(POS_TREE1 *const root, const ReadableDataLoc& loc) {
33  POS_TREE1 *at = root;
34  int height = 0;
35 
36  while (at->is_node()) { // now we got an inner node
37  POS_TREE1 *pt_next = PT_read_son(at, loc[height]);
38  if (!pt_next) { // there is no son of that type -> simply add the new son to that path
39  POS_TREE1 *new_root = root;
40  POS_TREE1 *leaf;
41  {
42  bool atRoot = (at == root);
43 
44  leaf = PT_create_leaf(&at, loc[height], loc);
45  ++height;
46 
47  if (atRoot) new_root = at;
48  }
49 
50  while (height<PT_MIN_TREE_HEIGHT && loc[height-1] != PT_QU) {
51  at = PT_change_leaf_to_node(leaf);
52  leaf = PT_create_leaf(&at, loc[height], loc);
53  ++height;
54  }
55 
56  pt_assert(height >= PT_MIN_TREE_HEIGHT || loc[height-1] == PT_QU);
57  return new_root;
58  }
59  else { // go down the tree
60  at = pt_next;
61  height++;
62 
63  if (loc[height-1] == PT_QU) {
64  // end of sequence reached -> change node to chain and add
65  pt_assert(at->is_chain());
66  PT_add_to_chain(at, loc);
67  return root;
68  }
69  }
70  }
71 
72  // type == leaf or chain
73  if (at->is_chain()) { // old chain reached
74  PT_add_to_chain(at, loc);
75  return root;
76  }
77 
78  // change leaf to node and create two sons
79 
80  const ReadableDataLoc loc_ref(at);
81 
82  while (loc[height] == loc_ref[height]) { // creates nodes until sequences are different
83  pt_assert(!at->is_node());
84 
85  if (at->is_chain()) {
86  PT_add_to_chain(at, loc);
87  return root;
88  }
89  if (height >= PT_POS_TREE_HEIGHT) {
90  if (at->is_leaf()) at = PT_leaf_to_chain(at);
91  pt_assert(at->is_chain());
92  PT_add_to_chain(at, loc);
93  return root;
94  }
95 
96  pt_assert(at->is_leaf());
97 
98  at = PT_change_leaf_to_node(at); // change tip to node and append two new leafs
99  at = PT_create_leaf(&at, loc[height], loc_ref); // dummy leaf just to create a new node; may become a chain
100 
101  height++;
102 
103  if (loc[height-1] == PT_QU) {
104  pt_assert(loc_ref[height-1] == PT_QU); // end of both sequences
105  pt_assert(at->is_chain());
106 
107  PT_add_to_chain(at, loc); // and add node
108  return root;
109  }
110  pt_assert(loc_ref[height-1] != PT_QU);
111  }
112 
113  pt_assert(loc[height] != loc_ref[height]);
114 
115  if (height >= PT_POS_TREE_HEIGHT) {
116  if (at->is_leaf()) at = PT_leaf_to_chain(at);
117  PT_add_to_chain(at, loc);
118  return root;
119  }
120  if (at->is_chain()) {
121  // not covered by test - but looks similar to case in top-loop
122  PT_add_to_chain(at, loc);
123  }
124  else {
125  at = PT_change_leaf_to_node(at); // delete leaf
126  PT_create_leaf(&at, loc[height], loc); // two new leafs
127  PT_create_leaf(&at, loc_ref[height], loc_ref);
128  }
129  return root;
130 }
131 
132 
133 inline void get_abs_align_pos(char *seq, int &pos) {
134  // get the absolute alignment position
135  int q_exists = 0;
136  if (pos > 3) {
137  pos-=3;
138  while (pos > 0) {
139  uint32_t c = *((uint32_t*)(seq+pos));
140  if (c == 0x2E2E2E2E) {
141  q_exists = 1;
142  pos-=4;
143  continue;
144  }
145  if (c == 0x2D2D2D2D) {
146  pos-=4;
147  continue;
148  }
149  break;
150  }
151  pos+=3;
152  }
153  while (pos) {
154  unsigned char c = seq[pos];
155  if (c == '.') {
156  q_exists = 1;
157  pos--;
158  continue;
159  }
160  if (c == '-') {
161  pos--;
162  continue;
163  }
164  break;
165  }
166  pos+=q_exists;
167 }
168 
169 static bool all_sons_saved(POS_TREE1 *node);
171  POS_TREE1::TYPE type = node->get_type();
172  return (type == PT1_NODE) ? !all_sons_saved(node) : (type != PT1_SAVED);
173 }
175  pt_assert(node->is_node());
176 
177  for (int i = PT_QU; i < PT_BASES; i++) {
178  POS_TREE1 *son = PT_read_son(node, (PT_base)i);
179  if (son) {
180  if (has_unsaved_sons(son)) return false;
181  }
182  }
183  return true;
184 }
185 
186 static long write_subtree(FILE *out, POS_TREE1 *node, long pos, long *node_pos, ARB_ERROR& error) {
188  node->clear_fathers();
189  return PTD_write_leafs_to_disk(out, node, pos, node_pos, error);
190 }
191 
192 static long save_lower_subtree(FILE *out, POS_TREE1 *node, long pos, int height, ARB_ERROR& error) {
193  if (height >= PT_MIN_TREE_HEIGHT) { // in lower part of tree
194  long dummy;
195  pos = write_subtree(out, node, pos, &dummy, error);
196  }
197  else {
198  switch (node->get_type()) {
199  case PT1_NODE:
200  for (int i = PT_QU; i<PT_BASES; ++i) {
201  POS_TREE1 *son = PT_read_son(node, PT_base(i));
202  if (son) pos = save_lower_subtree(out, son, pos, height+1, error);
203  }
204  break;
205 
206  case PT1_CHAIN: {
207  long dummy;
208  pos = write_subtree(out, node, pos, &dummy, error);
209  break;
210  }
211  case PT1_LEAF: pt_assert(0); break; // leafs shall not occur above PT_MIN_TREE_HEIGHT
212  case PT1_SAVED: break; // ok - saved by previous call
213  case PT1_UNDEF: pt_assert(0); break;
214  }
215  }
216  return pos;
217 }
218 
219 static long save_upper_tree(FILE *out, POS_TREE1 *node, long pos, long& node_pos, ARB_ERROR& error) {
220  pos = write_subtree(out, node, pos, &node_pos, error);
221  return pos;
222 }
223 
224 inline void check_tree_was_saved(POS_TREE1 *node, const char *whatTree, bool completely, ARB_ERROR& error) {
225  if (!error) {
226  bool saved = completely ? node->is_saved() : !has_unsaved_sons(node);
227  if (!saved) {
228 #if defined(DEBUG)
229  fprintf(stderr, "%s was not completely saved:\n", whatTree);
230  PT_dump_POS_TREE_recursive(node, " ", stderr);
231 #endif
232  error = GBS_global_string("%s was not saved completely", whatTree);
233  }
234  }
235 }
236 
237 long PTD_save_lower_tree(FILE *out, POS_TREE1 *node, long pos, ARB_ERROR& error) {
238  pos = save_lower_subtree(out, node, pos, 0, error);
239  check_tree_was_saved(node, "lower tree", false, error);
240  return pos;
241 }
242 
243 long PTD_save_upper_tree(FILE *out, POS_TREE1*& node, long pos, long& node_pos, ARB_ERROR& error) {
244  pt_assert(!has_unsaved_sons(node)); // forgot to call PTD_save_lower_tree?
245  pos = save_upper_tree(out, node, pos, node_pos, error);
246  check_tree_was_saved(node, "tree", true, error);
247  PTD_delete_saved_node(node);
248  return pos;
249 }
250 
251 #if defined(PTM_TRACE_MAX_MEM_USAGE)
252 static void dump_memusage() {
253  fflush(stderr);
254  printf("\n------------------------------ dump_memusage:\n"); fflush(stdout);
255 
256  malloc_stats();
257 
258  pid_t pid = getpid();
259  char *cmd = GBS_global_string_copy("pmap -d %i | grep -v lib", pid);
260  GB_ERROR error = GBK_system(cmd);
261  if (error) {
262  printf("Warning: %s\n", error);
263  }
264  free(cmd);
265  printf("------------------------------ dump_memusage [end]\n");
266  fflush_all();
267 }
268 #endif
269 
271  int passes;
272  size_t memuse;
273  int depth;
274 
275  const char *passname() const {
276  switch (depth) {
277  case 0: return "pass";
278  case 1: return "Level-I-passes";
279  case 2: return "Level-II-passes";
280  case 3: return "Level-III-passes";
281  case 4: return "Level-IV-passes";
282  default : pt_assert(0); break;
283  }
284  return NULp; // unreached
285  }
286 
287 public:
288  PartitionSpec() : passes(0), memuse(0), depth(0) {}
289  PartitionSpec(int passes_, size_t memuse_, int depth_) : passes(passes_), memuse(memuse_), depth(depth_) {}
290 
291  bool willUseMoreThan(size_t max_kb_usable) const { return memuse > max_kb_usable; }
292  bool isBetterThan(const PartitionSpec& other, size_t max_kb_usable) const {
293  if (!passes) return false;
294  if (!other.passes) return true;
295 
296  bool swaps = willUseMoreThan(max_kb_usable);
297  bool other_swaps = other.willUseMoreThan(max_kb_usable);
298 
299  int cmp = int(other_swaps)-int(swaps); // not to swap is better
300  if (cmp == 0) {
301  if (swaps) { // optimize for memory
302  cmp = other.memuse-memuse; // less memuse is better (@@@ true only if probe->pass-calculation is cheap)
303  if (cmp == 0) {
304  cmp = other.passes-passes; // less passes are better
305  if (cmp == 0) {
306  cmp = other.depth-depth;
307  }
308  }
309  }
310  else { // optimize for number of passes
311  cmp = other.passes-passes; // less passes are better
312  if (cmp == 0) {
313  cmp = other.depth-depth; // less depth is better
314  if (cmp == 0) {
315  cmp = other.memuse-memuse; // less memuse is better (@@@ true only if probe->pass-calculation is cheap)
316  }
317  }
318  }
319  }
320  return cmp>0;
321  }
322 
323  void print_info(FILE *out, size_t max_kb_usable) const {
324  fprintf(out,
325  "Estimated memory usage for %i %s: %s%s\n",
326  passes,
327  passname(),
328  GBS_readable_size(memuse*1024, "b"),
329  memuse>max_kb_usable ? " (would swap)" : "");
330  }
331 
332  Partition partition() const { return Partition(depth, passes); }
333 };
334 
335 static Partition decide_passes_to_use(size_t overallBases, size_t max_kb_usable, int forced_passes) {
336  // if 'forced_passes' == 0 -> decide number of passes such that estimated memuse is hard up for 'max_kb_usable'
337  // if 'forced_passes' > 0 -> ignore available memory (for DEBUGGING memory estimation)
338 
339  fflush_all();
340 
341  PartitionSpec best;
342 
343  for (int depth = 0; depth <= PT_MAX_PARTITION_DEPTH; ++depth) {
344  PrefixProbabilities prob(depth);
345 
346  int maxPasses = prob.get_prefix_count();
347  if (forced_passes) {
348  if (maxPasses >= forced_passes) {
349  PartitionSpec curr(forced_passes, max_kb_for_passes(prob, forced_passes, overallBases), depth);
350  if (curr.isBetterThan(best, max_kb_usable)) {
351  best = curr;
352 #if defined(PTM_TRACE_PARTITION_DETECTION)
353  best.print_info(stdout, max_kb_usable);
354 #endif
355  }
356  }
357  }
358  else {
359  for (int passes = 1; passes <= maxPasses; ++passes) {
360  PartitionSpec curr(passes, max_kb_for_passes(prob, passes, overallBases), depth);
361  if (curr.isBetterThan(best, max_kb_usable)) {
362  best = curr;
363 #if defined(PTM_TRACE_PARTITION_DETECTION)
364  best.print_info(stdout, max_kb_usable);
365 #endif
366  }
367  if (!curr.willUseMoreThan(max_kb_usable)) break;
368  }
369  }
370  }
371  fflush(stdout);
372 
373  best.print_info(stdout, max_kb_usable);
374 
375  if (best.willUseMoreThan(max_kb_usable)) {
376  const int allowed_passes = PrefixIterator(PT_QU, PT_T, PT_MAX_PARTITION_DEPTH).steps();
377 
378  fprintf(stderr,
379  "Warning: \n"
380  " You try to build a ptserver from a very big database!\n"
381  "\n"
382  " The memory installed on your machine would require to build the ptserver\n"
383  " in more than %i passes (the maximum allowed number of passes).\n"
384  "\n"
385  " As a result the build of this server may cause your machine to swap huge\n"
386  " amounts of memory and will possibly run for days, weeks or even months.\n"
387  "\n", allowed_passes);
388 
389  fflush(stderr);
390  }
391 
392  return best.partition();
393 }
394 
395 ARB_ERROR enter_stage_1_build_tree(PT_main * , const char *tname, ULONG ARM_size_kb) { // __ATTR__USERESULT
396  // initialize tree and call the build pos tree procedure
397 
399 
400  if (unlink(tname)) {
401  if (GB_size_of_file(tname) >= 0) {
402  error = GBS_global_string("Cannot remove %s", tname);
403  }
404  }
405 
406  if (!error) {
407  char *t2name = ARB_calloc<char>(strlen(tname) + 2);
408  sprintf(t2name, "%s%%", tname);
409 
410  FILE *out = fopen(t2name, "w");
411  if (!out) {
412  error = GBS_global_string("Cannot open %s", t2name);
413  }
414  else {
415  POS_TREE1 *pt = NULp;
416 
417  {
418  GB_ERROR sm_error = GB_set_mode_of_file(t2name, 0666);
419  if (sm_error) {
420  GB_warningf("%s\nOther users might get problems when they try to access this file.", sm_error);
421  }
422  }
423 
424  fputc(0, out); // disable zero father
425  long pos = 1;
426 
427  // now temp file exists -> trigger ptserver-selectionlist-update in all
428  // ARB applications by writing to log
429  GBS_add_ptserver_logentry(GBS_global_string("Calculating probe tree (%s)", tname));
430 
433 
434  pt = PT_create_leaf(NULp, PT_N, DataLoc(0, 0, 0)); // create main node
435  pt = PT_change_leaf_to_node(pt);
436  psg.stat.cut_offs = 0; // statistic information
438 
439  ULONG available_memory = GB_get_usable_memory() - ARM_size_kb - PTSERVER_BIN_MB*1024;
440  printf("Memory available for build: %s\n", GBS_readable_size(available_memory*1024, "b"));
441 
442  int forcedPasses = 0; // means "do not force"
443  {
444  const char *forced = GB_getenv("ARB_PTS_FORCE_PASSES");
445  if (forced) {
446  int f = atoi(forced);
447  if (f >= 1) {
448  forcedPasses = f;
449  printf("Warning: Forcing %i passes (by envvar ARB_PTS_FORCE_PASSES='%s')\n", forcedPasses, forced);
450  }
451  }
452  }
453 
454  Partition partition = decide_passes_to_use(psg.char_count, available_memory, forcedPasses);
455  {
456  size_t max_part = partition.estimate_max_probes_for_any_pass(psg.char_count);
457  printf("Overall bases: %s\n", GBS_readable_size(psg.char_count, "bp"));
458  printf("Max. partition size: %s (=%.1f%%)\n", GBS_readable_size(max_part, "bp"), max_part*100.0/psg.char_count);
459  }
460 
461  int passes = partition.number_of_passes();
462  arb_progress pass_progress(GBS_global_string("Build index in %i passes", passes), long(passes));
463  int currPass = 0;
464  do {
465  pt_assert(!partition.done());
466 
467  ++currPass;
468  arb_progress data_progress(GBS_global_string("pass %i/%i", currPass, passes), long(psg.data_count));
469 
470  for (int name = 0; name < psg.data_count; name++) {
471  const probe_input_data& pid = psg.data[name];
472 
473  SmartCharPtr seqPtr = pid.get_dataPtr();
474  const char *seq = &*seqPtr;
475 
476  pid.preload_rel2abs();
477  ReadableDataLoc insertLoc(name, 0, 0);
478  for (int rel = pid.get_size() - 1; rel >= 0; rel--) {
479  if (partition.contains(seq+rel)) {
480  insertLoc.set_position(pid.get_abspos(rel), rel);
481  pt = build_pos_tree(pt, insertLoc);
482  }
483  }
484  ++data_progress;
485  }
486 
487 #if defined(PTM_TRACE_MAX_MEM_USAGE)
488  dump_memusage();
489 #endif
490 
491  pos = PTD_save_lower_tree(out, pt, pos, error);
492  if (error) break;
493 
494 #ifdef PTM_DEBUG_NODES
495  PTD_debug_nodes();
496 #endif
497  }
498  while (partition.next());
499 
500  long last_obj = 0;
501  if (!error) {
502  pos = PTD_save_upper_tree(out, pt, pos, last_obj, error);
503  pt_assert(!pt);
504  }
505  if (!error) {
506  bool need64bit = false; // does created db need a 64bit ptserver ?
507 
508  pt_assert(last_obj);
509 #ifdef ARB_64
510  if (last_obj >= 0xffffffff) need64bit = true; // last_obj is bigger than int
511 #else
512  if (last_obj <= 0) { // overflow ?
513  GBK_terminate("Overflow - out of memory");
514  }
515 #endif
516 
517  // write information about database
518  long info_pos = pos;
519 
520  PTD_put_int(out, PT_SERVER_MAGIC); // marker to identify PT-Server file
521  PTD_put_byte(out, PT_SERVER_VERSION); // version of PT-Server file
522  pos += 4+1;
523 
524  // as last element of info block, write it's size (2byte)
525  long info_size = pos-info_pos;
526  PTD_put_short(out, info_size);
527  pos += 2;
528 
529  // save DB footer (which is the entry point on load)
530 
531  if (need64bit) { // last_obj is bigger than int
532 #ifdef ARB_64
533  PTD_put_longlong(out, last_obj); // write last_obj as long long (64 bit)
534  PTD_put_int(out, 0xffffffff); // write 0xffffffff at the end to signalize 64bit ptserver is needed
535 #else
536  pt_assert(0);
537 #endif
538  }
539  else {
540  PTD_put_int(out, last_obj); // last_obj fits into an int -> store it as usual (compatible to old unversioned format)
541  }
542  }
543  if (error) {
545  fclose(out);
546 
547  int res = GB_unlink(t2name);
548  if (res == -1) fputs(GB_await_error(), stderr);
549  }
550  else {
552  fclose(out);
553 
554  error = GB_move_file(t2name, tname);
555  if (!error) {
556  GB_ERROR sm_error = GB_set_mode_of_file(tname, 00666);
557  if (sm_error) GB_warning(sm_error);
558  }
559  }
560 
561  if (error) pass_progress.done();
562  }
563  free(t2name);
564  }
565 
566 #if defined(DEBUG)
567  {
568  char *related = ARB_strdup(tname);
569  char *starpos = strstr(related, ".arb.pt");
570 
571  pt_assert(starpos);
572  strcpy(starpos, ".*");
573 
574  fflush_all();
575 
576  char *listRelated = GBS_global_string_copy("ls -al %s", related);
577  GB_ERROR lserror = GBK_system(listRelated);
578 
579  fflush_all();
580 
581  if (lserror) fprintf(stderr, "Warning: %s\n", lserror);
582  free(listRelated);
583  free(related);
584  }
585 #endif
586 
587  return error;
588 }
589 
590 ARB_ERROR enter_stage_2_load_tree(PT_main *, const char *tname) { // __ATTR__USERESULT
591  // load tree from disk
593 
596 
597  {
598  long size = GB_size_of_file(tname);
599  if (size<0) {
600  error = GB_IO_error("stat", tname);
601  }
602  else {
603  printf("- mapping ptindex ('%s', %s) from disk\n", tname, GBS_readable_size(size, "b"));
604  FILE *in = fopen(tname, "r");
605  if (!in) {
606  error = GB_IO_error("read", tname);
607  }
608  else {
609  error = PTD_read_leafs_from_disk(tname, psg.TREE_ROOT2());
610  fclose(in);
611  }
612  }
613  }
614 
615  return error;
616 }
617 
618 // --------------------------------------------------------------------------------
619 
620 #ifdef UNIT_TESTS
621 #ifndef TEST_UNIT_H
622 #include <test_unit.h>
623 #include "PT_compress.h"
624 #endif
625 
626 void TEST_PrefixProbabilities() {
627  PrefixProbabilities prob0(0);
628  PrefixProbabilities prob1(1);
629  PrefixProbabilities prob2(2);
630 
631  const double EPS = 0.00001;
632 
633  TEST_EXPECT_SIMILAR(prob0.of(0), 1.0000, EPS); // all
634 
635  TEST_EXPECT_SIMILAR(prob1.of(0), 0.0014, EPS); // PT_QU
636  TEST_EXPECT_SIMILAR(prob1.of(1), 0.0003, EPS); // PT_N
637  TEST_EXPECT_SIMILAR(prob1.of(2), 0.2543, EPS);
638  TEST_EXPECT_SIMILAR(prob1.of(3), 0.2268, EPS);
639  TEST_EXPECT_SIMILAR(prob1.of(4), 0.3074, EPS);
640  TEST_EXPECT_SIMILAR(prob1.of(5), 0.2098, EPS);
641 
642  TEST_EXPECT_SIMILAR(prob2.of( 0), 0.00140, EPS); // PT_QU
643  TEST_EXPECT_SIMILAR(prob2.of( 1), 0.00000, EPS); // PT_N PT_QU
644  TEST_EXPECT_SIMILAR(prob2.of( 2), 0.00000, EPS); // PT_N PT_N
645  TEST_EXPECT_SIMILAR(prob2.of( 3), 0.00008, EPS); // PT_N PT_A
646  TEST_EXPECT_SIMILAR(prob2.of( 7), 0.00036, EPS); // PT_A PT_QU
647  TEST_EXPECT_SIMILAR(prob2.of( 9), 0.06467, EPS); // PT_A PT_A
648  TEST_EXPECT_SIMILAR(prob2.of(30), 0.04402, EPS); // PT_T PT_T
649 
650  TEST_EXPECT_SIMILAR(prob1.left_of(4), 0.4828, EPS);
651  TEST_EXPECT_SIMILAR(prob1.left_of(6), 1.0000, EPS); // all prefixes together
652 
653  TEST_EXPECT_SIMILAR(prob2.left_of(19), 0.4828, EPS);
654  TEST_EXPECT_SIMILAR(prob2.left_of(31), 1.0000, EPS); // all prefixes together
655 
656  TEST_EXPECT_EQUAL(prob0.find_index_near_leftsum(1.0), 1);
657 
658  TEST_EXPECT_EQUAL(prob1.find_index_near_leftsum(0.5), 4);
659  TEST_EXPECT_SIMILAR(prob1.left_of(4), 0.4828, EPS);
660  TEST_EXPECT_SIMILAR(prob1.left_of(5), 0.7902, EPS);
661 
662  TEST_EXPECT_EQUAL(prob2.find_index_near_leftsum(0.5), 21);
663  TEST_EXPECT_SIMILAR(prob2.left_of(21), 0.48332, EPS);
664  TEST_EXPECT_SIMILAR(prob2.left_of(22), 0.56149, EPS);
665 }
666 
667 static int count_passes(Partition& p) {
668  p.reset();
669  int count = 0;
670  while (!p.done()) {
671  p.next();
672  ++count;
673  }
674  p.reset();
675  return count;
676 }
677 
678 class Compressed {
679  size_t len;
680  PT_compressed compressed;
681 
682 public:
683  Compressed(const char *readable)
684  : len(strlen(readable)),
685  compressed(len)
686  {
687  compressed.createFrom(readable, len);
688  }
689  const char *seq() const { return compressed.get_seq(); }
690 };
691 
692 void TEST_MarkedPrefixes() {
693  MarkedPrefixes mp0(0);
694  MarkedPrefixes mp1(1);
695  MarkedPrefixes mp2(2);
696 
697  mp0.predecide();
698  TEST_EXPECT_EQUAL(mp0.isMarked(Compressed(".").seq()), false);
699  TEST_EXPECT_EQUAL(mp0.isMarked(Compressed("T").seq()), false);
700 
701  mp0.mark(0, 0);
702  mp0.predecide();
703  TEST_EXPECT_EQUAL(mp0.isMarked(Compressed(".").seq()), true);
704  TEST_EXPECT_EQUAL(mp0.isMarked(Compressed("T").seq()), true);
705 
706  mp1.mark(3, 5);
707  mp1.predecide();
708  TEST_EXPECT_EQUAL(mp1.isMarked(Compressed(".").seq()), false);
709  TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("N").seq()), false);
710  TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("A").seq()), false);
711  TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("C").seq()), true);
712  TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("G").seq()), true);
713  TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("T").seq()), true);
714 
715  mp2.mark(1, 7);
716  mp2.predecide();
717  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed(".").seq()), false);
718  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("N.").seq()), true);
719  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NN").seq()), true);
720  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NA").seq()), true);
721  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NC").seq()), true);
722  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NG").seq()), true);
723  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NT").seq()), true);
724  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("A.").seq()), true);
725  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AN").seq()), false);
726  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AC").seq()), false);
727  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AG").seq()), false);
728  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AT").seq()), false);
729  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("GG").seq()), false);
730  TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("TA").seq()), false);
731 }
732 
733 #if defined(ARB_64)
734 #define VAL_64_32_BITDEP(val64,val32) (val64)
735 #else // !defined(ARB_64)
736 #define VAL_64_32_BITDEP(val64,val32) (val32)
737 #endif
738 
739 __ATTR__REDUCED_OPTIMIZE void TEST_Partition() {
740  PrefixProbabilities p0(0);
741  PrefixProbabilities p1(1);
742  PrefixProbabilities p2(2);
743  PrefixProbabilities p3(3);
744  PrefixProbabilities p4(4);
745 
746  const int BASES_100k = 100000;
747 
748  {
749  Partition P01(p0, 1);
750  TEST_EXPECT_EQUAL(P01.estimate_probes_for_pass(1, BASES_100k), 100000);
751  TEST_EXPECT_EQUAL(P01.estimate_max_probes_for_any_pass(BASES_100k), 100000);
752  }
753 
754  {
755  // distributing memory to 6 passes on a level.1 Partitioner doesn't allow much choice:
756  Partition P16(p1, 6);
757  TEST_EXPECT_EQUAL(P16.number_of_passes(), 6);
758  TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(1, BASES_100k), 140);
759  TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(2, BASES_100k), 30);
760  TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(3, BASES_100k), 25430);
761  TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(4, BASES_100k), 22680);
762  TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(5, BASES_100k), 30740);
763  TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(6, BASES_100k), 20980);
764  TEST_EXPECT_EQUAL(P16.estimate_max_probes_for_any_pass(BASES_100k), 30740);
765  TEST_EXPECT_EQUAL(P16.estimate_max_kb_for_any_pass(BASES_100k), VAL_64_32_BITDEP(440583, 205015));
766  }
767 
768  {
769  // 3 passes
770  Partition P13(p1, 3);
771  TEST_EXPECT_EQUAL(P13.number_of_passes(), 3);
772  TEST_EXPECT_EQUAL(count_passes(P13), 3);
773 
774  TEST_EXPECT_EQUAL(P13.contains(Compressed(".").seq()), true);
775  TEST_EXPECT_EQUAL(P13.contains(Compressed("N").seq()), true);
776  TEST_EXPECT_EQUAL(P13.contains(Compressed("A").seq()), true);
777  TEST_EXPECT_EQUAL(P13.contains(Compressed("C").seq()), false);
778  TEST_EXPECT_EQUAL(P13.contains(Compressed("G").seq()), false);
779  TEST_EXPECT_EQUAL(P13.contains(Compressed("T").seq()), false);
780 
781  TEST_EXPECT_EQUAL(P13.next(), true);
782 
783  TEST_EXPECT_EQUAL(P13.contains(Compressed(".").seq()), false);
784  TEST_EXPECT_EQUAL(P13.contains(Compressed("N").seq()), false);
785  TEST_EXPECT_EQUAL(P13.contains(Compressed("A").seq()), false);
786  TEST_EXPECT_EQUAL(P13.contains(Compressed("C").seq()), true);
787  TEST_EXPECT_EQUAL(P13.contains(Compressed("G").seq()), true);
788  TEST_EXPECT_EQUAL(P13.contains(Compressed("T").seq()), false);
789 
790  TEST_EXPECT_EQUAL(P13.next(), true);
791 
792  TEST_EXPECT_EQUAL(P13.contains(Compressed(".").seq()), false);
793  TEST_EXPECT_EQUAL(P13.contains(Compressed("N").seq()), false);
794  TEST_EXPECT_EQUAL(P13.contains(Compressed("A").seq()), false);
795  TEST_EXPECT_EQUAL(P13.contains(Compressed("C").seq()), false);
796  TEST_EXPECT_EQUAL(P13.contains(Compressed("G").seq()), false);
797  TEST_EXPECT_EQUAL(P13.contains(Compressed("T").seq()), true);
798 
799  TEST_EXPECT_EQUAL(P13.next(), false);
800 
801  TEST_EXPECT_EQUAL(P13.estimate_probes_for_pass(1, BASES_100k), 25600);
802  TEST_EXPECT_EQUAL(P13.estimate_probes_for_pass(2, BASES_100k), 53420);
803  TEST_EXPECT_EQUAL(P13.estimate_probes_for_pass(3, BASES_100k), 20980);
804  TEST_EXPECT_EQUAL(P13.estimate_max_probes_for_any_pass(BASES_100k), 53420);
805  TEST_EXPECT_EQUAL(P13.estimate_max_kb_for_any_pass(BASES_100k), VAL_64_32_BITDEP(440687, 205101));
806  }
807 
808  {
809  // 2 passes
810  Partition P12(p1, 2);
811  TEST_EXPECT_EQUAL(P12.number_of_passes(), 2);
812  TEST_EXPECT_EQUAL(count_passes(P12), 2);
813 
814  TEST_EXPECT_EQUAL(P12.contains(Compressed(".").seq()), true);
815  TEST_EXPECT_EQUAL(P12.contains(Compressed("N").seq()), true);
816  TEST_EXPECT_EQUAL(P12.contains(Compressed("A").seq()), true);
817  TEST_EXPECT_EQUAL(P12.contains(Compressed("C").seq()), true);
818  TEST_EXPECT_EQUAL(P12.contains(Compressed("G").seq()), false);
819  TEST_EXPECT_EQUAL(P12.contains(Compressed("T").seq()), false);
820 
821  TEST_EXPECT_EQUAL(P12.next(), true);
822 
823  TEST_EXPECT_EQUAL(P12.contains(Compressed(".").seq()), false);
824  TEST_EXPECT_EQUAL(P12.contains(Compressed("N").seq()), false);
825  TEST_EXPECT_EQUAL(P12.contains(Compressed("A").seq()), false);
826  TEST_EXPECT_EQUAL(P12.contains(Compressed("C").seq()), false);
827  TEST_EXPECT_EQUAL(P12.contains(Compressed("G").seq()), true);
828  TEST_EXPECT_EQUAL(P12.contains(Compressed("T").seq()), true);
829 
830  TEST_EXPECT_EQUAL(P12.next(), false);
831 
832  TEST_EXPECT_EQUAL(P12.estimate_probes_for_pass(1, BASES_100k), 48280);
833  TEST_EXPECT_EQUAL(P12.estimate_probes_for_pass(2, BASES_100k), 51720);
834  TEST_EXPECT_EQUAL(P12.estimate_max_probes_for_any_pass(BASES_100k), 51720);
835  TEST_EXPECT_EQUAL(P12.estimate_max_kb_for_any_pass(BASES_100k), VAL_64_32_BITDEP(440679, 205095));
836  }
837 
838  TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 1, BASES_100k), 100000);
839  TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 2, BASES_100k), 51720);
840  TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 3, BASES_100k), 53420);
841  TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 4, BASES_100k), 30740);
842  TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 5, BASES_100k), 30740);
843  TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 6, BASES_100k), 30740);
844 
845  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 1, BASES_100k), 100000);
846  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 2, BASES_100k), 51668);
847  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 3, BASES_100k), 36879);
848  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 4, BASES_100k), 27429);
849  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 5, BASES_100k), 26571);
850  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 6, BASES_100k), 21270);
851  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 7, BASES_100k), 18958);
852  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 8, BASES_100k), 16578);
853  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 9, BASES_100k), 15899);
854  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 10, BASES_100k), 14789);
855  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 15, BASES_100k), 11730);
856  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 20, BASES_100k), 9449);
857  TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 30, BASES_100k), 9449);
858 
859  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 1, BASES_100k), 100000);
860  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 2, BASES_100k), 50333);
861  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 3, BASES_100k), 33890);
862  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 4, BASES_100k), 25853);
863  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 5, BASES_100k), 20906);
864  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 6, BASES_100k), 17668);
865  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 7, BASES_100k), 15099);
866  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 8, BASES_100k), 13854);
867  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 9, BASES_100k), 12259);
868  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 10, BASES_100k), 11073);
869  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 15, BASES_100k), 8168);
870  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 20, BASES_100k), 6401);
871  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 30, BASES_100k), 4737);
872  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 40, BASES_100k), 4176);
873  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 50, BASES_100k), 2983);
874  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 100, BASES_100k), 2905);
875  TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 150, BASES_100k), 2905);
876 
877  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 1, BASES_100k), 100000);
878  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 2, BASES_100k), 50084);
879  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 3, BASES_100k), 33425);
880  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 4, BASES_100k), 25072);
881  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 5, BASES_100k), 20145);
882  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 6, BASES_100k), 16837);
883  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 7, BASES_100k), 14528);
884  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 8, BASES_100k), 12606);
885  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 9, BASES_100k), 11319);
886  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 10, BASES_100k), 10158);
887  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 15, BASES_100k), 6887);
888  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 20, BASES_100k), 5315);
889  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 30, BASES_100k), 3547);
890  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 40, BASES_100k), 2805);
891  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 50, BASES_100k), 2336);
892  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 100, BASES_100k), 1397);
893  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 150, BASES_100k), 1243);
894  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 200, BASES_100k), 954);
895  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 300, BASES_100k), 893);
896  TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 600, BASES_100k), 893);
897 }
898 
899 static arb_test::match_expectation decides_on_passes(ULONG bp, size_t avail_mem_kb, int expected_passes, int expected_depth, size_t expected_passsize, size_t expected_memuse, bool expect_to_swap) {
900  size_t ARM_size_kb = bp/1800; // just guess .ARM size
901 
902  avail_mem_kb -= ARM_size_kb + PTSERVER_BIN_MB*1024;
903 
904  Partition part = decide_passes_to_use(bp, avail_mem_kb, 0);
905  int decided_passes = part.number_of_passes();
906  int decided_depth = part.split_depth();
907  size_t decided_passsize = part.estimate_max_probes_for_any_pass(bp);
908  size_t decided_memuse = part.estimate_max_kb_for_any_pass(bp);
909  bool decided_to_swap = decided_memuse>avail_mem_kb;
910 
911  using namespace arb_test;
912  expectation_group expected;
913  expected.add(that(decided_passes).is_equal_to(expected_passes));
914  expected.add(that(decided_depth).is_equal_to(expected_depth));
915  expected.add(that(decided_passsize).is_equal_to(expected_passsize));
916  expected.add(that(decided_memuse).is_equal_to(expected_memuse));
917  expected.add(that(decided_to_swap).is_equal_to(expect_to_swap));
918  return all().ofgroup(expected);
919 }
920 
921 #define TEST_DECIDES_PASSES(bp,memkb,expected_passes,expected_depth,expected_passsize,expected_memuse,expect_to_swap) \
922  TEST_EXPECTATION(decides_on_passes(bp, memkb, expected_passes, expected_depth, expected_passsize, expected_memuse, expect_to_swap))
923 
924 #define TEST_DECIDES_PASSES__BROKEN(bp,memkb,expected_passes,expected_depth,expected_passsize,expected_memuse,expect_to_swap) \
925  TEST_EXPECTATION__BROKEN(decides_on_passes(bp, memkb, expected_passes, expected_depth, expected_passsize, expected_memuse, expect_to_swap))
926 
927 void TEST_SLOW_decide_passes_to_use() {
928  const ULONG MB = 1024; // kb
929  const ULONG GB = 1024*MB; // kb
930 
931  const ULONG BP_SILVA_108_REF = 891481251ul;
932  const ULONG BP_SILVA_108_PARC = BP_SILVA_108_REF * (2492653/618442.0); // rough estimation by number of species
933  const ULONG BP_SILVA_108_40K = 56223289ul;
934  const ULONG BP_SILVA_108_12K = 17622233ul;
935 
936  const ULONG MINI_PC = 2 *GB;
937  const ULONG SMALL_PC = 4 *GB;
938 #if defined(ARB_64)
939  const ULONG SMALL_SERVER = 12 *GB; // "bilbo"
940  const ULONG MEDIUM_SERVER = 20 *GB; // "boarisch"
941  const ULONG BIG_SERVER = 64 *GB;
942  const ULONG HUGE_SERVER = 128 *GB;
943 
944  const ULONG MEM1 = ULONG(45.7*GB+0.5);
945  const ULONG MEM2 = ULONG(18.7*GB+0.5);
946  const ULONG MEM3 = ULONG(11.23*GB+0.5);
947  const ULONG MEM4 = ULONG(8*GB+0.5);
948 #endif
949  const ULONG MEM5 = ULONG(4*GB+0.5);
950 
951  const ULONG LMEM1 = 3072*MB;
952  const ULONG LMEM2 = 2560*MB;
953  const ULONG LMEM3 = 2048*MB;
954  const ULONG LMEM4 = 1536*MB;
955  const ULONG LMEM5 = 1024*MB;
956  const ULONG LMEM6 = 768*MB;
957  const ULONG LMEM7 = 512*MB;
958 
959  const int SWAPS = 1;
960 
961 #if defined(ARB_64)
962  // ---------------- database --------- machine -- passes depth ---- probes ----- memuse - swap?
963 
964  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM1, 1, 0, 3593147643UL, 21318473, 0);
965  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM2, 2, 1, 1858375961, 13356142, 0);
966  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM3, 4, 2, 985573522, 9350115, 0);
967  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM4, 11, 4, 333053234, 6355149, 0);
968 
969  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM1, 1, 0, 891481251, 5620314, 0);
970  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM2, 1, 0, 891481251, 5620314, 0);
971  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM3, 1, 0, 891481251, 5620314, 0);
972  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM4, 1, 0, 891481251, 5620314, 0);
973  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM5, 2, 1, 461074103, 3644812, 0);
974  TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM1, 4, 3, 230472727, 2586388, 0);
975  TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM2, 8, 3, 123505999, 2095427, 0);
976  TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM3, 111, 4, 11443798, 1581079, 0);
977 
978  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM1, 1, 0, 56223289, 767008, 0);
979  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM2, 1, 0, 56223289, 767008, 0);
980  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM3, 1, 0, 56223289, 767008, 0);
981  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM4, 1, 0, 56223289, 767008, 0);
982  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM5, 1, 0, 56223289, 767008, 0);
983  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM6, 2, 1, 29078685, 642419, 0);
984  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM7, 194, 4, 502032, 511256, SWAPS);
985 
986  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MINI_PC, 194, 4, 32084148, 4973748, SWAPS);
987  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MINI_PC, 111, 4, 11443798, 1581079, 0);
988  TEST_DECIDES_PASSES(BP_SILVA_108_40K, MINI_PC, 1, 0, 56223289, 767008, 0);
989  TEST_DECIDES_PASSES(BP_SILVA_108_12K, MINI_PC, 1, 0, 17622233, 542715, 0);
990 
991  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, SMALL_PC, 194, 4, 32084148, 4973748, SWAPS);
992  TEST_DECIDES_PASSES(BP_SILVA_108_REF, SMALL_PC, 2, 1, 461074103, 3644812, 0);
993  TEST_DECIDES_PASSES(BP_SILVA_108_40K, SMALL_PC, 1, 0, 56223289, 767008, 0);
994  TEST_DECIDES_PASSES(BP_SILVA_108_12K, SMALL_PC, 1, 0, 17622233, 542715, 0);
995 
996  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, SMALL_SERVER, 3, 3, 1217700425, 10415541, 0);
997  TEST_DECIDES_PASSES(BP_SILVA_108_REF, SMALL_SERVER, 1, 0, 891481251, 5620314, 0);
998  TEST_DECIDES_PASSES(BP_SILVA_108_40K, SMALL_SERVER, 1, 0, 56223289, 767008, 0);
999  TEST_DECIDES_PASSES(BP_SILVA_108_12K, SMALL_SERVER, 1, 0, 17622233, 542715, 0);
1000 
1001  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEDIUM_SERVER, 2, 1, 1858375961, 13356142, 0);
1002  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEDIUM_SERVER, 1, 0, 891481251, 5620314, 0);
1003  TEST_DECIDES_PASSES(BP_SILVA_108_40K, MEDIUM_SERVER, 1, 0, 56223289, 767008, 0);
1004  TEST_DECIDES_PASSES(BP_SILVA_108_12K, MEDIUM_SERVER, 1, 0, 17622233, 542715, 0);
1005 
1006  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, BIG_SERVER, 1, 0, 3593147643UL, 21318473, 0);
1007  TEST_DECIDES_PASSES(BP_SILVA_108_REF, BIG_SERVER, 1, 0, 891481251, 5620314, 0);
1008  TEST_DECIDES_PASSES(BP_SILVA_108_40K, BIG_SERVER, 1, 0, 56223289, 767008, 0);
1009  TEST_DECIDES_PASSES(BP_SILVA_108_12K, BIG_SERVER, 1, 0, 17622233, 542715, 0);
1010 
1011  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, HUGE_SERVER, 1, 0, 3593147643UL, 21318473, 0);
1012  TEST_DECIDES_PASSES(BP_SILVA_108_REF, HUGE_SERVER, 1, 0, 891481251, 5620314, 0);
1013  TEST_DECIDES_PASSES(BP_SILVA_108_40K, HUGE_SERVER, 1, 0, 56223289, 767008, 0);
1014  TEST_DECIDES_PASSES(BP_SILVA_108_12K, HUGE_SERVER, 1, 0, 17622233, 542715, 0);
1015 
1016 #else // !defined(ARB_64) => only test for situations with at most 4Gb
1017  // ---------------- database --------- machine -- passes depth ---- probes ----- memuse - swap?
1018 
1019  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM5, 2, 1, 461074103, 2831431, 0);
1020  TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM1, 3, 2, 328766946, 2327527, 0);
1021  TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM2, 4, 2, 244526639, 2006690, 0);
1022  TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM3, 7, 4, 129515581, 1568659, 0);
1023 
1024  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM1, 1, 0, 56223289, 473837, 0);
1025  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM2, 1, 0, 56223289, 473837, 0);
1026  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM3, 1, 0, 56223289, 473837, 0);
1027  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM4, 1, 0, 56223289, 473837, 0);
1028  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM5, 1, 0, 56223289, 473837, 0);
1029  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM6, 1, 0, 56223289, 473837, 0);
1030  TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM7, 2, 1, 29078685, 370454, 0);
1031 
1032  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MINI_PC, 194, 4, 32084148, 3835929, SWAPS);
1033  TEST_DECIDES_PASSES(BP_SILVA_108_REF, MINI_PC, 7, 4, 129515581, 1568659, 0);
1034  TEST_DECIDES_PASSES(BP_SILVA_108_40K, MINI_PC, 1, 0, 56223289, 473837, 0);
1035  TEST_DECIDES_PASSES(BP_SILVA_108_12K, MINI_PC, 1, 0, 17622233, 289125, 0);
1036 
1037  TEST_DECIDES_PASSES(BP_SILVA_108_PARC, SMALL_PC, 194, 4, 32084148, 3835929, SWAPS);
1038  TEST_DECIDES_PASSES(BP_SILVA_108_REF, SMALL_PC, 2, 1, 461074103, 2831431, 0);
1039  TEST_DECIDES_PASSES(BP_SILVA_108_40K, SMALL_PC, 1, 0, 56223289, 473837, 0);
1040  TEST_DECIDES_PASSES(BP_SILVA_108_12K, SMALL_PC, 1, 0, 17622233, 289125, 0);
1041 
1042 #endif
1043 }
1044 
1045 void NOTEST_SLOW_maybe_build_tree() {
1046  // does only test sth if DB is present.
1047 
1048  char dbarg[] = "-D" "extra_pt_src.arb";
1049  char *testDB = dbarg+2;
1050  const char *resultPT = "extra_pt_src.arb.pt";
1051  const char *expectedPT = "extra_pt_src.arb_expected.pt";
1052  bool exists = GB_is_regularfile(testDB);
1053 
1054  if (exists) {
1055  char pname[] = "fake_pt_server";
1056  char barg[] = "-build";
1057  char *argv[] = {
1058  pname,
1059  barg,
1060  dbarg,
1061  };
1062 
1063  // build
1064  int res = ARB_main(ARRAY_ELEMS(argv), argv);
1066 
1067 // #define TEST_AUTO_UPDATE
1068 #if defined(TEST_AUTO_UPDATE)
1069  TEST_COPY_FILE(resultPT, expectedPT);
1070 #else // !defined(TEST_AUTO_UPDATE)
1071  TEST_EXPECT_FILES_EQUAL(resultPT, expectedPT);
1072 #endif
1073  }
1074 }
1075 
1076 #endif // UNIT_TESTS
1077 
1078 // --------------------------------------------------------------------------------
struct probe_input_data * data
Definition: probe.h:356
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2492
long PTD_save_upper_tree(FILE *out, POS_TREE1 *&node, long pos, long &node_pos, ARB_ERROR &error)
GB_ERROR GBK_system(const char *system_command)
Definition: arb_msg.cxx:519
const char * GB_ERROR
Definition: arb_core.h:25
void PT_add_to_chain(POS_TREE1 *node, const DataLoc &loc)
Definition: probe.h:122
#define PT_SERVER_VERSION
Definition: probe.h:28
GB_TYPES type
GB_ERROR GB_commit_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2515
void GB_warning(const char *message)
Definition: arb_msg.cxx:484
group_matcher all()
Definition: test_unit.h:1000
Definition: probe.h:83
SmartCharPtr get_dataPtr() const
Definition: probe.h:165
void PT_dump_POS_TREE_recursive(PT *pt, const char *prefix, FILE *out)
Definition: PT_debug.cxx:369
#define TEST_EXPECT_SIMILAR(expr, want, epsilon)
Definition: test_unit.h:1287
void fflush_all()
Definition: PT_tools.h:211
void reset()
Definition: PT_partition.h:436
Definition: probe.h:84
void PTD_put_int(FILE *out, ULONG i)
void PTD_delete_saved_node(POS_TREE1 *&node)
#define PT_MIN_TREE_HEIGHT
Definition: probe.h:57
size_t get_abspos(size_t rel_pos) const
Definition: probe.h:249
probe_struct_global psg
Definition: PT_main.cxx:36
Partition partition() const
void set_position(int abs_pos, int rel_pos)
Definition: probe_tree.h:473
GB_ERROR GB_IO_error(const char *action, const char *filename)
Definition: arb_msg.cxx:293
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
unsigned long ULONG
Definition: probe.h:50
void PTD_put_longlong(FILE *out, ULONG i)
GBDATA * gb_main
Definition: probe.h:351
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
int get_size() const
Definition: probe.h:211
ARB_ERROR PTD_read_leafs_from_disk(const char *fname, POS_TREE2 *&root_ptr)
size_t steps() const
static Partition decide_passes_to_use(size_t overallBases, size_t max_kb_usable, int forced_passes)
ARB_ERROR enter_stage_1_build_tree(PT_main *, const char *tname, ULONG ARM_size_kb)
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
probe_statistic_struct stat
Definition: probe.h:379
int number_of_passes() const
Definition: PT_partition.h:425
long GB_size_of_file(const char *path)
Definition: arb_file.cxx:28
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
bool willUseMoreThan(size_t max_kb_usable) const
FILE * seq
Definition: rns.c:46
#define PT_POS_TREE_HEIGHT
Definition: probe.h:56
size_t estimate_max_kb_for_any_pass(size_t overall_base_count) const
Definition: PT_partition.h:455
const char * get_seq() const
Definition: PT_compress.h:183
bool isBetterThan(const PartitionSpec &other, size_t max_kb_usable) const
void createFrom(const unsigned char *const seq, const size_t length)
Definition: PT_compress.h:120
#define PTSERVER_BIN_MB
Definition: PT_partition.h:48
void PTD_put_byte(FILE *out, ULONG i)
void enter_stage(Stage stage_)
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
fflush(stdout)
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:490
TYPE get_type() const
Definition: probe_tree.h:226
#define PT_MAX_PARTITION_DEPTH
Definition: probe.h:54
long PTD_write_leafs_to_disk(FILE *out, POS_TREE1 *const node, long pos, long *node_pos, ARB_ERROR &error)
size_t max_probes_for_passes(const PrefixProbabilities &prob, int passes_wanted, size_t overall_base_count)
Definition: PT_partition.h:464
Definition: probe.h:122
void GBK_terminate(const char *error) __ATTR__NORETURN
Definition: arb_msg.cxx:463
#define pt_assert_stage(s)
Definition: probe.h:41
PartitionSpec(int passes_, size_t memuse_, int depth_)
bool is_chain() const
Definition: probe_tree.h:230
size_t estimate_max_probes_for_any_pass(size_t overall_base_count) const
Definition: PT_partition.h:445
GB_ERROR GB_move_file(const char *oldpath, const char *newpath)
Definition: arb_file.cxx:284
const char * GBS_readable_size(unsigned long long size, const char *unit_suffix)
Definition: arb_misc.cxx:23
void PT_init_cache_sizes(Stage stage)
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR GB_abort_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2503
fputc('\n', stderr)
expectation_group & add(const expectation &e)
Definition: test_unit.h:801
void check_tree_was_saved(POS_TREE1 *node, const char *whatTree, bool completely, ARB_ERROR &error)
#define that(thing)
Definition: test_unit.h:1032
#define cmp(h1, h2)
Definition: admap.cxx:50
void print_info(FILE *out, size_t max_kb_usable) const
bool is_saved() const
Definition: probe_tree.h:231
Definition: probe.h:88
#define pt_assert(bed)
Definition: PT_tools.h:22
bool is_leaf() const
Definition: probe_tree.h:229
POS_TREE1 * PT_leaf_to_chain(POS_TREE1 *node)
#define PT_SERVER_MAGIC
Definition: probe.h:27
POS_TREE1 * PT_create_leaf(POS_TREE1 **pfather, PT_base base, const DataLoc &loc)
static POS_TREE1 * build_pos_tree(POS_TREE1 *const root, const ReadableDataLoc &loc)
int ARB_main(int argc, char *argv[])
Definition: mkptypes.cxx:1545
PT * PT_read_son(PT *node, PT_base base)
GB_ERROR GB_set_mode_of_file(const char *path, long mode)
Definition: arb_file.cxx:231
void get_abs_align_pos(char *seq, int &pos)
#define is_equal_to(val)
Definition: test_unit.h:1014
static long save_lower_subtree(FILE *out, POS_TREE1 *node, long pos, int height, ARB_ERROR &error)
#define __ATTR__REDUCED_OPTIMIZE
Definition: test_unit.h:83
#define EPS
Definition: awt_canvas.hxx:291
GB_CSTR GB_getenv(const char *env)
Definition: adsocket.cxx:656
#define MB
Definition: adtune.cxx:14
static bool all_sons_saved(POS_TREE1 *node)
fputs(TRACE_PREFIX, stderr)
GB_ULONG GB_get_usable_memory(void)
Definition: adsocket.cxx:812
PT_base
Definition: probe.h:82
void preload_rel2abs() const
Definition: probe.h:229
str readable(const copy< T > &v)
Definition: test_unit.h:334
bool next()
Definition: PT_partition.h:429
static long write_subtree(FILE *out, POS_TREE1 *node, long pos, long *node_pos, ARB_ERROR &error)
#define TEST_EXPECT_FILES_EQUAL(f1, f2)
Definition: test_unit.h:1393
bool done() const
Definition: PT_partition.h:428
void PTD_debug_nodes(void)
POS_TREE2 *& TREE_ROOT2()
Definition: probe.h:390
Definition: probe.h:89
int get_prefix_count() const
Definition: PT_partition.h:119
#define NULp
Definition: cxxforward.h:97
bool GB_is_regularfile(const char *path)
Definition: arb_file.cxx:76
bool contains(const char *probe) const
Definition: PT_partition.h:459
bool is_node() const
Definition: probe_tree.h:228
void GBS_add_ptserver_logentry(const char *entry)
Definition: adtcp.cxx:435
static long save_upper_tree(FILE *out, POS_TREE1 *node, long pos, long &node_pos, ARB_ERROR &error)
ARB_ERROR enter_stage_2_load_tree(PT_main *, const char *tname)
void clear_fathers()
size_t max_kb_for_passes(const PrefixProbabilities &prob, int passes_wanted, size_t overall_base_count)
Definition: PT_partition.h:467
int split_depth() const
Definition: PT_partition.h:426
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
void PTD_put_short(FILE *out, ULONG i)
bool has_unsaved_sons(POS_TREE1 *node)
long PTD_save_lower_tree(FILE *out, POS_TREE1 *node, long pos, ARB_ERROR &error)
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
POS_TREE1 * PT_change_leaf_to_node(POS_TREE1 *node)
PT1_TYPE
Definition: probe_tree.h:185