ARB
AP_tree_edge.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : AP_tree_edge.cxx //
4 // Purpose : //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in Summer 1995 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // =============================================================== //
11 
12 #include "ap_tree_nlen.hxx"
13 #include "ap_main.hxx"
14 
15 #include <AP_filter.hxx>
16 #include <arb_progress.h>
17 
18 #include <cmath>
19 #include <iomanip>
20 
21 using namespace std;
22 
23 long AP_tree_edge::timeStamp = 0;
24 
25 AP_tree_edge::AP_tree_edge(AP_tree_nlen *node1, AP_tree_nlen *node2)
26  : next_in_chain(NULp),
27  used(0),
28  age(timeStamp++),
29  kl_visited(false)
30 {
31  node[0] = NULp; // => !is_linked()
32  relink(node1, node2); // link the nodes (initializes 'node' and 'index')
33 }
34 
36  if (is_linked()) unlink();
37 }
38 
39 static void buildSonEdges(AP_tree_nlen *node) {
45  if (!node->is_leaf()) {
46  buildSonEdges(node->get_leftson());
47  buildSonEdges(node->get_rightson());
48 
49  // to ensure the nodes contain the correct distance to the border
50  // we MUST build all son edges before creating the father edge
51 
52  new AP_tree_edge(node, node->get_leftson());
53  new AP_tree_edge(node, node->get_rightson());
54  }
55 }
56 
57 void AP_tree_edge::initialize(AP_tree_nlen *tree) {
61  while (tree->get_father()) tree = tree->get_father(); // go up to root
62  buildSonEdges(tree->get_leftson()); // link left subtree
63  buildSonEdges(tree->get_rightson()); // link right subtree
64 
65  // to ensure the nodes contain the correct distance to the border
66  // we MUST build all son edges before creating the root edge
67 
68  new AP_tree_edge(tree->get_leftson(), tree->get_rightson()); // link brothers
69 }
70 
71 void AP_tree_edge::destroy(AP_tree_nlen *tree) {
73  AP_tree_edge *edge = tree->nextEdge(NULp);
74  if (!edge) {
75  ap_assert(tree->is_root_node());
76  edge = tree->get_leftson()->edgeTo(tree->get_rightson());
77  }
78  ap_assert(edge); // got no edges?
79 
80  EdgeChain chain(edge, ANY_EDGE, false);
81  while (chain) {
82  AP_tree_edge *curr = *chain;
83  ++chain;
84  delete curr;
85  }
86 }
87 
89  ap_assert(this!=NULp);
90  ap_assert(is_linked());
91 
92  node[0]->edge[index[0]] = NULp;
93  node[1]->edge[index[1]] = NULp;
94 
95  node[0] = NULp;
96  node[1] = NULp;
97 
98  return this;
99 }
100 
101 void AP_tree_edge::relink(AP_tree_nlen *node1, AP_tree_nlen *node2) {
102  ap_assert(!is_linked());
103 
104  node[0] = node1;
105  node[1] = node2;
106 
107  node1->edge[index[0] = node1->unusedEdgeIndex()] = this;
108  node2->edge[index[1] = node2->unusedEdgeIndex()] = this;
109 
110  node1->index[index[0]] = 0;
111  node2->index[index[1]] = 1;
112 }
113 
114 size_t AP_tree_edge::buildChainInternal(EdgeSpec whichEdges, bool depthFirst, const AP_tree_nlen *skip, AP_tree_edge **&prevNextPtr) {
115  size_t added = 0;
116 
117  ap_assert(prevNextPtr);
118  ap_assert(!*prevNextPtr);
119 
120  bool descend = true;
121  bool use = true;
122 
123  if (use && (whichEdges&SKIP_UNMARKED_EDGES)) {
124  use = descend = has_marked(); // Note: root edge is chained if ANY son of root has marked children
125  }
126  if (use && (whichEdges&SKIP_FOLDED_EDGES)) {
127  // do not chain edges leading to root of group
128  // (doing an NNI there will swap branches across group-borders)
129  use = !next_to_folded_group();
130  }
131  if (use && (whichEdges&(SKIP_LEAF_EDGES|SKIP_INNER_EDGES))) {
132  use = !(whichEdges&(is_leaf_edge() ? SKIP_LEAF_EDGES : SKIP_INNER_EDGES));
133  }
134 
135  if (use && !depthFirst) {
136  *prevNextPtr = this;
137  next_in_chain = NULp;
138  prevNextPtr = &next_in_chain;
139  added++;
140  }
141  if (descend) {
142  for (int n=0; n<2; n++) {
143  if (node[n]!=skip && !node[n]->is_leaf()) {
144  for (int e=0; e<3; e++) {
145  AP_tree_edge * Edge = node[n]->edge[e];
146  if (Edge != this) {
147  descend = true;
148  if (descend && (whichEdges&SKIP_UNMARKED_EDGES)) descend = Edge->has_marked();
149  if (descend && (whichEdges&SKIP_FOLDED_EDGES)) descend = !Edge->next_to_folded_group();
150  if (descend) {
151  added += Edge->buildChainInternal(whichEdges, depthFirst, node[n], prevNextPtr);
152  }
153  }
154  }
155  }
156  }
157  }
158  if (use && depthFirst) {
159  ap_assert(!*prevNextPtr);
160 
161  *prevNextPtr = this;
162  next_in_chain = NULp;
163  prevNextPtr = &next_in_chain;
164  added++;
165  }
166 
167  return added;
168 }
169 
170 bool EdgeChain::exists = false;
171 
172 EdgeChain::EdgeChain(AP_tree_edge *startEgde, EdgeSpec whichEdges, bool depthFirst, const AP_tree_nlen *skip, bool includeStart)
173  : start(NULp),
174  curr(NULp)
175 {
184 #if defined(DEVEL_RALF)
185 # if defined(ASSERTION_USED) || defined(UNIT_TESTS)
186  if (whichEdges & SKIP_UNMARKED_EDGES) {
187  AP_tree_nlen *son = startEgde->sonNode();
188  bool flags_valid = son->has_correct_mark_flags();
189  if (flags_valid && startEgde->is_root_edge()) {
190  flags_valid = startEgde->otherNode(son)->has_correct_mark_flags();
191  }
192  if (!flags_valid) {
193  GBK_terminate("detected invalid flags while building chain");
194  }
195  }
196 # endif
197 #endif
198 
199  ap_assert(!exists); // only one existing chain is allowed!
200  exists = true;
201 
202  AP_tree_edge **prev = &start;
203 
204  len = startEgde->buildChainInternal(whichEdges, depthFirst, skip, prev);
205  if (!includeStart) {
206  if (depthFirst) {
207  // startEgde is last of chain (if included)
208  if (prev == &startEgde->next_in_chain) {
209  if (start == startEgde) { // special case: startEgde is the only edge in chain
210  ap_assert(len == 1);
211  start = NULp;
212  }
213  else {
214  // NULp all edge-link pointing to startEgde (may belong to current or older chain)
215  for (int n = 0; n<=1; ++n) {
216  AP_tree_edge *e1 = startEgde->node[n]->nextEdge(startEgde);
217  if (e1->next_in_chain == startEgde) e1->next_in_chain = NULp;
218  AP_tree_edge *e2 = startEgde->node[n]->nextEdge(e1);
219  if (e2->next_in_chain == startEgde) e2->next_in_chain = NULp;
220  }
221  }
222  --len;
223 #if defined(ASSERTION_USED)
224  {
225  size_t count = 0;
226  curr = start;
227  while (*this) {
228  ap_assert(**this != startEgde);
229  ++count;
230  ++*this;
231  }
232  ap_assert(len == count);
233  }
234 #endif
235  }
236  }
237  else {
238  // startEgde is first of chain (if included)
239  if (start == startEgde) {
240  start = start->next_in_chain;
241  --len;
242  }
243  }
244  }
245  curr = start;
246 
247  ap_assert(correlated(len, start));
248 }
249 
250 class MutationsPerSite : virtual Noncopyable {
251  char *Data;
252  size_t length;
253 
254 public:
255  MutationsPerSite(size_t len) :
256  Data(ARB_calloc<char>(len*3)),
257  length(len)
258  {}
260  free(Data);
261  }
262 
263  char *data(int block) {
264  ap_assert(block >= 0 && block<3);
265  return Data+block*length;
266  }
267  const char *data(int block) const {
268  return const_cast<MutationsPerSite*>(this)->data(block);
269  }
270 };
271 
272 static double ap_calc_bootstrap_remark_sub(int seq_len, const char *old, const char *ne) {
273  int sum[3] = { 0, 0, 0 };
274  for (int i=0; i<seq_len; i++) {
275  int diff = ne[i] - old[i];
276  if (diff > 1 || diff < -1) {
277 #if defined(DEBUG)
278  fprintf(stderr, "diff by nni at one position not in [-1,1]: %i:%i - %i", diff, old[i], ne[i]);
279 #endif // DEBUG
280  continue;
281  }
282  sum[diff+1] ++;
283  }
284 
285  double prob = 0;
286  {
287  int asum = 0;
288  for (int i=0; i<3; i++) asum += sum[i];
289 
290  double freq[3];
291  double log_freq[3];
292  for (int i=0; i<3; i++) {
293  freq[i] = sum[i] / double(asum); // relative frequencies of -1, 0, 1
294  if (sum[i] >0) {
295  log_freq[i] = log(freq[i]);
296  }
297  else {
298  log_freq[i] = -1e100; // minus infinit
299  }
300  }
301 
302  int max = seq_len; // bootstrap can select seq_len ones maximum
303  double log_fak_seq_len = GB_log_fak(seq_len);
304  double log_eps = log(1e-11);
305 
306  // loop over all delta_mutations, begin in the middle
307  for (int tsum_add = 1; tsum_add >= -1; tsum_add -= 2) {
308  int tsum = sum[2]-sum[0];
309  if (tsum <= 0) tsum = 1;
310  for (; tsum < max && tsum > 0; tsum += tsum_add) { // sum of mutations in bootstrap sample, loop over all possibilities
311  if (tsum_add < 0 && tsum == sum[2]-sum[0]) continue; // don't double count tsum
312 
313 
314 
315  int max_minus = max; // we need tsum + n_minus ones, we cannot have more than max_minux minus, reduce also
316  for (int minus_add = 1; minus_add>=-1; minus_add-=2) {
317  int first_minus = 1;
318  for (int n_minus = sum[0]; n_minus<max_minus && n_minus>=0; first_minus = 0, n_minus+=minus_add) {
319  if (minus_add < 0 && first_minus) continue; // don't double count center
320  int n_zeros = seq_len - n_minus * 2 - tsum; // number of minus
321  int n_plus = tsum + n_minus; // number of plus ones (n_ones + n_minus + n_zeros = seq_len)
322 
323  double log_a =
324  n_minus * log_freq[0] +
325  n_zeros * log_freq[1] +
326  n_plus * log_freq[2] +
327  log_fak_seq_len - GB_log_fak(n_minus) - GB_log_fak(n_zeros) - GB_log_fak(n_plus);
328 
329  if (log_a < log_eps) {
330  if (first_minus && minus_add>0) goto end;
331  break; // cannot go with so many minus, try next
332  }
333  double a = exp(log_a);
334  prob += a;
335  }
336  }
337  }
338  end :;
339  }
340  }
341  return prob;
342 }
343 
344 static void ap_calc_bootstrap_remark(AP_tree_nlen *son_node, AP_BL_MODE mode, const MutationsPerSite& mps) {
345  if (!son_node->is_leaf()) {
346  size_t seq_len = son_node->get_seq()->get_sequence_length();
347  float one = ap_calc_bootstrap_remark_sub(seq_len, mps.data(0), mps.data(1));
348  float two = ap_calc_bootstrap_remark_sub(seq_len, mps.data(0), mps.data(2));
349 
351  one = one * two; // assume independent bootstrap values for both nnis
352  }
353  else {
354  if (two<one) one = two; // dependent bootstrap values, take minimum (safe)
355  }
356 
357  double bootstrap = one<1.0 ? 100.0 * one : 100.0;
358  son_node->set_bootstrap(bootstrap);
359  son_node->get_brother()->use_as_remark(son_node->get_remark_ptr());
360  }
361 }
362 
363 const GBT_LEN AP_UNDEF_BL = 10.5;
364 
365 inline void update_undefined_leaf_branchlength(AP_tree_nlen *leaf) {
366  if (leaf->is_leaf() &&
367  leaf->get_branchlength_unrooted() == AP_UNDEF_BL)
368  {
369  // calculate the branchlength for leafs
370  AP_FLOAT Seq_len = leaf->get_seq()->weighted_base_count();
371  if (Seq_len <= 1.0) Seq_len = 1.0;
372 
373  ap_assert(leaf->is_leaf());
374 
375  Mutations parsbest = rootNode()->costs();
376 
377  ap_main->remember();
378  leaf->REMOVE();
379  Mutations mutations = parsbest - rootNode()->costs(); // number of min. mutations caused by adding 'leaf' to tree
380  ap_main->revert();
381 
382  GBT_LEN blen = mutations/Seq_len; // scale with Seq_len (=> max branchlength == 1.0)
383  leaf->set_branchlength_unrooted(blen);
384  }
385 }
386 
387 void AP_tree_edge::set_inner_branch_length_and_calc_adj_leaf_lengths(AP_FLOAT bcosts) {
388  // 'bcosts' is the number of mutations assumed at this edge
389 
391 
392  AP_tree_nlen *son = sonNode();
393  ap_assert(son->at_root()); // otherwise length calculation is unstable!
394  AP_tree_nlen *otherSon = son->get_brother();
395 
396  ap_assert(son->get_seq()->hasSequence());
397  ap_assert(otherSon->get_seq()->hasSequence());
398 
399  AP_FLOAT seq_len =
400  (son ->get_seq()->weighted_base_count() +
401  otherSon->get_seq()->weighted_base_count()
402  ) * 0.5;
403 
404  if (seq_len < 0.1) seq_len = 0.1; // avoid that branchlengths gets 'inf' for sequences w/o data
405 
406  AP_FLOAT blen = bcosts / seq_len; // branchlength := costs per bp
407 
408  son->set_branchlength_unrooted(blen);
409 
410  // calculate adjacent leaf branchlengths early
411  // (calculating them at end of nni_rec, produces much more combines)
412 
413  update_undefined_leaf_branchlength(son->get_leftson());
414  update_undefined_leaf_branchlength(son->get_rightson());
415  update_undefined_leaf_branchlength(otherSon->get_leftson());
416  update_undefined_leaf_branchlength(otherSon->get_rightson());
417 }
418 
419 #if defined(ASSERTION_USED) || defined(UNIT_TESTS)
420 bool allBranchlengthsAreDefined(AP_tree_nlen *tree) {
421  if (tree->father) {
422  if (tree->get_branchlength_unrooted() == AP_UNDEF_BL) return false;
423  }
424  if (tree->is_leaf()) return true;
425  return
426  allBranchlengthsAreDefined(tree->get_leftson()) &&
427  allBranchlengthsAreDefined(tree->get_rightson());
428 }
429 #endif
430 
431 inline void undefine_branchlengths(AP_tree_nlen *node) {
432  // undefine branchlengths of node (triggers recalculation)
433  ap_main->push_node(node, STRUCTURE); // store branchlengths for revert
434  node->leftlen = AP_UNDEF_BL;
435  node->rightlen = AP_UNDEF_BL;
436 }
437 
438 Mutations AP_tree_edge::nni_rec(EdgeSpec whichEdges, AP_BL_MODE mode, AP_tree_nlen *skipNode, bool includeStartEdge) {
439  if (!rootNode()) return Mutations(0);
440  if (rootNode()->is_leaf()) return rootNode()->costs();
441 
442  AP_tree_edge *oldRootEdge = rootEdge();
443 
444  Mutations old_parsimony = rootNode()->costs();
445  Mutations new_parsimony = old_parsimony;
446 
448 
449  bool recalc_lengths = mode & AP_BL_BL_ONLY;
450  if (recalc_lengths) {
451  ap_assert(whichEdges == ANY_EDGE);
452  }
453  else { // skip leaf edges when not calculating lengths
454  whichEdges = EdgeSpec(whichEdges|SKIP_LEAF_EDGES);
455  }
456 
457  ap_assert(implicated(includeStartEdge, this == rootEdge())); // non-subtree-NNI shall always be called with rootEdge (afaik)
458 
459  EdgeChain chain(this, whichEdges, !recalc_lengths, skipNode, includeStartEdge);
460  arb_progress progress(chain.size());
461 
462  if (recalc_lengths) { // set all branchlengths to undef
464  while (chain) {
465  AP_tree_edge *edge = *chain; ++chain;
466  undefine_branchlengths(edge->node[0]);
467  undefine_branchlengths(edge->node[1]);
468  undefine_branchlengths(edge->node[0]->get_father());
469  }
470  rootNode()->leftlen = AP_UNDEF_BL *.5;
471  rootNode()->rightlen = AP_UNDEF_BL *.5;
472  }
473 
474  chain.restart();
475  while (chain && (recalc_lengths || !progress.aborted())) { // never abort while calculating branchlengths
476  AP_tree_edge *edge = *chain; ++chain;
477 
478  if (!edge->is_leaf_edge()) {
479  AP_tree_nlen *son = edge->sonNode();
480  son->set_root();
481  if (mode & AP_BL_BOOTSTRAP_LIMIT) {
482  MutationsPerSite mps(son->get_seq()->get_sequence_length());
483  new_parsimony = edge->nni_mutPerSite(new_parsimony, mode, &mps);
484  ap_calc_bootstrap_remark(son, mode, mps);
485  }
486  else {
487  new_parsimony = edge->nni_mutPerSite(new_parsimony, mode, NULp);
488  }
489  // ap_assert(rootNode()->costs() == new_parsimony); // does not fail (but changes number of combines performed in tests)
490  }
491  progress.inc();
492  }
493 
495 
496  oldRootEdge->set_root();
497  new_parsimony = rootNode()->costs();
498 
499  return new_parsimony;
500 }
501 
503  ap_assert(!is_leaf_edge()); // avoid useless calls
504 
505  AP_tree_nlen *root = rootNode();
506  Mutations parsbest = pars_one;
507  AP_tree_nlen *son = sonNode();
508 
509  { // ******** original tree
510  if ((mode & AP_BL_BOOTSTRAP_LIMIT)) {
511  root->costs();
512  son->unhash_sequence();
513  son->get_father()->unhash_sequence();
514  ap_assert(son->is_son_of_root());
515  AP_tree_nlen *brother = son->get_brother();
516  brother->unhash_sequence();
517 
518  ap_assert(mps);
519  pars_one = root->costs(mps->data(0));
520  }
521 #if defined(ASSERTION_USED)
522  else {
523  ap_assert(pars_one != 0.0);
524  }
525 #endif
526  }
527 
528  Mutations pars_two;
529  { // ********* first nni
530  ap_main->remember();
531  son->swap_assymetric(AP_LEFT);
532  pars_two = root->costs(mps ? mps->data(1) : NULp);
533 
534  if (pars_two <= parsbest) {
536  parsbest = pars_two;
537  }
538  else {
539  ap_main->revert();
540  }
541  }
542 
543  Mutations pars_three;
544  { // ********** second nni
545  ap_main->remember();
546  son->swap_assymetric(AP_RIGHT);
547  pars_three = root->costs(mps ? mps->data(2) : NULp);
548 
549  if (pars_three <= parsbest) {
551  parsbest = pars_three;
552  }
553  else {
554  ap_main->revert();
555  }
556  }
557 
558  if (mode & AP_BL_BL_ONLY) { // ************* calculate branch lengths **************
559  GBT_LEN bcosts = (pars_one + pars_two + pars_three) - (3.0 * parsbest);
560  if (bcosts <0) bcosts = 0;
561 
562  root->costs();
563  set_inner_branch_length_and_calc_adj_leaf_lengths(bcosts);
564  }
565 
566  return
567  mode & AP_BL_NNI_ONLY
568  ? parsbest // in this case, the best topology was accepted above
569  : pars_one; // and in this case it has been reverted
570 }
571 
572 ostream& operator<<(ostream& out, const AP_tree_edge *e) {
573  static int notTooDeep;
574 
575  out << ' ';
576 
577  if (notTooDeep || !e) {
578  out << ((void*)e);
579  }
580  else {
581  notTooDeep = 1;
582  out << "AP_tree_edge(" << ((void*)e)
583  << ", node[0]=" << e->node[0]
584  << ", node[1]=" << e->node[1]
585  << ")";
586  notTooDeep = 0; // cppcheck-suppress redundantAssignment
587  }
588 
589  return out << ' ';
590 }
591 
592 void AP_tree_edge::mixTree(int repeat, int percent, EdgeSpec whichEdges) {
593  EdgeChain chain(this, EdgeSpec(SKIP_LEAF_EDGES|whichEdges), false);
594  long edges = chain.size();
595 
596  arb_progress progress(repeat*edges);
597  while (repeat-- && !progress.aborted()) {
598  chain.restart();
599  while (chain) {
600  AP_tree_nlen *son = (*chain)->sonNode();
601  ap_assert(!son->is_leaf());
602  if (percent>=100 || GB_random(100)<percent) {
603  son->swap_assymetric(GB_random(2) ? AP_LEFT : AP_RIGHT);
604  }
605  ++chain;
606  ++progress;
607  }
608  }
609 }
610 
611 // --------------------------------------------------------------------------------
612 
613 #ifdef UNIT_TESTS
614 #include <arb_defs.h>
615 #include "pars_main.hxx"
616 #include <AP_seq_dna.hxx>
617 #ifndef TEST_UNIT_H
618 #include <test_unit.h>
619 #endif
620 #include <test_env.h>
621 
622 void TEST_edgeChain() {
623  PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb");
624  TEST_EXPECT_NO_ERROR(env.load_tree("tree_test"));
625 
626  AP_tree_edge *root = rootEdge();
627  AP_tree_nlen *rootN = root->sonNode()->get_father();
628 
629  ap_assert(!rootN->father);
630  AP_tree_nlen *leftSon = rootN->get_leftson();
631  AP_tree_nlen *rightSon = rootN->get_rightson();
632 
633  const size_t ALL_EDGES = 27;
634  const size_t LEAF_EDGES = 15;
635 
636  TEST_EXPECT_EQUAL(EdgeChain(root, ANY_EDGE, true).size(), ALL_EDGES);
637  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(ANY_EDGE|SKIP_INNER_EDGES), true).size(), LEAF_EDGES); // 15 leafs
638  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(SKIP_FOLDED_EDGES|SKIP_INNER_EDGES), true).size(), LEAF_EDGES-4); // 4 leafs are inside folded group
639  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(ANY_EDGE|SKIP_LEAF_EDGES), true).size(), ALL_EDGES-LEAF_EDGES);
640 
641  // skip left/right subtree
642  TEST_EXPECT_EQUAL(EdgeChain(root, ANY_EDGE, true, leftSon) .size(), 9); // right subtree plus rootEdge (=lower subtree)
643  TEST_EXPECT_EQUAL(EdgeChain(root, ANY_EDGE, true, rightSon).size(), 19); // left subtree plus rootEdge (=upper subtree)
644 
645  const size_t MV_RIGHT = 8;
646  const size_t MV_LEFT = 6;
647  const size_t MARKED_VIS = MV_RIGHT + MV_LEFT - 1; // root-edge only once
648 
649  const EdgeSpec MARKED_VISIBLE_EDGES = EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_FOLDED_EDGES);
650 
651  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, leftSon) .size(), MV_RIGHT); // one leaf edge is unmarked
652  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, rightSon).size(), MV_LEFT);
653  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true) .size(), MARKED_VIS);
654 
655  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(MARKED_VISIBLE_EDGES|SKIP_INNER_EDGES), true).size(), 6); // 6 marked leafs
656  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(MARKED_VISIBLE_EDGES|SKIP_LEAF_EDGES), true).size(), MARKED_VIS-6);
657 
658  const size_t V_RIGHT = 9;
659  const size_t V_LEFT = 12;
660  const size_t VISIBLE = V_RIGHT + V_LEFT -1; // root-edge only once
661 
662  TEST_EXPECT_EQUAL(EdgeChain(root, SKIP_FOLDED_EDGES, true) .size(), VISIBLE);
663  TEST_EXPECT_EQUAL(EdgeChain(root, SKIP_FOLDED_EDGES, true, leftSon) .size(), V_RIGHT);
664  TEST_EXPECT_EQUAL(EdgeChain(root, SKIP_FOLDED_EDGES, true, rightSon).size(), V_LEFT);
665 
666  // test subtree-EdgeChains
667  {
668  AP_tree_edge *subtreeEdge = rightSon->edgeTo(rightSon->get_leftson()); // subtree containing CorAquat, CurCitre, CorGluta and CelBiazo
669  AP_tree_nlen *stFather = subtreeEdge->notSonNode();
670 
671  // collecting subtree-edges (by skipping father of start-edge) includes the startEdge
672  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, ANY_EDGE, true, stFather).size(), 7);
673  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_LEAF_EDGES, true, stFather).size(), 3);
674  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_INNER_EDGES, true, stFather).size(), 4);
675 
676  // collecting subtree-edges w/o startEdge
677  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, ANY_EDGE, true, stFather, false).size(), 6);
678  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_LEAF_EDGES, true, stFather, false).size(), 2);
679  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_INNER_EDGES, true, stFather, false).size(), 4);
680  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, ANY_EDGE, false, stFather, false).size(), 6);
681  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_LEAF_EDGES, false, stFather, false).size(), 2);
682  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_INNER_EDGES, false, stFather, false).size(), 4);
683 
684  subtreeEdge = leftSon->edgeTo(leftSon->get_leftson()); // subtree containing group 'test', CloInnoc and CloBifer
685  stFather = subtreeEdge->notSonNode();
686 
687  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, ANY_EDGE, true, stFather, false).size(), 10);
688  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, MARKED_VISIBLE_EDGES, false, stFather, false).size(), 0);
689  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_FOLDED_EDGES, true, stFather, false).size(), 3);
690  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, ANY_EDGE, false, stFather, true).size (), 11);
691  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, MARKED_VISIBLE_EDGES, true, stFather, true).size (), 0);
692  TEST_EXPECT_EQUAL(EdgeChain(subtreeEdge, SKIP_FOLDED_EDGES, false, stFather, true).size (), 4);
693  }
694 
695  // test group-folding at sons of root
696  {
697  // fold left subtree
698  leftSon->gr.grouped = true;
699 
700  TEST_EXPECT_EQUAL(EdgeChain(root, ANY_EDGE, true) .size(), ALL_EDGES); // all edges
701  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true) .size(), MV_RIGHT-1); // skips left subtree AND rootedge
702  TEST_EXPECT_EQUAL(EdgeChain(root, SKIP_FOLDED_EDGES, true) .size(), V_RIGHT-1); // skips left subtree AND rootedge
703 
704  // fold bold subtrees
705  rightSon->gr.grouped = true;
706 
707  TEST_EXPECT_EQUAL(EdgeChain(root, ANY_EDGE, true) .size(), ALL_EDGES); // all edges
708  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true) .size(), 0); // root edge not included (is adjacent to group)
709  TEST_EXPECT_EQUAL(EdgeChain(root, SKIP_FOLDED_EDGES, true) .size(), 0); // root edge not included (is adjacent to group)
710 
711  // fold right subtree only
712  leftSon->gr.grouped = false;
713 
714  TEST_EXPECT_EQUAL(EdgeChain(root, ANY_EDGE, true) .size(), ALL_EDGES); // all edges
715  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true) .size(), MV_LEFT-1); // skips right subtree AND rootedge
716  TEST_EXPECT_EQUAL(EdgeChain(root, SKIP_FOLDED_EDGES, true) .size(), V_LEFT-1); // skips right subtree AND rootedge
717 
718  // restore previous folding
719  rightSon->gr.grouped = false;
720  }
721 
722 
723  // mark only two species: CorGluta (unfolded) + CloTyro2 (folded)
724  {
725  GB_transaction ta(env.gbmain());
726  GBT_restore_marked_species(env.gbmain(), "CloTyro2;CorGluta");
727  env.compute_tree(); // species marks affect node-chain
728  }
729 
730  TEST_EXPECT_EQUAL(EdgeChain(root, ANY_EDGE, true) .size(), ALL_EDGES);
731  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true) .size(), 6);
732  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(MARKED_VISIBLE_EDGES|SKIP_INNER_EDGES), true) .size(), 1); // one visible marked leaf (the other is hidden)
733  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(MARKED_VISIBLE_EDGES|SKIP_LEAF_EDGES), true) .size(), 6-1);
734  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_INNER_EDGES), true) .size(), 2); // two marked leafs
735  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, rightSon).size(), 3);
736  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, leftSon) .size(), 4);
737 
738  // test trees with marks in ONE subtree (of root) only
739  {
740  GB_transaction ta(env.gbmain());
741  GBT_restore_marked_species(env.gbmain(), "CloTyro2");
742  env.compute_tree(); // species marks affect node-chain
743  }
744  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true) .size(), 3);
745  TEST_EXPECT_EQUAL(EdgeChain(root, EdgeSpec(MARKED_VISIBLE_EDGES|SKIP_INNER_EDGES), true) .size(), 0); // the only marked leaf is folded
746  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, rightSon).size(), 3);
747  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, leftSon) .size(), 1);
748 
749  {
750  GB_transaction ta(env.gbmain());
751  GBT_restore_marked_species(env.gbmain(), "CorGluta");
752  env.compute_tree(); // species marks affect node-chain
753  }
754  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true) .size(), 4);
755  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, rightSon) .size(), 1); // only root-edge
756  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, false, rightSon, false).size(), 0); // skips start-edge
757  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, rightSon, false).size(), 0); // skips start-edge
758  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true, leftSon) .size(), 4);
759 
760  // unmark all
761  {
762  GB_transaction ta(env.gbmain());
763  GBT_mark_all(env.gbmain(), 0);
764  env.compute_tree(); // species marks affect node-chain
765  }
766  TEST_EXPECT_EQUAL(EdgeChain(root, MARKED_VISIBLE_EDGES, true).size(), 0);
767 }
768 
769 void TEST_tree_flags_needed_by_EdgeChain() {
770  // EdgeChain depends on correctly set marked flags in AP_tree
771  // (i.e. on gr.mark_sum)
772  //
773  // These flags get destroyed by tree operations
774  // -> chains created after tree modifications are wrong
775  // -> optimization operates on wrong part of the tree
776  // (esp. add+NNI and NNI/KL)
777 
778  PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb");
779  TEST_EXPECT_NO_ERROR(env.load_tree("tree_test"));
780 
781  TEST_EXPECT(rootNode()->has_correct_mark_flags());
782  TEST_EXPECT(rootNode()->has_valid_root_remarks());
783 
784  // mark only two species: CorGluta (unfolded) + CloTyro2 (folded)
785  {
786  GB_transaction ta(env.gbmain());
787  GBT_restore_marked_species(env.gbmain(), "CloTyro2;CorGluta");
788  env.compute_tree(); // species marks affect order of node-chain (used in nni_rec)
789  }
790 
791  TEST_EXPECT(rootNode()->has_correct_mark_flags());
792 
793  AP_tree_nlen *CorGluta = rootNode()->findLeafNamed("CorGluta"); // marked
794  AP_tree_nlen *CelBiazo = rootNode()->findLeafNamed("CelBiazo"); // not marked (marked parent, marked brother)
795  AP_tree_nlen *CurCitre = rootNode()->findLeafNamed("CurCitre"); // not marked (unmarked parent, unmarked brother)
796  AP_tree_nlen *CloTyro2 = rootNode()->findLeafNamed("CloTyro2"); // marked, inside folded group!
797  AP_tree_nlen *CloCarni = rootNode()->findLeafNamed("CloCarni"); // in the mid of unmarked subtree of 4
798 
799  AP_tree_nlen *CurCitre_father = CurCitre->get_father();
800  AP_tree_nlen *CurCitre_grandfather = CurCitre_father->get_father();
801 
802  // test moving root
803  {
804  env.push();
805 
806  CorGluta->set_root();
807  TEST_EXPECT(rootNode()->has_correct_mark_flags());
808 
809  CelBiazo->set_root();
810  TEST_EXPECT(rootNode()->has_correct_mark_flags());
811 
812  CloTyro2->set_root();
813  TEST_EXPECT(rootNode()->has_correct_mark_flags());
814 
815  // CurCitre and its brother form an unmarked subtree
816  CurCitre->set_root();
817  TEST_EXPECT(rootNode()->has_correct_mark_flags());
818 
819  CurCitre_father->set_root();
820  TEST_EXPECT(rootNode()->has_correct_mark_flags());
821 
822  CurCitre_grandfather->set_root(); // grandfather has 1 marked child
823  TEST_EXPECT(rootNode()->has_correct_mark_flags());
824 
825  env.pop();
826  TEST_EXPECT(rootNode()->has_correct_mark_flags());
827  }
828 
829  TEST_EXPECT(rootNode()->has_valid_root_remarks());
830 
831  // test moving nodes/subtrees
832  // wontfix; acceptable because only used while adding species -> see PARS_main.cxx@flags_broken_by_moveNextTo
833  {
834  env.push();
835 
836  // move marked node into unmarked subtree of 2 species:
837  CorGluta->moveNextTo(CurCitre, 0.5);
838  TEST_EXPECT__BROKEN(rootNode()->has_correct_mark_flags());
839 
840  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
841 
842  // move unmarked subtree of two species (brother is marked)
843  CurCitre_father->moveNextTo(CelBiazo, 0.5); // move to unmarked uncle of brother
844  TEST_EXPECT__BROKEN(rootNode()->has_correct_mark_flags());
845 
846  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
847 
848  // move same subtree into unmarked subtree
849  CurCitre_father->moveNextTo(CloCarni, 0.5);
850  TEST_EXPECT__BROKEN(rootNode()->has_correct_mark_flags());
851 
852  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
853 
854  // move unmarked -> unmarked (both parents are unmarked as well)
855  CurCitre->moveNextTo(CloCarni, 0.5);
856  TEST_EXPECT(rootNode()->has_correct_mark_flags()); // works (but moving CurCitre_father doesnt)
857 
858  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
859 
860  // move marked -> marked
861  CorGluta->moveNextTo(CloTyro2, 0.5);
862  TEST_EXPECT__BROKEN(rootNode()->has_correct_mark_flags()); // subtree losts the only marked species (should unmark up to root)
863 
864  // --------------------------------------------------------------------------------
865  // now mark flags are broken -> test whether revert restores them
866  ap_assert(!rootNode()->has_correct_mark_flags());
867  rootNode()->compute_tree(); // fixes the flags (i.e. changes hidded AND marked flags)
868 
869  env.pop(); TEST_EXPECT(rootNode()->has_correct_mark_flags()); // shows that flags are correctly restored
870 
871  rootNode()->compute_tree(); // fix flags again
872  TEST_EXPECT(rootNode()->has_correct_mark_flags());
873  }
874 
875  // test swap_assymetric
876  {
877  env.push();
878 
879  rootNode()->get_leftson()->swap_assymetric(AP_LEFT);
880  TEST_EXPECT(rootNode()->has_correct_mark_flags());
881 
882  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
883 
884  CorGluta->get_father()->swap_assymetric(AP_RIGHT);
885  TEST_EXPECT(rootNode()->has_correct_mark_flags());
886 
887  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
888 
889  CorGluta->get_father()->swap_assymetric(AP_LEFT);
890  TEST_EXPECT(rootNode()->has_correct_mark_flags()); // (maybe swaps two unmarked subtrees?!)
891 
892  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
893 
894  {
895  // swap inside folded group
896  AP_tree_nlen *CloTyro2_father = CloTyro2->get_father();
897 
898  CloTyro2_father->swap_assymetric(AP_LEFT);
899  TEST_EXPECT(rootNode()->has_correct_mark_flags());
900 
901  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
902 
903  AP_tree_nlen *CloTyro2_grandfather = CloTyro2_father->get_father();
904  ap_assert(CloTyro2_grandfather->gr.grouped); // this is the group-root
905 
906  CloTyro2_grandfather->swap_assymetric(AP_LEFT);
907  TEST_EXPECT(rootNode()->has_correct_mark_flags());
908 
909  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
910 
911  CloTyro2_grandfather->swap_assymetric(AP_RIGHT); // (i guess) this swaps CloTyrob <-> CloInnoc (both unmarked)
912  TEST_EXPECT(rootNode()->has_correct_mark_flags());
913  }
914 
915  env.pop(); env.push(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
916 
917  // swap in unmarked subtree
918  CloCarni->get_father()->swap_assymetric(AP_LEFT);
919  TEST_EXPECT(rootNode()->has_correct_mark_flags());
920 
921  env.pop(); TEST_EXPECT(rootNode()->has_correct_mark_flags());
922  }
923 
924  TEST_EXPECT(rootNode()->has_valid_root_remarks());
925 }
926 
927 void TEST_undefined_branchlength() {
928  PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb");
929  TEST_EXPECT_NO_ERROR(env.load_tree("tree_test"));
930 
931  AP_tree_nlen *root = env.root_node();
932  AP_tree_nlen *CorAquat = root->findLeafNamed("CorAquat");
933  AP_tree_nlen *inner = CorAquat->get_father()->get_father();
934 
935  AP_tree_nlen *sonOfRoot = root->get_leftson();
936  ap_assert(!sonOfRoot->is_leaf());
937 
938  TEST_EXPECT(root && CorAquat && inner);
939 
940  GBT_LEN length[] = {
941  0.0,
942  0.8,
943  AP_UNDEF_BL,
944  };
945  AP_tree_nlen *nodes[] = {
946  sonOfRoot,
947  CorAquat,
948  inner,
949  };
950 
951  for (size_t i = 0; i<ARRAY_ELEMS(length); ++i) {
952  GBT_LEN testLen = length[i];
953  for (size_t n = 0; n<ARRAY_ELEMS(nodes); ++n) {
954  TEST_ANNOTATE(GBS_global_string("length=%.2f node=%zu", testLen, n));
955 
956  AP_tree_nlen *node = nodes[n];
957  GBT_LEN oldLen = node->get_branchlength_unrooted();
958 
959  node->set_branchlength_unrooted(testLen);
960  TEST_EXPECT_EQUAL(node->get_branchlength_unrooted(), testLen);
961 
962  node->set_branchlength_unrooted(oldLen);
963  TEST_EXPECT(node->get_branchlength_unrooted() == oldLen);
964  }
965  }
966 }
967 
968 #endif // UNIT_TESTS
969 
970 // --------------------------------------------------------------------------------
bool allBranchlengthsAreDefined(AP_tree_nlen *tree)
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
GB_ERROR GBT_restore_marked_species(GBDATA *gb_main, const char *stored_marked)
Definition: aditem.cxx:446
AP_BL_MODE
long GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
AP_tree_edge(AP_tree_nlen *node1, AP_tree_nlen *node2)
void restart()
Mutations nni_rec(EdgeSpec whichEdges, AP_BL_MODE mode, AP_tree_nlen *skipNode, bool includeStartEdge)
AP_tree_nlen * sonNode() const
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
STL namespace.
void relink(AP_tree_nlen *node1, AP_tree_nlen *node2)
ARB_edge rootEdge(TreeRoot *root)
Definition: TreeNode.h:898
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
size_t size() const
double AP_FLOAT
Definition: AP_matrix.hxx:30
AP_tree_edge * unlink()
ostream & operator<<(ostream &out, const AP_tree_edge *e)
EdgeChain(AP_tree_edge *start_, EdgeSpec whichEdges, bool depthFirst, const AP_tree_nlen *skip=NULp, bool includeStart=true)
static HelixNrInfo * start
bool push_node(AP_tree_nlen *node, AP_STACK_MODE)
Definition: AP_main.cxx:252
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
static void buildSonEdges(AP_tree_nlen *node)
#define TEST_EXPECT(cond)
Definition: test_unit.h:1328
static void destroy(AP_tree_nlen *root)
bool aborted()
Definition: arb_progress.h:335
void GBK_terminate(const char *error) __ATTR__NORETURN
Definition: arb_msg.cxx:509
#define false
Definition: ureadseq.h:13
bool is_leaf_edge() const
void accept_if(bool cond)
long Mutations
Definition: AP_sequence.hxx:99
void update_undefined_leaf_branchlength(AP_tree_nlen *leaf)
void remember()
Definition: AP_main.cxx:50
int GB_random(int range)
Definition: admath.cxx:88
#define ap_assert(cond)
double GB_log_fak(int n)
Definition: admath.cxx:21
AP_tree_nlen * rootNode()
Definition: ap_main.hxx:54
static double ap_calc_bootstrap_remark_sub(int seq_len, const char *old, const char *ne)
bool next_to_folded_group() const
AP_main * ap_main
Definition: PARS_main.cxx:65
const char * data(int block) const
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
const GBT_LEN AP_UNDEF_BL
AP_tree_nlen * otherNode(const AP_tree_nlen *n) const
friend class AP_tree_nlen
bool is_root_edge() const
void revert()
Definition: AP_main.cxx:66
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
MutationsPerSite(size_t len)
static void initialize(AP_tree_nlen *root)
float GBT_LEN
Definition: arbdb_base.h:34
#define NULp
Definition: cxxforward.h:116
bool is_leaf() const
Definition: probe_tree.h:67
static ED4_block block
Definition: ED4_block.cxx:74
EdgeSpec
Definition: pars_awars.h:57
GB_transaction ta(gb_var)
void mixTree(int repeat, int percent, EdgeSpec whichEdges)
bool has_marked() const
size_t length
static void ap_calc_bootstrap_remark(AP_tree_nlen *son_node, AP_BL_MODE mode, const MutationsPerSite &mps)
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
void undefine_branchlengths(AP_tree_nlen *node)
Mutations nni_mutPerSite(Mutations pars_one, AP_BL_MODE mode, MutationsPerSite *mps)
char * data(int block)
#define TEST_EXPECT__BROKEN(cond)
Definition: test_unit.h:1329
AP_tree_nlen * notSonNode() const
#define max(a, b)
Definition: f2c.h:154