ARB
CT_rbtree.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_rbtree.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 // Reconstruct GBT-tree from Ntree
12 
13 #include "CT_ntree.hxx"
14 #include "CT_ctree.hxx"
15 #include <arb_sort.h>
16 
17 
18 struct RB_INFO {
21  int percent; // branch probability [0..100]
22 };
23 
24 static int GBT_TREE_order(const TreeNode *t1, const TreeNode *t2) {
25  // define a strict order on trees
26 
27  int cmp = t1->is_leaf() - t2->is_leaf(); // leafs first
28  if (!cmp) {
29  GBT_LEN l1 = t1->leftlen+t1->rightlen;
30  GBT_LEN l2 = t2->leftlen+t2->rightlen;
31 
32  cmp = double_cmp(l1, l2); // NOW REALLY insert smaller len first
33  // (change had no effect on test results)
34  if (!cmp) {
35  if (t1->is_leaf()) {
36  cmp = strcmp(t1->name, t2->name);
37  }
38  else {
39  int cll = GBT_TREE_order(t1->get_leftson(), t2->get_leftson());
40  int clr = GBT_TREE_order(t1->get_leftson(), t2->get_rightson());
41  int crl = GBT_TREE_order(t1->get_rightson(), t2->get_leftson());
42  int crr = GBT_TREE_order(t1->get_rightson(), t2->get_rightson());
43 
44  cmp = cll+clr+crl+crr;
45  if (!cmp) {
46  cmp = cll;
47  arb_assert(cmp); // order not strict enough
48  }
49  }
50  }
51  }
52  return cmp;
53 }
54 
55 static int RB_INFO_order(const void *v1, const void *v2, void *) {
56  // defines a strict order on RB_INFOs
57 
58  const RB_INFO *i1 = (const RB_INFO *)v1;
59  const RB_INFO *i2 = (const RB_INFO *)v2;
60 
61  int cmp = i1->percent - i2->percent; // insert more probable branches first
62 
63  if (!cmp) {
64  cmp = double_cmp(i1->len, i2->len); // NOW REALLY insert smaller len first
65  // (change slightly affected test results in "weak" tree-parts)
66  if (!cmp) {
67  cmp = GBT_TREE_order(i1->node, i2->node);
68  }
69  }
70  return cmp;
71 }
72 
73 RB_INFO *ConsensusTree::rbtree(const NT_NODE *tree, TreeRoot *root) {
74  // doing all the work for rb_gettree() :-)
75  // convert a Ntree into a GBT-Tree
76 
77  TreeNode *tnode = root->makeNode();
78  tnode->father = NULp;
79 
80  RB_INFO *info = ARB_calloc<RB_INFO>(1);
81  info->node = tnode; // return-information
82  info->percent = int(tree->part->get_weight()*100.0+.5);
83  info->len = tree->part->get_len();
84 
85  NSONS *nsonp = tree->son_list;
86  if (!nsonp) { // if node is leaf
87  int idx = tree->part->index();
88 
89  tnode->name = ARB_strdup(get_species_name(idx));
90  tnode->markAsLeaf();
91  }
92  else {
93  arb_assert(!tnode->is_leaf());
94  arb_assert(!tnode->get_remark());
95  if (info->percent < 100) {
96  tnode->set_bootstrap(info->percent);
97  }
98 
99  int multifurc = ntree_count_sons(tree);
100  RB_INFO *son_info[multifurc];
101 
102  {
103  int sidx = 0;
104  while (nsonp) {
105  son_info[sidx++] = rbtree(nsonp->node, root);
106  nsonp = nsonp->next;
107  }
108 
109  GB_sort((void**)son_info, 0, sidx, RB_INFO_order, NULp); // bring sons into strict order
110  }
111 
112  while (multifurc>1) {
113  int didx = 0;
114  for (int sidx1 = 0; sidx1<multifurc; sidx1 += 2) {
115  int sidx2 = sidx1+1;
116  if (sidx2<multifurc) {
117  TreeNode *mf;
118  RB_INFO *sinfo;
119 
120  if (multifurc > 2) {
121  mf = root->makeNode();
122  ARB_calloc(sinfo, 1);
123 
124  mf->father = NULp;
125 
126  sinfo->percent = 0; // branch never really occurs (artificial multifurcation in binary tree)
127  sinfo->len = 0.0;
128  sinfo->node = mf;
129  }
130  else { // last step
131  mf = tnode;
132  sinfo = info;
133  }
134 
135  mf->leftson = son_info[sidx1]->node;
136  mf->leftlen = son_info[sidx1]->len;
137 
138  mf->rightson = son_info[sidx2]->node;
139  mf->rightlen = son_info[sidx2]->len;
140 
141  mf->leftson->father = mf;
142  mf->rightson->father = mf;
143 
144  freenull(son_info[sidx1]);
145  freenull(son_info[sidx2]);
146 
147  son_info[didx++] = sinfo;
148  }
149  else {
150  son_info[didx++] = son_info[sidx1];
151  }
152  }
153  multifurc = didx;
154  }
155 
156  arb_assert(multifurc == 1);
157  arb_assert(son_info[0] == info);
158  arb_assert(!info->node->father);
159  }
160  return info;
161 }
162 
163 
164 SizeAwareTree *ConsensusTree::rb_gettree(const NT_NODE *tree) {
165  // reconstruct GBT Tree from Ntree. Ntree is not destructed afterwards!
166  RB_INFO *info = rbtree(tree, new SizeAwareRoot);
167  SizeAwareTree *satree = DOWNCAST(SizeAwareTree*, info->node);
168  satree->announce_tree_constructed();
169  ASSERT_VALID_TREE(satree);
170  free(info);
171  return satree;
172 }
void set_bootstrap(double bootstrap)
Definition: TreeNode.h:323
#define arb_assert(cond)
Definition: arb_assert.h:245
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
int ntree_count_sons(const NT_NODE *tree)
Definition: CT_ntree.cxx:75
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
static int RB_INFO_order(const void *v1, const void *v2, void *)
Definition: CT_rbtree.cxx:55
GBT_LEN leftlen
Definition: TreeNode.h:172
TreeNode * rightson
Definition: TreeNode.h:171
#define DOWNCAST(totype, expr)
Definition: downcast.h:141
NT_NODE * node
Definition: CT_ntree.hxx:27
PART * part
Definition: CT_ntree.hxx:21
virtual TreeNode * makeNode() const =0
int index() const
Definition: CT_part.cxx:306
double get_weight() const
Definition: CT_part.hxx:179
TreeNode * node
Definition: CT_rbtree.cxx:20
TreeNode * father
Definition: TreeNode.h:171
NSONS * son_list
Definition: CT_ntree.hxx:22
#define cmp(h1, h2)
Definition: admap.cxx:50
TreeNode * leftson
Definition: TreeNode.h:171
GBT_LEN rightlen
Definition: TreeNode.h:172
bool is_leaf() const
Definition: TreeNode.h:211
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
char * name
Definition: TreeNode.h:174
float GBT_LEN
Definition: arbdb_base.h:34
#define NULp
Definition: cxxforward.h:116
GBT_LEN len
Definition: CT_rbtree.cxx:19
void markAsLeaf()
Definition: TreeNode.h:212
int percent
Definition: CT_rbtree.cxx:21
NSONS * next
Definition: CT_ntree.hxx:28
#define ASSERT_VALID_TREE(tree)
Definition: TreeNode.h:647
GBT_LEN get_len() const
Definition: CT_part.hxx:171
static int GBT_TREE_order(const TreeNode *t1, const TreeNode *t2)
Definition: CT_rbtree.cxx:24
const char * get_remark() const
Definition: TreeNode.h:307
static int info[maxsites+1]
CONSTEXPR_INLINE int double_cmp(const double d1, const double d2)
Definition: arbtools.h:185
const char * get_species_name(int idx) const
Definition: CT_common.hxx:184