ARB
CT_ctree.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_ctree.cxx //
4 // Purpose : consensus tree //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #include "CT_ctree.hxx"
12 #include "CT_hash.hxx"
13 #include "CT_ntree.hxx"
14 #include <arb_strbuf.h>
15 
17  names(names_),
18  Name_hash(NULp),
19  psize(NULp)
20 {
21  // names = leafnames (=species names)
22 
23  int leaf_count = names.size();
24  Name_hash = GBS_create_hash(leaf_count, GB_MIND_CASE);
25  for (int i=0; i< leaf_count; i++) {
26  GBS_write_hash(Name_hash, names[i], i+1);
27  }
28 
29  psize = new PartitionSize(leaf_count);
30  allSpecies = psize->create_root();
31 }
32 
34  delete allSpecies;
35  delete psize;
36 
37  if (Name_hash) {
38  GBS_free_hash(Name_hash);
39  Name_hash = NULp;
40  }
41 }
42 
43 // ---------------------------
44 // DeconstructedTree
45 
47  specSpace(specSpace_),
48  overall_weight(0)
49 {
50  registry = new PartRegistry;
51 }
53  delete registry;
54 }
55 void DeconstructedTree::start_sorted_retrieval() { registry->build_sorted_list(overall_weight); }
56 size_t DeconstructedTree::get_part_count() const { return registry->size(); }
57 PART *DeconstructedTree::get_part() { return registry->get_part(); }
58 const PART *DeconstructedTree::peek_part(int idx) const { return registry->peek_part(idx); }
59 
60 const PART *DeconstructedTree::part_with_origin(const TreeNode *node) const {
61  arb_assert(node);
62  size_t parts = get_part_count();
63  for (size_t i = 0; i<parts; ++i) {
64  const PART *p = peek_part(i);
65  if (p->get_origin() == node) {
66  return p;
67  }
68  }
69  return NULp;
70 }
71 const PART *DeconstructedTree::find_part(const TreeNode *node) const {
72  const PART *found = part_with_origin(node);
73  if (!found && node->is_son_of_root()) {
74  found = part_with_origin(node->get_brother());
75  }
76  return found;
77 }
78 
80  int best_dist = INT_MAX;
81  const PART *best_part = NULp;
82 
83  size_t parts = get_part_count();
84  for (size_t p = 0; p<parts; ++p) {
85  const PART *part = peek_part(p);
86  int dist = part->distance_to_tree_center();
87 
88  if (dist<best_dist) {
89  best_dist = dist;
90  best_part = part;
91  }
92  }
93 
94  arb_assert(best_part);
95  return best_part;
96 }
97 // -----------------------
98 // ConsensusTree
99 
101  SpeciesSpace(names_),
102  DeconstructedTree(static_cast<const SpeciesSpace&>(*this))
103 {
104 }
105 
106 GB_ERROR DeconstructedTree::deconstruct_weighted(const TreeNode *tree, const PART *wholeTree, int leafs, double weight, bool provideProgress, const PART *allSpecies, DeconstructionMode mode) {
107  // inserts a tree into the PartRegistry.
108  arb_assert(GBT_count_leafs(tree) == size_t(leafs));
109 
110  overall_weight += weight;
111 
112  if (wholeTree->get_members() != leafs) {
113  arb_assert(wholeTree->get_members() < leafs);
114  return "tree contains duplicated species";
115  }
116 
117  arb_progress *insertProgress = NULp;
118  if (provideProgress) {
119  long nodeCount = leafs_2_nodes(wholeTree->get_members(), ROOTED);
120  insertProgress = new arb_progress(nodeCount);
121  }
122 
123  bool contains_all_species = wholeTree->equals(allSpecies);
124  bool handle_partial_as_full = mode == DMODE_ROOTSYNC;
125 
126  if (contains_all_species || handle_partial_as_full) {
127  deconstruct_full_rootnode(tree, weight, insertProgress);
128  }
129  else {
130  deconstruct_partial_rootnode(tree, weight, wholeTree, insertProgress);
131  }
132 
133  GB_ERROR error = NULp;
134  if (provideProgress) {
135  error = insertProgress->error_if_aborted();
136 
137  delete insertProgress;
138  insertProgress = NULp;
139  }
140  arb_assert(!insertProgress);
141 
142  return error;
143 }
144 
145 GB_ERROR ConsensusTree::insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress) {
146  PART_Ptr wholeTree(create_tree_PART(tree, 0.0)); // weight does not matter here
147  return deconstruct_weighted(tree, &*wholeTree, leafs, weight, provideProgress, get_allSpecies(), DMODE_CONSENSUS_TREE);
148 }
149 
151  // Get new consensus-tree -> SizeAwareTree
152 
153  /* This function is little bit tricky:
154  the root-partition consist of 111....111 so it must have two sons
155  that represent the same partition son1 == ~son2 to do this we must split
156  the fist son-partition in two parts through logical calculation there
157  could only be one son! */
158 
159  arb_assert(!error);
161 
163 
164  const size_t steps = get_part_count()+2;
165  const size_t weighted_steps = triangular_number(steps);
166  arb_progress progress(weighted_steps);
167  size_t inserted = 0;
168  {
169 #if defined(DUMP_PART_INSERTION)
170  PART::start_pretty_printing(get_names());
171 #endif
172 
173  PART *p = get_part();
174  while (p && !progress.aborted()) {
175  insert_ntree(p);
176  progress.inc_by(++inserted);
177  p = get_part();
178  }
179 
180 #if defined(DUMP_PART_INSERTION)
181  PART::stop_pretty_printing();
182 #endif
183  }
184 
185  SizeAwareTree *result_tree = NULp;
186 
187  error = progress.error_if_aborted();
188  if (!error) {
189  const NT_NODE *n = ntree_get();
190 
191  arb_assert(ntree_count_sons(n) == 2);
192 
193 #if defined(NTREE_DEBUG_FUNCTIONS)
194  arb_assert(is_well_formed(n));
195 #endif
196 
197  result_tree = rb_gettree(n);
198 
199  progress.inc_by(++inserted);
200 
201  result_tree->get_tree_root()->find_innermost_edge().set_root();
202  result_tree->reorder_tree(BIG_BRANCHES_TO_BOTTOM);
203 
204  progress.inc_by(++inserted);
205  }
206 
207  ntree_cleanup();
208  arb_assert(contradicted(result_tree, error));
209  return result_tree;
210 }
211 
212 
214  GBS_strstruct remark(1000);
215  {
216  char *build_info = GBS_global_string_copy("ARB consensus tree build from %zu trees:", get_tree_count());
217  char *dated = GBS_log_action_to(NULp, build_info, true);
218  remark.cat(dated);
219  free(dated);
220  free(build_info);
221  }
222 
223  size_t allCount = get_species_count();
224 
225  size_t maxnamelen = 0;
226  for (size_t t = 0; t<get_tree_count(); ++t) {
227  maxnamelen = std::max(maxnamelen, strlen(get_tree_info(t).name()));
228  }
229  for (size_t t = 0; t<get_tree_count(); ++t) {
230  const TreeInfo& tree = get_tree_info(t);
231  remark.cat(" - ");
232  remark.nprintf(maxnamelen, "%-*s", int(maxnamelen), tree.name());
233  if (tree.species_count()<allCount) {
234  double pc = tree.species_count() / double(allCount);
235  remark.nprintf(50, " (%.1f%%; %zu/%zu)", pc*100.0, tree.species_count(), allCount);
236  }
237  remark.put('\n');
238  }
239 
240  return remark.release();
241 }
242 
243 
244 
Definition: arbdbt.h:48
#define arb_assert(cond)
Definition: arb_assert.h:245
void ntree_cleanup()
Definition: CT_ntree.cxx:62
const PART * peek_part(int idx) const
Definition: CT_ctree.cxx:58
size_t GBT_count_leafs(const TreeNode *tree)
Definition: adtree.cxx:842
SizeAwareTree * get_consensus_tree(GB_ERROR &error)
Definition: CT_ctree.cxx:150
size_t size() const
Definition: arb_strarray.h:85
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:454
const char * name() const
Definition: CT_common.hxx:85
int get_members() const
Definition: CT_part.hxx:225
const NT_NODE * ntree_get()
Definition: CT_ntree.cxx:19
ConsensusTree(const CharPtrArray &names_)
Definition: CT_ctree.cxx:100
char * get_tree_remark() const
Definition: CT_ctree.cxx:213
size_t get_species_count() const
Definition: CT_common.hxx:127
int ntree_count_sons(const NT_NODE *tree)
Definition: CT_ntree.cxx:75
const PART * find_innermost_part() const
Definition: CT_ctree.cxx:79
char * release()
Definition: arb_strbuf.h:129
size_t species_count() const
Definition: CT_common.hxx:86
size_t get_tree_count() const
Definition: CT_common.hxx:107
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
void cat(const char *from)
Definition: arb_strbuf.h:199
void start_sorted_retrieval()
Definition: CT_ctree.cxx:55
CONSTEXPR_INLINE long triangular_number(const long N)
Definition: triangular.h:18
DeconstructedTree(const SpeciesSpace &specSpace_)
Definition: CT_ctree.cxx:46
GB_ERROR error_if_aborted()
Definition: arb_progress.h:323
static FullNameMap names
CONSTEXPR_INLINE int leafs_2_nodes(int leafs, TreeModel model)
Definition: arbdbt.h:53
const PART * peek_part(int idx) const
Definition: CT_hash.cxx:38
static int weight[maxsites+1]
const TreeInfo & get_tree_info(int idx) const
Definition: CT_common.hxx:110
__ATTR__USERESULT GB_ERROR insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress)
Definition: CT_ctree.cxx:145
bool is_son_of_root() const
Definition: TreeNode.h:255
PART * get_part()
Definition: CT_hash.cxx:24
void ntree_init(const PartitionSize *registry)
Definition: CT_ntree.cxx:55
SpeciesSpace(const CharPtrArray &names_)
Definition: CT_ctree.cxx:16
const PART * find_part(const TreeNode *node) const
Definition: CT_ctree.cxx:71
bool aborted()
Definition: arb_progress.h:335
void build_sorted_list(double overall_weight)
Definition: CT_hash.cxx:143
static void error(const char *msg)
Definition: mkptypes.cxx:96
const CharPtrArray & get_names() const
Definition: CT_common.hxx:185
PART * create_root() const
Definition: CT_part.cxx:124
__ATTR__USERESULT GB_ERROR deconstruct_weighted(const TreeNode *tree, const PART *wholeTree, int leafs, double weight, bool provideProgress, const PART *allSpecies, DeconstructionMode mode)
Definition: CT_ctree.cxx:106
const PART * get_allSpecies() const
Definition: CT_common.hxx:187
DeconstructionMode
Definition: CT_common.hxx:194
TreeNode * get_brother()
Definition: TreeNode.h:435
size_t get_part_count() const
Definition: CT_ctree.cxx:56
bool equals(const PART *other) const
Definition: CT_part.cxx:208
char * GBS_log_action_to(const char *comment, const char *action, bool stamp)
Definition: adstring.cxx:976
size_t size() const
Definition: CT_hash.hxx:59
Definition: CT_part.hxx:62
void nprintf(size_t maxlen, const char *templat,...) __ATTR__FORMAT_MEMBER(2)
Definition: arb_strbuf.cxx:29
const TreeNode * get_origin() const
Definition: CT_part.hxx:129
void insert_ntree(PART *&part)
Definition: CT_ntree.cxx:201
#define NULp
Definition: cxxforward.h:116
int distance_to_tree_center() const
Definition: CT_part.hxx:229
PART * get_part()
Definition: CT_ctree.cxx:57
PART * create_tree_PART(const TreeNode *tree, const double &weight) const
Definition: CT_dtree.cxx:174
void inc_by(PINT count)
Definition: arb_progress.h:361
const PartitionSize * get_PartitionSize() const
Definition: CT_common.hxx:186
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
void put(char c)
Definition: arb_strbuf.h:174
#define max(a, b)
Definition: f2c.h:154