ARB
CT_ctree.hxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_ctree.hxx //
4 // Purpose : interface of CONSENSUS_TREE library //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #ifndef CT_CTREE_HXX
12 #define CT_CTREE_HXX
13 
14 #ifndef ARBTOOLS_H
15 #include <arbtools.h>
16 #endif
17 #ifndef ARBDBT_H
18 #include <arbdbt.h>
19 #endif
20 #ifndef ARB_STRARRAY_H
21 #include <arb_strarray.h>
22 #endif
23 #ifndef _GLIBCXX_MAP
24 #include <map>
25 #endif
26 #ifndef _GLIBCXX_VECTOR
27 #include <vector>
28 #endif
29 #ifndef _GLIBCXX_STRING
30 #include <string>
31 #endif
32 #ifndef TREENODE_H
33 #include <TreeNode.h>
34 #endif
35 #ifndef ARB_PROGRESS_H
36 #include <arb_progress.h>
37 #endif
38 
39 class PART;
40 struct NT_NODE;
41 class PartitionSize;
42 class PartRegistry;
43 class RB_INFO;
44 
45 // -----------------------
46 // SizeAwareTree
47 
48 struct SizeAwareRoot : public TreeRoot {
49  inline SizeAwareRoot();
50  inline TreeNode *makeNode() const OVERRIDE;
51  inline void destroyNode(TreeNode *node) const OVERRIDE;
52 };
53 
54 class SizeAwareTree FINAL_TYPE : public TreeNode {
55  // simple size-aware tree
56  unsigned leaf_count;
57 protected:
59  friend class SizeAwareRoot; // allowed to call dtor
60 public:
61  SizeAwareTree(TreeRoot *root) : TreeNode(root) {}
62  unsigned get_leaf_count() const OVERRIDE {
63  return leaf_count;
64  }
66  if (is_leaf()) {
67  leaf_count = 1;
68  }
69  else {
70  get_leftson()->compute_tree();
71  get_rightson()->compute_tree();
72  leaf_count = get_leftson()->get_leaf_count()+get_rightson()->get_leaf_count();
73  }
74  }
75 };
76 
78 inline TreeNode *SizeAwareRoot::makeNode() const { return new SizeAwareTree(const_cast<SizeAwareRoot*>(this)); }
79 inline void SizeAwareRoot::destroyNode(TreeNode *node) const { delete DOWNCAST(SizeAwareTree*,node); }
80 
81 // -----------------------
82 // ConsensusTree
83 
84 class ConsensusTree : virtual Noncopyable {
85  double overall_weight;
86  GB_HASH *Name_hash;
87  PartitionSize *size;
88  PART *allSpecies;
89  PartRegistry *registry;
90  const CharPtrArray& names;
91 
92  arb_progress *insertProgress;
93 
94  PART *deconstruct_full_subtree(const TreeNode *tree, const GBT_LEN& len, const double& weight);
95  PART *deconstruct_partial_subtree(const TreeNode *tree, const GBT_LEN& len, const double& weight, const PART *partialTree);
96 
97  void deconstruct_full_rootnode(const TreeNode *tree, const double& weight);
98  void deconstruct_partial_rootnode(const TreeNode *tree, const double& weight, const PART *partialTree);
99 
100  int get_species_index(const char *name) const {
101  int idx = GBS_read_hash(Name_hash, name);
102  arb_assert(idx>0); // given 'name' is unknown
103  return idx-1;
104  }
105 
106  const char *get_species_name(int idx) const {
107  return names[idx];
108  }
109 
110  struct RB_INFO *rbtree(const NT_NODE *tree, TreeRoot *root);
111  SizeAwareTree *rb_gettree(const NT_NODE *tree);
112 
113  void add_tree_to_PART(const TreeNode *tree, PART& part) const;
114  PART *create_tree_PART(const TreeNode *tree, const double& weight) const;
115 
116  void inc_insert_progress() { if (insertProgress) ++(*insertProgress); }
117 
118 public:
119  ConsensusTree(const CharPtrArray& names_);
120  ~ConsensusTree();
121 
122  __ATTR__USERESULT GB_ERROR insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress);
123 
124  SizeAwareTree *get_consensus_tree(GB_ERROR& error);
125 };
126 
127 // ------------------------------
128 // ConsensusTreeBuilder
129 
130 class TreeInfo {
131  std::string Name;
132  size_t specCount;
133 
134 public:
135  explicit TreeInfo(const char *Name_, size_t specCount_)
136  : Name(Name_),
137  specCount(specCount_)
138  {}
139 
140  const char *name() const { return Name.c_str(); }
141  size_t species_count() const { return specCount; }
142 };
143 
145  // wrapper for ConsensusTree
146  // - automatically collects species occurring in added trees (has to be done by caller of ConsensusTree)
147  // - uses much more memory than using ConsensusTree directly, since it stores all added trees
148  //
149  // Not helpful for consensing thousands of trees like bootstrapping does
150 
151  typedef std::map<std::string, size_t> OccurCount;
152  typedef std::vector<SizeAwareTree*> Trees;
153  typedef std::vector<double> Weights;
154 
155  OccurCount species_occurred;
156  Trees trees;
157  Weights weights;
158 
159  std::vector<TreeInfo> tree_info;
160 
161 public:
163  for (size_t i = 0; i<trees.size(); ++i) {
164  destroy(trees[i]);
165  }
166  }
167 
168  void add(SizeAwareTree*& tree, const char *treename, double weight) {
169  tree->get_tree_root()->find_innermost_edge().set_root();
170 
171  // (currently) reordering trees before deconstructing no longer
172  // affects the resulting consense tree (as performed as unit tests).
173  //
174  // tree->reorder_tree(BIG_BRANCHES_TO_TOP);
175  // tree->reorder_tree(BIG_BRANCHES_TO_BOTTOM);
176  // tree->reorder_tree(BIG_BRANCHES_TO_EDGE);
177 
178  trees.push_back(tree);
179  weights.push_back(weight);
180 
181  size_t name_count;
182  GB_CSTR *names = GBT_get_names_of_species_in_tree(tree, &name_count);
183 
184  for (size_t n = 0; n<name_count; ++n) {
185  const char *name = names[n];
186  if (species_occurred.find(name) == species_occurred.end()) {
187  species_occurred[name] = 1;
188  }
189  else {
190  species_occurred[name]++;
191  }
192  }
193 
194  tree_info.push_back(TreeInfo(treename, name_count));
195 
196  free(names);
197  tree = NULp;
198  }
199 
200  SizeAwareTree *get(size_t& different_species, GB_ERROR& error) {
201  arb_assert(!error);
202 
203  ConstStrArray species_names;
204  for (OccurCount::iterator s = species_occurred.begin(); s != species_occurred.end(); ++s) {
205  species_names.put(s->first.c_str());
206  }
207 
208  different_species = species_occurred.size();
209  ConsensusTree ctree(species_names);
210  {
211  arb_progress deconstruct("deconstructing", trees.size());
212 
213  for (size_t i = 0; i<trees.size() && !error; ++i) {
214  error = ctree.insert_tree_weighted(trees[i], tree_info[i].species_count(), weights[i], true);
215  ++deconstruct;
216  if (error) {
217  error = GBS_global_string("Failed to deconstruct '%s' (Reason: %s)", tree_info[i].name(), error);
218  }
219  else {
220  error = deconstruct.error_if_aborted();
221  }
222  }
223  if (error) deconstruct.done();
224  }
225 
226  if (error) return NULp;
227 
228 #if defined(DEBUG)
229  // if class ConsensusTree does depend on any local data,
230  // instanciating another instance will interfere:
231  ConsensusTree influence(species_names);
232 #endif
233 
234  arb_progress reconstruct("reconstructing");
235  return ctree.get_consensus_tree(error);
236  }
237 
238  char *get_tree_remark() const;
239  size_t species_count() const { return species_occurred.size(); }
240 };
241 
242 
243 #else
244 #error CT_ctree.hxx included twice
245 #endif // CT_CTREE_HXX
#define arb_assert(cond)
Definition: arb_assert.h:245
void compute_tree() OVERRIDE
Definition: CT_ctree.hxx:65
SizeAwareTree * get_consensus_tree(GB_ERROR &error)
Definition: CT_ctree.cxx:85
void put(const char *elem)
Definition: arb_strarray.h:199
return string(buffer, length)
const char * name() const
Definition: CT_ctree.hxx:140
ConsensusTree(const CharPtrArray &names_)
Definition: CT_ctree.cxx:16
char * get_tree_remark() const
Definition: CT_ctree.cxx:145
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
size_t species_count() const
Definition: CT_ctree.hxx:141
#define DOWNCAST(totype, expr)
Definition: downcast.h:141
static FullNameMap names
static int weight[maxsites+1]
__ATTR__USERESULT GB_ERROR insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress)
Definition: CT_ctree.cxx:49
#define true
Definition: ureadseq.h:14
size_t species_count() const
Definition: CT_ctree.hxx:239
TreeNode * makeNode() const OVERRIDE
Definition: CT_ctree.hxx:78
void add(SizeAwareTree *&tree, const char *treename, double weight)
Definition: CT_ctree.hxx:168
static void error(const char *msg)
Definition: mkptypes.cxx:96
~SizeAwareTree() OVERRIDE
Definition: CT_ctree.hxx:58
void destroyNode(TreeNode *node) const OVERRIDE
Definition: CT_ctree.hxx:79
Definition: CT_part.hxx:62
bool is_leaf() const
Definition: TreeNode.h:171
xml element
#define OVERRIDE
Definition: cxxforward.h:93
#define __ATTR__USERESULT
Definition: attributes.h:58
float GBT_LEN
Definition: arbdb_base.h:34
#define NULp
Definition: cxxforward.h:97
GB_CSTR * GBT_get_names_of_species_in_tree(const TreeNode *tree, size_t *count)
Definition: adtree.cxx:1291
void destroy(TreeNode *that)
Definition: TreeNode.h:559
TreeInfo(const char *Name_, size_t specCount_)
Definition: CT_ctree.hxx:135
SizeAwareTree(TreeRoot *root)
Definition: CT_ctree.hxx:61
unsigned get_leaf_count() const OVERRIDE
Definition: CT_ctree.hxx:62
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
GB_write_int const char s
Definition: AW_awar.cxx:156