ARB
CT_common.hxx
Go to the documentation of this file.
1 // ========================================================= //
2 // //
3 // File : CT_common.hxx //
4 // Purpose : provides common code used by public headers //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in May 20 //
7 // http://www.arb-home.de/ //
8 // //
9 // ========================================================= //
10 
11 
12 
13 #ifndef CT_COMMON_HXX
14 #define CT_COMMON_HXX
15 
16 #ifndef TREENODE_H
17 #include <TreeNode.h>
18 #endif
19 #ifndef ARB_STRARRAY_H
20 #include <arb_strarray.h>
21 #endif
22 #ifndef ARB_PROGRESS_H
23 #include <arb_progress.h>
24 #endif
25 #ifndef _GLIBCXX_MAP
26 #include <map>
27 #endif
28 #ifndef _GLIBCXX_VECTOR
29 #include <vector>
30 #endif
31 #ifndef _GLIBCXX_STRING
32 #include <string>
33 #endif
34 
35 // -----------------------
36 // SizeAwareTree
37 
38 struct SizeAwareRoot : public TreeRoot {
39  inline SizeAwareRoot();
40  inline TreeNode *makeNode() const OVERRIDE;
41  inline void destroyNode(TreeNode *node) const OVERRIDE;
42 };
43 
44 class SizeAwareTree FINAL_TYPE : public TreeNode {
45  // simple size-aware tree
46  unsigned leaf_count;
47 protected:
49  friend class SizeAwareRoot; // allowed to call dtor
50 public:
51  SizeAwareTree(TreeRoot *root) : TreeNode(root) {}
52 
53  DEFINE_TREE_RELATIVES_ACCESSORS(SizeAwareTree);
54 
55  unsigned get_leaf_count() const OVERRIDE {
56  return leaf_count;
57  }
59  if (is_leaf()) {
60  leaf_count = 1;
61  }
62  else {
63  get_leftson()->compute_tree();
64  get_rightson()->compute_tree();
65  leaf_count = get_leftson()->get_leaf_count()+get_rightson()->get_leaf_count();
66  }
67  }
68 };
69 
71 inline TreeNode *SizeAwareRoot::makeNode() const { return new SizeAwareTree(const_cast<SizeAwareRoot*>(this)); }
72 inline void SizeAwareRoot::destroyNode(TreeNode *node) const { delete DOWNCAST(SizeAwareTree*,node); }
73 
74 
75 class TreeInfo {
76  std::string Name;
77  size_t specCount;
78 
79 public:
80  explicit TreeInfo(const char *Name_, size_t specCount_)
81  : Name(Name_),
82  specCount(specCount_)
83  {}
84 
85  const char *name() const { return Name.c_str(); }
86  size_t species_count() const { return specCount; }
87 };
88 
90  // owns multiple loaded trees.
91  // maintains overall set of species occurring in these trees.
92 
93  typedef std::map<std::string, size_t> OccurCount;
94  OccurCount species_occurred;
95 
96  typedef std::vector<SizeAwareTree*> Trees;
97  Trees trees;
98  std::vector<TreeInfo> tree_info;
99 
100 public:
102  for (size_t i = 0; i<trees.size(); ++i) {
103  destroy(trees[i]);
104  }
105  }
106 
107  size_t get_tree_count() const { arb_assert(trees.size() == tree_info.size()); return tree_info.size(); }
108  bool valid_tree_index(int idx) const { return idx>=0 && size_t(idx)<get_tree_count(); }
109 
110  const TreeInfo& get_tree_info(int idx) const { arb_assert(valid_tree_index(idx)); return tree_info[idx]; }
111  const SizeAwareTree *get_tree(int idx) const { arb_assert(valid_tree_index(idx)); return trees[idx]; }
112  SizeAwareTree *take_tree(int idx) {
114  arb_assert(trees[idx]);
115  SizeAwareTree *t = trees[idx];
116  trees[idx] = NULp;
117  return t;
118  }
119 
120  void get_species_names(ConstStrArray& species_names) const {
121  // fills names into 'species_names'
122  for (OccurCount::const_iterator s = species_occurred.begin(); s != species_occurred.end(); ++s) {
123  species_names.put(s->first.c_str());
124  }
125  }
126 
127  size_t get_species_count() const { return species_occurred.size(); }
128 
129  int add(SizeAwareTree*& tree, const char *treename) {
134  tree->compute_tree();
135 
136  trees.push_back(tree);
137  int added_at_idx = trees.size()-1;
138 
139  size_t name_count;
140  GB_CSTR *names = GBT_get_names_of_species_in_tree(tree, &name_count);
141 
142  for (size_t n = 0; n<name_count; ++n) {
143  const char *name = names[n];
144  if (species_occurred.find(name) == species_occurred.end()) {
145  species_occurred[name] = 1;
146  }
147  else {
148  species_occurred[name]++;
149  }
150  }
151 
152  tree_info.push_back(TreeInfo(treename, name_count));
153  arb_assert(tree_info.size() == trees.size());
154 
155  free(names);
156  tree = NULp;
157 
158  return added_at_idx;
159  }
160 
161 };
162 
163 class PART;
164 class PartitionSize;
165 
166 class SpeciesSpace : virtual Noncopyable {
167  const CharPtrArray& names;
168 
169  GB_HASH *Name_hash;
170  PartitionSize *psize;
171  PART *allSpecies;
172 
173  void add_tree_to_PART(const TreeNode *tree, PART& part) const;
174 
175 public:
176  SpeciesSpace(const CharPtrArray& names_);
177  ~SpeciesSpace();
178 
179  int get_species_index(const char *name) const {
180  int idx = GBS_read_hash(Name_hash, name);
181  arb_assert(idx>0); // given 'name' is unknown
182  return idx-1;
183  }
184  const char *get_species_name(int idx) const { return names[idx]; }
185  const CharPtrArray& get_names() const { return names; }
186  const PartitionSize *get_PartitionSize() const { return psize; }
187  const PART *get_allSpecies() const { return allSpecies; }
188 
189  PART *create_tree_PART(const TreeNode *tree, const double& weight) const;
190 };
191 
192 class PartRegistry;
193 
195  DMODE_CONSENSUS_TREE, // inserts PARTs from partial trees twice (once with outgroup on each PARTs side)
196  DMODE_ROOTSYNC, // inserts PARTs from partial trees once (values of outgroup member are irrelevant)
197 };
198 
200  // for ConsensusTree: use one DeconstructedTree for all trees
201  // for RootSynchronizer: use one DeconstructedTree for each tree
202 
203  const SpeciesSpace& specSpace; // union of all species of all involved trees
204  PartRegistry *registry; // contains deconstructed tree(s), i.e. its/their edges
205  double overall_weight;
206 
207  PART *deconstruct_full_subtree(const TreeNode *tree, const GBT_LEN& len, const double& weight, arb_progress *insertProgress);
208  PART *deconstruct_partial_subtree(const TreeNode *tree, const GBT_LEN& len, const double& weight, const PART *partialTree, arb_progress *insertProgress);
209 
210  void deconstruct_full_rootnode(const TreeNode *tree, const double& weight, arb_progress *insertProgress);
211  void deconstruct_partial_rootnode(const TreeNode *tree, const double& weight, const PART *partialTree, arb_progress *insertProgress);
212 
213  const PART *part_with_origin(const TreeNode *node) const;
214 
215 public:
216  DeconstructedTree(const SpeciesSpace& specSpace_);
218 
219  __ATTR__USERESULT GB_ERROR deconstruct_weighted(const TreeNode *tree, const PART *wholeTree, int leafs, double weight, bool provideProgress, const PART *allSpecies, DeconstructionMode mode);
220 
221  void start_sorted_retrieval();
222 
223  size_t get_part_count() const;
224  PART *get_part();
225 
226  const PART *peek_part(int idx) const;
227  const PART *find_part(const TreeNode *node) const;
228 
229  const PART *find_innermost_part() const;
230 };
231 
232 namespace PART_FWD { // provide access to some methods of PART
233  const TreeNode *get_origin(const PART *part);
234 
235  int get_members(const PART *part);
236  void destroy_part(PART* part);
237 
238  int calcDistance(const PART *e1, const PART *e2, const PART *t1, const PART *t2);
239 };
240 
241 inline bool represents_existing_edge(const PART *p) {
242  // This function is not safe!
243  // Adding a PART twice into PartRegistry will erase the origin (see addWeightAndLength).
244  return PART_FWD::get_origin(p);
245 }
246 
247 typedef SmartCustomPtr(PART, PART_FWD::destroy_part) PART_Ptr;
248 
249 class TreeParts {
250  const SpeciesSpace& specSpace;
251  const TreeContainer& trees;
252 
253  mutable std::vector<PART_Ptr> whole_tree_parts; // cache
254 
255 public:
256  TreeParts(const SpeciesSpace& specSpace_, const TreeContainer& trees_) :
257  specSpace(specSpace_),
258  trees(trees_)
259  {
260  whole_tree_parts.resize(trees.get_tree_count());
261  }
262 
263  const PART *get_tree_PART(int idx) const {
264  arb_assert(trees.valid_tree_index(idx));
265 
266  PART_Ptr& wt = whole_tree_parts[idx];
267  if (wt.isNull()) {
268  wt = specSpace.create_tree_PART(trees.get_tree(idx), 0.0); // weight does/should not matter for wholeTree PARTs
269  }
270  arb_assert(wt.isSet());
271  return wt.content();
272  }
273 };
274 
275 
276 #else
277 #error CT_common.hxx included twice
278 #endif // CT_COMMON_HXX
279 
#define arb_assert(cond)
Definition: arb_assert.h:245
void compute_tree() OVERRIDE
Definition: CT_common.hxx:58
const PART * peek_part(int idx) const
Definition: CT_ctree.cxx:58
void put(const char *elem)
Definition: arb_strarray.h:188
return string(buffer, length)
const char * name() const
Definition: CT_common.hxx:85
const TreeNode * get_origin(const PART *part)
Definition: CT_part.cxx:523
const SizeAwareTree * get_tree(int idx) const
Definition: CT_common.hxx:111
#define DEFINE_TREE_RELATIVES_ACCESSORS(TreeType)
Definition: TreeNode.h:613
size_t get_species_count() const
Definition: CT_common.hxx:127
int get_species_index(const char *name) const
Definition: CT_common.hxx:179
const PART * find_innermost_part() const
Definition: CT_ctree.cxx:79
size_t species_count() const
Definition: CT_common.hxx:86
size_t get_tree_count() const
Definition: CT_common.hxx:107
int add(SizeAwareTree *&tree, const char *treename)
Definition: CT_common.hxx:129
void start_sorted_retrieval()
Definition: CT_ctree.cxx:55
int get_members(const PART *part)
Definition: CT_part.cxx:527
DeconstructedTree(const SpeciesSpace &specSpace_)
Definition: CT_ctree.cxx:46
#define DOWNCAST(totype, expr)
Definition: downcast.h:141
static FullNameMap names
int calcDistance(const PART *e1, const PART *e2, const PART *t1, const PART *t2)
Definition: CT_part.cxx:510
static int weight[maxsites+1]
const TreeInfo & get_tree_info(int idx) const
Definition: CT_common.hxx:110
SpeciesSpace(const CharPtrArray &names_)
Definition: CT_ctree.cxx:16
const PART * find_part(const TreeNode *node) const
Definition: CT_ctree.cxx:71
#define true
Definition: ureadseq.h:14
TreeNode * makeNode() const OVERRIDE
Definition: CT_common.hxx:71
const CharPtrArray & get_names() const
Definition: CT_common.hxx:185
__ATTR__USERESULT GB_ERROR deconstruct_weighted(const TreeNode *tree, const PART *wholeTree, int leafs, double weight, bool provideProgress, const PART *allSpecies, DeconstructionMode mode)
Definition: CT_ctree.cxx:106
const PART * get_allSpecies() const
Definition: CT_common.hxx:187
DeconstructionMode
Definition: CT_common.hxx:194
~SizeAwareTree() OVERRIDE
Definition: CT_common.hxx:48
size_t get_part_count() const
Definition: CT_ctree.cxx:56
SizeAwareTree * take_tree(int idx)
Definition: CT_common.hxx:112
void destroyNode(TreeNode *node) const OVERRIDE
Definition: CT_common.hxx:72
void get_species_names(ConstStrArray &species_names) const
Definition: CT_common.hxx:120
Definition: CT_part.hxx:62
bool is_leaf() const
Definition: TreeNode.h:211
xml element
bool represents_existing_edge(const PART *p)
Definition: CT_common.hxx:241
const PART * get_tree_PART(int idx) const
Definition: CT_common.hxx:263
bool valid_tree_index(int idx) const
Definition: CT_common.hxx:108
#define OVERRIDE
Definition: cxxforward.h:112
#define __ATTR__USERESULT
Definition: attributes.h:58
float GBT_LEN
Definition: arbdb_base.h:34
#define NULp
Definition: cxxforward.h:116
GB_CSTR * GBT_get_names_of_species_in_tree(const TreeNode *tree, size_t *count)
Definition: adtree.cxx:1350
PART * get_part()
Definition: CT_ctree.cxx:57
PART * create_tree_PART(const TreeNode *tree, const double &weight) const
Definition: CT_dtree.cxx:174
void destroy(TreeNode *that)
Definition: TreeNode.h:600
const PartitionSize * get_PartitionSize() const
Definition: CT_common.hxx:186
TreeInfo(const char *Name_, size_t specCount_)
Definition: CT_common.hxx:80
TreeParts(const SpeciesSpace &specSpace_, const TreeContainer &trees_)
Definition: CT_common.hxx:256
const char * get_species_name(int idx) const
Definition: CT_common.hxx:184
SizeAwareTree(TreeRoot *root)
Definition: CT_common.hxx:51
void destroy_part(PART *part)
Definition: CT_part.cxx:531
typedef SmartCustomPtr(PART, PART_FWD::destroy_part) PART_Ptr
unsigned get_leaf_count() const OVERRIDE
Definition: CT_common.hxx:55
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
GB_write_int const char s
Definition: AW_awar.cxx:154