ARB
CT_ctree.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_ctree.cxx //
4 // Purpose : consensus tree //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #include "CT_ctree.hxx"
12 #include "CT_hash.hxx"
13 #include "CT_ntree.hxx"
14 #include <arb_strbuf.h>
15 
17  : overall_weight(0),
18  Name_hash(NULp),
19  size(NULp),
20  names(names_),
21  insertProgress(NULp)
22 {
23  // names = leafnames (=species names)
24 
25  int leaf_count = names.size();
26  Name_hash = GBS_create_hash(leaf_count, GB_MIND_CASE);
27  for (int i=0; i< leaf_count; i++) {
28  GBS_write_hash(Name_hash, names[i], i+1);
29  }
30 
31  size = new PartitionSize(leaf_count);
32  allSpecies = size->create_root();
33  registry = new PartRegistry;
34 }
35 
37  arb_assert(!insertProgress);
38 
39  delete registry;
40  delete allSpecies;
41  delete size;
42 
43  if (Name_hash) {
44  GBS_free_hash(Name_hash);
45  Name_hash = NULp;
46  }
47 }
48 
49 GB_ERROR ConsensusTree::insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress) {
50  // Insert a GBT-tree in the Hash-Table
51  // The GBT-tree is destructed afterwards!
52  arb_assert(GBT_count_leafs(tree) == size_t(leafs));
53 
54  overall_weight += weight;
55 
56  PART *wholeTree = create_tree_PART(tree, weight);
57  if (wholeTree->get_members() != leafs) {
58  arb_assert(wholeTree->get_members() < leafs);
59  return "tree contains duplicated species";
60  }
61 
62  if (provideProgress) {
63  long nodeCount = leafs_2_nodes(wholeTree->get_members(), ROOTED);
64  insertProgress = new arb_progress(nodeCount);
65  }
66 
67  bool contains_all_species = wholeTree->equals(allSpecies);
68 
69  if (contains_all_species) {
70  deconstruct_full_rootnode(tree, weight);
71  }
72  else {
73  deconstruct_partial_rootnode(tree, weight, wholeTree);
74  }
75 
76  if (provideProgress) {
77  delete insertProgress;
78  insertProgress = NULp;
79  }
80 
81  delete wholeTree;
82  return NULp;
83 }
84 
86  // Get new consensus-tree -> SizeAwareTree
87 
88  /* This function is little bit tricky:
89  the root-partition consist of 111....111 so it must have two sons
90  that represent the same partition son1 == ~son2 to do this we must split
91  the fist son-partition in two parts through logical calculation there
92  could only be one son! */
93 
94  arb_assert(!error);
95  ntree_init(size);
96 
97  registry->build_sorted_list(overall_weight);
98 
99  arb_progress progress(registry->size()+2);
100  {
101 #if defined(DUMP_PART_INSERTION)
102  PART::start_pretty_printing(names);
103 #endif
104 
105  PART *p = registry->get_part();
106  while (p && !progress.aborted()) {
107  insert_ntree(p);
108  ++progress;
109  p = registry->get_part();
110  }
111 
112 #if defined(DUMP_PART_INSERTION)
113  PART::stop_pretty_printing();
114 #endif
115  }
116 
117  SizeAwareTree *result_tree = NULp;
118 
119  error = progress.error_if_aborted();
120  if (!error) {
121  const NT_NODE *n = ntree_get();
122 
123  arb_assert(ntree_count_sons(n) == 2);
124 
125 #if defined(NTREE_DEBUG_FUNCTIONS)
126  arb_assert(is_well_formed(n));
127 #endif
128 
129  result_tree = rb_gettree(n);
130 
131  ++progress;
132 
133  result_tree->get_tree_root()->find_innermost_edge().set_root();
134  result_tree->reorder_tree(BIG_BRANCHES_TO_BOTTOM);
135 
136  ++progress;
137  }
138 
139  ntree_cleanup();
140  arb_assert(contradicted(result_tree, error));
141  return result_tree;
142 }
143 
144 
146  GBS_strstruct remark(1000);
147  {
148  char *build_info = GBS_global_string_copy("ARB consensus tree build from %zu trees:", tree_info.size());
149  char *dated = GBS_log_action_to(NULp, build_info, true);
150  remark.cat(dated);
151  free(dated);
152  free(build_info);
153  }
154 
155  size_t allCount = species_count();
156 
157  size_t maxnamelen = 0;
158  for (size_t t = 0; t<tree_info.size(); ++t) {
159  maxnamelen = std::max(maxnamelen, strlen(tree_info[t].name()));
160  }
161  for (size_t t = 0; t<tree_info.size(); ++t) {
162  const TreeInfo& tree = tree_info[t];
163  remark.cat(" - ");
164  remark.nprintf(maxnamelen, "%-*s", int(maxnamelen), tree.name());
165  if (tree.species_count()<allCount) {
166  double pc = tree.species_count() / double(allCount);
167  remark.nprintf(50, " (%.1f%%; %zu/%zu)", pc*100.0, tree.species_count(), allCount);
168  }
169  remark.put('\n');
170  }
171 
172  return remark.release();
173 }
Definition: arbdbt.h:48
#define arb_assert(cond)
Definition: arb_assert.h:245
void ntree_cleanup()
Definition: CT_ntree.cxx:62
size_t GBT_count_leafs(const TreeNode *tree)
Definition: adtree.cxx:796
SizeAwareTree * get_consensus_tree(GB_ERROR &error)
Definition: CT_ctree.cxx:85
size_t size() const
Definition: arb_strarray.h:85
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:457
const char * name() const
Definition: CT_ctree.hxx:140
int get_members() const
Definition: CT_part.hxx:211
const NT_NODE * ntree_get()
Definition: CT_ntree.cxx:19
ConsensusTree(const CharPtrArray &names_)
Definition: CT_ctree.cxx:16
char * get_tree_remark() const
Definition: CT_ctree.cxx:145
int ntree_count_sons(const NT_NODE *tree)
Definition: CT_ntree.cxx:75
char * release()
Definition: arb_strbuf.h:80
size_t species_count() const
Definition: CT_ctree.hxx:141
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
void cat(const char *from)
Definition: arb_strbuf.h:156
static FullNameMap names
CONSTEXPR_INLINE int leafs_2_nodes(int leafs, TreeModel model)
Definition: arbdbt.h:53
static int weight[maxsites+1]
__ATTR__USERESULT GB_ERROR insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress)
Definition: CT_ctree.cxx:49
PART * get_part()
Definition: CT_hash.cxx:24
void ntree_init(const PartitionSize *registry)
Definition: CT_ntree.cxx:55
void build_sorted_list(double overall_weight)
Definition: CT_hash.cxx:134
size_t species_count() const
Definition: CT_ctree.hxx:239
static void error(const char *msg)
Definition: mkptypes.cxx:96
PART * create_root() const
Definition: CT_part.cxx:125
bool equals(const PART *other) const
Definition: CT_part.cxx:209
char * GBS_log_action_to(const char *comment, const char *action, bool stamp)
Definition: adstring.cxx:990
size_t size() const
Definition: CT_hash.hxx:58
Definition: CT_part.hxx:62
void nprintf(size_t maxlen, const char *templat,...) __ATTR__FORMAT_MEMBER(2)
Definition: arb_strbuf.cxx:29
void insert_ntree(PART *&part)
Definition: CT_ntree.cxx:201
#define NULp
Definition: cxxforward.h:97
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
void put(char c)
Definition: arb_strbuf.h:138
#define max(a, b)
Definition: f2c.h:154