ARB
CT_hash.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_hash.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #include "CT_hash.hxx"
12 #include "CT_ntree.hxx"
13 #include "CT_ctree.hxx"
14 #include <cmath>
15 
16 #include <arb_sort.h>
17 
19  for (PartSet::iterator p = parts.begin(); p != parts.end(); ++p) delete *p;
20  for (PartSet::iterator p = artificial_parts.begin(); p != artificial_parts.end(); ++p) delete *p;
21  for (PartVector::iterator p = sorted.begin(); p != sorted.end(); ++p) delete *p;
22 }
23 
29  arb_assert(retrieval_phase()); // call build_sorted_list() before retrieving
30 
31  PART *p = NULp;
32  if (retrieved<sorted.size()) {
33  std::swap(p, sorted[retrieved++]);
34  }
35  return p;
36 }
37 
38 const PART *PartRegistry::peek_part(int idx) const {
39  arb_assert(idx>=0 && size_t(idx)<size());
40  arb_assert(retrieval_phase()); // call build_sorted_list() before retrieving
41 
42  return sorted[idx];
43 }
44 
45 
48 
49  arb_assert(registration_phase());
50  part->standardize();
51 
52  PartSet::iterator found = parts.find(part);
53 
54  if (found != parts.end()) {
55  (*found)->addWeightAndLength(part);
56  delete part;
57  }
58  else {
59  part->set_origin(node);
60  parts.insert(part);
61  }
62  part = NULp;
63 }
64 
66  arb_assert(registration_phase());
67  part->standardize();
68 
69  PartSet::iterator found = artificial_parts.find(part);
70  if (found != artificial_parts.end()) {
71  (*found)->addWeightAndLength(part);
72  delete part;
73  }
74  else {
75  artificial_parts.insert(part);
76  }
77  part = NULp;
78 }
79 
80 void PartRegistry::put_part_from_partial_tree(PART*& part, const PART *partialTree, const TreeNode *node) {
82  //
83  // Example:
84  //
85  // 'part': A---B (A=part, B=invs)
86  // set of missing species: C
87  //
88  // A
89  // /
90  // assumed tree: C---+
91  // \ .
92  // B
93  //
94  // inserted partitions: A---CB
95  // AC---B
96  //
97  // Both partitions are inserted here. Since they mutually exclude each other,
98  // only one of them can and will be inserted into the result tree.
99  //
100  // The algorithm assumes that all species missing in the partialTree,
101  // reside on the bigger side of each branch of the partialTree.
102  //
103  // Therefore the partition representing that state is added with normal weight.
104  // The opposite ("missing on smaller side of branch") is added with reduced
105  // probability.
106  //
107  // Note: The PART 'C---AB' might exists at EACH branch in the deconstructed tree
108  // and is inserted once globally (in deconstruct_partial_rootnode).
109 
110  arb_assert(registration_phase());
111 
112  PART *invs = part->clone();
113  invs->invertInSuperset(partialTree);
114 
115  int diff = part->get_members() - invs->get_members();
116  if (diff != 0) {
117  if (diff<0) std::swap(part, invs);
118  // now part contains bigger partition
119 
120  // Note: adding 'part' means "add unknown species to smaller side of the partition"
121  int edges_in_part = leafs_2_edges(part->get_members()+1, ROOTED);
122  int edges_in_invs = leafs_2_edges(invs->get_members()+1, ROOTED);
123 
124  double part_prob = double(edges_in_invs)/edges_in_part; // probability for one unkown species to reside inside smaller set
125  arb_assert(part_prob>0.0 && part_prob<1.0);
126 
127  int unknownCount = partialTree->get_nonmembers();
128  double all_in_part_prob = pow(part_prob, unknownCount); // probability ALL unkown species reside inside smaller set
129 
130  part->set_faked_weight(all_in_part_prob); // make "unknown in smaller" unlikely
131  // -> reduces chance to be picked and bootstrap IF picked
132  }
133  // else, if both partitions have equal size -> simply add both
134 
135  put_part_from_complete_tree(part, node); // inserts 'A---CB' (i.e. missing species added to invs, which is smaller)
136  put_part_from_complete_tree(invs, node); // inserts 'B---CA' (i.e. missing species added to part)
137 }
138 
139 inline bool insertionOrder_less(const PART *p1, const PART *p2) {
140  return p1->insertionOrder_cmp(p2)<0;
141 }
142 
143 void PartRegistry::build_sorted_list(double overall_weight) {
145 
146  arb_assert(!parts.empty()); // nothing has been added
147  arb_assert(registration_phase());
148 
149  merge_artificial_parts();
150 
151  copy(parts.begin(), parts.end(), std::back_insert_iterator<PartVector>(sorted));
152  sort(sorted.begin(), sorted.end(), insertionOrder_less);
153 
154  parts.clear();
155 
156  for (PartVector::iterator pi = sorted.begin(); pi != sorted.end(); ++pi) {
157  (*pi)->takeMean(overall_weight);
158  }
159 
160  arb_assert(retrieval_phase());
161 }
162 
163 void PartRegistry::merge_artificial_parts() {
175  for (PartSet::iterator arti = artificial_parts.begin(); arti != artificial_parts.end(); ++arti) {
176  if (parts.find(*arti) == parts.end()) {
177  // partition was not added by any known edge -> add plain artificial edge
178  (*arti)->set_faked_weight(0.0); // never occurs (implicates length=1.0; see get_len())
179  parts.insert(*arti);
180  }
181  else {
182  delete *arti;
183  }
184  }
185  artificial_parts.clear();
186 }
187 
Definition: arbdbt.h:48
#define arb_assert(cond)
Definition: arb_assert.h:245
int get_members() const
Definition: CT_part.hxx:225
void standardize()
Definition: CT_part.cxx:292
void put_part_from_partial_tree(PART *&part, const PART *partialTree, const TreeNode *node)
Definition: CT_hash.cxx:80
PART * clone() const
Definition: CT_part.hxx:118
void invertInSuperset(const PART *superset)
Definition: CT_part.cxx:168
void set_origin(const TreeNode *node_)
Definition: CT_part.hxx:124
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
int get_nonmembers() const
Definition: CT_part.hxx:226
const PART * peek_part(int idx) const
Definition: CT_hash.cxx:38
PART * get_part()
Definition: CT_hash.cxx:24
void build_sorted_list(double overall_weight)
Definition: CT_hash.cxx:143
CONSTEXPR_INLINE int leafs_2_edges(int leafs, TreeModel model)
Definition: arbdbt.h:61
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
Definition: ad_io_inline.h:19
bool insertionOrder_less(const PART *p1, const PART *p2)
Definition: CT_hash.cxx:139
size_t size() const
Definition: CT_hash.hxx:59
Definition: CT_part.hxx:62
void set_faked_weight(double w)
Definition: CT_part.hxx:195
void put_artificial_part(PART *&part)
Definition: CT_hash.cxx:65
static void copy(double **i, double **j)
Definition: trnsprob.cxx:32
int insertionOrder_cmp(const PART *other) const
Definition: CT_part.cxx:332
void put_part_from_complete_tree(PART *&part, const TreeNode *node)
Definition: CT_hash.cxx:46
#define NULp
Definition: cxxforward.h:116