ARB
CT_hash.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_hash.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #include "CT_hash.hxx"
12 #include "CT_ntree.hxx"
13 #include "CT_ctree.hxx"
14 #include <cmath>
15 
16 #include <arb_sort.h>
17 
19  for (PartSet::iterator p = parts.begin(); p != parts.end(); ++p) delete *p;
20  for (PartSet::iterator p = artificial_parts.begin(); p != artificial_parts.end(); ++p) delete *p;
21  for (PartVector::iterator p = sorted.begin(); p != sorted.end(); ++p) delete *p;
22 }
23 
29  arb_assert(retrieval_phase()); // call build_sorted_list() before retrieving
30 
31  PART *p = NULp;
32  if (retrieved<sorted.size()) {
33  std::swap(p, sorted[retrieved++]);
34  }
35  return p;
36 }
37 
40 
41  arb_assert(registration_phase());
42  part->standardize();
43 
44  PartSet::iterator found = parts.find(part);
45 
46  if (found != parts.end()) {
47  (*found)->addWeightAndLength(part);
48  delete part;
49  }
50  else {
51  parts.insert(part);
52  }
53  part = NULp;
54 }
55 
57  arb_assert(registration_phase());
58  part->standardize();
59 
60  PartSet::iterator found = artificial_parts.find(part);
61  if (found != artificial_parts.end()) {
62  (*found)->addWeightAndLength(part);
63  delete part;
64  }
65  else {
66  artificial_parts.insert(part);
67  }
68  part = NULp;
69 }
70 
71 void PartRegistry::put_part_from_partial_tree(PART*& part, const PART *partialTree) {
73  //
74  // Example:
75  //
76  // 'part': A---B (A=part, B=invs)
77  // set of missing species: C
78  //
79  // A
80  // /
81  // assumed tree: C---+
82  // \ .
83  // B
84  //
85  // inserted partitions: A---CB
86  // AC---B
87  //
88  // Both partitions are inserted here. Since they mutually exclude each other,
89  // only one of them can and will be inserted into the result tree.
90  //
91  // The algorithm assumes that all species missing in the partialTree,
92  // reside on the bigger side of each branch of the partialTree.
93  //
94  // Therefore the partition representing that state is added with normal weight.
95  // The opposite ("missing on smaller side of branch") is added with reduced
96  // probability.
97  //
98  // Note: The PART 'C---AB' might exists at EACH branch in the deconstructed tree
99  // and is inserted once globally (in deconstruct_partial_rootnode).
100 
101  arb_assert(registration_phase());
102 
103  PART *invs = part->clone();
104  invs->invertInSuperset(partialTree);
105 
106  int diff = part->get_members() - invs->get_members();
107  if (diff != 0) {
108  if (diff<0) std::swap(part, invs);
109  // now part contains bigger partition
110 
111  // Note: adding 'part' means "add unknown species to smaller side of the partition"
112  int edges_in_part = leafs_2_edges(part->get_members()+1, ROOTED);
113  int edges_in_invs = leafs_2_edges(invs->get_members()+1, ROOTED);
114 
115  double part_prob = double(edges_in_invs)/edges_in_part; // probability for one unkown species to reside inside smaller set
116  arb_assert(part_prob>0.0 && part_prob<1.0);
117 
118  int unknownCount = partialTree->get_nonmembers();
119  double all_in_part_prob = pow(part_prob, unknownCount); // probability ALL unkown species reside inside smaller set
120 
121  part->set_faked_weight(all_in_part_prob); // make "unknown in smaller" unlikely
122  // -> reduces chance to be picked and bootstrap IF picked
123  }
124  // else, if both partitions have equal size -> simply add both
125 
126  put_part_from_complete_tree(part); // inserts 'A---CB' (i.e. missing species added to invs, which is smaller)
127  put_part_from_complete_tree(invs); // inserts 'B---CA' (i.e. missing species added to part)
128 }
129 
130 inline bool insertionOrder_less(const PART *p1, const PART *p2) {
131  return p1->insertionOrder_cmp(p2)<0;
132 }
133 
134 void PartRegistry::build_sorted_list(double overall_weight) {
136 
137  arb_assert(!parts.empty()); // nothing has been added
138  arb_assert(registration_phase());
139 
140  merge_artificial_parts();
141 
142  copy(parts.begin(), parts.end(), std::back_insert_iterator<PartVector>(sorted));
143  sort(sorted.begin(), sorted.end(), insertionOrder_less);
144 
145  parts.clear();
146 
147  for (PartVector::iterator pi = sorted.begin(); pi != sorted.end(); ++pi) {
148  (*pi)->takeMean(overall_weight);
149  }
150 
151  arb_assert(retrieval_phase());
152 }
153 
154 void PartRegistry::merge_artificial_parts() {
166  for (PartSet::iterator arti = artificial_parts.begin(); arti != artificial_parts.end(); ++arti) {
167  if (parts.find(*arti) == parts.end()) {
168  // partition was not added by any known edge -> add plain artificial edge
169  (*arti)->set_faked_weight(0.0); // never occurs (implicates length=1.0; see get_len())
170  parts.insert(*arti);
171  }
172  else {
173  delete *arti;
174  }
175  }
176  artificial_parts.clear();
177 }
178 
Definition: arbdbt.h:48
#define arb_assert(cond)
Definition: arb_assert.h:245
int get_members() const
Definition: CT_part.hxx:211
void standardize()
Definition: CT_part.cxx:265
PART * clone() const
Definition: CT_part.hxx:114
void invertInSuperset(const PART *superset)
Definition: CT_part.cxx:169
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
int get_nonmembers() const
Definition: CT_part.hxx:212
PART * get_part()
Definition: CT_hash.cxx:24
void build_sorted_list(double overall_weight)
Definition: CT_hash.cxx:134
CONSTEXPR_INLINE int leafs_2_edges(int leafs, TreeModel model)
Definition: arbdbt.h:61
void put_part_from_partial_tree(PART *&part, const PART *partialTree)
Definition: CT_hash.cxx:71
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
Definition: ad_io_inline.h:19
bool insertionOrder_less(const PART *p1, const PART *p2)
Definition: CT_hash.cxx:130
Definition: CT_part.hxx:62
void set_faked_weight(double w)
Definition: CT_part.hxx:181
void put_artificial_part(PART *&part)
Definition: CT_hash.cxx:56
static void copy(double **i, double **j)
Definition: trnsprob.cxx:32
void put_part_from_complete_tree(PART *&part)
Definition: CT_hash.cxx:38
int insertionOrder_cmp(const PART *other) const
Definition: CT_part.cxx:305
#define NULp
Definition: cxxforward.h:97