ARB
CT_part.hxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_part.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #ifndef CT_PART_HXX
12 #define CT_PART_HXX
13 
14 #ifndef ARBTOOLS_H
15 #include <arbtools.h>
16 #endif
17 #ifndef ARBDB_BASE_H
18 #include <arbdb_base.h>
19 #endif
20 #ifndef CT_DEF_HXX
21 #include "CT_def.hxx"
22 #endif
23 #ifndef ARB_ASSERT_H
24 #include <arb_assert.h>
25 #endif
26 #ifndef ARB_MEM_H
27 #include <arb_mem.h>
28 #endif
29 
30 
31 typedef unsigned int PELEM;
32 class PART;
33 
34 #if defined(ASSERTION_USED)
35 CONSTEXPR_INLINE bool diff_smaller_epsilon(double d, double eps) {
36  return abs(d)<eps;
37 }
38 CONSTEXPR_INLINE bool is_similar(double d1, double d2, double eps) {
39  return diff_smaller_epsilon(d1-d2, eps);
40 }
41 #endif
42 
44  PELEM cutmask; // this mask is used to zero unused bits from the last long (in PART::p)
45  int longs; // number of longs per part
46  int bits; // number of bits per part ( = overall number of species)
47 
48  mutable size_t id; // unique id (depending on part creation order, which depends on the added trees)
49 
50 public:
51  PartitionSize(int len);
52 
53  int get_longs() const { return longs; }
54  int get_bits() const { return bits; }
55  size_t get_unique_id() const { id++; return id; }
56  PELEM get_cutmask() const { return cutmask; }
57 
58  PART *create_root() const;
59  PELEM *alloc_mem() const { return ARB_calloc<PELEM>(get_longs()); }
60 };
61 
62 class PART {
63  const PartitionSize *info;
64 
65  PELEM *p;
66  GBT_LEN len; // length between two nodes (weighted by weight)
67  double weight; // sum of weights
68  size_t id;
69  int members;
70 
71  const TreeNode *node; // =original node PART was created from; maybe NULp; erased when PARTs get combined
72 
73  PART(const PART& other) :
74  info(other.info),
75  p(info->alloc_mem()),
76  len(other.len),
77  weight(other.weight),
78  id(info->get_unique_id()),
79  members(other.members),
80  node(other.node)
81  {
82  const int longs = get_longs();
83  for (int i = 0; i<longs; ++i) { // IRRELEVANT_LOOP
84  p[i] = other.p[i];
85  }
87  }
88  DECLARE_ASSIGNMENT_OPERATOR(PART);
89 
90  int count_members() const;
91 
92  void set_weight(double pc) {
93  double lenScale = pc/weight;
94  weight = pc;
95  len *= lenScale;
96  }
97 
98  static int byte_pos(int pos) { return pos / sizeof(PELEM) / 8; }
99  static PELEM bit_mask(int pos) { return 1 << (pos % (sizeof(PELEM)*8)); }
100 
101 public:
102 
103  PART(const PartitionSize* size_info, double weight_) :
104  info(size_info),
105  p(info->alloc_mem()),
106  len(0),
107  weight(weight_),
108  id(info->get_unique_id()),
109  members(0),
110  node(NULp)
111  {
112  arb_assert(is_valid());
113  }
114  ~PART() {
115  free(p);
116  }
117 
118  PART *clone() const { return new PART(*this); }
119 
120  int get_longs() const { return info->get_longs(); }
121  int get_maxsize() const { return info->get_bits(); }
122  PELEM get_cutmask() const { return info->get_cutmask(); }
123 
124  void set_origin(const TreeNode *node_) {
125  arb_assert(!node);
126  arb_assert(node_);
127  node = node_;
128  }
129  const TreeNode *get_origin() const { return node; }
130 
131  bool is_valid() const {
132  PELEM last = p[get_longs()-1];
133 
134  bool only_valid_bits = (last&get_cutmask()) == last;
135  bool valid_member_count = members >= 0 && members <= get_maxsize();
136  bool valid_weight = weight >= 0.0;
137 
138  return only_valid_bits && valid_member_count && valid_weight;
139  }
140 
141  void setbit(int pos) {
144  arb_assert(is_valid());
145 
146  int idx = byte_pos(pos);
147  PELEM bit = bit_mask(pos);
148 
149  if (!(p[idx]&bit)) {
150  p[idx] |= bit;
151  members++;
152  }
153 
154  arb_assert(is_valid());
155  }
156  bool bit_is_set(int pos) const {
159  arb_assert(is_valid());
160 
161  int idx = byte_pos(pos);
162  PELEM bit = bit_mask(pos);
163 
164  return p[idx] & bit;
165  }
166 
168  arb_assert(is_valid());
169  len = length*weight;
170  }
171  GBT_LEN get_len() const {
172  if (weight == 0.0) {
173  // if weight is 0.0 = > branch never occurred!
174  return 1.0; // return worst distance between these nodes (which are disjunct trees)
175  }
176  return len/weight;
177  }
178 
179  double get_weight() const { return weight; }
180 
181  void addWeightAndLength(const PART *other) {
182  weight += other->weight;
183  len += other->len;
184 
185  // combining two PARTs destroys possibility to deduct PART->TreeNode
186  node = NULp; // so we forget the originating TreeNode now
187  }
188 
189  void takeMean(double overall_weight) {
190  set_weight(get_weight() / overall_weight);
191  arb_assert(is_valid());
192  arb_assert(weight <= 1.0);
193  }
194 
195  void set_faked_weight(double w) { set_weight(w); }
196 
197  void invert();
198  void invertInSuperset(const PART *superset);
199 
200  bool is_standardized() const;
201  void standardize();
202 
203  void add_members_from(const PART *source);
204 
205  bool is_subset_of(const PART *other) const {
206  const int longs = get_longs();
207  for (int i=0; i<longs; i++) {
208  if ((p[i] & other->p[i]) != p[i]) {
209  return false;
210  }
211  }
212  return true;
213  }
214  bool is_real_son_of(const PART *father) const { return is_subset_of(father) && differs(father); }
215 
216  bool overlaps_with(const PART *other) const;
217  bool disjunct_from(const PART *other) const { return !overlaps_with(other); }
218 
219  bool equals(const PART *other) const;
220  bool differs(const PART *other) const { return !equals(other); }
221 
222  int index() const;
223  unsigned key() const;
224 
225  int get_members() const { return members; }
226  int get_nonmembers() const { return get_maxsize() - get_members(); }
227 
228  bool is_leaf_edge() const { int size = members; return size == 1 || size == (get_maxsize()-1); }
229  int distance_to_tree_center() const { return abs(get_maxsize()/2 - members); }
230 
231  int insertionOrder_cmp(const PART *other) const;
232  int topological_cmp(const PART *other) const;
233 
234  int distanceTo(const PART *other, const PART *this_superset, const PART *other_superset) const;
235 
236 #if defined(NTREE_DEBUG_FUNCTIONS)
237  void print() const;
238  static void start_pretty_printing(const class CharPtrArray& names_);
239  static void stop_pretty_printing();
240 #endif
241 };
242 
243 #else
244 #error CT_part.hxx included twice
245 #endif // CT_PART_HXX
#define arb_assert(cond)
Definition: arb_assert.h:245
void set_len(GBT_LEN length)
Definition: CT_part.hxx:167
int topological_cmp(const PART *other) const
Definition: CT_part.cxx:365
bool differs(const PART *other) const
Definition: CT_part.hxx:220
PartitionSize(int len)
Definition: CT_part.cxx:35
int get_members() const
Definition: CT_part.hxx:225
PELEM get_cutmask() const
Definition: CT_part.hxx:56
bool is_leaf_edge() const
Definition: CT_part.hxx:228
unsigned int PELEM
Definition: CT_part.hxx:31
int get_maxsize() const
Definition: CT_part.hxx:121
void standardize()
Definition: CT_part.cxx:292
CONSTEXPR_INLINE bool is_similar(double d1, double d2, double eps)
Definition: CT_part.hxx:38
bool is_valid() const
Definition: CT_part.hxx:131
PART * clone() const
Definition: CT_part.hxx:118
void invertInSuperset(const PART *superset)
Definition: CT_part.cxx:168
POS_TREE1 * father
Definition: probe_tree.h:39
void set_origin(const TreeNode *node_)
Definition: CT_part.hxx:124
void takeMean(double overall_weight)
Definition: CT_part.hxx:189
int get_nonmembers() const
Definition: CT_part.hxx:226
void addWeightAndLength(const PART *other)
Definition: CT_part.hxx:181
int index() const
Definition: CT_part.cxx:306
double get_weight() const
Definition: CT_part.hxx:179
int get_longs() const
Definition: CT_part.hxx:120
unsigned key() const
Definition: CT_part.cxx:222
PELEM * alloc_mem() const
Definition: CT_part.hxx:59
bool is_standardized() const
Definition: CT_part.cxx:280
PELEM get_cutmask() const
Definition: CT_part.hxx:122
PART * create_root() const
Definition: CT_part.cxx:124
#define CONSTEXPR_INLINE
Definition: cxxforward.h:111
bool bit_is_set(int pos) const
Definition: CT_part.hxx:156
~PART()
Definition: CT_part.hxx:114
bool disjunct_from(const PART *other) const
Definition: CT_part.hxx:217
bool equals(const PART *other) const
Definition: CT_part.cxx:208
void add_members_from(const PART *source)
Definition: CT_part.cxx:185
void invert()
Definition: CT_part.cxx:149
int distanceTo(const PART *other, const PART *this_superset, const PART *other_superset) const
Definition: CT_part.cxx:397
Definition: CT_part.hxx:62
void set_faked_weight(double w)
Definition: CT_part.hxx:195
int insertionOrder_cmp(const PART *other) const
Definition: CT_part.cxx:332
PART(const PartitionSize *size_info, double weight_)
Definition: CT_part.hxx:103
bool is_real_son_of(const PART *father) const
Definition: CT_part.hxx:214
const TreeNode * get_origin() const
Definition: CT_part.hxx:129
#define abs(x)
Definition: f2c.h:151
CONSTEXPR_INLINE bool diff_smaller_epsilon(double d, double eps)
Definition: CT_part.hxx:35
size_t get_unique_id() const
Definition: CT_part.hxx:55
float GBT_LEN
Definition: arbdb_base.h:34
#define NULp
Definition: cxxforward.h:116
bool is_subset_of(const PART *other) const
Definition: CT_part.hxx:205
int distance_to_tree_center() const
Definition: CT_part.hxx:229
bool overlaps_with(const PART *other) const
Definition: CT_part.cxx:135
int get_bits() const
Definition: CT_part.hxx:54
void setbit(int pos)
Definition: CT_part.hxx:141
GBT_LEN get_len() const
Definition: CT_part.hxx:171
size_t length
int get_longs() const
Definition: CT_part.hxx:53
void print(const T &t)
Definition: test_unit.h:359