ARB
CT_part.hxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_part.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #ifndef CT_PART_HXX
12 #define CT_PART_HXX
13 
14 #ifndef ARBTOOLS_H
15 #include <arbtools.h>
16 #endif
17 #ifndef ARBDB_BASE_H
18 #include <arbdb_base.h>
19 #endif
20 #ifndef CT_DEF_HXX
21 #include "CT_def.hxx"
22 #endif
23 #ifndef ARB_ASSERT_H
24 #include <arb_assert.h>
25 #endif
26 #ifndef ARB_MEM_H
27 #include <arb_mem.h>
28 #endif
29 
30 
31 typedef unsigned int PELEM;
32 class PART;
33 
34 #if defined(ASSERTION_USED)
35 CONSTEXPR_INLINE bool diff_smaller_epsilon(double d, double eps) {
36  return abs(d)<eps;
37 }
38 CONSTEXPR_INLINE bool is_similar(double d1, double d2, double eps) {
39  return diff_smaller_epsilon(d1-d2, eps);
40 }
41 #endif
42 
44  PELEM cutmask; // this mask is used to zero unused bits from the last long (in PART::p)
45  int longs; // number of longs per part
46  int bits; // number of bits per part ( = overall number of species)
47 
48  mutable size_t id; // unique id (depending on part creation order, which depends on the added trees)
49 
50 public:
51  PartitionSize(int len);
52 
53  int get_longs() const { return longs; }
54  int get_bits() const { return bits; }
55  size_t get_unique_id() const { id++; return id; }
56  PELEM get_cutmask() const { return cutmask; }
57 
58  PART *create_root() const;
59  PELEM *alloc_mem() const { return ARB_calloc<PELEM>(get_longs()); }
60 };
61 
62 class PART {
63  const PartitionSize *info;
64 
65  PELEM *p;
66  GBT_LEN len; // length between two nodes (weighted by weight)
67  double weight; // sum of weights
68  size_t id;
69  int members;
70 
71  PART(const PART& other)
72  : info(other.info),
73  p(info->alloc_mem()),
74  len(other.len),
75  weight(other.weight),
76  id(info->get_unique_id()),
77  members(other.members)
78  {
79  int longs = get_longs();
80  for (int i = 0; i<longs; ++i) { // IRRELEVANT_LOOP
81  p[i] = other.p[i];
82  }
84  }
85  DECLARE_ASSIGNMENT_OPERATOR(PART);
86 
87  int count_members() const;
88 
89  void set_weight(double pc) {
90  double lenScale = pc/weight;
91  weight = pc;
92  len *= lenScale;
93  }
94 
95  static int byte_pos(int pos) { return pos / sizeof(PELEM) / 8; }
96  static PELEM bit_mask(int pos) { return 1 << (pos % (sizeof(PELEM)*8)); }
97 
98 public:
99 
100  PART(const PartitionSize* size_info, double weight_)
101  : info(size_info),
102  p(info->alloc_mem()),
103  len(0),
104  weight(weight_),
105  id(info->get_unique_id()),
106  members(0)
107  {
108  arb_assert(is_valid());
109  }
110  ~PART() {
111  free(p);
112  }
113 
114  PART *clone() const { return new PART(*this); }
115 
116  int get_longs() const { return info->get_longs(); }
117  int get_maxsize() const { return info->get_bits(); }
118  PELEM get_cutmask() const { return info->get_cutmask(); }
119 
120  bool is_valid() const {
121  PELEM last = p[get_longs()-1];
122 
123  bool only_valid_bits = (last&get_cutmask()) == last;
124  bool valid_member_count = members >= 0 && members <= get_maxsize();
125  bool valid_weight = weight >= 0.0;
126 
127  return only_valid_bits && valid_member_count && valid_weight;
128  }
129 
130  void setbit(int pos) {
133  arb_assert(is_valid());
134 
135  int idx = byte_pos(pos);
136  PELEM bit = bit_mask(pos);
137 
138  if (!(p[idx]&bit)) {
139  p[idx] |= bit;
140  members++;
141  }
142 
143  arb_assert(is_valid());
144  }
145  bool bit_is_set(int pos) const {
148  arb_assert(is_valid());
149 
150  int idx = byte_pos(pos);
151  PELEM bit = bit_mask(pos);
152 
153  return p[idx] & bit;
154  }
155 
157  arb_assert(is_valid());
158  len = length*weight;
159  }
160  GBT_LEN get_len() const {
161  if (weight == 0.0) {
162  // if weight is 0.0 = > branch never occurred!
163  return 1.0; // return worst distance between these nodes (which are disjunct trees)
164  }
165  return len/weight;
166  }
167 
168  double get_weight() const { return weight; }
169 
170  void addWeightAndLength(const PART *other) {
171  weight += other->weight;
172  len += other->len;
173  }
174 
175  void takeMean(double overall_weight) {
176  set_weight(get_weight() / overall_weight);
177  arb_assert(is_valid());
178  arb_assert(weight <= 1.0);
179  }
180 
181  void set_faked_weight(double w) { set_weight(w); }
182 
183  void invert();
184  void invertInSuperset(const PART *superset);
185 
186  bool is_standardized() const;
187  void standardize();
188 
189  void add_members_from(const PART *source);
190 
191  bool is_subset_of(const PART *other) const {
192  int longs = get_longs();
193  for (int i=0; i<longs; i++) {
194  if ((p[i] & other->p[i]) != p[i]) {
195  return false;
196  }
197  }
198  return true;
199  }
200  bool is_real_son_of(const PART *father) const { return is_subset_of(father) && differs(father); }
201 
202  bool overlaps_with(const PART *other) const;
203  bool disjunct_from(const PART *other) const { return !overlaps_with(other); }
204 
205  bool equals(const PART *other) const;
206  bool differs(const PART *other) const { return !equals(other); }
207 
208  int index() const;
209  unsigned key() const;
210 
211  int get_members() const { return members; }
212  int get_nonmembers() const { return get_maxsize() - get_members(); }
213 
214  bool is_leaf_edge() const { int size = members; return size == 1 || size == (get_maxsize()-1); }
215  int distance_to_tree_center() const { return abs(get_maxsize()/2 - members); }
216 
217  int insertionOrder_cmp(const PART *other) const;
218  int topological_cmp(const PART *other) const;
219 
220 #if defined(NTREE_DEBUG_FUNCTIONS)
221  void print() const;
222  static void start_pretty_printing(const class CharPtrArray& names_);
223  static void stop_pretty_printing();
224 #endif
225 };
226 
227 #else
228 #error CT_part.hxx included twice
229 #endif // CT_PART_HXX
#define arb_assert(cond)
Definition: arb_assert.h:245
void set_len(GBT_LEN length)
Definition: CT_part.hxx:156
int topological_cmp(const PART *other) const
Definition: CT_part.cxx:338
bool differs(const PART *other) const
Definition: CT_part.hxx:206
PartitionSize(int len)
Definition: CT_part.cxx:36
int get_members() const
Definition: CT_part.hxx:211
PELEM get_cutmask() const
Definition: CT_part.hxx:56
bool is_leaf_edge() const
Definition: CT_part.hxx:214
unsigned int PELEM
Definition: CT_part.hxx:31
int get_maxsize() const
Definition: CT_part.hxx:117
void standardize()
Definition: CT_part.cxx:265
CONSTEXPR_INLINE bool is_similar(double d1, double d2, double eps)
Definition: CT_part.hxx:38
bool is_valid() const
Definition: CT_part.hxx:120
PART * clone() const
Definition: CT_part.hxx:114
void invertInSuperset(const PART *superset)
Definition: CT_part.cxx:169
POS_TREE1 * father
Definition: probe_tree.h:39
void takeMean(double overall_weight)
Definition: CT_part.hxx:175
int get_nonmembers() const
Definition: CT_part.hxx:212
void addWeightAndLength(const PART *other)
Definition: CT_part.hxx:170
int index() const
Definition: CT_part.cxx:279
double get_weight() const
Definition: CT_part.hxx:168
int get_longs() const
Definition: CT_part.hxx:116
unsigned key() const
Definition: CT_part.cxx:223
PELEM * alloc_mem() const
Definition: CT_part.hxx:59
bool is_standardized() const
Definition: CT_part.cxx:253
PELEM get_cutmask() const
Definition: CT_part.hxx:118
PART * create_root() const
Definition: CT_part.cxx:125
#define CONSTEXPR_INLINE
Definition: cxxforward.h:92
bool bit_is_set(int pos) const
Definition: CT_part.hxx:145
~PART()
Definition: CT_part.hxx:110
bool disjunct_from(const PART *other) const
Definition: CT_part.hxx:203
bool equals(const PART *other) const
Definition: CT_part.cxx:209
void add_members_from(const PART *source)
Definition: CT_part.cxx:186
void invert()
Definition: CT_part.cxx:150
Definition: CT_part.hxx:62
void set_faked_weight(double w)
Definition: CT_part.hxx:181
int insertionOrder_cmp(const PART *other) const
Definition: CT_part.cxx:305
PART(const PartitionSize *size_info, double weight_)
Definition: CT_part.hxx:100
bool is_real_son_of(const PART *father) const
Definition: CT_part.hxx:200
#define abs(x)
Definition: f2c.h:151
CONSTEXPR_INLINE bool diff_smaller_epsilon(double d, double eps)
Definition: CT_part.hxx:35
size_t get_unique_id() const
Definition: CT_part.hxx:55
float GBT_LEN
Definition: arbdb_base.h:34
bool is_subset_of(const PART *other) const
Definition: CT_part.hxx:191
int distance_to_tree_center() const
Definition: CT_part.hxx:215
bool overlaps_with(const PART *other) const
Definition: CT_part.cxx:136
int get_bits() const
Definition: CT_part.hxx:54
void setbit(int pos)
Definition: CT_part.hxx:130
GBT_LEN get_len() const
Definition: CT_part.hxx:160
size_t length
int get_longs() const
Definition: CT_part.hxx:53
void print(const T &t)
Definition: test_unit.h:348