ARB
CT_part.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : CT_part.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 /* This module is designed to organize the data structure partitions
12  partitions represent the edges of a tree */
13 // the partitions are implemented as an array of longs
14 // Each leaf in a GBT-Tree is represented as one Bit in the Partition
15 
16 #include "CT_part.hxx"
17 #include <arbdbt.h>
18 #include <arb_strarray.h>
19 
20 #define BITS_PER_PELEM (sizeof(PELEM)*8)
21 
22 #if defined(DUMP_PART_INIT) || defined(UNIT_TESTS)
23 static const char *readable_cutmask(PELEM mask) {
24  static char readable[BITS_PER_PELEM+1];
25  memset(readable, '0', BITS_PER_PELEM);
26  readable[BITS_PER_PELEM] = 0;
27 
28  for (int b = BITS_PER_PELEM-1; b >= 0; --b) {
29  if (mask&1) readable[b] = '1';
30  mask = mask>>1;
31  }
32  return readable;
33 }
34 #endif
35 
37  : cutmask(0),
38  longs((((len + 7) / 8)+sizeof(PELEM)-1) / sizeof(PELEM)),
39  bits(len),
40  id(0)
41 {
48  int j = len % BITS_PER_PELEM;
49  if (!j) j += BITS_PER_PELEM;
50 
51  for (int i=0; i<j; i++) {
52  cutmask <<= 1;
53  cutmask |= 1;
54  }
55 
56 #if defined(DEBUG)
57  size_t possible = longs*BITS_PER_PELEM;
58  arb_assert((possible-bits)<BITS_PER_PELEM); // longs is too big (wasted space)
59 
60 #if defined(DUMP_PART_INIT)
61  printf("leafs=%i\n", len);
62  printf("cutmask='%s'\n", readable_cutmask(cutmask));
63  printf("longs=%i (can hold %zu bits)\n", longs, possible);
64  printf("bits=%i\n", bits);
65 #endif
66 #endif
67 }
68 
69 #if defined(NTREE_DEBUG_FUNCTIONS)
70 
71 static const CharPtrArray *namesPtr = NULp;
72 
73 void PART::start_pretty_printing(const CharPtrArray& names) { namesPtr = &names; }
74 void PART::stop_pretty_printing() { namesPtr = NULp; }
75 
76 void PART::print() const {
77  // ! Testfunction to print a part
78  int k = 0;
79  const int longs = get_longs();
80  const int plen = info->get_bits();
81 
82  if (namesPtr) {
83  const CharPtrArray& names = *namesPtr;
84  for (int part = 0; part<=1; ++part) {
85  // bool first = true;
86  for (int i=0; i<longs; i++) {
87  PELEM el = 1;
88  for (int j=0; k<plen && size_t(j)<sizeof(PELEM)*8; j++, k++) {
89  bool bitset = p[i] & el;
90  if (bitset == part) {
91  const char *name = names[k];
92 #if 1
93  fputc(name[0], stdout); // first char of name
94 #else
95  if (!first) fputc(',', stdout);
96  else first = false;
97  fputs(name, stdout); // full name
98 #endif
99  }
100  el <<= 1;
101  }
102  }
103  if (!part) {
104  fputs("---", stdout);
105  k = 0;
106  }
107  }
108  }
109  else {
110  for (int i=0; i<longs; i++) {
111  PELEM el = 1;
112  for (int j=0; k<plen && size_t(j)<sizeof(PELEM)*8; j++, k++) {
113  bool bitset = p[i] & el;
114  fputc('0'+bitset, stdout);
115  el <<= 1;
116  }
117  }
118  }
119 
120  printf(" len=%.5f prob=%5.1f%% w.len=%.5f leaf=%i dist2center=%i\n",
121  len, weight*100.0, get_len(), is_leaf_edge(), distance_to_tree_center());
122 }
123 #endif
124 
130  PART *p = new PART(this, 1.0);
131  p->invert();
132  arb_assert(p->is_valid());
133  return p;
134 }
135 
136 bool PART::overlaps_with(const PART *other) const {
140  arb_assert(is_valid());
141  arb_assert(other->is_valid());
142 
143  int longs = get_longs();
144  for (int i=0; i<longs; i++) {
145  if (p[i] & other->p[i]) return true;
146  }
147  return false;
148 }
149 
150 void PART::invert() {
152  //
153  // Each edge in a tree connects two subtrees.
154  // These subtrees are represented by inverse partitions
155 
156  arb_assert(is_valid());
157 
158  int longs = get_longs();
159  for (int i=0; i<longs; i++) { // LOOP_VECTORIZED
160  p[i] = ~p[i];
161  }
162  p[longs-1] &= get_cutmask();
163 
164  members = get_maxsize()-members;
165 
166  arb_assert(is_valid());
167 }
168 
169 void PART::invertInSuperset(const PART *superset) {
170  arb_assert(is_valid());
171  arb_assert(is_subset_of(superset));
172 
173  int longs = get_longs();
174  for (int i=0; i<longs; i++) { // LOOP_VECTORIZED
175  p[i] = p[i] ^ superset->p[i];
176  }
177  p[longs-1] &= get_cutmask();
178 
179  members = superset->get_members()-members;
180 
181  arb_assert(is_valid());
182  arb_assert(is_subset_of(superset));
183 }
184 
185 
186 void PART::add_members_from(const PART *source) {
188  arb_assert(source->is_valid());
189  arb_assert(is_valid());
190 
191  bool distinct = disjunct_from(source);
192 
193  int longs = get_longs();
194  for (int i=0; i<longs; i++) { // LOOP_VECTORIZED
195  p[i] |= source->p[i];
196  }
197 
198  if (distinct) {
199  members += source->members;
200  }
201  else {
202  members = count_members();
203  }
204 
205  arb_assert(is_valid());
206 }
207 
208 
209 bool PART::equals(const PART *other) const {
212  arb_assert(is_valid());
213  arb_assert(other->is_valid());
214 
215  int longs = get_longs();
216  for (int i=0; i<longs; i++) {
217  if (p[i] != other->p[i]) return false;
218  }
219  return true;
220 }
221 
222 
223 unsigned PART::key() const {
225  arb_assert(is_valid());
226 
227  PELEM ph = 0;
228  int longs = get_longs();
229  for (int i=0; i<longs; i++) { // LOOP_VECTORIZED
230  ph ^= p[i];
231  }
232 
233  return ph;
234 }
235 
236 int PART::count_members() const {
238  int leafs = 0;
239  int longs = get_longs();
240  for (int i = 0; i<longs; ++i) {
241  PELEM e = p[i];
242 
243  if (i == (longs-1)) e = e&get_cutmask();
244 
245  for (size_t b = 0; b<(sizeof(e)*8); ++b) {
246  if (e&1) leafs++;
247  e = e>>1;
248  }
249  }
250  return leafs;
251 }
252 
253 bool PART::is_standardized() const { // @@@ inline
258  // may be any criteria which differs between PART and its inverse
259  // if you change the criteria, generated trees will change
260  // (because branch-insertion-order is affected)
261 
262  return bit_is_set(0);
263 }
264 
271  arb_assert(is_valid());
272  if (!is_standardized()) {
273  invert();
275  }
276  arb_assert(is_valid());
277 }
278 
279 int PART::index() const {
287  arb_assert(is_valid());
289 
290  int pos = 0;
291  int longs = get_longs();
292  for (int i=0; i<longs; i++) {
293  PELEM p_temp = p[i];
294  pos = i * sizeof(PELEM) * 8;
295  if (p_temp) {
296  for (; p_temp; p_temp >>= 1, pos++) {
297  ;
298  }
299  break;
300  }
301  }
302  return pos-1;
303 }
304 
305 int PART::insertionOrder_cmp(const PART *other) const {
306  // defines order in which edges will be inserted into the consensus tree
307 
308  if (this == other) return 0;
309 
310  int cmp = is_leaf_edge() - other->is_leaf_edge();
311 
312  if (!cmp) {
313  cmp = -double_cmp(weight, other->weight); // insert bigger weight first
314  if (!cmp) {
315  int centerdist1 = distance_to_tree_center();
316  int centerdist2 = other->distance_to_tree_center();
317 
318  cmp = centerdist1-centerdist2; // insert central edges first
319 
320  if (!cmp) {
321  cmp = -double_cmp(get_len(), other->get_len()); // NOW REALLY insert bigger len first
322  // (change affected test results: increased in-tree-distance,
323  // but reduced parsimony value of result-trees)
324  if (!cmp) {
325  cmp = id - other->id; // strict by definition
326  arb_assert(cmp);
327  }
328  }
329  }
330  }
331 
332  return cmp;
333 }
334 inline int PELEM_cmp(const PELEM& p1, const PELEM& p2) {
335  return p1<p2 ? -1 : (p1>p2 ? 1 : 0);
336 }
337 
338 int PART::topological_cmp(const PART *other) const {
339  // define a strict order on topologies defined by edges
340 
341  if (this == other) return 0;
342 
344  arb_assert(other->is_standardized());
345 
346  int cmp = members - other->members;
347  if (!cmp) {
348  int longs = get_longs();
349  for (int i = 0; !cmp && i<longs; ++i) {
350  cmp = PELEM_cmp(p[i], other->p[i]);
351  }
352  }
353 
354  arb_assert(contradicted(cmp, equals(other)));
355 
356  return cmp;
357 }
358 
359 // --------------------------------------------------------------------------------
360 
361 #ifdef UNIT_TESTS
362 #ifndef TEST_UNIT_H
363 #include <test_unit.h>
364 #endif
365 
366 void TEST_PartRegistry() {
367  {
368  PartitionSize reg(0);
369  TEST_EXPECT_EQUAL(reg.get_bits(), 0);
370  TEST_EXPECT_EQUAL(reg.get_longs(), 0);
371  // cutmask doesnt matter
372  }
373 
374  {
375  PartitionSize reg(1);
376  TEST_EXPECT_EQUAL(reg.get_bits(), 1);
377  TEST_EXPECT_EQUAL(reg.get_longs(), 1);
378  TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "00000000000000000000000000000001");
379  }
380 
381  {
382  PartitionSize reg(31);
383  TEST_EXPECT_EQUAL(reg.get_bits(), 31);
384  TEST_EXPECT_EQUAL(reg.get_longs(), 1);
385  TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "01111111111111111111111111111111");
386  }
387 
388  {
389  PartitionSize reg(32);
390  TEST_EXPECT_EQUAL(reg.get_bits(), 32);
391  TEST_EXPECT_EQUAL(reg.get_longs(), 1);
392  TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "11111111111111111111111111111111");
393  }
394 
395  {
396  PartitionSize reg(33);
397  TEST_EXPECT_EQUAL(reg.get_bits(), 33);
398  TEST_EXPECT_EQUAL(reg.get_longs(), 2);
399  TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "00000000000000000000000000000001");
400  }
401 
402  {
403  PartitionSize reg(95);
404  TEST_EXPECT_EQUAL(reg.get_bits(), 95);
405  TEST_EXPECT_EQUAL(reg.get_longs(), 3);
406  TEST_EXPECT_EQUAL(readable_cutmask(reg.get_cutmask()), "01111111111111111111111111111111");
407  }
408 }
409 
410 #endif // UNIT_TESTS
411 
412 // --------------------------------------------------------------------------------
413 
414 
#define arb_assert(cond)
Definition: arb_assert.h:245
int topological_cmp(const PART *other) const
Definition: CT_part.cxx:338
const char * id
Definition: AliAdmin.cxx:17
int PELEM_cmp(const PELEM &p1, const PELEM &p2)
Definition: CT_part.cxx:334
#define BITS_PER_PELEM
Definition: CT_part.cxx:20
PartitionSize(int len)
Definition: CT_part.cxx:36
int get_members() const
Definition: CT_part.hxx:211
bool is_leaf_edge() const
Definition: CT_part.hxx:214
unsigned int PELEM
Definition: CT_part.hxx:31
int get_maxsize() const
Definition: CT_part.hxx:117
void standardize()
Definition: CT_part.cxx:265
bool is_valid() const
Definition: CT_part.hxx:120
void invertInSuperset(const PART *superset)
Definition: CT_part.cxx:169
static FullNameMap names
int index() const
Definition: CT_part.cxx:279
int get_longs() const
Definition: CT_part.hxx:116
unsigned key() const
Definition: CT_part.cxx:223
bool is_standardized() const
Definition: CT_part.cxx:253
fputc('\n', stderr)
PELEM get_cutmask() const
Definition: CT_part.hxx:118
PART * create_root() const
Definition: CT_part.cxx:125
bool bit_is_set(int pos) const
Definition: CT_part.hxx:145
#define cmp(h1, h2)
Definition: admap.cxx:50
bool disjunct_from(const PART *other) const
Definition: CT_part.hxx:203
bool equals(const PART *other) const
Definition: CT_part.cxx:209
void add_members_from(const PART *source)
Definition: CT_part.cxx:186
void invert()
Definition: CT_part.cxx:150
Definition: CT_part.hxx:62
fputs(TRACE_PREFIX, stderr)
str readable(const copy< T > &v)
Definition: test_unit.h:334
int insertionOrder_cmp(const PART *other) const
Definition: CT_part.cxx:305
#define NULp
Definition: cxxforward.h:97
bool is_subset_of(const PART *other) const
Definition: CT_part.hxx:191
int distance_to_tree_center() const
Definition: CT_part.hxx:215
bool overlaps_with(const PART *other) const
Definition: CT_part.cxx:136
int get_bits() const
Definition: CT_part.hxx:54
GBT_LEN get_len() const
Definition: CT_part.hxx:160
CONSTEXPR_INLINE int double_cmp(const double d1, const double d2)
Definition: arbtools.h:182
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
void print(const T &t)
Definition: test_unit.h:348