ARB
adseqcompr.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adseqcompr.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <arb_progress.h>
12 #include <arb_file.h>
13 #include <arb_misc.h>
14 #include <arb_diff.h>
15 #include "ad_cb.h"
16 #include "gb_key.h"
17 #include "TreeNode.h"
18 
19 #include <climits>
20 
21 // --------------------------------------------------------------------------------
22 
23 #define MAX_SEQUENCE_PER_MASTER 50 // was 18 till May 2008
24 
25 #if defined(DEBUG)
26 // don't do optimize, only create tree and save to DB
27 // #define SAVE_COMPRESSION_TREE_TO_DB
28 #endif // DEBUG
29 
30 // --------------------------------------------------------------------------------
31 
32 struct CompressionRoot : public TreeRoot {
34  TreeNode *makeNode() const OVERRIDE;
35  void destroyNode(TreeNode *node) const OVERRIDE;
36 };
37 
38 class CompressionTree FINAL_TYPE : public TreeNode {
39 protected:
41  friend class CompressionRoot;
42 public:
43 
44  // members initialized by init_indices_and_count_sons
45  int index; // master(inner nodes) or sequence(leaf nodes) index
46  int sons; // sons with sequence or masters (in subtree)
47 
49 
50  unsigned get_leaf_count() const OVERRIDE {
51  gb_assert(0); // @@@ impl (see also GBT_count_leafs and AP_pos_var::getsize)
52  return 0;
53  }
55 
56  DEFINE_TREE_ACCESSORS(CompressionRoot, CompressionTree);
57 };
58 
60 TreeNode *CompressionRoot::makeNode() const { return new CompressionTree(const_cast<CompressionRoot*>(this)); }
61 void CompressionRoot::destroyNode(TreeNode *node) const { delete DOWNCAST(CompressionTree*,node); }
62 
63 struct Consensus {
64  int len;
65  char used[256];
66  unsigned char *con[256];
67 };
68 
69 struct Sequence {
71  int master;
72 };
73 
76  int master;
77 };
78 
79 // --------------------------------------------------------------------------------
80 
81 static Consensus *g_b_new_Consensus(long len) {
82  Consensus *gcon = ARB_calloc<Consensus>(1);
83  unsigned char *data = ARB_calloc<unsigned char>(256*len);
84 
85  gcon->len = len;
86 
87  for (int i=0; i<256; i++) { // IRRELEVANT_LOOP (gcc 9.x refuses to optimize)
88  gcon->con[i] = data + len*i;
89  }
90  return gcon;
91 }
92 
93 
94 static void g_b_delete_Consensus(Consensus *gcon) {
95  free(gcon->con[0]);
96  free(gcon);
97 }
98 
99 
100 static void g_b_Consensus_add(Consensus *gcon, unsigned char *seq, long seq_len) {
101  const int max_priority = 255/MAX_SEQUENCE_PER_MASTER; // No overflow possible
102  gb_assert(max_priority >= 1);
103 
104  if (seq_len > gcon->len) seq_len = gcon->len;
105 
106  // Search for runs
107  unsigned char *s = seq;
108  int last = 0;
109  int i;
110  int li;
111  int c;
112 
113  for (li = i = 0; i < seq_len; i++) {
114  c = *(s++);
115  if (c == last) {
116  continue;
117  }
118  else {
119  inc_hits :
120  int eq_count = i-li;
121  gcon->used[c] = 1;
122  unsigned char *p = gcon->con[last];
123  last = c;
124 
125  if (eq_count <= GB_RUNLENGTH_SIZE) {
126  c = max_priority;
127  while (li < i) p[li++] += c; // LOOP_VECTORIZED =1[>=8] =2 // 2x with 4.9.2; 2x since 5.x; again 1x with 10.3 + 9.5 + 9.1 + 8.5
128  }
129  else {
130  c = max_priority * (GB_RUNLENGTH_SIZE) / eq_count;
131  if (c) {
132  while (li < i) p[li++] += c; // LOOP_VECTORIZED =1[>=8] =2 // 2x with 4.9.2; 2x since 5.x; again 1x with 10.3 + 9.5 + 9.1 + 8.5
133  }
134  else {
135  while (li < i) p[li++] |= 1; // LOOP_VECTORIZED =1[>=8] =2 // 2x with 4.9.2; 2x since 5.x; again 1x with 10.3 + 9.5 + 9.1 + 8.5
136  }
137  }
138  }
139  }
140  if (li<seq_len) {
141  c = last;
142  i = seq_len;
143  goto inc_hits;
144  }
145 }
146 
148  int pos;
149 
150  unsigned char *s;
151 
152  unsigned char *max = ARB_calloc<unsigned char>(gcon->len);
153  char *seq = ARB_calloc<char>(gcon->len+1);
154 
155  memset(seq, '@', gcon->len);
156 
157  for (int c = 1; c<256; c++) { // Find maximum frequency of non run
158  if (!gcon->used[c]) continue;
159  s = gcon->con[c];
160  for (pos = 0; pos<gcon->len; pos++) {
161  if (*s > max[pos]) {
162  max[pos] = *s;
163  seq[pos] = c;
164  }
165  s++;
166  }
167  }
168  free(max);
169  return seq;
170 }
171 
172 
173 static int g_b_count_leafs(CompressionTree *node) {
174  if (node->is_leaf()) return 1;
175  node->gb_node = NULp;
176  return g_b_count_leafs(node->get_leftson()) + g_b_count_leafs(node->get_rightson());
177 }
178 
179 static void g_b_put_sequences_in_container(CompressionTree *ctree, Sequence *seqs, MasterSequence **masters, Consensus *gcon) {
180  if (ctree->is_leaf()) {
181  if (ctree->index >= 0) {
182  GB_CSTR data = GB_read_char_pntr(seqs[ctree->index].gb_seq);
183  long len = GB_read_string_count(seqs[ctree->index].gb_seq);
184  g_b_Consensus_add(gcon, (unsigned char *)data, len);
185  }
186  }
187  else if (ctree->index<0) {
188  g_b_put_sequences_in_container(ctree->get_leftson(), seqs, masters, gcon);
189  g_b_put_sequences_in_container(ctree->get_rightson(), seqs, masters, gcon);
190  }
191  else {
192  GB_CSTR data = GB_read_char_pntr(masters[ctree->index]->gb_mas);
193  long len = GB_read_string_count(masters[ctree->index]->gb_mas);
194  g_b_Consensus_add(gcon, (unsigned char *)data, len);
195  }
196 }
197 
198 static void g_b_create_master(CompressionTree *node, Sequence *seqs, MasterSequence **masters, int my_master, const char *ali_name, long seq_len, arb_progress& progress) {
199  if (node->is_leaf()) {
200  if (node->index >= 0) {
201  GBDATA *gb_data = GBT_find_sequence(node->gb_node, ali_name);
202 
203  seqs[node->index].gb_seq = gb_data->as_entry();
204  seqs[node->index].master = my_master;
205  }
206  }
207  else {
208  if (progress.aborted()) return;
209 
210  if (node->index>=0) {
211  masters[node->index]->master = my_master;
212  my_master = node->index;
213  }
214  g_b_create_master(node->get_leftson(), seqs, masters, my_master, ali_name, seq_len, progress);
215  g_b_create_master(node->get_rightson(), seqs, masters, my_master, ali_name, seq_len, progress);
216  if (node->index>=0 && !progress.aborted()) { // build me
217  char *data;
218  Consensus *gcon = g_b_new_Consensus(seq_len);
219 
220  g_b_put_sequences_in_container(node->get_leftson(), seqs, masters, gcon);
221  g_b_put_sequences_in_container(node->get_rightson(), seqs, masters, gcon);
222 
223  data = g_b_Consensus_get_sequence(gcon);
224 
225  GB_write_string(masters[node->index]->gb_mas, data);
226  GB_write_security_write(masters[node->index]->gb_mas, 7);
227 
228  g_b_delete_Consensus(gcon);
229  free(data);
230 
231  ++progress;
232  }
233  }
234 }
235 
236 // -------------------------------------
237 // distribute master sequences
238 
239 static void subtract_sons_from_tree(CompressionTree *node, int subtract) {
240  while (node) {
241  node->sons -= subtract;
242  node = node->get_father();
243  }
244 }
245 
246 static int set_masters_with_sons(CompressionTree *node, int wantedSons, long *mcount) {
247  if (!node->is_leaf()) {
248  if (node->sons == wantedSons) {
249  // insert new master
250  gb_assert(node->index == -1);
251  node->index = *mcount;
252  (*mcount)++;
253 
254  subtract_sons_from_tree(node->get_father(), node->sons-1);
255  node->sons = 1;
256  }
257  else if (node->sons>wantedSons) {
258  int lMax = set_masters_with_sons(node->get_leftson(), wantedSons, mcount);
259  int rMax = set_masters_with_sons(node->get_rightson(), wantedSons, mcount);
260 
261  int maxSons = lMax<rMax ? rMax : lMax;
262  if (node->sons <= MAX_SEQUENCE_PER_MASTER && node->sons>maxSons) {
263  maxSons = node->sons;
264  }
265  return maxSons;
266  }
267  }
268  return node->sons <= MAX_SEQUENCE_PER_MASTER ? node->sons : 0;
269 }
270 
271 static int maxCompressionSteps(CompressionTree *node) {
272  if (node->is_leaf()) {
273  return 0;
274  }
275 
276  int left = maxCompressionSteps(node->get_leftson());
277  int right = maxCompressionSteps(node->get_rightson());
278 
279 #if defined(SAVE_COMPRESSION_TREE_TO_DB)
280  freenull(node->name);
281  if (node->index2 != -1) {
282  node->name = GBS_global_string_copy("master_%03i", node->index2);
283  }
284 #endif // SAVE_COMPRESSION_TREE_TO_DB
285 
286  return (left>right ? left : right) + (node->index == -1 ? 0 : 1);
287 }
288 
289 static int init_indices_and_count_sons(CompressionTree *node, long *scount, const char *ali_name) {
290  if (node->is_leaf()) {
291  if (!node->gb_node || !GBT_find_sequence(node->gb_node, (char *)ali_name)) {
292  node->index = -1;
293  node->sons = 0;
294  }
295  else {
296  node->index = *scount;
297  node->sons = 1;
298  (*scount)++;
299  }
300  }
301  else {
302  node->index = -1;
303  node->sons =
304  init_indices_and_count_sons(node->get_leftson(), scount, ali_name) +
305  init_indices_and_count_sons(node->get_rightson(), scount, ali_name);
306  }
307  return node->sons;
308 }
309 
310 static void distribute_masters(CompressionTree *tree, long *mcount, int *max_masters) {
311  int wantedSons = MAX_SEQUENCE_PER_MASTER;
312  while (wantedSons >= 2) {
313  int maxSons = set_masters_with_sons(tree, wantedSons, mcount);
314  wantedSons = maxSons;
315  }
316  gb_assert(tree->sons == 1);
317 
318  gb_assert(tree->index != -1);
319  *max_masters = maxCompressionSteps(tree);
320 }
321 
322 // --------------------------------------------------------------------------------
323 
324 #define MAX_NUMBER 0x7fffffff
325 STATIC_ASSERT(MAX_NUMBER <= INT_MAX); // ensure 32-bit-version compatibility!
326 
327 inline int g_b_read_number2(const unsigned char*& s) {
328  int result;
329  unsigned c0 = *s++;
330  if (c0 & 0x80) {
331  unsigned c1 = *s++;
332  if (c0 & 0x40) {
333  unsigned c2 = *s++;
334  if (c0 & 0x20) {
335  unsigned c3 = *s++;
336  if (c0 & 0x10) {
337  unsigned c4 = *s++;
338  result = c4 | (c3<<8) | (c2<<16) | (c1<<24);
339  }
340  else {
341  result = c3 | (c2<<8) | (c1<<16) | ((c0 & 0x0f)<<24);
342  }
343  }
344  else {
345  result = c2 | (c1<<8) | ((c0 & 0x1f)<<16);
346  }
347  }
348  else {
349  result = c1 | ((c0 & 0x3f)<<8);
350  }
351  }
352  else {
353  result = c0;
354  }
355  gb_assert(result >= 0 && result <= MAX_NUMBER);
356  return result;
357 }
358 
359 inline void g_b_put_number2(int i, unsigned char*& s) {
360  gb_assert(i >= 0 && i <= MAX_NUMBER);
361 
362  if (i< 0x80) {
363  *s++ = i;
364  }
365  else {
366  int j;
367  if (i<0x4000) {
368  j = (i>>8) | 0x80; *s++ = j;
369  *s++ = i;
370  }
371  else if (i<0x200000) {
372  j = (i>>16) | 0xC0; *s++ = j;
373  j = (i>>8); *s++ = j;
374  *s++ = i;
375  }
376  else if (i<0x10000000) {
377  j = (i>>24) | 0xE0; *s++ = j;
378  j = (i>>16); *s++ = j;
379  j = (i>>8); *s++ = j;
380  *s++ = i;
381  }
382  else {
383  *s++ = 0xF0;
384  j = (i>>24); *s++ = j;
385  j = (i>>16); *s++ = j;
386  j = (i>>8); *s++ = j;
387  *s++ = i;
388  }
389  }
390 }
391 
392 // --------------------------------------------------------------------------------
393 
394 #ifdef UNIT_TESTS
395 #ifndef TEST_UNIT_H
396 #include <test_unit.h>
397 #endif
398 
399 static arb_test::match_expectation put_read_num_using_bytes(int num_written, int bytes_expected, unsigned char *buffer_expected = NULp) {
400  const int BUFSIZE = 6;
401  unsigned char buffer[BUFSIZE];
402 
403  using namespace arb_test;
404 
405  unsigned char INIT = 0xaa;
406  memset(buffer, INIT, BUFSIZE);
407 
408  expectation_group expected;
409 
410  {
411  unsigned char *bp = buffer;
412  g_b_put_number2(num_written, bp);
413 
414  size_t bytes_written = bp-buffer;
415  expected.add(that(bytes_written).is_equal_to(bytes_expected));
416 
417  if (buffer_expected) {
418  expected.add(that(arb_test::memory_is_equal(buffer, buffer_expected, bytes_expected)).is_equal_to(true));
419  }
420  }
421  {
422  const unsigned char *bp = buffer;
423 
424  int num_read = g_b_read_number2(bp);
425  expected.add(that(num_read).is_equal_to(num_written));
426 
427  size_t bytes_read = bp-buffer;
428  expected.add(that(bytes_read).is_equal_to(bytes_expected));
429  }
430 
431  expected.add(that(buffer[bytes_expected]).is_equal_to(INIT));
432 
433  return all().ofgroup(expected);
434 }
435 
436 #define TEST_PUT_READ_NUMBER(num,expect_bytes) TEST_EXPECTATION(put_read_num_using_bytes(num, expect_bytes))
437 #define TEST_PUT_READ_NUMBER__BROKEN(num,expect_bytes) TEST_EXPECTATION__BROKEN(put_read_num_using_bytes(num, expect_bytes))
438 
439 #define TEST_PUT_NUMBER_BINARY1(num, byte1) do { \
440  unsigned char buf[1]; \
441  buf[0] = byte1; \
442  TEST_EXPECTATION(put_read_num_using_bytes(num, 1, buf)); \
443  } while(0)
444 
445 #define TEST_PUT_NUMBER_BINARY2(num, byte1, byte2) do { \
446  unsigned char buf[2]; \
447  buf[0] = byte1; \
448  buf[1] = byte2; \
449  TEST_EXPECTATION(put_read_num_using_bytes(num, 2, buf)); \
450  } while(0)
451 
452 #define TEST_PUT_NUMBER_BINARY3(num, byte1, byte2, byte3) do { \
453  unsigned char buf[3]; \
454  buf[0] = byte1; \
455  buf[1] = byte2; \
456  buf[2] = byte3; \
457  TEST_EXPECTATION(put_read_num_using_bytes(num, 3, buf)); \
458  } while(0)
459 
460 #define TEST_PUT_NUMBER_BINARY4(num, byte1, byte2, byte3, byte4) do { \
461  unsigned char buf[4]; \
462  buf[0] = byte1; \
463  buf[1] = byte2; \
464  buf[2] = byte3; \
465  buf[3] = byte4; \
466  TEST_EXPECTATION(put_read_num_using_bytes(num, 4, buf)); \
467  } while(0)
468 
469 #define TEST_PUT_NUMBER_BINARY5(num, byte1, byte2, byte3, byte4, byte5) do { \
470  unsigned char buf[5]; \
471  buf[0] = byte1; \
472  buf[1] = byte2; \
473  buf[2] = byte3; \
474  buf[3] = byte4; \
475  buf[4] = byte5; \
476  TEST_EXPECTATION(put_read_num_using_bytes(num, 5, buf)); \
477  } while(0)
478 
479 void TEST_put_read_number() {
480  // test that put and read are compatible:
481  TEST_PUT_READ_NUMBER(0x0, 1);
482 
483  TEST_PUT_READ_NUMBER(0x7f, 1);
484  TEST_PUT_READ_NUMBER(0x80, 2);
485 
486  TEST_PUT_READ_NUMBER(0x3fff, 2);
487  TEST_PUT_READ_NUMBER(0x4000, 3);
488 
489  TEST_PUT_READ_NUMBER(0x1fffff, 3);
490  TEST_PUT_READ_NUMBER(0x200000, 4);
491 
492  TEST_PUT_READ_NUMBER(0xfffffff, 4);
493 
494  TEST_PUT_READ_NUMBER(0x10000000, 5);
495  TEST_PUT_READ_NUMBER(0x7fffffff, 5);
496 
497  // test binary compatibility:
498  // (code affects DB content, cannot be changed)
499 
500  TEST_PUT_NUMBER_BINARY1(0x0, 0x00);
501  TEST_PUT_NUMBER_BINARY1(0x7f, 0x7f);
502 
503  TEST_PUT_NUMBER_BINARY2(0x80, 0x80, 0x80);
504  TEST_PUT_NUMBER_BINARY2(0x81, 0x80, 0x81);
505  TEST_PUT_NUMBER_BINARY2(0x3fff, 0xbf, 0xff);
506 
507  TEST_PUT_NUMBER_BINARY3(0x4000, 0xc0, 0x40, 0x00);
508  TEST_PUT_NUMBER_BINARY3(0x1fffff, 0xdf, 0xff, 0xff);
509 
510  TEST_PUT_NUMBER_BINARY4(0x200000, 0xe0, 0x20, 0x00, 0x00);
511  TEST_PUT_NUMBER_BINARY4(0xfffffff, 0xef, 0xff, 0xff, 0xff);
512 
513  TEST_PUT_NUMBER_BINARY5(0x10000000, 0xf0, 0x10, 0x00, 0x00, 0x00);
514  TEST_PUT_NUMBER_BINARY5(0x7fffffff, 0xf0, 0x7f, 0xff, 0xff, 0xff);
515 }
516 
517 #endif // UNIT_TESTS
518 
519 // --------------------------------------------------------------------------------
520 
521 
522 static char *gb_compress_seq_by_master(const char *master, size_t master_len, int master_index,
523  GBQUARK q, const char *seq, size_t seq_len,
524  size_t *memsize, int old_flag) {
525  unsigned char *buffer;
526  int rest = 0;
527  unsigned char *d;
528  int i, cs, cm;
529  int last;
530  long len = seq_len;
531 
532  d = buffer = (unsigned char *)GB_give_other_buffer(seq, seq_len);
533 
534  if (seq_len > master_len) {
535  rest = seq_len - master_len;
536  len = master_len;
537  }
538 
539  last = -1000; // Convert Sequence relative to Master
540  for (i = len; i>0; i--) {
541  cm = *(master++);
542  cs = *(seq++);
543  if (cm==cs && cs != last) {
544  *(d++) = 0;
545  last = 1000;
546  }
547  else {
548  *(d++) = cs;
549  last = cs;
550  }
551  }
552  for (i = rest; i>0; i--) { // LOOP_VECTORIZED[!<492,!>=6<650] (works for 4.9.2 + 6.5.0)
553  *(d++) = *(seq++);
554  }
555 
556  { // Append run length compression method
557  unsigned char *buffer2;
558  unsigned char *dest2;
559  buffer2 = dest2 = (unsigned char *)GB_give_other_buffer((char *)buffer, seq_len+100);
560  *(dest2++) = GB_COMPRESSION_SEQUENCE | old_flag;
561 
562  g_b_put_number2(master_index, dest2); // Tags
563  g_b_put_number2(q, dest2);
564 
565  gb_compress_equal_bytes_2((char *)buffer, seq_len, memsize, (char *)dest2); // append runlength compressed sequences to tags
566 
567  *memsize = *memsize + (dest2-buffer2);
568  return (char *)buffer2;
569  }
570 }
571 
572 static char *gb_compress_sequence_by_master(GBDATA *gbd, const char *master, size_t master_len, int master_index,
573  GBQUARK q, const char *seq, size_t seq_len, size_t *memsize)
574 {
575  size_t size;
576  char *is = gb_compress_seq_by_master(master, master_len, master_index, q, seq, seq_len, &size, GB_COMPRESSION_LAST);
577  char *res = gb_compress_data(gbd, 0, is, size, memsize, ~(GB_COMPRESSION_DICTIONARY|GB_COMPRESSION_SORTBYTES|GB_COMPRESSION_RUNLENGTH), true);
578  return res;
579 }
580 
581 static GB_ERROR compress_sequence_tree(GBCONTAINER *gb_main, CompressionTree *tree, const char *ali_name) {
582  GB_ERROR error = NULp;
583  long ali_len = GBT_get_alignment_len(gb_main, ali_name);
584  int main_clock = GB_read_clock(gb_main);
585 
587 
588  if (ali_len<=0) {
589  warning = GBS_global_string("Skipping alignment '%s' (Reason: %s)", ali_name, GB_await_error());
590  GB_clear_error();
591  }
592  else {
593  int leafcount = g_b_count_leafs(tree);
594  if (!leafcount) {
595  error = "Tree is empty";
596  }
597  else {
598  arb_progress tree_progress("Compressing sequences", 4L);
599 
600  // Distribute masters in tree
601  long mastercount = 0;
602  int max_compSteps = 0; // in one branch
603  long seqcount = 0;
604 
605  init_indices_and_count_sons(tree, &seqcount, ali_name);
606  if (!seqcount) {
607  warning = GBS_global_string("Tree contains no sequences with data in '%s'\n"
608  "Skipping compression for this alignment",
609  ali_name);
610  }
611  else {
612  distribute_masters(tree, &mastercount, &max_compSteps);
613 
614 #if defined(SAVE_COMPRESSION_TREE_TO_DB)
615  {
616  error = GBT_link_tree(tree, gb_main, 0, NULp, NULp);
617  if (!error) error = GBT_write_tree(gb_main, 0, "tree_compression_new", tree);
618  GB_information("Only generated compression tree (do NOT save DB anymore)");
619  return error;
620  }
621 #endif // SAVE_COMPRESSION_TREE_TO_DB
622 
623  // detect degenerated trees
624  {
625  long min_masters = ((seqcount-1)/MAX_SEQUENCE_PER_MASTER)+1;
626  int min_compSteps = 1;
627  {
628  int m = min_masters;
629  while (m>1) {
630  m = ((m-1)/MAX_SEQUENCE_PER_MASTER)+1;
631  min_masters += m;
632  min_compSteps++;
633  }
634  }
635 
636  long acceptable_masters = (3*min_masters)/2; // accept 50% overhead
637  int acceptable_compSteps = 11*min_compSteps; // accept 1000% overhead
638 
639  if (mastercount>acceptable_masters || max_compSteps>acceptable_compSteps) {
640  GB_warningf("Tree is ill-suited for compression (cause of deep branches)\n"
641  " Used tree Optimal tree Overhead\n"
642  "Compression steps %5i %5i %4i%% (speed)\n"
643  "Master sequences %5li %5li %4li%% (size)\n"
644  "If you like to restart with a better tree,\n"
645  "press 'Abort' to stop compression",
646  max_compSteps, min_compSteps, (100*max_compSteps)/min_compSteps-100,
647  mastercount, min_masters, (100*mastercount)/min_masters-100);
648  }
649  }
650 
651  gb_assert(mastercount>0);
652  }
653 
654  if (!warning) {
655  GBCONTAINER *gb_master_ali = NULp;
656  GBDATA *old_gb_master_ali = NULp;
657  Sequence *seqs = NULp;
658  GB_MAIN_TYPE *Main = GB_MAIN(gb_main);
659  GBQUARK ali_quark = gb_find_or_create_quark(Main, ali_name);
660  unsigned long long sumorg = 0;
661  unsigned long long sumold = 0;
662  unsigned long long sumnew = 0;
663 
664  MasterSequence **masters; ARB_calloc(masters, leafcount);
665 
666  {
667  char *masterfoldername = GBS_global_string_copy("%s/@master_data/@%s", GB_SYSTEM_FOLDER, ali_name);
668  old_gb_master_ali = GBDATA::as_container(GB_search(gb_main, masterfoldername, GB_FIND));
669  free(masterfoldername);
670  }
671 
672  // create masters
673  if (!error) {
674  {
675  char *master_data_name = GBS_global_string_copy("%s/@master_data", GB_SYSTEM_FOLDER);
676  char *master_name = GBS_global_string_copy("@%s", ali_name);
677 
678  GBCONTAINER *gb_master_data = gb_search(gb_main, master_data_name, GB_CREATE_CONTAINER, 1)->as_container();
679 
680  // create a master container, the old is deleted as soon as all sequences are compressed by the new method
681  gb_master_ali = gb_create_container(gb_master_data, master_name);
682  GB_write_security_delete(gb_master_ali, 7);
683 
684  free(master_name);
685  free(master_data_name);
686  }
687  for (long si = 0; si<mastercount; si++) {
688  ARB_calloc(masters[si], 1);
689  masters[si]->gb_mas = gb_create(gb_master_ali, "@master", GB_STRING);
690  }
691  ARB_calloc(seqs, leafcount);
692 
693  if (!error) {
694  arb_progress progress("Building master sequences", mastercount);
695  g_b_create_master(tree, seqs, masters, -1, ali_name, ali_len, progress);
696 
697  error = progress.error_if_aborted();
698  }
699  }
700  tree_progress.inc_and_check_user_abort(error);
701 
702  // Compress sequences in tree
703  if (!error) {
704  arb_progress progress("Compressing sequences in tree", seqcount);
705 
706  for (long si=0; si<seqcount && !error; si++) {
707  int mi = seqs[si].master;
708  MasterSequence *master = masters[mi];
709  GBDATA *gbd = seqs[si].gb_seq;
710 
711  if (GB_read_clock(gbd) >= main_clock) {
712  GB_warning("A species seems to be more than once in the tree");
713  }
714  else {
715  char *seq = GB_read_string(gbd);
716  int seq_len = GB_read_string_count(gbd);
717  long sizen = GB_read_memuse(gbd);
718  char *seqm = GB_read_string(master->gb_mas);
719  int master_len = GB_read_string_count(master->gb_mas);
720  size_t sizes;
721  char *ss = gb_compress_sequence_by_master(gbd, seqm, master_len, mi, ali_quark, seq, seq_len, &sizes);
722 
723  gb_write_compressed_pntr(gbd->as_entry(), ss, sizes, seq_len);
724  sizes = GB_read_memuse(gbd); // check real usage
725 
726  sumnew += sizes;
727  sumold += sizen;
728  sumorg += seq_len;
729 
730  free(seqm);
731  free(seq);
732  }
733 
734  progress.inc_and_check_user_abort(error);
735  }
736  }
737  tree_progress.inc_and_check_user_abort(error);
738 
739  // Compress rest of sequences
740  if (!error) {
741  int pass; // pass 1 : count species to compress, pass 2 : compress species
742  long speciesNotInTree = 0;
743 
744  SmartPtr<arb_progress> progress;
745 
746  for (pass = 1; pass <= 2; ++pass) {
748  GBDATA *gb_species;
749 
750  long count = 0;
751 
752  for (gb_species = GBT_first_species_rel_species_data(gb_species_data);
753  gb_species;
754  gb_species = GBT_next_species(gb_species))
755  {
756  GBDATA *gbd = GBT_find_sequence(gb_species, ali_name);
757 
758  if (!gbd) continue;
759  if (GB_read_clock(gbd) >= main_clock) continue; // Compress only those which are not compressed by masters
760  count++;
761  if (pass == 2) {
762  char *data = GB_read_string(gbd);
763  long seq_len = GB_read_string_count(gbd);
764  long size = GB_read_memuse(gbd);
765 
766  GB_write_string(gbd, ""); // force recompression
767  GB_write_string(gbd, data);
768  free(data);
769 
770  sumold += size;
771 
772  size = GB_read_memuse(gbd);
773  sumnew += size;
774  sumorg += seq_len;
775 
776  progress->inc_and_check_user_abort(error);
777  }
778  }
779  if (pass == 1) {
780  speciesNotInTree = count;
781  if (speciesNotInTree>0) {
782  progress = new arb_progress("Compressing sequences NOT in tree", speciesNotInTree);
783  }
784  }
785  }
786 
787  if (!progress.isSet()) progress = new arb_progress;
788  arb_assert(progress.isSet()); // if no progress used here -> parent progress fails assertion
789  }
790  tree_progress.inc_and_check_user_abort(error);
791 
792  if (!error) {
793  arb_progress progress("Compressing master-sequences", mastercount);
794 
795  // Compress all masters
796  for (long si=0; si<mastercount; si++) {
797  int mi = masters[si]->master;
798 
799  if (mi>0) { // master available
800  GBDATA *gbd = masters[si]->gb_mas;
801 
802  gb_assert(mi>si); // we don't want a recursion, because we cannot uncompress sequence compressed masters, Main->gb_master_data is wrong
803 
804  if (gb_read_nr(gbd) != si) { // Check database
805  GB_internal_error("Sequence Compression: Master Index Conflict");
806  error = GB_export_error("Sequence Compression: Master Index Conflict");
807  break;
808  }
809 
810  {
811  MasterSequence *master = masters[mi];
812  char *seqm = GB_read_string(master->gb_mas);
813  int master_len = GB_read_string_count(master->gb_mas);
814  char *seq = GB_read_string(gbd);
815  int seq_len = GB_read_string_count(gbd);
816  size_t sizes;
817  char *ss = gb_compress_sequence_by_master(gbd, seqm, master_len, mi, ali_quark, seq, seq_len, &sizes);
818 
819  gb_write_compressed_pntr(gbd->as_entry(), ss, sizes, seq_len);
820  sumnew += sizes;
821 
822  free(seq);
823  free(seqm);
824  }
825 
826  progress.inc_and_check_user_abort(error);
827  }
828  else { // count size of top master
829  GBDATA *gbd = masters[si]->gb_mas;
830  sumnew += GB_read_memuse(gbd);
831 
832  progress.inc_and_check_user_abort(error);
833  }
834  }
835 
836  // count size of old master data
837  if (!error) {
838  GBDATA *gb_omaster;
839  for (gb_omaster = GB_entry(old_gb_master_ali, "@master");
840  gb_omaster;
841  gb_omaster = GB_nextEntry(gb_omaster))
842  {
843  long size = GB_read_memuse(gb_omaster);
844  sumold += size;
845  }
846  }
847 
848  if (!error) {
849  char *sizeOrg = ARB_strdup(GBS_readable_size(sumorg, "b"));
850  char *sizeOld = ARB_strdup(GBS_readable_size(sumold, "b"));
851  char *sizeNew = ARB_strdup(GBS_readable_size(sumnew, "b"));
852 
853  GB_warningf("Alignment '%s':\n"
854  " Uncompressed data: %7s\n"
855  " Old compressed data: %7s = %6.2f%%\n"
856  " New compressed data: %7s = %6.2f%%",
857  ali_name, sizeOrg,
858  sizeOld, (100.0*sumold)/sumorg,
859  sizeNew, (100.0*sumnew)/sumorg);
860 
861  free(sizeNew);
862  free(sizeOld);
863  free(sizeOrg);
864  }
865  }
866  tree_progress.inc_and_check_user_abort(error);
867 
868  if (!error) {
869  if (old_gb_master_ali) error = GB_delete(old_gb_master_ali);
870  Main->keys[ali_quark].gb_master_ali = gb_master_ali;
871  }
872 
873  // free data
874  free(seqs);
875  for (long si=0; si<mastercount; si++) free(masters[si]);
876  free(masters);
877  }
878  else {
879  tree_progress.done();
880  }
881  }
882  }
883 
884  if (warning) GB_information(warning);
885 
886  return error;
887 }
888 
889 GB_ERROR GBT_compress_sequence_tree2(GBDATA *gbd, const char *tree_name, const char *ali_name) { // goes to header: __ATTR__USERESULT // @@@ rename function
890  // Compress sequences, call only outside a transaction
891  GB_ERROR error = NULp;
892  GB_MAIN_TYPE *Main = GB_MAIN(gbd);
893 
894  if (Main->get_transaction_level() > 0) {
895  error = "Compress Sequences called while transaction running";
896  GB_internal_error(error);
897  }
898  else {
900  GB_UNDO_TYPE prev_undo_type = GB_get_requested_undo_type(gb_main);
901 
902  error = GB_request_undo_type(gb_main, GB_UNDO_KILL);
903  if (!error) {
904  GB_transaction ta(gb_main);
905  if (ta.ok()) {
906  GB_topSecurityLevel unsecured(gb_main);
907 
908  if (!tree_name || !strlen(tree_name)) {
909  tree_name = GBT_name_of_largest_tree(gb_main);
910  }
911 
912  {
913  CompressionTree *ctree = DOWNCAST(CompressionTree*, GBT_read_tree(gb_main, tree_name, new CompressionRoot));
914  if (!ctree) error = GB_await_error();
915  else {
916  error = GBT_link_tree(ctree, gb_main, false, NULp, NULp);
917  if (!error) error = compress_sequence_tree(gb_main, ctree, ali_name);
918  destroy(ctree);
919  }
920  }
921  if (!error) GB_disable_quicksave(gb_main, "Database optimized");
922  }
923  error = ta.close(error);
924  ASSERT_NO_ERROR(GB_request_undo_type(gb_main, prev_undo_type));
925  }
926 
927 #if defined(SAVE_COMPRESSION_TREE_TO_DB)
928  error = "fake error";
929 #endif // SAVE_COMPRESSION_TREE_TO_DB
930  }
931  return error;
932 }
933 
934 #ifdef DEBUG
935 
936 void GBT_compression_test(struct Unfixed_cb_parameter *, GBDATA *gb_main) {
938  char *ali_name = GBT_get_default_alignment(gb_main);
939  char *tree_name = ali_name ? GBT_read_string(gb_main, "focus/tree_name") : NULp;
940 
941  if (!ali_name || !tree_name) error = GB_await_error();
942 
943  error = GB_end_transaction(gb_main, error);
944 
945  if (!error) {
946  printf("Recompression data in alignment '%s' using tree '%s'\n", ali_name, tree_name);
947  error = GBT_compress_sequence_tree2(gb_main, tree_name, ali_name);
948  }
949 
950  if (error) GB_warning(error);
951  free(tree_name);
952  free(ali_name);
953 }
954 
955 #endif
956 
957 // ******************** Decompress Sequences ********************
958 
959 static char *g_b_uncompress_single_sequence_by_master(const char *s, const char *master, size_t size, size_t *new_size) {
960  const signed char *source = (signed char *)s;
961  char *dest;
962  const char *m = master;
963  unsigned int c;
964  int j;
965  int i;
966  char *buffer;
967 
968  dest = buffer = GB_give_other_buffer((char *)source, size);
969 
970  for (i=size; i;) {
971  j = *source++;
972  if (j>0) { // uncompressed data block
973  if (j>i) j=i;
974  i -= j;
975  for (; j; j--) {
976  c = *source++;
977  if (!c) c = *m;
978  *dest++ = c;
979  m++;
980  }
981  }
982  else { // equal bytes compressed
983  if (!j) break; // end symbol
984  if (j == -122) {
985  j = (*source++) & 0xff;
986  j |= (static_cast<unsigned char>(*source++) << 8) & 0xff00;
987  j = -j;
988  }
989  c = *source++;
990  i += j;
991  if (i<0) {
992  GB_internal_error("Internal Error: Missing end in data");
993  j += -i;
994  i = 0;
995  }
996  if (c==0) {
997  memcpy(dest, m, -j);
998  dest += -j;
999  m += -j;
1000  }
1001  else {
1002  memset(dest, c, -j);
1003  dest += -j;
1004  m += -j;
1005  }
1006  }
1007  }
1008  *dest++ = 0; // zero-terminate string
1009 
1010  *new_size = dest-buffer;
1011  gb_assert(size == *new_size); // buffer overflow
1012 
1013  return buffer;
1014 }
1015 
1016 char *gb_uncompress_by_sequence(GBDATA *gbd, const char *ss, size_t size, GB_ERROR *error, size_t *new_size) {
1017  char *dest = NULp;
1018 
1019  *error = NULp;
1020 
1022  if (!Main && GB_FATHER(gbd)) Main = GB_MAIN(gbd);
1023 
1024  if (!Main) {
1025  *error = "Can not uncompress this sequence (neither has father nor inside callback)";
1026  }
1027  else {
1028  GBDATA *gb_main = Main->gb_main();
1029  char *to_free = GB_check_out_buffer(ss); // Remove 'ss' from memory management, otherwise load_single_key_data() may destroy it
1030  int index;
1031  GBQUARK quark;
1032 
1033  {
1034  const unsigned char *s = (const unsigned char *)ss;
1035 
1036  index = g_b_read_number2(s);
1037  quark = g_b_read_number2(s);
1038 
1039  ss = (const char *)s;
1040  }
1041 
1042  if (!Main->keys[quark].gb_master_ali) {
1043  gb_load_single_key_data(gb_main, quark);
1044  }
1045 
1046  if (!Main->keys[quark].gb_master_ali) {
1047  *error = "Cannot uncompress this sequence: Cannot find a master sequence";
1048  }
1049  else {
1050  GBDATA *gb_master = gb_find_by_nr(Main->keys[quark].gb_master_ali, index);
1051  if (gb_master) {
1052  const char *master = GB_read_char_pntr(gb_master); // make sure that this is not a buffer !!!
1053 
1054  gb_assert((GB_read_string_count(gb_master)+1) == size); // size mismatch between master and slave
1055  dest = g_b_uncompress_single_sequence_by_master(ss, master, size, new_size);
1056  }
1057  else {
1058  *error = GB_await_error();
1059  }
1060  }
1061  free(to_free);
1062  }
1063 
1064  return dest;
1065 }
1066 
1067 // --------------------------------------------------------------------------------
1068 
1069 #ifdef UNIT_TESTS
1070 #ifndef TEST_UNIT_H
1071 #include <test_unit.h>
1072 #endif
1073 
1074 // #define TEST_AUTO_UPDATE // uncomment to auto-update expected result DB
1075 
1076 void TEST_SLOW_sequence_compression() {
1077  const char *source = "TEST_nuc.arb";
1078  const char *compressed = "TEST_nuc_seqcompr.arb";
1079  const char *expected = "TEST_nuc_seqcompr_exp.arb";
1080  const char *aliname = "ali_16s";
1081 
1082  GB_shell shell;
1083 
1084  const int SEQ2COMPARE = 7;
1085  char *seq_exp[SEQ2COMPARE];
1086 
1087  {
1088  GBDATA *gb_main;
1089  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(source, "rw"));
1090 
1091  {
1092  GB_transaction ta(gb_main);
1093  int count = 0;
1094 
1095  for (GBDATA *gb_species = GBT_first_species(gb_main);
1096  gb_species && count<SEQ2COMPARE;
1097  gb_species = GBT_next_species(gb_species), ++count)
1098  {
1099  GBDATA *gb_seq = GBT_find_sequence(gb_species, aliname);
1100  seq_exp[count] = GB_read_string(gb_seq);
1101  }
1102  }
1103 
1104  TEST_EXPECT_NO_ERROR(GBT_compress_sequence_tree2(gb_main, "tree_nuc", aliname));
1105  TEST_EXPECT_NO_ERROR(GB_save_as(gb_main, compressed, "b"));
1106  GB_close(gb_main);
1107  }
1108 #if defined(TEST_AUTO_UPDATE)
1109  TEST_COPY_FILE(compressed, expected);
1110 #endif
1111  TEST_EXPECT_FILES_EQUAL(compressed, expected);
1112 
1113  {
1114  GBDATA *gb_main;
1115  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(compressed, "rw"));
1116  {
1117  GB_transaction ta(gb_main);
1118  int count = 0;
1119 
1120  for (GBDATA *gb_species = GBT_first_species(gb_main);
1121  gb_species && count<SEQ2COMPARE;
1122  gb_species = GBT_next_species(gb_species), ++count)
1123  {
1124  GBDATA *gb_seq = GBT_find_sequence(gb_species, aliname);
1125  char *seq = GB_read_string(gb_seq);
1126 
1127  TEST_EXPECT_EQUAL(seq, seq_exp[count]);
1128 
1129  freenull(seq_exp[count]);
1130  free(seq);
1131  }
1132  }
1133  GB_close(gb_main);
1134  }
1135 
1137 }
1138 TEST_PUBLISH(TEST_SLOW_sequence_compression);
1139 
1140 #endif // UNIT_TESTS
1141 
1142 // --------------------------------------------------------------------------------
1143 
1144 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
static char * gb_compress_seq_by_master(const char *master, size_t master_len, int master_index, GBQUARK q, const char *seq, size_t seq_len, size_t *memsize, int old_flag)
Definition: adseqcompr.cxx:522
GB_MAIN_TYPE * gb_get_main_during_cb()
Definition: ad_cb.cxx:108
#define arb_assert(cond)
Definition: arb_assert.h:245
void compute_tree() OVERRIDE
Definition: adseqcompr.cxx:54
#define GB_SYSTEM_FOLDER
Definition: arbdb.h:27
string result
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
void GB_warning(const char *message)
Definition: arb_msg.cxx:530
void GBT_compression_test(struct Unfixed_cb_parameter *, GBDATA *gb_main)
group_matcher all()
Definition: test_unit.h:1011
static char * gb_compress_sequence_by_master(GBDATA *gbd, const char *master, size_t master_len, int master_index, GBQUARK q, const char *seq, size_t seq_len, size_t *memsize)
Definition: adseqcompr.cxx:572
GB_BUFFER gb_compress_data(GBDATA *gbd, int key, GB_CBUFFER source, size_t size, size_t *msize, GB_COMPRESSION_MASK max_compr, bool pre_compressed)
Definition: adcompr.cxx:923
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
static Consensus * g_b_new_Consensus(long len)
Definition: adseqcompr.cxx:81
#define ASSERT_NO_ERROR(errorExpr)
Definition: arb_assert.h:360
GB_MAIN_TYPE * GB_MAIN(GBDATA *gbd)
Definition: gb_data.h:291
GBDATA * GB_nextEntry(GBDATA *entry)
Definition: adquery.cxx:339
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
static void g_b_Consensus_add(Consensus *gcon, unsigned char *seq, long seq_len)
Definition: adseqcompr.cxx:100
void GB_disable_quicksave(GBDATA *gbd, const char *reason)
Definition: arbdb.cxx:2647
static char * g_b_uncompress_single_sequence_by_master(const char *s, const char *master, size_t size, size_t *new_size)
Definition: adseqcompr.cxx:959
const char * GBT_name_of_largest_tree(GBDATA *gb_main)
Definition: adtree.cxx:1103
TreeNode * makeNode() const OVERRIDE
Definition: adseqcompr.cxx:60
GB_ERROR GBT_compress_sequence_tree2(GBDATA *gbd, const char *tree_name, const char *ali_name)
Definition: adseqcompr.cxx:889
bool ok() const
Definition: arbdb.h:162
GBCONTAINER * gb_create_container(GBCONTAINER *father, const char *key)
Definition: arbdb.cxx:1773
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:837
GBCONTAINER * gb_master_ali
Definition: gb_key.h:36
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
void warning(int warning_num, const char *warning_message)
Definition: util.cxx:61
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
char * GB_check_out_buffer(GB_CBUFFER buffer)
Definition: arbdb.cxx:350
GBQUARK gb_find_or_create_quark(GB_MAIN_TYPE *Main, const char *key)
Definition: arbdb.cxx:1668
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
char * gb_uncompress_by_sequence(GBDATA *gbd, const char *ss, size_t size, GB_ERROR *error, size_t *new_size)
static int init_indices_and_count_sons(CompressionTree *node, long *scount, const char *ali_name)
Definition: adseqcompr.cxx:289
GBCONTAINER * root_container
Definition: gb_main.h:120
GB_ERROR GBT_link_tree(TreeNode *tree, GBDATA *gb_main, bool show_status, int *zombies, int *duplicates)
Definition: adtree.cxx:953
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
FILE * seq
Definition: rns.c:46
static void subtract_sons_from_tree(CompressionTree *node, int subtract)
Definition: adseqcompr.cxx:239
#define DOWNCAST(totype, expr)
Definition: downcast.h:141
GB_ERROR error_if_aborted()
Definition: arb_progress.h:323
GBENTRY * gb_create(GBCONTAINER *father, const char *key, GB_TYPES type)
Definition: arbdb.cxx:1763
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1916
GBDATA * GBT_first_species_rel_species_data(GBDATA *gb_species_data)
Definition: aditem.cxx:121
GB_BUFFER GB_give_other_buffer(GB_CBUFFER buffer, long size)
Definition: arbdb.cxx:363
int master
Definition: adseqcompr.cxx:71
GB_UNDO_TYPE
Definition: arbdb.h:107
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1517
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:257
size_t GB_read_string_count(GBDATA *gbd)
Definition: arbdb.cxx:916
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
#define MAX_SEQUENCE_PER_MASTER
Definition: adseqcompr.cxx:23
void destroyNode(TreeNode *node) const OVERRIDE
Definition: adseqcompr.cxx:61
char * GBT_read_string(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:267
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:536
#define MAX_NUMBER
Definition: adseqcompr.cxx:324
bool isSet() const
test if SmartPtr is not NULp
Definition: smartptr.h:245
long GB_read_memuse(GBDATA *gbd)
Definition: arbdb.cxx:762
char used[256]
Definition: adseqcompr.cxx:65
GBDATA * gb_species_data
Definition: adname.cxx:33
static void distribute_masters(CompressionTree *tree, long *mcount, int *max_masters)
Definition: adseqcompr.cxx:310
static void g_b_create_master(CompressionTree *node, Sequence *seqs, MasterSequence **masters, int my_master, const char *ali_name, long seq_len, arb_progress &progress)
Definition: adseqcompr.cxx:198
Generic smart pointer.
Definition: smartptr.h:149
bool aborted()
Definition: arb_progress.h:335
#define true
Definition: ureadseq.h:14
GBENTRY * gb_mas
Definition: adseqcompr.cxx:75
void GB_clear_error()
Definition: arb_msg.cxx:354
GB_ERROR GB_save_as(GBDATA *gbd, const char *path, const char *savetype)
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, TreeNode *tree)
Definition: adtree.cxx:523
const char * GBS_readable_size(unsigned long long size, const char *unit_suffix)
Definition: arb_misc.cxx:23
STATIC_ASSERT(MAX_NUMBER<=INT_MAX)
static void error(const char *msg)
Definition: mkptypes.cxx:96
static int maxCompressionSteps(CompressionTree *node)
Definition: adseqcompr.cxx:271
expectation_group & add(const expectation &e)
Definition: test_unit.h:812
GBDATA * gb_find_by_nr(GBCONTAINER *father, int index)
Definition: adquery.cxx:367
#define that(thing)
Definition: test_unit.h:1043
int g_b_read_number2(const unsigned char *&s)
Definition: adseqcompr.cxx:327
GBCONTAINER * as_container() const
Definition: gb_data.h:155
unsigned char * con[256]
Definition: adseqcompr.cxx:66
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
Definition: test_unit.h:1090
CONSTEXPR_INLINE GBCONTAINER * GB_FATHER(GBDATA *gbd)
Definition: gb_data.h:271
GB_ERROR GB_write_security_write(GBDATA *gbd, unsigned long level)
Definition: arbdb.cxx:1584
GBDATA * gb_main() const
Definition: gb_main.h:179
Definition: arbdb.h:86
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:708
#define is_equal_to(val)
Definition: test_unit.h:1025
~CompressionTree() OVERRIDE
Definition: adseqcompr.cxx:40
int gb_read_nr(GBDATA *gbd)
Definition: arbdb.cxx:900
int get_transaction_level() const
Definition: gb_main.h:177
void gb_compress_equal_bytes_2(const char *source, size_t size, size_t *msize, char *dest)
Definition: adcompr.cxx:392
static GB_ERROR compress_sequence_tree(GBCONTAINER *gb_main, CompressionTree *tree, const char *ali_name)
Definition: adseqcompr.cxx:581
static int g_b_count_leafs(CompressionTree *node)
Definition: adseqcompr.cxx:173
void GB_internal_error(const char *message)
Definition: arb_msg.cxx:481
static void g_b_put_sequences_in_container(CompressionTree *ctree, Sequence *seqs, MasterSequence **masters, Consensus *gcon)
Definition: adseqcompr.cxx:179
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
void g_b_put_number2(int i, unsigned char *&s)
Definition: adseqcompr.cxx:359
#define gb_assert(cond)
Definition: arbdbt.h:11
xml element
GB_ERROR close(GB_ERROR error)
Definition: arbdbpp.cxx:35
GB_UNDO_TYPE GB_get_requested_undo_type(GBDATA *gb_main)
Definition: adindex.cxx:749
static void g_b_delete_Consensus(Consensus *gcon)
Definition: adseqcompr.cxx:94
#define memory_is_equal(m1, m2, size)
Definition: test_unit.h:1435
#define GB_RUNLENGTH_SIZE
Definition: gb_tune.h:14
#define OVERRIDE
Definition: cxxforward.h:112
#define TEST_EXPECT_FILES_EQUAL(f1, f2)
Definition: test_unit.h:1422
GB_ERROR gb_write_compressed_pntr(GBENTRY *gbe, const char *s, long memsize, long stored_size)
Definition: arbdb.cxx:1317
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:909
static int set_masters_with_sons(CompressionTree *node, int wantedSons, long *mcount)
Definition: adseqcompr.cxx:246
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
GB_ERROR GB_write_security_delete(GBDATA *gbd, unsigned long level)
Definition: arbdb.cxx:1605
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
CompressionTree(CompressionRoot *croot)
Definition: adseqcompr.cxx:48
gb_Key * keys
Definition: gb_main.h:134
GBENTRY * as_entry() const
Definition: gb_data.h:150
long GB_read_clock(GBDATA *gbd)
Definition: arbdb.cxx:1714
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:116
void GB_information(const char *message)
Definition: arb_msg.cxx:548
static char * g_b_Consensus_get_sequence(Consensus *gcon)
Definition: adseqcompr.cxx:147
GBDATA * gb_search(GBCONTAINER *gbc, const char *key, GB_TYPES create, int internflag)
Definition: adquery.cxx:445
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
#define DEFINE_TREE_ACCESSORS(RootType, TreeType)
Definition: TreeNode.h:621
GB_ERROR GB_request_undo_type(GBDATA *gb_main, GB_UNDO_TYPE type) __ATTR__USERESULT_TODO
Definition: adindex.cxx:721
GB_transaction ta(gb_var)
void destroy(TreeNode *that)
Definition: TreeNode.h:600
#define BUFSIZE
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
void gb_load_single_key_data(GBDATA *gb_main, GBQUARK q)
Definition: adsystem.cxx:112
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GBENTRY * gb_seq
Definition: adseqcompr.cxx:70
const char * GB_CSTR
Definition: arbdb_base.h:25
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
unsigned get_leaf_count() const OVERRIDE
Definition: adseqcompr.cxx:50
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
li
Definition: AW_awar.cxx:152
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105
#define max(a, b)
Definition: f2c.h:154
GB_write_int const char s
Definition: AW_awar.cxx:154