ARB
chartable.cxx
Go to the documentation of this file.
1 // ================================================================= //
2 // //
3 // File : chartable.cxx //
4 // Purpose : count characters of multiple sequences //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ================================================================= //
10 
11 #include "chartable.h"
12 
13 #include <arb_msg.h>
14 #include <arb_mem.h>
15 #include <iupac.h>
16 #include <arb_defs.h>
17 #include <smartptr.h>
18 
19 #include <cctype>
20 
21 #define CONSENSUS_AWAR_SOURCE CAS_INTERNAL // not awar-constructable here
22 #include <consensus.h>
23 
24 using namespace chartable;
25 
26 // ------------------------
27 // SepBaseFreq
28 
29 #ifdef DEBUG
30 // # define COUNT_BASES_TABLE_SIZE
31 #endif
32 
33 #ifdef COUNT_BASES_TABLE_SIZE
34 static long bases_table_size = 0L;
35 #endif
36 
38  if (length) {
39  if (no_of_bases.shortTable) {
40  delete no_of_bases.shortTable;
41  no_of_bases.shortTable = NULp;
42 #ifdef COUNT_BASES_TABLE_SIZE
43  bases_table_size -= (no_of_entries+1)*table_entry_size;
44 #endif
45  }
46 
47  no_of_entries = length;
48  int new_size = (no_of_entries+1)*table_entry_size;
49  no_of_bases.shortTable = (unsigned char*)new char[new_size];
50  memset(no_of_bases.shortTable, 0, new_size);
51 #ifdef COUNT_BASES_TABLE_SIZE
52  bases_table_size += new_size;
53  printf("bases_table_size = %li\n", bases_table_size);
54 #endif
55  }
56 }
57 
58 void SepBaseFreq::expand_table_entry_size() { // converts short table into long table
59  ct_assert(table_entry_size==SHORT_TABLE_ELEM_SIZE);
60 
61  int *new_table = new int[no_of_entries+1];
62  int i;
63 
64  for (i=0; i<=no_of_entries; i++) { // LOOP_VECTORIZED[!<5.0]
65  new_table[i] = no_of_bases.shortTable[i];
66  }
67  delete [] no_of_bases.shortTable;
68 
69 #ifdef COUNT_BASES_TABLE_SIZE
70  bases_table_size += (no_of_entries+1)*(LONG_TABLE_ELEM_SIZE-SHORT_TABLE_ELEM_SIZE);
71  printf("bases_table_size = %li\n", bases_table_size);
72 #endif
73 
74  no_of_bases.longTable = new_table;
75  table_entry_size = LONG_TABLE_ELEM_SIZE;
76 }
77 
78 #define INVALID_TABLE_OPERATION() GBK_terminatef("SepBaseFreq: invalid operation at %i", __LINE__)
79 
80 void SepBaseFreq::add(const SepBaseFreq& other, int start, int end) {
81  ct_assert(no_of_entries==other.no_of_entries);
82  ct_assert(start>=0);
83  ct_assert(end<no_of_entries);
84  ct_assert(start<=end);
85 
86  int i;
87  if (table_entry_size==SHORT_TABLE_ELEM_SIZE) {
88  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
89  for (i=start; i<=end; i++) {
90  set_elem_short(i, get_elem_short(i)+other.get_elem_short(i));
91  }
92  }
93  else {
94  INVALID_TABLE_OPERATION(); // cannot add long to short
95  }
96  }
97  else {
98  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
99  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
100  set_elem_long(i, get_elem_long(i)+other.get_elem_short(i));
101  }
102  }
103  else {
104  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
105  set_elem_long(i, get_elem_long(i)+other.get_elem_long(i));
106  }
107  }
108  }
109 }
110 void SepBaseFreq::sub(const SepBaseFreq& other, int start, int end) {
111  ct_assert(no_of_entries==other.no_of_entries);
112  ct_assert(start>=0);
113  ct_assert(end<no_of_entries);
114  ct_assert(start<=end);
115 
116  int i;
117  if (table_entry_size==SHORT_TABLE_ELEM_SIZE) {
118  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
119  for (i=start; i<=end; i++) {
120  set_elem_short(i, get_elem_short(i)-other.get_elem_short(i));
121  }
122  }
123  else {
124  INVALID_TABLE_OPERATION(); // cannot sub long from short
125  }
126  }
127  else {
128  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
129  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
130  set_elem_long(i, get_elem_long(i)-other.get_elem_short(i));
131  }
132  }
133  else {
134  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
135  set_elem_long(i, get_elem_long(i)-other.get_elem_long(i));
136  }
137  }
138  }
139 }
140 void SepBaseFreq::sub_and_add(const SepBaseFreq& Sub, const SepBaseFreq& Add, PosRange range) {
141  ct_assert(no_of_entries==Sub.no_of_entries);
142  ct_assert(no_of_entries==Add.no_of_entries);
143 
144  int start = range.start();
145  int end = range.end();
146 
147  ct_assert(start>=0);
148  ct_assert(end<no_of_entries);
149  ct_assert(start<=end);
150 
151  int i;
152  if (table_entry_size==SHORT_TABLE_ELEM_SIZE) {
153  if (Sub.table_entry_size==SHORT_TABLE_ELEM_SIZE &&
154  Add.table_entry_size==SHORT_TABLE_ELEM_SIZE)
155  {
156  for (i=start; i<=end; i++) {
157  set_elem_short(i, get_elem_short(i)-Sub.get_elem_short(i)+Add.get_elem_short(i));
158  }
159  }
160  else {
161  INVALID_TABLE_OPERATION(); // cannot add or sub long to/from short
162  }
163  }
164  else {
165  if (Sub.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
166  if (Add.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
167  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
168  set_elem_long(i, get_elem_long(i)-Sub.get_elem_short(i)+Add.get_elem_short(i));
169  }
170  }
171  else {
172  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
173  set_elem_long(i, get_elem_long(i)-Sub.get_elem_short(i)+Add.get_elem_long(i));
174  }
175  }
176  }
177  else {
178  if (Add.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
179  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
180  set_elem_long(i, get_elem_long(i)-Sub.get_elem_long(i)+Add.get_elem_short(i));
181  }
182  }
183  else {
184  for (i=start; i<=end; i++) { // LOOP_VECTORIZED[!>=620<7]
185  set_elem_long(i, get_elem_long(i)-Sub.get_elem_long(i)+Add.get_elem_long(i));
186  }
187  }
188  }
189  }
190 }
191 
192 int SepBaseFreq::firstDifference(const SepBaseFreq& other, int start, int end, int *firstDifferentPos) const {
193  int i;
194  int result = 0;
195 
196  if (table_entry_size==SHORT_TABLE_ELEM_SIZE) {
197  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
198  for (i=start; i<=end; i++) {
199  if (get_elem_short(i)!=other.get_elem_short(i)) {
200  *firstDifferentPos = i;
201  result = 1;
202  break;
203  }
204  }
205  }
206  else {
207  for (i=start; i<=end; i++) {
208  if (get_elem_short(i)!=other.get_elem_long(i)) {
209  *firstDifferentPos = i;
210  result = 1;
211  break;
212  }
213  }
214  }
215  }
216  else {
217  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
218  for (i=start; i<=end; i++) {
219  if (get_elem_long(i)!=other.get_elem_short(i)) {
220  *firstDifferentPos = i;
221  result = 1;
222  break;
223  }
224  }
225  }
226  else {
227  for (i=start; i<=end; i++) {
228  if (get_elem_long(i)!=other.get_elem_long(i)) {
229  *firstDifferentPos = i;
230  result = 1;
231  break;
232  }
233  }
234  }
235  }
236 
237  return result;
238 }
239 int SepBaseFreq::lastDifference(const SepBaseFreq& other, int start, int end, int *lastDifferentPos) const {
240  int i;
241  int result = 0;
242 
243  if (table_entry_size==SHORT_TABLE_ELEM_SIZE) {
244  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
245  for (i=end; i>=start; i--) {
246  if (get_elem_short(i)!=other.get_elem_short(i)) {
247  *lastDifferentPos = i;
248  result = 1;
249  break;
250  }
251  }
252  }
253  else {
254  for (i=end; i>=start; i--) {
255  if (get_elem_short(i)!=other.get_elem_long(i)) {
256  *lastDifferentPos = i;
257  result = 1;
258  break;
259  }
260  }
261  }
262  }
263  else {
264  if (other.table_entry_size==SHORT_TABLE_ELEM_SIZE) {
265  for (i=end; i>=start; i--) {
266  if (get_elem_long(i)!=other.get_elem_short(i)) {
267  *lastDifferentPos = i;
268  result = 1;
269  break;
270  }
271  }
272  }
273  else {
274  for (i=end; i>=start; i--) {
275  if (get_elem_long(i)!=other.get_elem_long(i)) {
276  *lastDifferentPos = i;
277  result = 1;
278  break;
279  }
280  }
281  }
282  }
283 
284  return result;
285 }
286 
287 
288 SepBaseFreq::SepBaseFreq(int maxseqlength)
289  : table_entry_size(SHORT_TABLE_ELEM_SIZE),
290  no_of_entries(0)
291 {
292  no_of_bases.shortTable = NULp;
293  if (maxseqlength) init(maxseqlength);
294 }
295 
297  delete [] no_of_bases.shortTable;
298 #ifdef COUNT_BASES_TABLE_SIZE
299  bases_table_size -= (no_of_entries+1)*table_entry_size;
300  printf("bases_table_size = %li\n", bases_table_size);
301 #endif
302 }
303 
304 void SepBaseFreq::change_table_length(int new_length, int default_entry) {
305  if (new_length!=no_of_entries) {
306  int min_length = new_length<no_of_entries ? new_length : no_of_entries;
307  int growth = new_length-no_of_entries;
308 
309  if (table_entry_size==SHORT_TABLE_ELEM_SIZE) {
310  unsigned char *new_table = new unsigned char[new_length+1];
311 
312  ct_assert(default_entry>=0 && default_entry<256);
313 
314  memcpy(new_table, no_of_bases.shortTable, min_length*sizeof(*new_table));
315  new_table[new_length] = no_of_bases.shortTable[no_of_entries];
316  if (growth>0) {
317  for (int e=no_of_entries; e<new_length; ++e) new_table[e] = default_entry;
318  }
319 
320  delete [] no_of_bases.shortTable;
321  no_of_bases.shortTable = new_table;
322  }
323  else {
324  int *new_table = new int[new_length+1];
325 
326  memcpy(new_table, no_of_bases.longTable, min_length*sizeof(*new_table));
327  new_table[new_length] = no_of_bases.longTable[no_of_entries];
328  if (growth>0) {
329  for (int e=no_of_entries; e<new_length; ++e) { // LOOP_VECTORIZED
330  new_table[e] = default_entry;
331  }
332  }
333 
334  delete [] no_of_bases.longTable;
335  no_of_bases.longTable = new_table;
336  }
337 
338 #ifdef COUNT_BASES_TABLE_SIZE
339  bases_table_size += growth*table_entry_size;
340  printf("bases_table_size = %li\n", bases_table_size);
341 #endif
342  no_of_entries = new_length;
343  }
344 }
345 
346 #if defined(TEST_CHAR_TABLE_INTEGRITY) || defined(ASSERTION_USED)
347 
348 int SepBaseFreq::empty() const {
349  int i;
350 
351  if (table_entry_size==SHORT_TABLE_ELEM_SIZE) {
352  for (i=0; i<no_of_entries; i++) {
353  if (get_elem_short(i)) {
354  return 0;
355  }
356  }
357  }
358  else {
359  for (i=0; i<no_of_entries; i++) {
360  if (get_elem_long(i)) {
361  return 0;
362  }
363  }
364  }
365 
366  return 1;
367 }
368 #endif // ASSERTION_USED
369 
371  ExplicitRange range(r, size());
372 
373  long entries = range.size();
374  char *new_buf = ARB_alloc<char>(entries+1);
375 
376  build_consensus_string_to(new_buf, range, cbp);
377  new_buf[entries] = 0;
378 
379  return new_buf;
380 }
381 
382 #if defined(DEBUG)
383 // #define DEBUG_CONSENSUS
384 #endif
385 
386 void BaseFrequencies::build_consensus_string_to(char *consensus_string, ExplicitRange range, const ConsensusBuildParams& BK) const {
387  // 'consensus_string' has to be a buffer of size 'range.size()+1'
388  // Note : Always check that consensus behavior is identical to that used in CON_evaluatestatistic()
389 
390  ct_assert(consensus_string);
391  ct_assert(range.end()<size());
392 
393 #define PERCENT(part, all) ((100*(part))/(all))
394 #define MAX_BASES_TABLES 41 // 25
395 
396  ct_assert(used_bases_tables<=MAX_BASES_TABLES); // this is correct for DNA/RNA -> build_consensus_string() must be corrected for AMI/PRO
397 
398  const int left_idx = range.start();
399  const int right_idx = range.end();
400 
401 #if defined(DEBUG_CONSENSUS)
402  #define DUMPINT(var) do { if (dumpcol) fprintf(stderr, "%s=%i ", #var, var); } while(0)
403 #else
404  #define DUMPINT(var)
405 #endif
406 
407  if (sequenceUnits) {
408  for (int i=left_idx; i<=right_idx; i++) {
409 #if defined(DEBUG_CONSENSUS)
410  int column = i+1;
411  bool dumpcol = (column == 21);
412  DUMPINT(column);
413 #endif
414 
415  const int o = i-left_idx; // output offset
416 
417  int bases = 0; // count of all bases together
418  int base[MAX_BASES_TABLES]; // count of specific base
419  int max_base = -1; // maximum count of specific base
420  int max_base_idx = -1; // index of this base
421 
422  for (int j=0; j<used_bases_tables; j++) {
423  base[j] = linear_table(j)[i];
424  if (!isGap(index_to_upperChar(j))) {
425  bases += base[j];
426  if (base[j]>max_base) { // search for most used base
427  max_base = base[j];
428  max_base_idx = j;
429  }
430  }
431  }
432 
433  int gaps = sequenceUnits-bases; // count of gaps
434 
435  DUMPINT(sequenceUnits);
436  DUMPINT(gaps);
437  DUMPINT(bases);
438 
439  // What to do with gaps?
440 
441  if (gaps == sequenceUnits) {
442  consensus_string[o] = '=';
443  }
444  else if (BK.countgaps && PERCENT(gaps, sequenceUnits) >= BK.gapbound) {
445  DUMPINT(PERCENT(gaps,sequenceUnits));
446  DUMPINT(BK.gapbound);
447  consensus_string[o] = '-';
448  }
449  else {
450  char kchar = 0; // character for consensus
451  int kcount = 0; // count this character
452 
453  if (BK.group) { // simplify using IUPAC
454  if (ali_type == GB_AT_RNA || ali_type == GB_AT_DNA) {
455  int no_of_bases = 0; // count of different bases used to create iupac
456  char used_bases[MAX_BASES_TABLES+1]; // string containing those bases
457 
458  for (int j=0; j<used_bases_tables; j++) {
459  int bchar = index_to_upperChar(j);
460 
461  if (!isGap(bchar)) {
462  if (base[j]>0 && PERCENT(base[j],bases) >= BK.considbound) {
463 #if defined(DEBUG_CONSENSUS)
464  if (!kcount) DUMPINT(BK.considbound);
465 #endif
466  used_bases[no_of_bases++] = index_to_upperChar(j);
467  kcount += base[j];
468 
469  DUMPINT(base[j]);
470  DUMPINT(PERCENT(base[j],bases));
471  DUMPINT(kcount);
472  }
473  }
474  }
475  used_bases[no_of_bases] = 0;
476  kchar = iupac::encode(used_bases, ali_type);
477  }
478  else {
479  ct_assert(ali_type == GB_AT_AA);
480 
481  const int amino_groups = iupac::AA_GROUP_COUNT;
482  int group_count[amino_groups];
483 
484  for (int j=0; j<amino_groups; j++) {
485  group_count[j] = 0;
486  }
487  for (int j=0; j<used_bases_tables; j++) {
488  unsigned char bchar = index_to_upperChar(j);
489 
490  if (!isGap(bchar)) {
491  group_count[iupac::get_amino_group_for(bchar)] += base[j];
492  }
493  }
494 
495  int bestGroup = 0;
496  for (int j=0; j<amino_groups; j++) {
497  if (group_count[j]>kcount) {
498  bestGroup = j;
499  kcount = group_count[j];
500  }
501  }
502 
503  ct_assert(kcount>0);
504  if (PERCENT(kcount, bases) >= BK.considbound) {
506  }
507  else {
508  kchar = 'X';
509  kcount = bases;
510  }
511  }
512  }
513  if (!kcount) { // IUPAC grouping is either off OR didnt consider any bases
514  ct_assert(max_base); // expect at least one base to occur
515  ct_assert(max_base_idx >= 0);
516 
517  kchar = index_to_upperChar(max_base_idx);
518  kcount = max_base;
519  }
520 
521  ct_assert(kchar);
522  ct_assert(kcount);
523  ct_assert(kcount<=bases);
524 
525  // show as upper or lower case ?
526  int percent = PERCENT(kcount, BK.countgaps ? sequenceUnits : bases);
527  DUMPINT(percent);
528  DUMPINT(BK.upper);
529  DUMPINT(BK.lower);
530  if (percent>=BK.upper) {
531  consensus_string[o] = kchar;
532  }
533  else if (percent>=BK.lower) {
534  consensus_string[o] = tolower(kchar);
535  }
536  else {
537  consensus_string[o] = '.';
538  }
539  }
540  ct_assert(consensus_string[o]);
541 
542 #if defined(DEBUG_CONSENSUS)
543  if (dumpcol) fprintf(stderr, "result='%c'\n", consensus_string[o]);
544 #endif
545  }
546  }
547  else {
548  memset(consensus_string, '?', right_idx-left_idx+1);
549  }
550 #undef PERCENT
551 }
552 
553 // -----------------------
554 // BaseFrequencies
555 
556 bool BaseFrequencies::initialized = false;
557 Ambiguity BaseFrequencies::ambiguity_table[MAX_AMBIGUITY_CODES];
558 uint8_t BaseFrequencies::unitsPerSequence = 1;
559 unsigned char BaseFrequencies::char_to_index_tab[MAXCHARTABLE];
560 bool BaseFrequencies::is_gap[MAXCHARTABLE];
561 unsigned char BaseFrequencies::upper_index_chars[MAX_USED_BASES_TABLES];
562 int BaseFrequencies::used_bases_tables = 0;
563 GB_alignment_type BaseFrequencies::ali_type = GB_AT_RNA;
564 
565 inline void BaseFrequencies::set_char_to_index(unsigned char c, int index) {
566  ct_assert(index>=0 && index<used_bases_tables);
567  char_to_index_tab[upper_index_chars[index] = toupper(c)] = index;
568  char_to_index_tab[tolower(c)] = index;
569 }
570 
571 void BaseFrequencies::expand_tables() {
572  int i;
573  for (i=0; i<used_bases_tables; i++) {
574  linear_table(i).expand_table_entry_size();
575  }
576 }
577 
578 static const char *getAAgroupDefinition() {
579  using namespace iupac;
580  static SmartCharPtr aadef;
581 
582  if (aadef.isNull()) {
583  char buffer[2*26+1];
584  int off = 0;
585  for (int c = 'A'; c<='Z'; ++c) {
586  switch (get_amino_group_for(c)) {
587  case AA_GROUP_NONE: break; // cannot add: would add invalid characters -> add 'X' manually below.
588  case AA_GROUP_ILLEGAL: ct_assert(0); break;
589  default:
590  buffer[off++] = c;
591  buffer[off++] = ',';
592  break;
593  }
594  }
595  buffer[off] = 'X';
596  buffer[off+1] = 0;
597 
598  aadef = strdup(buffer);
599  }
600 
601  return &*aadef;
602 }
603 
604 void BaseFrequencies::setup(const char *gap_chars, GB_alignment_type ali_type_) {
605  const char *groups = NULp;
606  const char *ambiguous = NULp;
607 
608  ali_type = ali_type_;
609 
610  switch (ali_type) {
611  case GB_AT_RNA:
612  groups = "A,C,G,TU,"; // Note: terminal ',' defines a group for unknown characters
613  ambiguous = "MRWSYKVHDBN";
614  unitsPerSequence = 12;
615  break;
616 
617  case GB_AT_DNA:
618  groups = "A,C,G,UT,";
619  ambiguous = "MRWSYKVHDBN";
620  unitsPerSequence = 12;
621  break;
622 
623  case GB_AT_AA:
624  groups = getAAgroupDefinition();
625  unitsPerSequence = 1;
626  break;
627 
628  default:
629  ct_assert(0);
630  break;
631  }
632 
633  ct_assert(groups);
634 #if defined(ASSERTION_USED)
635  if (ali_type == GB_AT_AA) {
636  ct_assert(!ambiguous);
637  // ct_assert(strlen(ambiguous) <= MAX_AMBIGUITY_CODES); // @@@ later
638  }
639  else {
640  ct_assert(strlen(ambiguous) == MAX_AMBIGUITY_CODES);
641  }
642 #endif
643 
644  for (int i = 0; i<MAXCHARTABLE; ++i) is_gap[i] = false;
645 
646  int gapcharcount = 0;
647  char *unique_gap_chars = strdup(gap_chars);
648  for (int i = 0; gap_chars[i]; ++i) {
649  if (!is_gap[safeCharIndex(gap_chars[i])]) { // ignore duplicate occurrences in 'gap_chars'
650  is_gap[safeCharIndex(gap_chars[i])] = true;
651  unique_gap_chars[gapcharcount++] = gap_chars[i];
652  }
653  }
654 
655  used_bases_tables = gapcharcount;
656 
657  for (int i=0; groups[i]; i++) {
658  if (groups[i]==',') {
659  used_bases_tables++;
660  }
661  }
662 
663  ct_assert(used_bases_tables<=MAX_USED_BASES_TABLES);
664 
665  int idx = 0;
666  unsigned char init_val = used_bases_tables-1; // map all unknown stuff to last table
667  memset(char_to_index_tab, init_val, MAXCHARTABLE*sizeof(*char_to_index_tab));
668 
669  for (int i=0; groups[i]; i++) {
670  if (groups[i]==',') {
671  idx++;
672  }
673  else {
674  ct_assert(isupper(groups[i]));
675  set_char_to_index(groups[i], idx);
676  }
677  }
678 
679  const char *align_string_ptr = unique_gap_chars;
680  while (1) {
681  char c = *align_string_ptr++;
682  if (!c) break;
683  set_char_to_index(c, idx++);
684  }
685 
686  if (ambiguous) {
687  ct_assert(ali_type == GB_AT_DNA || ali_type == GB_AT_RNA); // @@@ amino ambiguities not impl
688 
689  const uint8_t indices2increment[MAX_TARGET_INDICES+1] = { 0, 0, 6, 4, 3 };
690  for (int i = 0; ambiguous[i]; ++i) {
691  const char *contained = iupac::decode(ambiguous[i], ali_type, false);
692 
693  Ambiguity& amb = ambiguity_table[i];
694 
695  amb.indices = strlen(contained);
697  amb.increment = indices2increment[amb.indices];
698 
699  for (int j = 0; j<amb.indices; ++j) {
700  int cidx = char_to_index_tab[safeCharIndex(contained[j])];
701  ct_assert(cidx<MAX_INDEX_TABLES); // has to be non-ambiguous
702  amb.index[j] = cidx;
703  }
704 
705  char_to_index_tab[toupper(ambiguous[i])] =
706  char_to_index_tab[tolower(ambiguous[i])] = i+MAX_INDEX_TABLES;
707  }
708  }
709 
710  free(unique_gap_chars);
711  ct_assert(idx==used_bases_tables);
712  initialized = true;
713 }
714 
716  : ignore(0)
717 {
718  ct_assert(initialized);
719 
720  bases_table = new SepBaseFreqPtr[used_bases_tables];
721 
722  for (int i=0; i<used_bases_tables; i++) {
723  bases_table[i] = new SepBaseFreq(maxseqlength);
724  }
725 
726  sequenceUnits = 0;
727 }
728 
729 void BaseFrequencies::init(int maxseqlength) {
730  int i;
731  for (i=0; i<used_bases_tables; i++) {
732  linear_table(i).init(maxseqlength);
733  }
734 
735  sequenceUnits = 0;
736 }
737 
738 void BaseFrequencies::bases_and_gaps_at(int column, int *bases, int *gaps) const {
739  int b = 0;
740  int g = 0;
741 
742  for (int i=0; i<used_bases_tables; i++) {
743  char c = upper_index_chars[i];
744 
745  if (isGap(c)) {
746  g += table(c)[column];
747  }
748  else {
749  b += table(c)[column];
750  }
751  }
752 
753  if (bases) {
754  *bases = b/unitsPerSequence;
755  ct_assert((b%unitsPerSequence) == 0); // could happen if an ambiguous code contains a gap
756  }
757  if (gaps) {
758  *gaps = g/unitsPerSequence;
759  ct_assert((g%unitsPerSequence) == 0); // could happen if an ambiguous code contains a gap
760  }
761 }
762 
763 double BaseFrequencies::max_frequency_at(int column, bool ignore_gaps) const {
764  int gaps = 0;
765  int minus = 0;
766  int maxb = 0;
767 
768  for (int i=0; i<used_bases_tables; i++) {
769  char c = upper_index_chars[i];
770 
771  if (isGap(c)) {
772  if (c == '-') {
773  minus += table(c)[column];
774  }
775  else {
776  gaps += table(c)[column];
777  }
778  }
779  else {
780  maxb = std::max(maxb, table(c)[column]);
781  }
782  }
783 
784  if (ignore_gaps) {
785  gaps += minus;
786  }
787  else {
788  maxb = std::max(maxb, minus);
789  }
790 
791  double mfreq;
792  if (sequenceUnits>gaps) {
793  mfreq = maxb/double(sequenceUnits-gaps);
794  }
795  else { // only gaps (but no '-')
796  ct_assert(sequenceUnits == gaps);
797  mfreq = 0.0;
798  }
799 
800 #if defined(ASSERTION_USED)
801  if (mfreq != 0.0) {
802  if (ali_type != GB_AT_AA) {
803  ct_assert(mfreq>=0.2); // wrong for amino acids
804  }
805  ct_assert(mfreq<=1.0);
806  }
807 #endif
808 
809  return mfreq;
810 }
811 
813  int i;
814  for (i=0; i<used_bases_tables; i++) {
815  delete bases_table[i];
816  }
817 
818  delete [] bases_table;
819 }
820 
822  int i;
823  int Size = size();
824  int start = Size-1;
825  int end = 0;
826 
827  ct_assert(Size==other.size());
828  for (i=0; i<used_bases_tables; i++) {
829  if (linear_table(i).firstDifference(other.linear_table(i), 0, start, &start)) {
830  if (end<start) {
831  end = start;
832  }
833  linear_table(i).lastDifference(other.linear_table(i), end+1, Size-1, &end);
834 
835  for (; i<used_bases_tables; i++) {
836  linear_table(i).firstDifference(other.linear_table(i), 0, start-1, &start);
837  if (end<start) {
838  end = start;
839  }
840  linear_table(i).lastDifference(other.linear_table(i), end+1, Size-1, &end);
841  }
842 
843  ct_assert(start<=end);
844 
845  static PosRange range;
846  range = PosRange(start, end);
847 
848  return &range;
849  }
850  }
851  return NULp;
852 }
853 void BaseFrequencies::add(const BaseFrequencies& other) {
854  if (other.ignore) return;
855  if (other.size()==0) return;
856  prepare_to_add_elements(other.added_sequences());
857  add(other, 0, other.size()-1);
858 }
859 void BaseFrequencies::add(const BaseFrequencies& other, int start, int end) {
860  if (other.ignore) return;
861 
862  test();
863  other.test();
864 
865  ct_assert(start<=end);
866 
867  int i;
868  for (i=0; i<used_bases_tables; i++) {
869  linear_table(i).add(other.linear_table(i), start, end);
870  }
871 
872  sequenceUnits += other.sequenceUnits;
873 
874  test();
875 }
876 
877 void BaseFrequencies::sub(const BaseFrequencies& other) {
878  if (other.ignore) return;
879  sub(other, 0, other.size()-1);
880 }
881 
882 void BaseFrequencies::sub(const BaseFrequencies& other, int start, int end) {
883  if (other.ignore) return;
884 
885  test();
886  other.test();
887  ct_assert(start<=end);
888 
889  int i;
890  for (i=0; i<used_bases_tables; i++) {
891  linear_table(i).sub(other.linear_table(i), start, end);
892  }
893 
894  sequenceUnits -= other.sequenceUnits;
895 
896  test();
897 }
898 
900  test();
901  Sub.test();
902  Add.test();
903 
904  ct_assert(!Sub.ignore && !Add.ignore);
905  ct_assert(range.is_part());
906 
907  int i;
908  for (i=0; i<used_bases_tables; i++) {
909  linear_table(i).sub_and_add(Sub.linear_table(i), Add.linear_table(i), range);
910  }
911 
912  test();
913 }
914 
915 const PosRange *BaseFrequencies::changed_range(const char *string1, const char *string2, int min_len) {
916  const unsigned long *range1 = (const unsigned long *)string1;
917  const unsigned long *range2 = (const unsigned long *)string2;
918  const int step = sizeof(*range1);
919 
920  int l = min_len/step;
921  int r = min_len%step;
922  int i;
923  int j;
924  int k;
925 
926  int start = -1, end = -1;
927 
928  for (i=0; i<l; i++) { // search wordwise (it's faster)
929  if (range1[i] != range2[i]) {
930  k = i*step;
931  for (j=0; j<step; j++) {
932  if (string1[k+j] != string2[k+j]) {
933  start = end = k+j;
934  break;
935  }
936  }
937 
938  break;
939  }
940  }
941 
942  k = l*step;
943  if (i==l) { // no difference found in word -> look at rest
944  for (j=0; j<r; j++) {
945  if (string1[k+j] != string2[k+j]) {
946  start = end = k+j;
947  break;
948  }
949  }
950 
951  if (j==r) { // the strings are equal
952  return NULp;
953  }
954  }
955 
956  ct_assert(start != -1 && end != -1);
957 
958  for (j=r-1; j>=0; j--) { // search rest backwards
959  if (string1[k+j] != string2[k+j]) {
960  end = k+j;
961  break;
962  }
963  }
964 
965  if (j==-1) { // not found in rest -> search words backwards
966  int m = start/step;
967  for (i=l-1; i>=m; i--) {
968  if (range1[i] != range2[i]) {
969  k = i*step;
970  for (j=step-1; j>=0; j--) {
971  if (string1[k+j] != string2[k+j]) {
972  end = k+j;
973  break;
974  }
975  }
976  break;
977  }
978  }
979  }
980 
981  ct_assert(start<=end);
982 
983  static PosRange range;
984  range = PosRange(start, end);
985 
986  return &range;
987 }
988 
989 #define INC_DEC(incdec) do { \
990  int idx = char_to_index_tab[c]; \
991  if (idx<MAX_INDEX_TABLES) { \
992  table(c).incdec(offset, unitsPerSequence); \
993  } \
994  else { \
995  Ambiguity& amb = ambiguity_table[idx-MAX_INDEX_TABLES]; \
996  for (int i = 0; i<amb.indices; ++i) { \
997  linear_table(amb.index[i]).incdec(offset, amb.increment); \
998  } \
999  } \
1000  } while(0)
1001 
1002 void BaseFrequencies::incr_short(unsigned char c, int offset) {
1003  INC_DEC(inc_short);
1004 }
1005 
1006 void BaseFrequencies::decr_short(unsigned char c, int offset) {
1007  INC_DEC(dec_short);
1008 }
1009 
1010 void BaseFrequencies::incr_long(unsigned char c, int offset) {
1011  INC_DEC(inc_long);
1012 }
1013 
1014 void BaseFrequencies::decr_long(unsigned char c, int offset) {
1015  INC_DEC(dec_long);
1016 }
1017 
1018 #undef INC_DEC
1019 
1020 void BaseFrequencies::add(const char *scan_string, int len) {
1021  test();
1022 
1023  prepare_to_add_elements(1);
1024 
1025  int i;
1026  int sz = size();
1027 
1028  if (get_table_entry_size()==SHORT_TABLE_ELEM_SIZE) {
1029  for (i=0; i<len; i++) {
1030  unsigned char c = scan_string[i];
1031  if (!c) break;
1032  incr_short(c, i);
1033  }
1034  SepBaseFreq& t = table('.');
1035  for (; i<sz; i++) {
1036  t.inc_short(i, unitsPerSequence);
1037  }
1038  }
1039  else {
1040  for (i=0; i<len; i++) {
1041  unsigned char c = scan_string[i];
1042  if (!c) break;
1043  incr_long(c, i);
1044  }
1045  SepBaseFreq& t = table('.');
1046  for (; i<sz; i++) { // LOOP_VECTORIZED[!>=620<7]
1047  t.inc_long(i, unitsPerSequence);
1048  }
1049  }
1050 
1051  sequenceUnits += unitsPerSequence;
1052 
1053  test();
1054 }
1055 
1056 void BaseFrequencies::sub(const char *scan_string, int len) {
1057  test();
1058 
1059  int i;
1060  int sz = size();
1061 
1062  if (get_table_entry_size()==SHORT_TABLE_ELEM_SIZE) {
1063  for (i=0; i<len; i++) {
1064  unsigned char c = scan_string[i];
1065  if (!c) break;
1066  decr_short(c, i);
1067  }
1068  SepBaseFreq& t = table('.');
1069  for (; i<sz; i++) {
1070  t.dec_short(i, unitsPerSequence);
1071  }
1072  }
1073  else {
1074  for (i=0; i<len; i++) {
1075  unsigned char c = scan_string[i];
1076  if (!c) break;
1077  decr_long(c, i);
1078  }
1079  SepBaseFreq& t = table('.');
1080  for (; i<sz; i++) { // LOOP_VECTORIZED[!>=620<7]
1081  t.dec_long(i, unitsPerSequence);
1082  }
1083  }
1084 
1085  sequenceUnits -= unitsPerSequence;
1086 
1087  test();
1088 }
1089 
1090 void BaseFrequencies::sub_and_add(const char *old_string, const char *new_string, PosRange range) {
1091  test();
1092  int start = range.start();
1093  int end = range.end();
1094  ct_assert(start<=end);
1095 
1096  if (get_table_entry_size()==SHORT_TABLE_ELEM_SIZE) {
1097  for (int i=start; i<=end; i++) {
1098  unsigned char o = old_string[i];
1099  unsigned char n = new_string[i];
1100 
1101  ct_assert(o && n); // passed string have to be defined in range
1102 
1103  if (o!=n) {
1104  decr_short(o, i);
1105  incr_short(n, i);
1106  }
1107  }
1108  }
1109  else {
1110  for (int i=start; i<=end; i++) {
1111  unsigned char o = old_string[i];
1112  unsigned char n = new_string[i];
1113 
1114  ct_assert(o && n); // passed string have to be defined in range
1115 
1116  if (o!=n) {
1117  decr_long(o, i);
1118  incr_long(n, i);
1119  }
1120  }
1121  }
1122 
1123  test();
1124 }
1125 
1127  for (int c=0; c<used_bases_tables; c++) {
1128  linear_table(c).change_table_length(new_length, 0);
1129  }
1130  test();
1131 }
1132 
1133 // --------------
1134 // tests:
1135 
1136 #if defined(TEST_CHAR_TABLE_INTEGRITY) || defined(ASSERTION_USED)
1137 
1139  int i;
1140  for (i=0; i<used_bases_tables; i++) {
1141  if (!linear_table(i).empty()) {
1142  return false;
1143  }
1144  }
1145  return true;
1146 }
1147 
1148 bool BaseFrequencies::ok() const {
1149  if (empty()) return true;
1150  if (is_ignored()) return true;
1151 
1152  if (sequenceUnits<0) {
1153  fprintf(stderr, "Negative # of sequences (%i) in BaseFrequencies\n", added_sequences());
1154  return false;
1155  }
1156 
1157  const int table_size = size();
1158  for (int i=0; i<table_size; i++) {
1159  int bases, gaps;
1160  bases_and_gaps_at(i, &bases, &gaps);
1161 
1162  if (!bases && !gaps) { // this occurs after insert column
1163  for (int j=i+1; j<table_size; j++) { // test if we find any bases till end of table !!!
1164 
1165  bases_and_gaps_at(j, &bases, NULp);
1166  if (bases) { // bases found -> empty position was illegal
1167  fprintf(stderr, "Zero in BaseFrequencies at column %i\n", i);
1168  return false;
1169  }
1170  }
1171  break;
1172  }
1173  else {
1174  if ((bases+gaps)!=added_sequences()) {
1175  fprintf(stderr, "bases+gaps should be equal to # of sequences at column %i (bases=%i, gaps=%i, added_sequences=%i)\n",
1176  i, bases, gaps, added_sequences());
1177  return false;
1178  }
1179  }
1180  }
1181 
1182  return true;
1183 }
1184 
1185 #endif // TEST_CHAR_TABLE_INTEGRITY || ASSERTION_USED
1186 
1187 #if defined(TEST_CHAR_TABLE_INTEGRITY)
1188 
1189 void BaseFrequencies::test() const {
1190 
1191  if (!ok()) {
1192  GBK_terminate("BaseFrequencies::test() failed");
1193  }
1194 }
1195 
1196 #endif // TEST_CHAR_TABLE_INTEGRITY
1197 
1198 
1199 // --------------------------------------------------------------------------------
1200 
1201 #ifdef UNIT_TESTS
1202 #ifndef TEST_UNIT_H
1203 #include <test_unit.h>
1204 #endif
1205 #include <arbdb.h>
1206 
1207 #define SETUP(gapChars,alitype) BaseFrequencies::setup(gapChars,alitype)
1208 
1209 void TEST_char_table() {
1210  for (int useAminoAcids = 0; useAminoAcids<=1; ++useAminoAcids) {
1211  const char *alphabeth = useAminoAcids ? "ABCDEFGHIJKLMNPQRSTVWXYZ-" : "ACGTN-";
1212  const int alphabeth_size = strlen(alphabeth);
1213 
1214  TEST_EXPECT_EQUAL(alphabeth_size, (useAminoAcids ? 23 : 4)+2); // "+2" is for gap and N/X
1215 
1216  const int SEQLEN = 30;
1217  char seq[SEQLEN+1];
1218 
1219  const char *gapChars = ".-"; // @@@ doesnt make any difference which gaps are used - why?
1220  // const char *gapChars = "-";
1221  // const char *gapChars = ".";
1222  // const char *gapChars = "A"; // makes me fail
1223  SETUP(gapChars, useAminoAcids ? GB_AT_AA : GB_AT_RNA);
1224 
1225  if (useAminoAcids) {
1226  // document used amino acid group definition:
1227  TEST_EXPECT_EQUAL(getAAgroupDefinition(), "A,B,C,D,E,F,G,H,I,J,K,L,M,N,P,Q,R,S,T,V,W,Y,Z,X");
1228  }
1229 
1231  // BK.lower = 70; BK.upper = 95; BK.gapbound = 60; // defaults from awars
1232  BK.lower = 40; BK.upper = 70; BK.gapbound = 40;
1233 
1234  const int LOOPS = 5;
1235  GB_random_seed(100);
1236 
1237  int seed[LOOPS];
1238  for (int loop = 0; loop<5; ++loop) {
1239  seed[loop] = GB_random(INT_MAX);
1240  }
1241 
1242  for (int loop = 0; loop<5; ++loop) {
1243  GB_random_seed(seed[loop]);
1244 
1245  BaseFrequencies tab(SEQLEN);
1246 
1247  // build some seq
1248  for (int c = 0; c<SEQLEN; ++c) {
1249  seq[c] = alphabeth[GB_random(alphabeth_size)];
1250  }
1251  seq[SEQLEN] = 0;
1252 
1253  TEST_EXPECT_EQUAL(strlen(seq), size_t(SEQLEN));
1254 
1255  const int add_count = 300;
1256  char *added_seqs[add_count];
1257 
1258  const char *s1; // length = SEQLEN
1259  const char *s2;
1260  if (useAminoAcids) {
1261  s1 = "ABCDEFGHIJKLMNPQRSTVWXYZabcdef";
1262  s2 = "ghijklmnpqrstvwxyzABCDEFGHIJKL";
1263  }
1264  else { // rna
1265  s1 = "ACGTACGTAcgtacgtaCGTACGTACGTAC"; // @@@ insert gaps! -> adapt changed results
1266  s2 = "MRWSYKVHDBNmrwsykvhdbnSYKVHDBN"; // use ambiguities
1267  }
1268 
1269  // add seq multiple times
1270  for (int a = 0; a<add_count; ++a) {
1271  tab.add(seq, SEQLEN);
1272  added_seqs[a] = strdup(seq);
1273 
1274  // modify 1 character in seq:
1275  int sidx = GB_random(SEQLEN);
1276  int aidx = GB_random(alphabeth_size);
1277  seq[sidx] = alphabeth[aidx];
1278 
1279  if (a == 16) { // with 15 sequences in tab
1280  // test sub_and_add (short table version)
1281  char *consensus = tab.build_consensus_string(BK);
1282 
1283  tab.add(s1, SEQLEN);
1284  PosRange r(0, SEQLEN-1);
1285  tab.sub_and_add(s1, s2, r);
1286  tab.sub_and_add(s2, consensus, r);
1287  tab.sub(consensus, SEQLEN);
1288 
1289  char *consensus2 = tab.build_consensus_string(BK);
1290  TEST_EXPECT_EQUAL(consensus, consensus2);
1291 
1292  free(consensus2);
1293  free(consensus);
1294  }
1295  }
1296 
1297  // build consensi (just check regression)
1298  // Note: semantic tests for consensus are in ../../NTREE/AP_consensus.cxx@TEST_nucleotide_consensus_and_maxFrequency
1299  {
1300  char *consensus = tab.build_consensus_string(BK);
1301  if (useAminoAcids) {
1302  switch (seed[loop]) {
1303  case 1080710223: TEST_EXPECT_EQUAL(consensus, "a.i.dXi.haai.hifdi.d.adXaa..dX"); break;
1304  case 290469779: TEST_EXPECT_EQUAL(consensus, ".......I.D.A......-f..iXi...dX"); break;
1305  case 346019116: TEST_EXPECT_EQUAL(consensus, "if.a-hd.hd..a.di-ddf.i..aXIaI."); break;
1306  case 1708473987: TEST_EXPECT_EQUAL(consensus, "aia..df...haDaadd.d.fha.ii.X.."); break;
1307  case 2080862653: TEST_EXPECT_EQUAL(consensus, "a.ia.id...a.aah.had......i..d."); break;
1308  default: TEST_EXPECTATION(all().of(that(consensus).is_equal_to("undef"), that(seed[loop]).is_equal_to(-1))); // default case is unwanted (logs data -> want one case above for each pair)
1309  }
1310  }
1311  else {
1312  switch (seed[loop]) {
1313  case 1080710223: TEST_EXPECT_EQUAL(consensus, "W.gKyc.sR.urSkGcy.kuy......RY-"); break;
1314  case 290469779: TEST_EXPECT_EQUAL(consensus, ".u-SW.Mgcu.uw-uu..-mmb.sgcRuY."); break;
1315  case 346019116: TEST_EXPECT_EQUAL(consensus, "y-m.-Suy.c.-u.-g-y.cWgr.cyGrgY"); break;
1316  case 1708473987: TEST_EXPECT_EQUAL(consensus, ".K.Kc--.cakyw.uW..a-.guuggwH-a"); break;
1317  case 2080862653: TEST_EXPECT_EQUAL(consensus, ".sgaR..--.uu.cMuuycsk-u.cu.Wc."); break;
1318  default: TEST_EXPECTATION(all().of(that(consensus).is_equal_to("undef"), that(seed[loop]).is_equal_to(-1))); // default case is unwanted (logs data -> want one case above for each pair)
1319  }
1320  }
1321 
1322  // test sub_and_add()
1323 
1324  tab.add(s1, SEQLEN);
1325  PosRange r(0, SEQLEN-1);
1326  tab.sub_and_add(s1, s2, r);
1327  tab.sub_and_add(s2, consensus, r);
1328  tab.sub(consensus, SEQLEN);
1329 
1330  char *consensus2 = tab.build_consensus_string(BK);
1331  TEST_EXPECT_EQUAL(consensus, consensus2);
1332 
1333  free(consensus2);
1334  free(consensus);
1335  }
1336 
1337  for (int a = 0; a<add_count; a++) {
1338  tab.sub(added_seqs[a], SEQLEN);
1339  freenull(added_seqs[a]);
1340  }
1341  {
1342  char *consensus = tab.build_consensus_string(BK);
1343  TEST_EXPECT_EQUAL(consensus, "??????????????????????????????"); // check tab is empty
1344  free(consensus);
1345  }
1346  }
1347  }
1348 }
1349 
1350 void TEST_SepBaseFreq() {
1351  const int LEN = 20;
1352  const int OFF1 = 6;
1353  const int OFF2 = 7; // adjacent to OFF1
1354 
1355  SepBaseFreq short1(LEN), short2(LEN);
1356  for (int i = 0; i<100; ++i) short1.inc_short(OFF1, 1);
1357  for (int i = 0; i<70; ++i) short1.inc_short(OFF2, 1);
1358  for (int i = 0; i<150; ++i) short2.inc_short(OFF1, 1);
1359  for (int i = 0; i<80; ++i) short2.inc_short(OFF2, 1);
1360 
1361  SepBaseFreq long1(LEN), long2(LEN);
1362  long1.expand_table_entry_size();
1363  long2.expand_table_entry_size();
1364  for (int i = 0; i<2000; ++i) long1.inc_long(OFF1, 1);
1365  for (int i = 0; i<2500; ++i) long2.inc_long(OFF1, 1);
1366 
1367  SepBaseFreq shortsum(LEN);
1368  shortsum.add(short1, 0, LEN-1); TEST_EXPECT_EQUAL(shortsum[OFF1], 100); TEST_EXPECT_EQUAL(shortsum[OFF2], 70);
1369  shortsum.add(short2, 0, LEN-1); TEST_EXPECT_EQUAL(shortsum[OFF1], 250); TEST_EXPECT_EQUAL(shortsum[OFF2], 150);
1370  shortsum.sub(short1, 0, LEN-1); TEST_EXPECT_EQUAL(shortsum[OFF1], 150); TEST_EXPECT_EQUAL(shortsum[OFF2], 80);
1371 
1372  shortsum.sub_and_add(short1, short2, PosRange(0, LEN-1)); TEST_EXPECT_EQUAL(shortsum[OFF1], 200); TEST_EXPECT_EQUAL(shortsum[OFF2], 90);
1373 
1374  // shortsum.add(long1, 0, LEN-1); // invalid operation -> terminate
1375 
1376  SepBaseFreq longsum(LEN);
1377  longsum.expand_table_entry_size();
1378  longsum.add(long1, 0, LEN-1); TEST_EXPECT_EQUAL(longsum[OFF1], 2000);
1379  longsum.add(long2, 0, LEN-1); TEST_EXPECT_EQUAL(longsum[OFF1], 4500);
1380  longsum.sub(long1, 0, LEN-1); TEST_EXPECT_EQUAL(longsum[OFF1], 2500);
1381  longsum.add(short1, 0, LEN-1); TEST_EXPECT_EQUAL(longsum[OFF1], 2600);
1382  longsum.sub(short2, 0, LEN-1); TEST_EXPECT_EQUAL(longsum[OFF1], 2450);
1383 
1384  longsum.sub_and_add(long1, long2, PosRange(0, LEN-1)); TEST_EXPECT_EQUAL(longsum[OFF1], 2950);
1385  longsum.sub_and_add(short1, short2, PosRange(0, LEN-1)); TEST_EXPECT_EQUAL(longsum[OFF1], 3000);
1386  longsum.sub_and_add(long1, short2, PosRange(0, LEN-1)); TEST_EXPECT_EQUAL(longsum[OFF1], 1150);
1387  longsum.sub_and_add(short1, long2, PosRange(0, LEN-1)); TEST_EXPECT_EQUAL(longsum[OFF1], 3550);
1388 
1389  int pos = -1;
1390  TEST_EXPECT_EQUAL(short1.firstDifference(short2, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF1);
1391  TEST_EXPECT_EQUAL(short1.lastDifference (short2, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF2);
1392  TEST_EXPECT_EQUAL(short1.firstDifference(long1, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF1);
1393  TEST_EXPECT_EQUAL(short1.lastDifference (long1, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF2);
1394  TEST_EXPECT_EQUAL(long2.firstDifference (short2, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF1);
1395  TEST_EXPECT_EQUAL(long2.lastDifference (short2, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF2);
1396  TEST_EXPECT_EQUAL(long2.firstDifference (long1, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF1);
1397  TEST_EXPECT_EQUAL(long2.lastDifference (long1, 0, LEN-1, &pos), 1); TEST_EXPECT_EQUAL(pos, OFF1);
1398 }
1399 
1400 #endif // UNIT_TESTS
1401 
1402 // --------------------------------------------------------------------------------
1403 
1404 
1405 
string result
#define INVALID_TABLE_OPERATION()
Definition: chartable.cxx:78
#define MAX_INDEX_TABLES
Definition: chartable.h:42
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
Definition: adlang1.cxx:914
group_matcher all()
Definition: test_unit.h:1011
void add(const SepBaseFreq &other, int start, int end)
Definition: chartable.cxx:80
BaseFrequencies(int maxseqlength=0)
Definition: chartable.cxx:715
CONSTEXPR_INLINE unsigned char safeCharIndex(char c)
Definition: dupstr.h:73
bool empty() const
Definition: chartable.cxx:1138
bool isGap(char c)
int start() const
Definition: pos_range.h:60
#define MAX_BASES_TABLES
void change_table_length(int new_length, int default_entry)
Definition: chartable.cxx:304
Definition: iupac.cxx:21
char * build_consensus_string(PosRange r, const ConsensusBuildParams &cbp) const
Definition: chartable.cxx:370
#define INC_DEC(incdec)
Definition: chartable.cxx:989
void init(int length)
Definition: chartable.cxx:37
void change_table_length(int new_length)
Definition: chartable.cxx:1126
bool is_part() const
Definition: pos_range.h:73
int firstDifference(const SepBaseFreq &other, int start, int end, int *firstDifferentPos) const
Definition: chartable.cxx:192
double max_frequency_at(int column, bool ignore_gaps) const
Definition: chartable.cxx:763
void dec_short(int offset, int decr)
Definition: chartable.h:128
#define MAX_TARGET_INDICES
Definition: chartable.h:43
char get_amino_consensus_char(Amino_Group ag)
Definition: iupac.cxx:95
int size() const
Definition: pos_range.h:69
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
FILE * seq
Definition: rns.c:46
uint8_t index[MAX_TARGET_INDICES]
Definition: chartable.h:60
static const char * getAAgroupDefinition()
Definition: chartable.cxx:578
static HelixNrInfo * start
static bool initialized
Definition: AW_advice.cxx:36
#define PERCENT(part, all)
void inc_short(int offset, int incr)
Definition: chartable.h:123
static void setup(const char *gap_chars, GB_alignment_type ali_type_)
Definition: chartable.cxx:604
const char * decode(char iupac, GB_alignment_type aliType, bool decode_amino_iupac_groups)
Definition: iupac.cxx:239
void init(int maxseqlength)
Definition: chartable.cxx:729
void GB_random_seed(unsigned seed)
Definition: admath.cxx:80
void sub_and_add(const BaseFrequencies &Sub, const BaseFrequencies &Add, PosRange range)
Definition: chartable.cxx:899
void inc_long(int offset, int incr)
Definition: chartable.h:133
static bool isGap(char c)
Definition: chartable.h:248
const PosRange * changed_range(const BaseFrequencies &other) const
Definition: chartable.cxx:821
void GBK_terminate(const char *error) __ATTR__NORETURN
Definition: arb_msg.cxx:509
SepBaseFreq(int maxseqlength)
Definition: chartable.cxx:288
#define MAXCHARTABLE
Definition: chartable.h:37
Amino_Group
Definition: iupac.h:26
void bases_and_gaps_at(int column, int *bases, int *gaps) const
Definition: chartable.cxx:738
void scan_string(ED4_multi_species_manager *parent, ED4_reference_terminals &refterms, const char *str, int *index, arb_progress &progress)
#define ct_assert(bed)
Definition: chartable.h:27
#define that(thing)
Definition: test_unit.h:1043
void dec_long(int offset, int decr)
Definition: chartable.h:137
#define LONG_TABLE_ELEM_SIZE
Definition: chartable.h:40
int GB_random(int range)
Definition: admath.cxx:88
bool is_gap(char c)
Definition: seq_search.hxx:48
#define is_equal_to(val)
Definition: test_unit.h:1025
GB_alignment_type
Definition: arbdb_base.h:61
int size() const
Definition: chartable.h:242
void expand_table_entry_size()
Definition: chartable.cxx:58
#define SHORT_TABLE_ELEM_SIZE
Definition: chartable.h:38
#define TEST_EXPECTATION(EXPCTN)
Definition: test_unit.h:1048
int lastDifference(const SepBaseFreq &other, int start, int end, int *lastDifferentPos) const
Definition: chartable.cxx:239
void sub(const SepBaseFreq &other, int start, int end)
Definition: chartable.cxx:110
Amino_Group get_amino_group_for(char aa)
Definition: iupac.cxx:89
#define MAX_USED_BASES_TABLES
Definition: chartable.h:46
#define LOOPS
Definition: db_server.cxx:20
bool ok() const
Definition: chartable.cxx:1148
char encode(const char bases[], GB_alignment_type aliType)
Definition: iupac.cxx:192
void sub_and_add(const SepBaseFreq &Sub, const SepBaseFreq &Add, PosRange range)
Definition: chartable.cxx:140
#define NULp
Definition: cxxforward.h:116
int end() const
Definition: pos_range.h:64
#define offset(field)
Definition: GLwDrawA.c:73
void build_consensus_string_to(char *consensus_string, ExplicitRange range, const ConsensusBuildParams &BK) const
Definition: chartable.cxx:386
int is_ignored() const
Definition: chartable.h:239
#define MAX_AMBIGUITY_CODES
Definition: chartable.h:44
size_t length
#define DUMPINT(var)
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
static int column
Definition: arb_a2ps.c:295
int added_sequences() const
Definition: chartable.h:243
#define max(a, b)
Definition: f2c.h:154