ARB
AP_codon_table.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : AP_codon_table.cxx //
4 // Purpose : codon definitions for DNA -> AA translation //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in January 2010 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // =============================================================== //
11 
12 #include "AP_codon_table.hxx"
13 #include "AP_pro_a_nucs.hxx"
14 #include "iupac.h"
15 
16 #include <arb_global_defs.h>
17 #include <arb_str.h>
18 
19 #include <cctype>
20 
21 #define pn_assert(cond) arb_assert(cond)
22 
23 #define EMBL_BACTERIAL_TABLE_INDEX 11
24 #define AWT_CODON_TABLE_MAX_NAME_LENGTH 57 // increasing this limit forces GUI re-layout (look4: AWT_get_codon_code_name)
25 
26 #define VALID_PROTEIN "ABCDEFGHIJKLMNPQRSTVWXYZ*" // all possible translations
27 #define VALID_PROTEIN_NO_X "ABCDEFGHIJKLMNPQRSTVWYZ*" // same as VALID_PROTEIN w/o 'X'
28 
29 // ----------------------------------------------------------------------------------------------------
30 //
31 // Info about translation codes was taken from
32 // http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
33 // and
34 // https://en.wikipedia.org/wiki/List_of_genetic_codes
35 //
36 // Whenever adding new or correcting existing code tables, please
37 // - check data on NCBI webpage mentioned above
38 // - document last update in ../../HELP_SOURCE/oldhelp/transl_table.hlp@LAST_UPDATE_FROM_WEBPAGE
39 //
40 // ----------------------------------------------------------------------------------------------------
41 
43  {
44  // 0000000000111111111122222222223333333333444444444455555555556666 codon number (0-63)
45  // 0123456789012345678901234567890123456789012345678901234567890123
46  //
47  // "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", base1
48  // "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", base2
49  // "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" base3
50  {
51  " (1) Standard code",
52  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", // The first code in this table has to be 'Standard code'!
53  "---M------**--*----M---------------M----------------------------",
54  1 // arb:0
55  },
56  {
57  " (2) Vertebrate mitochondrial code",
58  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
59  "----------**--------------------MMMM----------**---M------------",
60  2 // arb:1
61  },
62  {
63  " (3) Yeast mitochondrial code",
64  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
65  "----------**----------------------MM----------------------------",
66  3 // arb:2
67  },
68  // " (X) 6789012345678901234567890123456789012345678901234567", // max.name length (57)
69  {
70  " (4) Coelenterate Mitochondrial + Mycoplasma/Spiroplasma",
71  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
72  "--MM------**-------M------------MMMM---------------M------------",
73  4 // arb:3
74  },
75  {
76  " (5) Invertebrate mitochondrial code",
77  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
78  "---M------**--------------------MMMM---------------M------------",
79  5 // arb:4
80  },
81  {
82  " (6) Ciliate, Dasycladacean and Hexamita nuclear code",
83  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
84  "--------------*--------------------M----------------------------",
85  6 // arb:5
86  },
87  {
88  " (9) Echinoderm and Flatworm mitochondrial code",
89  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
90  "----------**-----------------------M---------------M------------",
91  9 // arb:6
92  },
93  {
94  "(10) Euplotid nuclear code",
95  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
96  "----------**-----------------------M----------------------------",
97  10 // arb:7
98  },
99  // 0000000001111111111222222222233333333334444444444555555555566666
100  // 1234567890123456789012345678901234567890123456789012345678901234
101 
102  // "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", base1
103  // "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", base2
104  // "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" base3
105  {
106  "(11) Bacterial and Plant Plastid code",
107  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
108  "---M------**--*----M------------MMMM---------------M------------",
109  11 // arb:8
110  },
111  {
112  "(12) Alternative Yeast nuclear code",
113  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
114  "----------**--*----M---------------M----------------------------",
115  12 // arb:9
116  },
117  {
118  "(13) Ascidian mitochondrial code",
119  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
120  "---M------**----------------------MM---------------M------------",
121  13 // arb:10
122  },
123  {
124  "(14) Alternative Flatworm mitochondrial code",
125  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
126  "-----------*-----------------------M----------------------------",
127  14 // arb:11
128  },
129  {
130  "(15) Blepharisma nuclear code (deleted?)", // why is it no longer listed at NCBI?
131  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
132  "----------*---*--------------------M----------------------------", // converted to new format manually (no source)
133  15 // arb:12
134  },
135  {
136  "(16) Chlorophycean mitochondrial code",
137  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
138  "----------*---*--------------------M----------------------------",
139  16 // arb:13
140  },
141  {
142  "(21) Trematode mitochondrial code",
143  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
144  "----------**-----------------------M---------------M------------",
145  21 // arb:14
146  },
147  {
148  "(22) Scenedesmus obliquus mitochondrial code",
149  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
150  "------*---*---*--------------------M----------------------------",
151  22 // arb:15
152  },
153  {
154  "(23) Thraustochytrium mitochondrial code",
155  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
156  "--*-------**--*-----------------M--M---------------M------------",
157  23 // arb:16
158  },
159  {
160  "(24) Pterobranchia Mitochondrial Code",
161  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
162  "---M------**-------M---------------M---------------M------------",
163  24 // arb:17
164  },
165  {
166  "(25) Candidate Division SR1 and Gracilibacteria Code",
167  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
168  "---M------**-----------------------M---------------M------------",
169  25 // arb:18
170  },
171  {
172  "(26) Pachysolen tannophilus Nuclear Code",
173  "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
174  "----------**--*----M---------------M----------------------------",
175  26 // arb:19
176  },
177  {
178  "(27) Karyorelict Nuclear",
179  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
180  "--------------*--------------------M----------------------------",
181  27 // arb:20
182  },
183  {
184  "(28) Condylostoma Nuclear",
185  "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
186  "----------**--*--------------------M----------------------------",
187  28 // arb:21
188  },
189  {
190  "(29) Mesodinium Nuclear",
191  "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
192  "--------------*--------------------M----------------------------",
193  29 // arb:22
194  },
195  {
196  "(30) Peritrich Nuclear",
197  "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
198  "--------------*--------------------M----------------------------",
199  30 // arb:23
200  },
201  {
202  "(31) Blastocrithidia Nuclear",
203  "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
204  "----------**-----------------------M----------------------------",
205  31 // arb:24
206  },
207 
208  { NULp, NULp, NULp, 0 } // end of table-marker
209  };
210 
211 // When adding new genetic code:
212 // - increase AP_codon_table.hxx@AWT_CODON_TABLES
213 // - increase .@MAX_EMBL_TRANSL_TABLE_VALUE
214 // - add arb-codenr to .@ALL_TABLES
215 
216 #define MAX_EMBL_TRANSL_TABLE_VALUE 31 // maximum known EMBL transl_table value
217 
218 // --------------------------------------------------------------------------------
219 
220 int TTIT_embl2arb(int embl_code_nr) {
221  // returns -1 if embl_code_nr is not known by ARB
222 
223  static bool initialized = false;
224  static int arb_code_nr_table[MAX_EMBL_TRANSL_TABLE_VALUE+1]; // key: embl_code_nr, value: arb_code_nr or -1
225 
226  if (!initialized) {
227  for (int embl = 0; embl <= MAX_EMBL_TRANSL_TABLE_VALUE; ++embl) {
228  arb_code_nr_table[embl] = -1; // illegal table
229  }
230  for (int arb_code_nr = 0; arb_code_nr < AWT_CODON_TABLES; ++arb_code_nr) {
231  int embl = AWT_codon_def[arb_code_nr].embl_feature_transl_table;
232 
233  pn_assert(embl<=MAX_EMBL_TRANSL_TABLE_VALUE); // defined embl code is above limit
234  pn_assert(arb_code_nr_table[embl] == -1); // duplicate definition of EMBL table number
235 
236  arb_code_nr_table[embl] = arb_code_nr;
237  }
238  // should be index of 'Bacterial and Plant Plastid code'
239  // (otherwise maybe AWAR_PROTEIN_TYPE_bacterial_code_index is wrong)
241  pn_assert(arb_code_nr_table[1] == 0); // Standard code has to be on index zero!
242  pn_assert(arb_code_nr_table[MAX_EMBL_TRANSL_TABLE_VALUE] != -1); // arb_code_nr_table is defined too big
243 
244  initialized = true;
245  }
246 
247  if (embl_code_nr<0 || embl_code_nr>MAX_EMBL_TRANSL_TABLE_VALUE) return -1;
248 
249  int arb_code_nr = arb_code_nr_table[embl_code_nr];
250 #ifdef DEBUG
251  if (arb_code_nr != -1) {
252  pn_assert(arb_code_nr >= 0 && arb_code_nr < AWT_CODON_TABLES);
253  pn_assert(TTIT_arb2embl(arb_code_nr) == embl_code_nr);
254  }
255 #endif
256  return arb_code_nr;
257 }
258 
259 int TTIT_arb2embl(int arb_code_nr) {
260  pn_assert(arb_code_nr >= 0 && arb_code_nr<AWT_CODON_TABLES);
261  return AWT_codon_def[arb_code_nr].embl_feature_transl_table;
262 }
263 
264 
265 static bool codon_tables_initialized = false;
266 static char definite_translation[AWT_MAX_CODONS]; // contains 0 if ambiguous, otherwise it contains the definite translation
267 static char *ambiguous_codons[AWT_MAX_CODONS]; // for each ambiguous codon: contains all translations (each only once)
268 
269 static void addToAmbiguous(int codon_nr, char possible_translation) {
270  static uint8_t length[AWT_MAX_CODONS];
271 
272  char*& ambEntry = ambiguous_codons[codon_nr];
273  uint8_t& ambLen = length[codon_nr];
274 
275  if (!ambEntry) { // first insert
276  ambEntry = ARB_calloc<char>(AWT_MAX_CODONS+1);
277  ambEntry[0] = possible_translation;
278  ambLen = 1;
279  }
280  else if (!strchr(ambEntry, possible_translation)) {
281  ambEntry[ambLen++] = possible_translation;
282  }
283 }
284 
286  if (codon_tables_initialized) return;
287 
288  int codon_nr;
289  int code_nr;
290 
291  for (codon_nr=0; codon_nr<AWT_MAX_CODONS; codon_nr++) {
292  ambiguous_codons[codon_nr] = NULp;
293  }
294 
296  pn_assert(!AWT_codon_def[AWT_CODON_TABLES].aa); // Error in AWT_codon_def or AWT_CODON_CODES
297 
298  for (code_nr=0; code_nr<AWT_CODON_TABLES; code_nr++) {
299  const char *translation = AWT_codon_def[code_nr].aa;
300  const char *startStop = AWT_codon_def[code_nr].startStop;
301 
302  pn_assert(strlen(AWT_codon_def[code_nr].name) <= AWT_CODON_TABLE_MAX_NAME_LENGTH); // GUI layout depends on max. name length
303 
304  for (codon_nr=0; codon_nr<AWT_MAX_CODONS; codon_nr++) {
305  bool isOptionalStartStop = false;
306 
307  // check definition of 'translation' and 'startStop' is consistent:
308  switch (startStop[codon_nr]) {
309  case 'M': // defined as start-codon
310  pn_assert(translation[codon_nr] != '*'); // invalid def: stop AND start
311  isOptionalStartStop = translation[codon_nr] != 'M';
312  break;
313 
314  case '*': // defined as stop-codon (new def style)
315  pn_assert(translation[codon_nr] != 'M'); // invalid def: start AND stop
316  isOptionalStartStop = translation[codon_nr] != '*';
317  break;
318 
319  case '-': // neither start nor stop (new def style) not start (old def style)
320  pn_assert(translation[codon_nr] != '*'); // invalid def: stop codons have to be marked in 'Starts' definition
321  break;
322 
323  default:
324  pn_assert(0); // invalid character in startStop
325  break;
326  }
327 
328  // detect definite/ambiguous translations:
329  if (code_nr == 0) { // first table (no ambiguity possible yet)
330  if (isOptionalStartStop) {
331  addToAmbiguous(codon_nr, translation[codon_nr]);
332  addToAmbiguous(codon_nr, startStop[codon_nr]);
333  definite_translation[codon_nr] = 0;
334  }
335  else {
336  definite_translation[codon_nr] = translation[codon_nr];
337  }
338  }
339  else if (definite_translation[codon_nr]) { // is definite till now
340  if (definite_translation[codon_nr] != translation[codon_nr] || isOptionalStartStop) { // we found a different translation
341  addToAmbiguous(codon_nr, definite_translation[codon_nr]);
342  addToAmbiguous(codon_nr, translation[codon_nr]);
343  if (isOptionalStartStop) addToAmbiguous(codon_nr, startStop[codon_nr]);
344  definite_translation[codon_nr] = 0;
345  }
346  }
347  else { // is ambiguous
348  addToAmbiguous(codon_nr, translation[codon_nr]);
349  if (isOptionalStartStop) addToAmbiguous(codon_nr, startStop[codon_nr]);
350  }
351  }
352  }
353 
355 }
356 
357 // return 0..3 (ok) or 4 (failure)
358 inline int dna2idx(char c) {
359  switch (c) {
360  case 'T': case 't':
361  case 'U': case 'u': return 0;
362  case 'C': case 'c': return 1;
363  case 'A': case 'a': return 2;
364  case 'G': case 'g': return 3;
365  }
366  return 4;
367 }
368 
369 inline char idx2dna(int idx) {
370  pn_assert(idx>=0 && idx<4);
371  return "TCAG"[idx];
372 }
373 
374 inline int calc_codon_nr(const char *dna) {
375  int i1 = dna2idx(dna[0]); if (i1 == 4) return AWT_MAX_CODONS; // is not a codon
376  int i2 = dna2idx(dna[1]); if (i2 == 4) return AWT_MAX_CODONS;
377  int i3 = dna2idx(dna[2]); if (i3 == 4) return AWT_MAX_CODONS;
378 
379  int codon_nr = i1*16 + i2*4 + i3;
380  pn_assert(codon_nr>=0 && codon_nr<=AWT_MAX_CODONS);
381  return codon_nr;
382 }
383 
384 inline void build_codon(int codon_nr, char *to_buffer) {
385  pn_assert(codon_nr>=0 && codon_nr<AWT_MAX_CODONS);
386 
387  to_buffer[0] = idx2dna((codon_nr>>4)&3);
388  to_buffer[1] = idx2dna((codon_nr>>2)&3);
389  to_buffer[2] = idx2dna(codon_nr&3);
390 }
391 
392 const char* AWT_get_codon_code_name(int code) {
393  pn_assert(code>=0 && code<AWT_CODON_TABLES);
394  return AWT_codon_def[code].name;
395 }
396 
397 static const char *aa_3letter_name[26+1] = {
398  "Ala", // A
399  "Asx", // B (= D or N)
400  "Cys", // C
401  "Asp", // D
402  "Glu", // E
403  "Phe", // F
404  "Gly", // G
405  "His", // H
406  "Ile", // I
407  "Xle", // J (= I or L)
408  "Lys", // K
409  "Leu", // L
410  "Met", // M
411  "Asn", // N
412  NULp, // O
413  "Pro", // P
414  "Gln", // Q
415  "Arg", // R
416  "Ser", // S
417  "Thr", // T
418  NULp, // U
419  "Val", // V
420  "Trp", // W
421  "Xaa", // X
422  "Tyr", // Y
423  "Glx", // Z (= E or Q)
424  NULp
425 };
426 
427 const char *getAminoAcidAbbr(char aa) {
428  if (aa=='*') return "End";
429  aa = toupper(aa);
430  if (aa>='A' && aa<='Z') return aa_3letter_name[aa-'A'];
431  return NULp;
432 }
433 
434 #ifdef DEBUG
435 
436 inline char nextBase(char c) {
437  switch (c) {
438  case 'T': return 'C';
439  case 'C': return 'A';
440  case 'A': return 'G';
441 #if 0
442  case 'G': return 0;
443 #else
444  case 'G': return 'M';
445  case 'M': return 'R';
446  case 'R': return 'W';
447  case 'W': return 'S';
448  case 'S': return 'Y';
449  case 'Y': return 'K';
450  case 'K': return 'V';
451  case 'V': return 'H';
452  case 'H': return 'D';
453  case 'D': return 'B';
454  case 'B': return 'N';
455  case 'N': return 0;
456 #endif
457  default: pn_assert(0);
458  }
459  return 0;
460 }
461 
462 void AWT_dump_codons(TranslationTableIndexType type, bool skipX) {
463  // use for debugging
464 
465  const TransTables all_allowed;
466 
467  for (char c='*'; c<='Z'; c++) {
468  printf("Codons for '%c': ", c);
469 
470  if (skipX && c == 'X') {
471  fputs("skipped", stdout);
472  }
473  else {
474  bool first_line = true;
475  bool found = false;
476  for (char b1='T'; b1; b1=nextBase(b1)) {
477  for (char b2='T'; b2; b2=nextBase(b2)) {
478  for (char b3='T'; b3; b3=nextBase(b3)) {
479  char dna[4];
480  dna[0]=b1;
481  dna[1]=b2;
482  dna[2]=b3;
483  dna[3]=0;
484 
485  TransTables remaining;
486  if (AWT_is_codon(c, dna, all_allowed, remaining)) {
487  if (!first_line) fputs("\n ", stdout);
488  first_line = false;
489  printf("%s (%s)", dna, remaining.to_string(type));
490  found = true;
491  }
492  }
493  }
494  }
495  if (!found) fputs("none", stdout);
496  }
497  fputs("\n", stdout);
498  if (c=='*') c='A'-1;
499  }
500 }
501 #endif
502 
503 inline char isStartOrStopCodonNr(int codon_nr, int code_nr) {
504  char isStartStop = 0;
505  pn_assert(code_nr >= 0 && code_nr<AWT_CODON_TABLES);
506 
507  pn_assert(codon_nr != AWT_MAX_CODONS); // should not be called with IUPAC codons
508  pn_assert(codon_nr >= 0 && codon_nr<AWT_MAX_CODONS); // (use isStartOrStopCodon, isStartCodon or isStopCodon)
509 
510  if (codon_nr != AWT_MAX_CODONS) { // 'codon' is a clean codon (it contains no iupac-codes)
511  isStartStop = AWT_codon_def[code_nr].startStop[codon_nr];
512  if (isStartStop == '-') {
513  isStartStop = 0;
514  }
515  }
516 
517  arb_assert(implicated(isStartStop, isStartStop == '*' || isStartStop == 'M'));
518  return isStartStop;
519 }
520 
521 char AWT_translator::isStartOrStopCodon(const char *codon) const {
527  char result = 0;
528  int codon_nr = calc_codon_nr(codon);
529  if (codon_nr == AWT_MAX_CODONS) { // codon contains iupac codes (rare case -> brute force implementation ok)
530  TransTables allowed;
531  allowed.forbidAllBut(CodeNr());
532  TransTables remaining = allowed;
533 
534  bool is_start = AWT_is_codon('M', codon, allowed, remaining, NULp);
535  bool is_stop = is_start ? false : AWT_is_codon('*', codon, allowed, remaining, NULp);
536 
537  pn_assert(!(is_start && is_stop));
538  result = is_start ? 'M' : (is_stop ? '*' : 0);
539  }
540  else { // codon is a clean codon
541  result = isStartOrStopCodonNr(calc_codon_nr(codon), code_nr);
542  }
543  return result;
544 }
545 
546 inline bool protMatches(char p1, char p2) {
554  pn_assert(p1 != 'B' && p1 != 'Z' && p1 != 'J');
555  pn_assert(p1 == toupper(p1));
556  pn_assert(p2 == toupper(p2));
557 
558  if (p1 == p2) return true;
559  if (p2 == 'B') return p1 == 'D' || p1 == 'N';
560  if (p2 == 'J') return p1 == 'I' || p1 == 'L';
561  if (p2 == 'Z') return p1 == 'E' || p1 == 'Q';
562  return false;
563 }
564 inline bool containsProtMatching(const char *pstr, char p) {
568  pn_assert(p == toupper(p));
569  if (p == 'B') return strchr(pstr, 'D') || strchr(pstr, 'N');
570  if (p == 'J') return strchr(pstr, 'I') || strchr(pstr, 'L');
571  if (p == 'Z') return strchr(pstr, 'E') || strchr(pstr, 'Q');
572  return strchr(pstr, p);
573 }
574 inline bool isGap(char c) { return GAP::is_std_gap(c); }
575 
576 inline GB_ERROR neverTranslatesError(const char *dna, char protein) {
577  if (!strchr(VALID_PROTEIN, protein)) {
578  return GBS_global_string("'%c' is no valid amino acid", protein);
579  }
580  return GBS_global_string("'%c%c%c' never translates to '%c'", dna[0], dna[1], dna[2], protein);
581 }
582 
583 bool AWT_is_codon(char protein, const char *const dna, const TransTables& allowed, TransTables& remaining, const char **fail_reason_ptr) {
593  pn_assert(allowed.any());
595 
596  const char *fail_reason = NULp;
597  if (fail_reason_ptr) *fail_reason_ptr = NULp;
598 
599  bool is_codon = false;
600  int codon_nr = calc_codon_nr(dna);
601  int first_iupac_pos = -1;
602  int iupac_positions = 0;
603  bool decided = false;
604  bool general_failure = false;
605 
606  protein = toupper(protein);
607 
608  if (codon_nr==AWT_MAX_CODONS) { // dna is not a clean codon (i.e. it contains iupac-codes or gaps)
609  bool too_short = false;
610  int nucs_seen = 0;
611  for (int iupac_pos=0; iupac_pos<3 && !too_short && !fail_reason; iupac_pos++) {
612  char N = dna[iupac_pos];
613 
614  if (!N) too_short = true;
615  else if (!isGap(N)) {
616  nucs_seen++;
617  if (!strchr("ACGTU", N)) {
618  if (first_iupac_pos==-1) first_iupac_pos = iupac_pos;
619  iupac_positions++;
620  const char *decoded_iupac = iupac::decode(N, GB_AT_DNA, 0);
621  if (!decoded_iupac[0]) { // no valid IUPAC
622  fail_reason = GBS_global_string("Invalid character '%c' in DNA", N);
623  }
624  }
625  }
626  }
627 
628  if (!fail_reason && !nucs_seen) { // got no dna
629  fail_reason = "No nucleotides left";
630  }
631  else if (nucs_seen<3) {
632  too_short = true;
633  }
634 
635  if (fail_reason) {
636  decided = true; // fails for all proteins
637  }
638  else if (too_short) {
639  decided = true;
640  if (protein == 'X') {
641  is_codon = true;
642  }
643  else {
644  char dna_copy[4];
645  strncpy(dna_copy, dna, 3);
646  dna_copy[3] = 0;
647 
648  fail_reason = GBS_global_string("Not enough nucleotides (got '%s')", dna_copy);
649  }
650  }
651  }
652 
653  if (!decided) {
654  if (protein == 'X') {
655  TransTables allowed_copy = allowed;
656  const char *valid_prot = VALID_PROTEIN_NO_X;
657 
658  for (int i = 0; valid_prot[i]; ++i) {
659  if (AWT_is_codon(valid_prot[i], dna, allowed_copy, remaining)) {
660  allowed_copy.forbid(remaining);
661  if (allowed_copy.none()) break;
662  }
663  }
664 
665  if (allowed_copy.any()) {
666  is_codon = true;
667  remaining = allowed_copy;
668  }
669  else {
670  fail_reason = neverTranslatesError(dna, protein);
671  }
672  }
673  else if (codon_nr==AWT_MAX_CODONS) { // dna is a codon with one or more IUPAC codes
674  pn_assert(iupac_positions);
675  const char *decoded_iupac = iupac::decode(dna[first_iupac_pos], GB_AT_DNA, 0);
676  pn_assert(decoded_iupac[0]); // already should have been catched above
677 
678  char dna_copy[4];
679  memcpy(dna_copy, dna, 3);
680  dna_copy[3] = 0;
681 
682  bool all_are_codons = true;
683  bool one_is_codon = false;
684 
685  TransTables allowed_copy = allowed;
686 
687  for (int i=0; decoded_iupac[i]; i++) {
688  dna_copy[first_iupac_pos] = decoded_iupac[i];
689  const char *subfail;
690  if (!AWT_is_codon(protein, dna_copy, allowed_copy, remaining, &subfail)) {
691  all_are_codons = false;
692  if (!one_is_codon && ARB_strBeginsWith(subfail, "Not all ")) one_is_codon = true;
693  if (one_is_codon) break;
694  }
695  else {
696  one_is_codon = true;
697  allowed_copy = remaining;
698  }
699  }
700 
701  if (all_are_codons) {
702  pn_assert(allowed_copy.any());
703  remaining = allowed_copy;
704  is_codon = true;
705  }
706  else {
707  remaining.forbidAll();
708  dna_copy[first_iupac_pos] = dna[first_iupac_pos];
709  if (one_is_codon) {
710  fail_reason = GBS_global_string("Not all IUPAC-combinations of '%s' translate to '%c'", dna_copy, protein); // careful when changing this message (see above)
711  }
712  else {
713  fail_reason = neverTranslatesError(dna_copy, protein);
714  }
715  }
716  }
717  else if (definite_translation[codon_nr]) { // codon has a definite translation (i.e. translates equal for all code-tables)
718  char defTransl = definite_translation[codon_nr];
719 
720 #if defined(ASSERTION_USED)
721  bool optionalCodonExists = false;
722  for (int code_nr=0; code_nr<AWT_CODON_TABLES && !optionalCodonExists; code_nr++) {
723  char startStop = isStartOrStopCodonNr(codon_nr, code_nr);
724  if (startStop && startStop != defTransl) { // got optional start/stop codon
725  if (allowed.is_allowed(code_nr)) {
726  pn_assert(startStop == '*' || startStop == 'M');
727  optionalCodonExists = true;
728  }
729  }
730  }
731  pn_assert(!optionalCodonExists); // when this fails -> definite_translation[] is wrong
732 #endif
733 
734  int ok = protMatches(defTransl, protein);
735  if (ok) {
736  remaining = allowed;
737  is_codon = true;
738  }
739  else {
740  remaining.forbidAll();
741  fail_reason = GBS_global_string("'%c%c%c' translates to '%c', not to '%c'", dna[0], dna[1], dna[2], defTransl, protein);
742  general_failure = true;
743  }
744  }
745  else if (!containsProtMatching(ambiguous_codons[codon_nr], protein)) { // codon does not translate to protein in any code-table
746  remaining.forbidAll();
747  fail_reason = neverTranslatesError(dna, protein);
748  general_failure = true;
749  }
750  else {
751 #if defined(ASSERTION_USED)
752  bool correct_disallowed_translation = false;
753 #endif
754 
755  // Now codon translates to protein in at least 1 code-table!
756  // Check whether protein translates in any of the allowed code-tables and forbid rest
757  for (int code_nr=0; code_nr<AWT_CODON_TABLES; code_nr++) {
758  bool mayTranslate = protMatches(AWT_codon_def[code_nr].aa[codon_nr], protein);
759  if (!mayTranslate && (protein == '*' || protein == 'M')) {
760  char startOrStop = isStartOrStopCodonNr(codon_nr, code_nr);
761  mayTranslate = startOrStop && protMatches(startOrStop, protein);
762  }
763 
764  if (mayTranslate) { // may codon_nr translate to protein for code_nr
765  if (allowed.is_allowed(code_nr)) { // is this code allowed?
766  remaining.allow(code_nr);
767  is_codon = true;
768  }
769  else {
770  remaining.forbid(code_nr); // otherwise forbid code in future
771 #if defined(ASSERTION_USED)
772  correct_disallowed_translation = true;
773 #endif
774  }
775  }
776  else {
777  remaining.forbid(code_nr); // otherwise forbid code in future
778  }
779  }
780 
781  if (!is_codon) {
782  pn_assert(correct_disallowed_translation); // should be true because otherwise we shouldn't run into this else-branch
783  fail_reason = GBS_global_string("'%c%c%c' does not translate to '%c'", dna[0], dna[1], dna[2], protein);
784  }
785  }
786  }
787 
788  if (!is_codon) {
789  pn_assert(fail_reason);
790  if (fail_reason_ptr) {
791  if (!allowed.all() && !general_failure) {
792  int one = allowed.explicit_table();
793  if (one == -1) {
794  const char *left_tables = allowed.to_string(TTIT_EMBL);
795  pn_assert(left_tables[0]); // allowed should never be empty!
796 
797  fail_reason = GBS_global_string("%s (for any of the leftover trans-tables: %s)", fail_reason, left_tables);
798  }
799  else {
800  int one_embl = TTIT_arb2embl(one);
801  fail_reason = GBS_global_string("%s (for trans-table %i)", fail_reason, one_embl);
802  }
803  }
804 
805  *fail_reason_ptr = fail_reason; // set failure-reason if requested
806  }
807  }
808 #if defined(ASSERTION_USED)
809  else {
810  pn_assert(remaining.is_subset_of(allowed));
811  }
812 #endif
813  return is_codon;
814 }
815 
816 // -------------------------------------------------------------------------------- Codon_Group
817 
818 #if defined(DEBUG)
819 // #define DUMP_CODON_GROUP_EXPANSION
820 #endif
821 
822 class Codon_Group {
823  char codon[64]; // index is calculated with calc_codon_nr
824 
825 public:
826  Codon_Group(char protein, int code_nr);
828 
829  Codon_Group& operator += (const Codon_Group& other);
830  int expand(char *to_buffer) const;
831 };
832 
833 Codon_Group::Codon_Group(char protein, int code_nr) {
834  protein = toupper(protein);
835  pn_assert(protein=='*' || isalpha(protein));
836  pn_assert(code_nr>=0 && code_nr<AWT_CODON_TABLES);
837 
838  const char *amino_table = AWT_codon_def[code_nr].aa;
839  for (int i=0; i<AWT_MAX_CODONS; i++) {
840  codon[i] = amino_table[i]==protein;
841  }
842 }
843 
845  for (int i=0; i<AWT_MAX_CODONS; i++) {
846  codon[i] = codon[i] || other.codon[i];
847  }
848  return *this;
849 }
850 
851 inline int legal_dna_no(int i) { return i>=0 && i<4; }
852 
853 inline const char *buildMixedCodon(const char *const con1, const char *const con2) {
854  int mismatches = 0;
855  int mismatch_index = -1;
856  static char buf[4];
857 
858  for (int i=0; i<3; i++) {
859  if (con1[i]!=con2[i]) {
860  mismatches++;
861  mismatch_index = i;
862  }
863  else {
864  buf[i] = con1[i];
865  }
866  }
867 
868  if (mismatches==1) { // exactly one position differs between codons
869  pn_assert(mismatch_index!=-1);
870  buf[mismatch_index] = iupac::combine(con1[mismatch_index], con2[mismatch_index], GB_AT_DNA);
871  buf[3] = 0;
872 
873  if (memcmp(con1, buf, 3) == 0 ||
874  memcmp(con2, buf, 3) == 0)
875  {
876  return NULp;
877  }
878 
879 #if defined(DUMP_CODON_GROUP_EXPANSION)
880  printf(" buildMixedCodon('%c%c%c','%c%c%c') == '%s'\n",
881  con1[0], con1[1], con1[2],
882  con2[0], con2[1], con2[2],
883  buf);
884 #endif
885 
886  return buf;
887  }
888  return NULp;
889 }
890 
891 static int expandMore(const char *bufferStart, int no_of_condons, char*&to_buffer) {
892  int i, j;
893  const char *con1, *con2;
894  int added = 0;
895 
896  for (i=0; i<no_of_condons; i++) {
897  con1 = bufferStart+3*i;
898 
899  for (j=i+1; j<no_of_condons; j++) {
900  con2 = bufferStart+3*j;
901  const char *result = buildMixedCodon(con1, con2);
902  if (result) {
903  to_buffer[0] = 0;
904  // do we already have this codon?
905  const char *found;
906  const char *startSearch = bufferStart;
907  for (;;) {
908  found = strstr(startSearch, result);
909  if (!found) break;
910  int pos = (found-bufferStart);
911  if ((pos%3)==0) break; // yes already here!
912  startSearch = found+1; // was misaligned -> try behind
913  }
914 
915  if (!found) {
916  memmove(to_buffer, result, 3); to_buffer+=3;
917  added++;
918  }
919  }
920  }
921  }
922  return no_of_condons+added;
923 }
924 
925 int Codon_Group::expand(char *to_buffer) const {
926  int count = 0;
927  int i;
928  char *org_to_buffer = to_buffer;
929 
930  for (i=0; i<AWT_MAX_CODONS; i++) {
931  if (codon[i]) {
932  build_codon(i, to_buffer);
933  to_buffer += 3;
934  count++;
935  }
936  }
937 
938 #if defined(DUMP_CODON_GROUP_EXPANSION)
939  to_buffer[0] = 0;
940  printf("codons = '%s'\n", org_to_buffer);
941 #endif
942 
943  for (;;) {
944  int new_count = expandMore(org_to_buffer, count, to_buffer);
945  if (new_count==count) break; // nothing expanded -> done
946  count = new_count;
947 #if defined(DUMP_CODON_GROUP_EXPANSION)
948  to_buffer[0] = 0;
949  printf("codons (expandedMore) = '%s'\n", org_to_buffer);
950 #endif
951  }
952 
953  pn_assert(count==(int(to_buffer-org_to_buffer)/3));
954 
955  return count;
956 }
957 
958 // --------------------------------------------------------------------------------
959 
960 static Codon_Group *get_Codon_Group(char protein, int code_nr) {
961  pn_assert(code_nr>=0 && code_nr<AWT_CODON_TABLES);
962  protein = toupper(protein);
963  pn_assert(isalpha(protein) || protein=='*');
965 
966  Codon_Group *cgroup = NULp;
967 
968  if (protein=='B') {
969  cgroup = new Codon_Group('D', code_nr);
970  Codon_Group N('N', code_nr);
971  *cgroup += N;
972  }
973  else if (protein=='Z') {
974  cgroup = new Codon_Group('E', code_nr);
975  Codon_Group Q('Q', code_nr);
976  *cgroup += Q;
977  }
978  else {
979  cgroup = new Codon_Group(protein, code_nr);
980  }
981 
982  pn_assert(cgroup);
983 
984  return cgroup;
985 }
986 
987 #define MAX_CODON_LIST_LENGTH (70*3)
988 
989 const char *AP_get_codons(char protein, int code_nr) {
990  // get a list of all codons ("xyzxyzxyz...") encoding 'protein' in case we use Codon-Code 'code_nr'
991  // (includes all completely contained IUPAC-encoded codons at the end of list)
992  //
993  // Optional start-/stop-codons are not added
994  // (i.e. a query for 'M' or '*' may report "incomplete" results)
995 
996  Codon_Group *cgroup = get_Codon_Group(protein, code_nr);
997 
998  static char buffer[MAX_CODON_LIST_LENGTH+1];
999  int offset = 3*cgroup->expand(buffer);
1001  buffer[offset] = 0;
1002 
1003  delete cgroup;
1004 
1005  return buffer;
1006 }
1007 
1008 // --------------------------------------------------------------------------------
1009 
1010 #ifdef UNIT_TESTS
1011 #ifndef TEST_UNIT_H
1012 #include <test_unit.h>
1013 #endif
1014 
1015 static const char *startStopSummary() {
1016  // returns string showing summary for start/stop
1017  // position = codon_nr
1018  // content:
1019  // '*' -> translates to stop-codon for at least one code
1020  // 'M' -> translates to start-codon for at least one code
1021  // '2' -> both (not necessarily same code)
1022  // '-' -> does not translate to start or stop for any code
1023 
1024  static char result[AWT_MAX_CODONS+1];
1025 
1026  for (int codon = 0; codon<AWT_MAX_CODONS; ++codon) {
1027  char startStop = '-';
1028  for (int code = 0; code<AWT_CODON_TABLES && (startStop != '2'); ++code) {
1029  switch (isStartOrStopCodonNr(codon, code)) {
1030  case '*':
1031  switch (startStop) {
1032  case '*': break;
1033  case '-': startStop = '*'; break;
1034  case 'M': startStop = '2'; break;
1035  default: pn_assert(0); break;
1036  }
1037  break;
1038  case 'M':
1039  switch (startStop) {
1040  case 'M': break;
1041  case '-': startStop = 'M'; break;
1042  case '*': startStop = '2'; break;
1043  default: pn_assert(0); break;
1044  }
1045  break;
1046 
1047  case 0: break;
1048  default: pn_assert(0); break;
1049  }
1050  }
1051  result[codon] = startStop;
1052  }
1053  result[AWT_MAX_CODONS] = 0;
1054  return result;
1055 }
1056 static const char *optionality() {
1057  // returns string indicating whether start/stop-codon is optional
1058  // position = codon_nr
1059  // content:
1060  // '-' -> only non-optional start/stop
1061  // '!' -> only optional start/stop
1062  // '?' -> both
1063  // ' ' -> never start or stop
1064 
1065  static char result[AWT_MAX_CODONS+1];
1066 
1067  for (int codon = 0; codon<AWT_MAX_CODONS; ++codon) {
1068  char optional = ' ';
1069  for (int code = 0; code<AWT_CODON_TABLES && (optional != '?'); ++code) {
1070  char startStop = isStartOrStopCodonNr(codon, code);
1071  if (startStop) {
1072  bool is_optional = AWT_codon_def[code].aa[codon] != startStop;
1073 
1074  switch (optional) {
1075  case ' ': optional = is_optional ? '!' : '-'; break;
1076  case '-': optional = is_optional ? '?' : '-'; break;
1077  case '!': optional = is_optional ? '!' : '?'; break;
1078  default: pn_assert(0); break;
1079  }
1080  }
1081  }
1082 
1083 #if defined(ASSERTION_USED)
1084  bool sometimes_optional = optional == '!' || optional == '?';
1085  pn_assert(!sometimes_optional || !definite_translation[codon]);
1086 #endif
1087 
1088  result[codon] = optional;
1089  }
1090  result[AWT_MAX_CODONS] = 0;
1091 
1092  return result;
1093 }
1094 static const char *definite() {
1095  static char result[AWT_MAX_CODONS+1];
1096  for (int codon = 0; codon<AWT_MAX_CODONS; ++codon) {
1097  result[codon] = definite_translation[codon] ? definite_translation[codon] : ' ';
1098  }
1099  result[AWT_MAX_CODONS] = 0;
1100  return result;
1101 }
1102 static const char *ambig_count() {
1103  static char result[AWT_MAX_CODONS+1];
1104  for (int codon = 0; codon<AWT_MAX_CODONS; ++codon) {
1105  const char *amb = ambiguous_codons[codon];
1106  result[codon] = amb ? '0'+strlen(amb) : ' ';
1107  }
1108  result[AWT_MAX_CODONS] = 0;
1109  return result;
1110 }
1111 
1112 #define e2a(c) TTIT_embl2arb(c)
1113 
1114 void TEST_codon_check() {
1116 
1117  // 0000000000111111111122222222223333333333444444444455555555556666 codon number (0-63)
1118  // 0123456789012345678901234567890123456789012345678901234567890123
1119  //
1120  // "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" base1
1121  // "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" base2
1122  // "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" base3
1123  TEST_EXPECT_EQUAL(startStopSummary(), "--2M--*---**--*----M------------MMMM----------**---M------------");
1124  TEST_EXPECT_EQUAL(optionality (), " ?! - ?? ? ! !!?- -- ! ");
1125  TEST_EXPECT_EQUAL(definite (), "FF SS SYY CC W PPPPHHQQRRRR MTTTTNN KSS VVV AAAADDEEGGGG"); // optional start/stop codons shall never be definite
1126  TEST_EXPECT_EQUAL(ambig_count (), " 32 2 45 4 2225 222 2 45 2 "); // number of proteins in ambiguous_codons
1127 
1128  TEST_EXPECT_EQUAL(getAminoAcidAbbr('*'), "End");
1129  TEST_EXPECT_EQUAL(getAminoAcidAbbr('C'), "Cys");
1130  TEST_EXPECT_EQUAL(getAminoAcidAbbr('B'), "Asx");
1131  TEST_EXPECT_EQUAL(getAminoAcidAbbr('b'), "Asx");
1132  TEST_EXPECT_EQUAL(getAminoAcidAbbr('J'), "Xle");
1134  TEST_EXPECT_EQUAL(getAminoAcidAbbr('X'), "Xaa");
1135  TEST_EXPECT_EQUAL(getAminoAcidAbbr('x'), "Xaa");
1139 
1140  TEST_EXPECT(protMatches('V', 'V'));
1141  TEST_EXPECT(protMatches('N', 'B'));
1142  TEST_EXPECT(protMatches('E', 'Z'));
1143  TEST_EXPECT(!protMatches('N', 'Z'));
1144  TEST_EXPECT(!protMatches('V', 'Z'));
1145 
1146  TEST_EXPECT_EQUAL(AP_get_codons('D', 0), "GATGACGAY");
1147  TEST_EXPECT_EQUAL(AP_get_codons('N', 0), "AATAACAAY");
1148  TEST_EXPECT_EQUAL(AP_get_codons('B', 0), "AAT" "AAC" "GAT" "GAC" "AAY" "RAT" "RAC" "GAY" "RAY"); // 'B' = 'D' or 'N'
1149 
1150  TEST_EXPECT_EQUAL(AP_get_codons('L', 0), "TTATTGCTTCTCCTACTG" "TTRYTAYTGCTYCTWCTKCTMCTSCTRCTHCTBCTDCTVYTRCTN");
1151  TEST_EXPECT_EQUAL(AP_get_codons('L', 2), "TTATTG" "TTR");
1152  TEST_EXPECT_EQUAL(AP_get_codons('L', 9), "TTATTGCTTCTCCTAT" "TRYTACTYCTWCTMCTH");
1153  TEST_EXPECT_EQUAL(AP_get_codons('L', 13), "TTATTGTAGCTTCTCCTACTG" "TTRYTATWGYTGCTYCTWCTKCTMCTSCTRCTHCTBCTDCTVYTRCTN");
1154  TEST_EXPECT_EQUAL(AP_get_codons('L', 16), "TTGCTTCTCCTAC" "TGYTGCTYCTWCTKCTMCTSCTRCTHCTBCTDCTVCTN");
1155 
1156  TEST_EXPECT_EQUAL(AP_get_codons('S', 0), "TCTTCCTCATCGAGTAGC" "TCYTCWTCKTCMTCSTCRAGYTCHTCBTCDTCVTCN");
1157  TEST_EXPECT_EQUAL(AP_get_codons('S', 4), "TCTTCCTCATCGAGTAGCAGAAGG" "TCYTCWTCKTCMTCSTCRAGYAGWAGKAGMAGSAGRTCHTCBTCDTCVAGHAGBAGDAGVTCNAGN");
1158  TEST_EXPECT_EQUAL(AP_get_codons('S', 9), "TCTTCCTCATCGCTGAGTAGC" "TCYTCWTCKTCMTCSTCRAGYTCHTCBTCDTCVTCN");
1159  TEST_EXPECT_EQUAL(AP_get_codons('S', 15), "TCTTCCTCGAGTAGC" "TCYTCKTCSAGYTCB");
1160 
1161  // stop-codons:
1162  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a( 1)), "TAATAGTGA" "TARTRA"); // the 3 standard stop codons and their IUPAC covers
1163  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a( 2)), "TAATAGAGAAGG" "TARAGR");
1164  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a( 3)), "TAATAG" "TAR"); // not TGA
1165  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a( 4)), "TAATAG" "TAR");
1166  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a( 5)), "TAATAG" "TAR");
1167  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a( 9)), "TAATAG" "TAR");
1168  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(10)), "TAATAG" "TAR");
1169  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(13)), "TAATAG" "TAR");
1170  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(21)), "TAATAG" "TAR");
1171  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(15)), "TAATGA" "TRA"); // not TAG
1172  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(16)), "TAATGA" "TRA");
1173  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a( 6)), "TGA"); // not TAA TAG
1174  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(14)), "TAG"); // not TAA TGA
1175  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(22)), "TCATAATGA" "TMATSATRATVA");
1176  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(23)), "TTATAATAGTGA" "TWATKATARTRATDA");
1177 
1178  {
1179  // Note: optional start/stop-codons are not added in Codon_Group,
1180  // because they would introduce ambiguous mapping.
1181 
1182  // test optional stop-codons:
1183  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(27)), "");
1184  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(28)), "");
1185  TEST_EXPECT_EQUAL(AP_get_codons('*', e2a(31)), "");
1186 
1187  // test optional start-codons:
1188  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a( 1)), "ATG"); // 3 (start-codons listed in table-definition)
1189  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a( 2)), "ATAATG" "ATR"); // 5
1190  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a( 3)), "ATAATG" "ATR"); // 2
1191  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a( 4)), "ATG"); // 8
1192  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a( 5)), "ATAATG" "ATR"); // 6
1193  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a( 6)), "ATG"); // 1
1194  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a(11)), "ATG"); // 7
1195  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a(13)), "ATAATG" "ATR"); // 4
1196  TEST_EXPECT_EQUAL(AP_get_codons('M', e2a(24)), "ATG"); // 4
1197  }
1198 
1199  TEST_EXPECT_EQUAL(AP_get_codons('X', 0), ""); // @@@ wrong: TGR->X (or disallow call)
1200 
1201  const TransTables allowed;
1202 
1203  // ---------------------------
1204  // test valid codons
1205  struct test_is_codon {
1206  char protein;
1207  const char *codon;
1208  const char *tables;
1209  };
1210 
1211 #define ALL_TABLES "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" // contains arb table-numbers
1212 
1213  test_is_codon is_codon[] = {
1214  { 'P', "CCC", ALL_TABLES },
1215  { 'P', "CCN", ALL_TABLES },
1216  { 'R', "CGN", ALL_TABLES },
1217 
1218  { 'D', "GAY", ALL_TABLES },
1219  { 'N', "AAY", ALL_TABLES },
1220  { 'B', "AAY", ALL_TABLES }, // translates to 'N', but matches B(=D|N) for realigner
1221  { 'B', "GAY", ALL_TABLES }, // translates to 'D', but matches B(=D|N) for realigner
1222  { 'B', "RAY", ALL_TABLES }, // translates to 'D' or to 'N' (i.e. only matches 'B', see failing test for 'RAY' below)
1223  { 'B', "RAT", ALL_TABLES },
1224 
1225  { 'Q', "CAR", ALL_TABLES },
1226  { 'E', "GAR", ALL_TABLES },
1227  { 'Z', "SAR", ALL_TABLES },
1228 
1229  { 'X', "NNN", ALL_TABLES },
1230 
1231  { 'L', "TTR", "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15" ",17,18,19,20,21,22,23,24" }, { 'X', "TTR", "16" },
1232  { 'L', "YTA", "0,1"",3,4,5,6,7,8,9,10,11,12,13,14,15" ",17,18,19,20,21,22,23,24" }, { 'X', "YTA", "2,16" }, // Y=TC
1233  { 'L', "CTM", "0,1"",3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" }, { 'T', "CTM", "2" }, // M=AC
1234  { 'L', "CTN", "0,1"",3,4,5,6,7,8"",10,11,12,13,14,15,16,17,18" ",20,21,22,23,24" }, { 'T', "CTN", "2" }, { 'X', "CTN", "9,19" },
1235  { 'L', "CTK", "0,1"",3,4,5,6,7,8"",10,11,12,13,14,15,16,17,18" ",20,21,22,23,24" }, { 'T', "CTK", "2" }, { 'X', "CTK", "9,19" }, // K=TG
1236 
1237  { 'L', "TWG", "13,15" }, // W=AT
1238  { 'J', "TWG", "13,15" }, // translates to 'L', but matches J(=I|L) for realigner
1239  { 'X', "TWG", "0,1,2,3,4,5,6,7,8,9,10,11,12" ",14" ",16,17,18,19,20,21,22,23,24" }, // all but 'L<->TWG'
1240 
1241  { 'S', "AGY", ALL_TABLES },
1242  { 'S', "TCY", ALL_TABLES },
1243  { 'S', "TCN", "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,23,24" }, // all but 15 (where 'TCA->*')
1244  { 'S', "AGN", "4,6,11,14" },
1245  { 'S', "AGR", "4,6,11,14" },
1246 
1247  { '*', "AGR", "1" }, // R=AG
1248  { 'G', "AGR", "10" },
1249  { 'X', "AGR", "17" },
1250  { 'R', "AGR", "0,2,3,5,7,8,9,12,13,15,16,18,19,20,21,22,23,24" },
1251 
1252  { 'G', "AGA", "10" },
1253  { 'S', "AGA", "4,6,11,14,17" },
1254  { 'R', "AGA", "0,2,3,5,7,8,9,12,13,15,16,18,19,20,21,22,23,24" },
1255  { '*', "AGA", "1" },
1256 
1257  { 'K', "AGG", "17" },
1258 
1259  { 'W', "TGR", "1,2,3,4,6,10,11,14,17,20,21,24" },
1260  { 'X', "TGR", "0,5,7,8,9,12,13,15,16,18,19,22,23" }, // all but 'W<->TGR' (e.g. code==0: TGA->* & TGG->W => TGR->X)
1261 
1262  { 'C', "TGW", "7" }, // W = AT
1263  { 'X', "TGW", "0,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" }, // all but 'C<->TGW'
1264 
1265  { 'C', "TGT", ALL_TABLES },
1266 
1267  { 'C', "TGA", "7" },
1268  { 'G', "TGA", "18" },
1269  { 'W', "TGA", "1,2,3,4,6,10,11,14,17,20,21,24" },
1270  { '*', "TGA", "0,5,8,9,12,13,15,16,19,20,21,22,23" }, // standard stop codons
1271  { '*', "TAA", "0,1,2,3,4,6,7,8,9,10,12,13,14,15,16,17,18,19,21,24" },
1272  { '*', "TAG", "0,1,2,3,4,6,7,8,9,10,11,14,16,17,18,19,21,24" },
1273 
1274  { '*', "TRA", "0,8,9,12,13,15,16,19,21" }, // R=AG
1275  { 'X', "TRA", "1,2,3,4,5,6,7,10,11,14,17,18,20,22,23,24" }, // all but '*<->TRA'
1276 
1277  { '*', "TAR", "0,1,2,3,4,6,7,8,9,10,14,16,17,18,19,21,24" },
1278  { 'Y', "TAR", "22" },
1279  { 'E', "TAR", "23,24" },
1280  { 'Q', "TAR", "5,20,21" },
1281  { 'Z', "TAR", "5,20,21,23,24" }, // Z=EQ (TAR never translates to 'E', only 'Q')
1282  { 'X', "TAR", "11,12,13,15" },
1283 
1284  { 'B', "AAW", "6,11,14" }, // W=AT
1285  { 'N', "AAW", "6,11,14" },
1286  { 'X', "AAW", "0,1,2,3,4,5,7,8,9,10,12,13,15,16,17,18,19,20,21,22,23,24" }, // all but 'B<->AAW' & 'N<->AAW'
1287 
1288  { 'T', "CTG", "2" },
1289  { 'S', "CTG", "9" },
1290  { 'A', "CTG", "19" },
1291  { 'L', "CTG", "0,1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,20,21,22,23,24" }, // all but 'T<->CTG' & 'S<->CTG'
1292  { 'J', "CTG", "0,1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,20,21,22,23,24" }, // same as for 'L'
1293  { 'M', "CTG", "0,3,8,9,17,19" }, // optional start-codon
1294 
1295  { 'T', "CTR", "2" },
1296  { 'X', "CTR", "9,19" },
1297  { 'L', "CTR", "0,1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,20,21,22,23,24" }, // all but 'T<->CTR' & 'X<->CTR'
1298 
1299  { 'E', "KAR", "23,24" },
1300  // Q <->KAR fails (see below)
1301  { 'Z', "KAR", "5,20,21,23,24" }, // Z=E|Q
1302  { 'X', "KAR", "0,1,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,22" },
1303 
1304  { 'G', "KGA", "18" },
1305  { 'X', "KGA", "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19,20,21,22,23,24" }, // all but G<->KGA
1306 
1307  { 'E', "TAG", "23,24" },
1308  { 'Q', "TAG", "5,12,20,21" },
1309  { 'L', "TAG", "13,15" },
1310  { 'Y', "TAG", "22" },
1311  { 'J', "TAG", "13,15" }, // J=I|L
1312  { 'Z', "TAG", "5,12,20,21,23,24" }, // Z=E|Q
1313  { '*', "TAG", "0,1,2,3,4,6,7,8,9,10,11,14,16,17,18,19,21,24" },
1314 
1315  { 'J', "WTA", "0,3,5,6,7,8,9,11,12,13,15,17,18,19,20,21,22,23,24" },
1316 
1317  { 'X', "A-C", ALL_TABLES },
1318  { 'X', ".T.", ALL_TABLES },
1319 
1320  // tests to protect buffer overflows in dna
1321  { 'X', "CG", ALL_TABLES },
1322  { 'X', "T", ALL_TABLES },
1323 
1324  // 0000000000111111111122222222223333333333444444444455555555556666 codon number (0-63)
1325  // 0123456789012345678901234567890123456789012345678901234567890123
1326  //
1327  // "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" base1
1328  // "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" base2
1329  // "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" base3
1330  // "--2M--*---**--*----M------------MMMM----------**---M------------" (= startStopSummary)
1331  // " ?! - ?? ? ! !!?- -- ! " (= optionality: !=all start/stop optional; -=no start/stop optional, ?=mixed)
1332 
1333  // test all start codons:
1334  { 'M', "TTA", "3" }, // start AND stop -> see ../ALILINK/TranslateRealign.cxx@TTA_AMBIGUITY
1335  { 'M', "TTG", "0,3,4,8,10,17,18" },
1336  { 'L', "TTG", ALL_TABLES },
1337  // M <->CTG already tested above
1338  { 'M', "ATT", "1,3,4,8,16" },
1339  { 'M', "ATC", "1,3,4,8" },
1340  { 'M', "ATA", "1,2,3,4,8,10,14" },
1341  { 'I', "ATA", "0,3,5,6,7,8,9,11,12,13,15,16,17,18,19,20,21,22,23,24" }, // optional for 3, 8
1342  { 'M', "ATG", ALL_TABLES }, // no optional start
1343  { 'M', "ATR", "1,2,3,4,8,10,14" }, // R = AG (code=3 -> ATA->IM ATG->M)
1344  { 'M', "ATM", "1,3,4,8" }, // M = AC
1345  { 'M', "ATS", "1,3,4,8" }, // S = CG
1346  { 'M', "ATY", "1,3,4,8" }, // Y = TC
1347  { 'M', "ATK", "1,3,4,8,16" }, // K = TG
1348  { 'M', "ATW", "1,3,4,8" }, // W = AT
1349  { 'M', "ATV", "1,3,4,8" }, // V = ACG
1350  { 'M', "ATB", "1,3,4,8" }, // B = TCG
1351  { 'M', "ATD", "1,3,4,8" }, // D = ATG
1352 
1353  { 'M', "ATH", "1,3,4,8" }, // H = ACT
1354  { 'I', "ATH", "0,3,5,6,7,8,9,11,12,13,15,16,17,18,19,20,21,22,23,24" },
1355  { 'X', "ATH", "2,10,14" },
1356 
1357  { 'M', "ATN", "1,3,4,8" }, // H = ATCG
1358  { 'M', "GTG", "1,3,4,6,8,10,14,16,17,18" },
1359 
1360  // test all stop codons:
1361  { '*', "AGA", "1" }, // (DUPTEST)
1362  { '*', "AGG", "1" },
1363  { '*', "TAA", "0,1,2,3,4,6,7,8,9,10,12,13,14,15,16,17,18,19,21,24" },//(DUPTEST)
1364  { '*', "TAG", "0,1,2,3,4,6,7,8,9,10,11,14,16,17,18,19,21,24" }, // (DUPTEST)
1365  { '*', "TCA", "15" },
1366  { '*', "TGA", "0,5,8,9,12,13,15,16,19,20,21,22,23" }, // (DUPTEST)
1367  { '*', "TTA", "16" },
1368 
1369  { '*', "TWA", "16" }, // W = AT
1370  { '*', "TMA", "15" }, // M = AC
1371  { '*', "TAR", "0,1,2,3,4,6,7,8,9,10,14,16,17,18,19,21,24" }, // R = AG (DUPTEST)
1372  { '*', "TRA", "0,8,9,12,13,15,16,19,21" }, // R = AG (DUPTEST)
1373  { '*', "AGR", "1" }, // R = AG (DUPTEST)
1374 
1375  { 0, NULp, NULp}
1376  };
1377 
1378  for (int c = 0; is_codon[c].protein; ++c) {
1379  const test_is_codon& C = is_codon[c];
1380  TEST_ANNOTATE(GBS_global_string("%c <- %s", C.protein, C.codon));
1381 
1382  TransTables remaining;
1383  const char *failure;
1384  bool isCodon = AWT_is_codon(C.protein, C.codon, allowed, remaining, &failure);
1385 
1386  TEST_EXPECT_NULL(failure);
1387  TEST_EXPECT(isCodon);
1388  TEST_EXPECT_EQUAL(remaining.to_string(TTIT_ARB), C.tables);
1389  }
1390 
1391  // -----------------------------
1392  // test invalid codons
1393  struct test_not_codon {
1394  char protein;
1395  const char *codon;
1396  const char *error;
1397  };
1398  test_not_codon not_codon[] = {
1399  { 'P', "SYK", "Not all IUPAC-combinations of 'SYK' translate to 'P'" }, // correct (possible translations are PAL)
1400  { 'F', "SYK", "'SYK' never translates to 'F'" }, // correct failure
1401  { 'P', "NNN", "Not all IUPAC-combinations of 'NNN' translate to 'P'" }, // correct failure
1402  { 'D', "RAY", "Not all IUPAC-combinations of 'RAY' translate to 'D'" }, // correct failure
1403  { 'E', "SAR", "Not all IUPAC-combinations of 'SAR' translate to 'E'" }, // correct failure
1404  { 'Q', "KAR", "Not all IUPAC-combinations of 'KAR' translate to 'Q'" }, // correct failure
1405 
1406  { 'S', "CYT", "'CYT' never translates to 'S'" }, // correct failure
1407 
1408  { 'O', "RAY", "'O' is no valid amino acid" },
1409  { 'U', "AAA", "'U' is no valid amino acid" },
1410 
1411  { 'L', "A-C", "Not enough nucleotides (got 'A-C')" }, // correct failure
1412  { 'V', ".T.", "Not enough nucleotides (got '.T.')" }, // correct failure
1413  { 'L', "...", "No nucleotides left" },
1414  { 'J', "...", "No nucleotides left" },
1415 
1416  { 'I', "ATR", "Not all IUPAC-combinations of 'ATR' translate to 'I'" }, // R = AG // ok: 'ATG' translates to 'M', not to 'I'
1417 
1418  { '*', "TYA", "Not all IUPAC-combinations of 'TYA' translate to '*'" }, // Y = TC; TCA(code=15) TTA(code=16) -> no code for both
1419  { '*', "TRR", "Not all IUPAC-combinations of 'TRR' translate to '*'" }, // R = AG (TGG does never translate to '*')
1420  { '*', "WGA", "Not all IUPAC-combinations of 'WGA' translate to '*'" }, // W = AT; AGA(1) TGA(other) -> no common codes
1421  { '*', "THA", "Not all IUPAC-combinations of 'THA' translate to '*'" }, // H = ACT; TAA(many) TCA(15) TTA(16) -> no code overlap between TCA and TTA
1422 
1423  { 'X', "...", "No nucleotides left" },
1424  { 'X', "..", "No nucleotides left" },
1425  { 'X', "-", "No nucleotides left" },
1426  { 'X', "", "No nucleotides left" },
1427 
1428  // test invalid chars
1429  { 'X', "AZA", "Invalid character 'Z' in DNA" },
1430  { 'X', "A@A", "Invalid character '@' in DNA" },
1431  { 'L', "AZA", "Invalid character 'Z' in DNA" },
1432 
1433  // tests to protect buffer overflows in dna
1434 
1435  { 'A', "--", "No nucleotides left" },
1436  { 'L', ".", "No nucleotides left" },
1437  { 'J', ".", "No nucleotides left" },
1438  { 'L', "AT", "Not enough nucleotides (got 'AT')" },
1439  { 'L', "C", "Not enough nucleotides (got 'C')" },
1440  { 'L', "", "No nucleotides left" },
1441 
1442  { 0, NULp, NULp}
1443  };
1444  for (int c = 0; not_codon[c].protein; ++c) {
1445  const test_not_codon& C = not_codon[c];
1446  TEST_ANNOTATE(GBS_global_string("%c <- %s", C.protein, C.codon));
1447 
1448  TransTables remaining;
1449  const char *failure;
1450  bool isCodon = AWT_is_codon(C.protein, C.codon, allowed, remaining, &failure);
1451 
1452  if (isCodon) { // the test-case makes no sense in 'not_codon'
1453  TEST_EXPECT_EQUAL(remaining.to_string(TTIT_ARB), ""); // -> move the failing test-case up into 'is_codon'-section
1454  }
1455  else {
1456  TEST_EXPECT_EQUAL(failure, C.error);
1457  }
1458  TEST_EXPECT(!isCodon);
1459  }
1460 
1461  // ----------------------------------
1462  // test uncombinable codons
1463  struct test_uncombinable_codons {
1464  char protein1;
1465  const char *codon1;
1466  const char *tables;
1467  char protein2;
1468  const char *codon2;
1469  const char *error;
1470  };
1471  test_uncombinable_codons uncomb_codons[] = {
1472  { '*', "TTA", "16", 'E', "SAR", "Not all IUPAC-combinations of 'SAR' translate to 'E' (for trans-table 23)" },
1473  { '*', "TTA", "16", 'X', "TRA", "'TRA' never translates to 'X' (for trans-table 23)" },
1474  { 'L', "TAG", "13,15", 'X', "TRA", "'TRA' never translates to 'X' (for any of the leftover trans-tables: 16,22)" },
1475  { 'L', "TAG", "13,15", 'Q', "TAR", "'TAR' never translates to 'Q' (for any of the leftover trans-tables: 16,22)" },
1476  { '*', "TTA", "16", '*', "TCA", "'TCA' does not translate to '*' (for trans-table 23)" },
1477  { 'N', "AAA", "6,11,14", 'X', "AAW", "'AAW' never translates to 'X' (for any of the leftover trans-tables: 9,14,21)" },
1478  { 'N', "AAA", "6,11,14", 'K', "AAA", "'AAA' does not translate to 'K' (for any of the leftover trans-tables: 9,14,21)" },
1479 
1480  { 0, NULp, NULp, 0, NULp, NULp}
1481  };
1482 
1483  for (int c = 0; uncomb_codons[c].protein1; ++c) {
1484  const test_uncombinable_codons& C = uncomb_codons[c];
1485  TEST_ANNOTATE(GBS_global_string("%c <- %s + %c <- %s", C.protein1, C.codon1, C.protein2, C.codon2));
1486 
1487  TransTables remaining1;
1488  const char *failure;
1489  bool isCodon = AWT_is_codon(C.protein1, C.codon1, allowed, remaining1, &failure);
1490 
1491  TEST_EXPECT(isCodon);
1492  TEST_EXPECT_EQUAL(remaining1.to_string(TTIT_ARB), C.tables);
1493 
1494  // @@@ add separate test: show protein2/codon2 return true from AWT_is_codon if not called with remaining1
1495 
1496  TransTables remaining2;
1497  isCodon = AWT_is_codon(C.protein2, C.codon2, remaining1, remaining2, &failure);
1498  TEST_EXPECT_EQUAL(failure, C.error);
1499  TEST_REJECT(isCodon);
1500 
1501  }
1502 }
1503 
1504 #endif // UNIT_TESTS
1505 
1506 // --------------------------------------------------------------------------------
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
bool protMatches(char p1, char p2)
string result
GB_TYPES type
char idx2dna(int idx)
bool isGap(char c)
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
const char * AP_get_codons(char protein, int code_nr)
#define pn_assert(cond)
const char * to_string(TranslationTableIndexType type) const
#define VALID_PROTEIN_NO_X
static char * ambiguous_codons[AWT_MAX_CODONS]
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
void forbidAllBut(int nr)
static AWT_Codon_Code_Definition AWT_codon_def[AWT_CODON_TABLES+1]
static char definite_translation[AWT_MAX_CODONS]
Definition: trnsprob.h:20
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
char combine(char c1, char c2, GB_alignment_type ali)
Definition: iupac.cxx:231
void build_codon(int codon_nr, char *to_buffer)
#define VALID_PROTEIN
static bool initialized
Definition: AW_advice.cxx:36
int TTIT_embl2arb(int embl_code_nr)
char isStartOrStopCodonNr(int codon_nr, int code_nr)
#define TEST_EXPECT(cond)
Definition: test_unit.h:1312
const char * decode(char iupac, GB_alignment_type aliType, bool decode_amino_iupac_groups)
Definition: iupac.cxx:239
bool none() const
static int expandMore(const char *bufferStart, int no_of_condons, char *&to_buffer)
#define AWT_CODON_TABLE_MAX_NAME_LENGTH
int calc_codon_nr(const char *dna)
int CodeNr() const
#define AWT_MAX_CODONS
int TTIT_arb2embl(int arb_code_nr)
#define TEST_REJECT(cond)
Definition: test_unit.h:1314
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR neverTranslatesError(const char *dna, char protein)
bool all() const
char isStartOrStopCodon(const char *codon) const
bool any() const
const char * AWT_get_codon_code_name(int code)
Codon_Group(char protein, int code_nr)
fputs(TRACE_PREFIX, stderr)
Definition: trnsprob.h:20
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1307
void allow(int nr)
static void addToAmbiguous(int codon_nr, char possible_translation)
const int AWAR_PROTEIN_TYPE_bacterial_code_index
int expand(char *to_buffer) const
int explicit_table() const
#define AWT_CODON_TABLES
void forbid(int nr)
bool is_std_gap(const char c)
Codon_Group & operator+=(const Codon_Group &other)
#define NULp
Definition: cxxforward.h:97
static const char * aa_3letter_name[26+1]
#define EMBL_BACTERIAL_TABLE_INDEX
#define offset(field)
Definition: GLwDrawA.c:73
bool ARB_strBeginsWith(const char *str, const char *with)
Definition: arb_str.h:42
static bool codon_tables_initialized
bool containsProtMatching(const char *pstr, char p)
bool AWT_is_codon(char protein, const char *const dna, const TransTables &allowed, TransTables &remaining, const char **fail_reason_ptr)
void AP_initialize_codon_tables()
const char * buildMixedCodon(const char *const con1, const char *const con2)
static Codon_Group * get_Codon_Group(char protein, int code_nr)
size_t length
bool is_allowed(int nr) const
#define MAX_EMBL_TRANSL_TABLE_VALUE
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
int dna2idx(char c)
int legal_dna_no(int i)
bool is_subset_of(const TransTables &other) const
TranslationTableIndexType
const char * getAminoAcidAbbr(char aa)
#define MAX_CODON_LIST_LENGTH