21 #define pn_assert(cond) arb_assert(cond)
23 #define EMBL_BACTERIAL_TABLE_INDEX 11
24 #define AWT_CODON_TABLE_MAX_NAME_LENGTH 57 // increasing this limit forces GUI re-layout (look4: AWT_get_codon_code_name)
26 #define VALID_PROTEIN "ABCDEFGHIJKLMNPQRSTVWXYZ*" // all possible translations
27 #define VALID_PROTEIN_NO_X "ABCDEFGHIJKLMNPQRSTVWYZ*" // same as VALID_PROTEIN w/o 'X'
52 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
53 "---M------**--*----M---------------M----------------------------",
57 " (2) Vertebrate mitochondrial code",
58 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
59 "----------**--------------------MMMM----------**---M------------",
63 " (3) Yeast mitochondrial code",
64 "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
65 "----------**----------------------MM----------------------------",
70 " (4) Coelenterate Mitochondrial + Mycoplasma/Spiroplasma",
71 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
72 "--MM------**-------M------------MMMM---------------M------------",
76 " (5) Invertebrate mitochondrial code",
77 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
78 "---M------**--------------------MMMM---------------M------------",
82 " (6) Ciliate, Dasycladacean and Hexamita nuclear code",
83 "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
84 "--------------*--------------------M----------------------------",
88 " (9) Echinoderm and Flatworm mitochondrial code",
89 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
90 "----------**-----------------------M---------------M------------",
94 "(10) Euplotid nuclear code",
95 "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
96 "----------**-----------------------M----------------------------",
106 "(11) Bacterial and Plant Plastid code",
107 "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
108 "---M------**--*----M------------MMMM---------------M------------",
112 "(12) Alternative Yeast nuclear code",
113 "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
114 "----------**--*----M---------------M----------------------------",
118 "(13) Ascidian mitochondrial code",
119 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
120 "---M------**----------------------MM---------------M------------",
124 "(14) Alternative Flatworm mitochondrial code",
125 "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
126 "-----------*-----------------------M----------------------------",
130 "(15) Blepharisma nuclear code (deleted?)",
131 "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
132 "----------*---*--------------------M----------------------------",
136 "(16) Chlorophycean mitochondrial code",
137 "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
138 "----------*---*--------------------M----------------------------",
142 "(21) Trematode mitochondrial code",
143 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
144 "----------**-----------------------M---------------M------------",
148 "(22) Scenedesmus obliquus mitochondrial code",
149 "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
150 "------*---*---*--------------------M----------------------------",
154 "(23) Thraustochytrium mitochondrial code",
155 "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
156 "--*-------**--*-----------------M--M---------------M------------",
160 "(24) Pterobranchia Mitochondrial Code",
161 "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
162 "---M------**-------M---------------M---------------M------------",
166 "(25) Candidate Division SR1 and Gracilibacteria Code",
167 "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
168 "---M------**-----------------------M---------------M------------",
172 "(26) Pachysolen tannophilus Nuclear Code",
173 "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
174 "----------**--*----M---------------M----------------------------",
178 "(27) Karyorelict Nuclear",
179 "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
180 "--------------*--------------------M----------------------------",
184 "(28) Condylostoma Nuclear",
185 "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
186 "----------**--*--------------------M----------------------------",
190 "(29) Mesodinium Nuclear",
191 "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
192 "--------------*--------------------M----------------------------",
196 "(30) Peritrich Nuclear",
197 "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
198 "--------------*--------------------M----------------------------",
202 "(31) Blastocrithidia Nuclear",
203 "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
204 "----------**-----------------------M----------------------------",
216 #define MAX_EMBL_TRANSL_TABLE_VALUE 31 // maximum known EMBL transl_table value
228 arb_code_nr_table[embl] = -1;
234 pn_assert(arb_code_nr_table[embl] == -1);
236 arb_code_nr_table[embl] = arb_code_nr;
249 int arb_code_nr = arb_code_nr_table[embl_code_nr];
251 if (arb_code_nr != -1) {
272 char*& ambEntry = ambiguous_codons[codon_nr];
273 uint8_t& ambLen = length[codon_nr];
277 ambEntry[0] = possible_translation;
280 else if (!strchr(ambEntry, possible_translation)) {
281 ambEntry[ambLen++] = possible_translation;
292 ambiguous_codons[codon_nr] =
NULp;
299 const char *translation = AWT_codon_def[code_nr].
aa;
300 const char *startStop = AWT_codon_def[code_nr].
startStop;
305 bool isOptionalStartStop =
false;
308 switch (startStop[codon_nr]) {
311 isOptionalStartStop = translation[codon_nr] !=
'M';
316 isOptionalStartStop = translation[codon_nr] !=
'*';
330 if (isOptionalStartStop) {
333 definite_translation[codon_nr] = 0;
336 definite_translation[codon_nr] = translation[codon_nr];
339 else if (definite_translation[codon_nr]) {
340 if (definite_translation[codon_nr] != translation[codon_nr] || isOptionalStartStop) {
343 if (isOptionalStartStop)
addToAmbiguous(codon_nr, startStop[codon_nr]);
344 definite_translation[codon_nr] = 0;
349 if (isOptionalStartStop)
addToAmbiguous(codon_nr, startStop[codon_nr]);
361 case 'U':
case 'u':
return 0;
362 case 'C':
case 'c':
return 1;
363 case 'A':
case 'a':
return 2;
364 case 'G':
case 'g':
return 3;
379 int codon_nr = i1*16 + i2*4 + i3;
387 to_buffer[0] =
idx2dna((codon_nr>>4)&3);
388 to_buffer[1] =
idx2dna((codon_nr>>2)&3);
389 to_buffer[2] =
idx2dna(codon_nr&3);
394 return AWT_codon_def[code].
name;
428 if (aa==
'*')
return "End";
430 if (aa>=
'A' && aa<=
'Z')
return aa_3letter_name[aa-
'A'];
436 inline char nextBase(
char c) {
438 case 'T':
return 'C';
439 case 'C':
return 'A';
440 case 'A':
return 'G';
444 case 'G':
return 'M';
445 case 'M':
return 'R';
446 case 'R':
return 'W';
447 case 'W':
return 'S';
448 case 'S':
return 'Y';
449 case 'Y':
return 'K';
450 case 'K':
return 'V';
451 case 'V':
return 'H';
452 case 'H':
return 'D';
453 case 'D':
return 'B';
454 case 'B':
return 'N';
467 for (
char c=
'*'; c<=
'Z'; c++) {
468 printf(
"Codons for '%c': ", c);
470 if (skipX && c ==
'X') {
471 fputs(
"skipped", stdout);
474 bool first_line =
true;
476 for (
char b1=
'T'; b1; b1=nextBase(b1)) {
477 for (
char b2=
'T'; b2; b2=nextBase(b2)) {
478 for (
char b3=
'T'; b3; b3=nextBase(b3)) {
487 if (!first_line)
fputs(
"\n ", stdout);
489 printf(
"%s (%s)", dna, remaining.
to_string(type));
495 if (!found)
fputs(
"none", stdout);
504 char isStartStop = 0;
511 isStartStop = AWT_codon_def[code_nr].
startStop[codon_nr];
512 if (isStartStop ==
'-') {
535 bool is_stop = is_start ?
false :
AWT_is_codon(
'*', codon, allowed, remaining,
NULp);
538 result = is_start ?
'M' : (is_stop ?
'*' : 0);
554 pn_assert(p1 !=
'B' && p1 !=
'Z' && p1 !=
'J');
558 if (p1 == p2)
return true;
559 if (p2 ==
'B')
return p1 ==
'D' || p1 ==
'N';
560 if (p2 ==
'J')
return p1 ==
'I' || p1 ==
'L';
561 if (p2 ==
'Z')
return p1 ==
'E' || p1 ==
'Q';
569 if (p ==
'B')
return strchr(pstr,
'D') || strchr(pstr,
'N');
570 if (p ==
'J')
return strchr(pstr,
'I') || strchr(pstr,
'L');
571 if (p ==
'Z')
return strchr(pstr,
'E') || strchr(pstr,
'Q');
572 return strchr(pstr, p);
580 return GBS_global_string(
"'%c%c%c' never translates to '%c'", dna[0], dna[1], dna[2], protein);
596 const char *fail_reason =
NULp;
597 if (fail_reason_ptr) *fail_reason_ptr =
NULp;
599 bool is_codon =
false;
601 int first_iupac_pos = -1;
602 int iupac_positions = 0;
603 bool decided =
false;
604 bool general_failure =
false;
606 protein = toupper(protein);
609 bool too_short =
false;
611 for (
int iupac_pos=0; iupac_pos<3 && !too_short && !fail_reason; iupac_pos++) {
612 char N = dna[iupac_pos];
614 if (!N) too_short =
true;
615 else if (!
isGap(N)) {
617 if (!strchr(
"ACGTU", N)) {
618 if (first_iupac_pos==-1) first_iupac_pos = iupac_pos;
621 if (!decoded_iupac[0]) {
628 if (!fail_reason && !nucs_seen) {
629 fail_reason =
"No nucleotides left";
631 else if (nucs_seen<3) {
638 else if (too_short) {
640 if (protein ==
'X') {
645 strncpy(dna_copy, dna, 3);
654 if (protein ==
'X') {
658 for (
int i = 0; valid_prot[i]; ++i) {
659 if (
AWT_is_codon(valid_prot[i], dna, allowed_copy, remaining)) {
660 allowed_copy.
forbid(remaining);
661 if (allowed_copy.
none())
break;
665 if (allowed_copy.
any()) {
667 remaining = allowed_copy;
679 memcpy(dna_copy, dna, 3);
682 bool all_are_codons =
true;
683 bool one_is_codon =
false;
687 for (
int i=0; decoded_iupac[i]; i++) {
688 dna_copy[first_iupac_pos] = decoded_iupac[i];
690 if (!
AWT_is_codon(protein, dna_copy, allowed_copy, remaining, &subfail)) {
691 all_are_codons =
false;
692 if (!one_is_codon &&
ARB_strBeginsWith(subfail,
"Not all ")) one_is_codon =
true;
693 if (one_is_codon)
break;
697 allowed_copy = remaining;
701 if (all_are_codons) {
703 remaining = allowed_copy;
708 dna_copy[first_iupac_pos] = dna[first_iupac_pos];
710 fail_reason =
GBS_global_string(
"Not all IUPAC-combinations of '%s' translate to '%c'", dna_copy, protein);
717 else if (definite_translation[codon_nr]) {
718 char defTransl = definite_translation[codon_nr];
720 #if defined(ASSERTION_USED)
721 bool optionalCodonExists =
false;
722 for (
int code_nr=0; code_nr<
AWT_CODON_TABLES && !optionalCodonExists; code_nr++) {
724 if (startStop && startStop != defTransl) {
726 pn_assert(startStop ==
'*' || startStop ==
'M');
727 optionalCodonExists =
true;
741 fail_reason =
GBS_global_string(
"'%c%c%c' translates to '%c', not to '%c'", dna[0], dna[1], dna[2], defTransl, protein);
742 general_failure =
true;
748 general_failure =
true;
751 #if defined(ASSERTION_USED)
752 bool correct_disallowed_translation =
false;
758 bool mayTranslate =
protMatches(AWT_codon_def[code_nr].aa[codon_nr], protein);
759 if (!mayTranslate && (protein ==
'*' || protein ==
'M')) {
761 mayTranslate = startOrStop &&
protMatches(startOrStop, protein);
766 remaining.
allow(code_nr);
770 remaining.
forbid(code_nr);
771 #if defined(ASSERTION_USED)
772 correct_disallowed_translation =
true;
777 remaining.
forbid(code_nr);
782 pn_assert(correct_disallowed_translation);
783 fail_reason =
GBS_global_string(
"'%c%c%c' does not translate to '%c'", dna[0], dna[1], dna[2], protein);
790 if (fail_reason_ptr) {
791 if (!allowed.
all() && !general_failure) {
797 fail_reason =
GBS_global_string(
"%s (for any of the leftover trans-tables: %s)", fail_reason, left_tables);
801 fail_reason =
GBS_global_string(
"%s (for trans-table %i)", fail_reason, one_embl);
805 *fail_reason_ptr = fail_reason;
808 #if defined(ASSERTION_USED)
830 int expand(
char *to_buffer)
const;
834 protein = toupper(protein);
835 pn_assert(protein==
'*' || isalpha(protein));
838 const char *amino_table = AWT_codon_def[code_nr].
aa;
840 codon[i] = amino_table[i]==protein;
846 codon[i] = codon[i] || other.codon[i];
855 int mismatch_index = -1;
858 for (
int i=0; i<3; i++) {
859 if (con1[i]!=con2[i]) {
873 if (memcmp(con1, buf, 3) == 0 ||
874 memcmp(con2, buf, 3) == 0)
879 #if defined(DUMP_CODON_GROUP_EXPANSION)
880 printf(
" buildMixedCodon('%c%c%c','%c%c%c') == '%s'\n",
881 con1[0], con1[1], con1[2],
882 con2[0], con2[1], con2[2],
891 static int expandMore(
const char *bufferStart,
int no_of_condons,
char*&to_buffer) {
893 const char *con1, *con2;
896 for (i=0; i<no_of_condons; i++) {
897 con1 = bufferStart+3*i;
899 for (j=i+1; j<no_of_condons; j++) {
900 con2 = bufferStart+3*j;
906 const char *startSearch = bufferStart;
908 found = strstr(startSearch, result);
910 int pos = (found-bufferStart);
911 if ((pos%3)==0)
break;
912 startSearch = found+1;
916 memmove(to_buffer, result, 3); to_buffer+=3;
922 return no_of_condons+added;
928 char *org_to_buffer = to_buffer;
938 #if defined(DUMP_CODON_GROUP_EXPANSION)
940 printf(
"codons = '%s'\n", org_to_buffer);
944 int new_count =
expandMore(org_to_buffer, count, to_buffer);
945 if (new_count==count)
break;
947 #if defined(DUMP_CODON_GROUP_EXPANSION)
949 printf(
"codons (expandedMore) = '%s'\n", org_to_buffer);
953 pn_assert(count==(
int(to_buffer-org_to_buffer)/3));
962 protein = toupper(protein);
963 pn_assert(isalpha(protein) || protein==
'*');
973 else if (protein==
'Z') {
987 #define MAX_CODON_LIST_LENGTH (70*3)
1015 static const char *startStopSummary() {
1027 char startStop =
'-';
1031 switch (startStop) {
1033 case '-': startStop =
'*';
break;
1034 case 'M': startStop =
'2';
break;
1039 switch (startStop) {
1041 case '-': startStop =
'M';
break;
1042 case '*': startStop =
'2';
break;
1051 result[codon] = startStop;
1056 static const char *optionality() {
1065 static char result[AWT_MAX_CODONS+1];
1068 char optional =
' ';
1072 bool is_optional = AWT_codon_def[code].
aa[codon] != startStop;
1075 case ' ': optional = is_optional ?
'!' :
'-';
break;
1076 case '-': optional = is_optional ?
'?' :
'-';
break;
1077 case '!': optional = is_optional ?
'!' :
'?';
break;
1083 #if defined(ASSERTION_USED)
1084 bool sometimes_optional = optional ==
'!' || optional ==
'?';
1085 pn_assert(!sometimes_optional || !definite_translation[codon]);
1088 result[codon] = optional;
1094 static const char *definite() {
1095 static char result[AWT_MAX_CODONS+1];
1097 result[codon] = definite_translation[codon] ? definite_translation[codon] :
' ';
1102 static const char *ambig_count() {
1103 static char result[AWT_MAX_CODONS+1];
1105 const char *amb = ambiguous_codons[codon];
1106 result[codon] = amb ?
'0'+strlen(amb) :
' ';
1112 #define e2a(c) TTIT_embl2arb(c)
1114 void TEST_codon_check() {
1123 TEST_EXPECT_EQUAL(startStopSummary(),
"--2M--*---**--*----M------------MMMM----------**---M------------");
1125 TEST_EXPECT_EQUAL(definite (),
"FF SS SYY CC W PPPPHHQQRRRR MTTTTNN KSS VVV AAAADDEEGGGG");
1205 struct test_is_codon {
1211 #define ALL_TABLES "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" // contains arb table-numbers
1213 test_is_codon is_codon[] = {
1214 {
'P',
"CCC", ALL_TABLES },
1215 {
'P',
"CCN", ALL_TABLES },
1216 {
'R',
"CGN", ALL_TABLES },
1218 {
'D',
"GAY", ALL_TABLES },
1219 {
'N',
"AAY", ALL_TABLES },
1220 {
'B',
"AAY", ALL_TABLES },
1221 {
'B',
"GAY", ALL_TABLES },
1222 {
'B',
"RAY", ALL_TABLES },
1223 {
'B',
"RAT", ALL_TABLES },
1225 {
'Q',
"CAR", ALL_TABLES },
1226 {
'E',
"GAR", ALL_TABLES },
1227 {
'Z',
"SAR", ALL_TABLES },
1229 {
'X',
"NNN", ALL_TABLES },
1231 {
'L',
"TTR",
"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15" ",17,18,19,20,21,22,23,24" }, {
'X',
"TTR",
"16" },
1232 {
'L',
"YTA",
"0,1"",3,4,5,6,7,8,9,10,11,12,13,14,15" ",17,18,19,20,21,22,23,24" }, {
'X',
"YTA",
"2,16" },
1233 {
'L',
"CTM",
"0,1"",3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" }, {
'T',
"CTM",
"2" },
1234 {
'L',
"CTN",
"0,1"",3,4,5,6,7,8"",10,11,12,13,14,15,16,17,18" ",20,21,22,23,24" }, {
'T',
"CTN",
"2" }, {
'X',
"CTN",
"9,19" },
1235 {
'L',
"CTK",
"0,1"",3,4,5,6,7,8"",10,11,12,13,14,15,16,17,18" ",20,21,22,23,24" }, {
'T',
"CTK",
"2" }, {
'X',
"CTK",
"9,19" },
1237 {
'L',
"TWG",
"13,15" },
1238 {
'J',
"TWG",
"13,15" },
1239 {
'X',
"TWG",
"0,1,2,3,4,5,6,7,8,9,10,11,12" ",14" ",16,17,18,19,20,21,22,23,24" },
1241 {
'S',
"AGY", ALL_TABLES },
1242 {
'S',
"TCY", ALL_TABLES },
1243 {
'S',
"TCN",
"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,17,18,19,20,21,22,23,24" },
1244 {
'S',
"AGN",
"4,6,11,14" },
1245 {
'S',
"AGR",
"4,6,11,14" },
1247 {
'*',
"AGR",
"1" },
1248 {
'G',
"AGR",
"10" },
1249 {
'X',
"AGR",
"17" },
1250 {
'R',
"AGR",
"0,2,3,5,7,8,9,12,13,15,16,18,19,20,21,22,23,24" },
1252 {
'G',
"AGA",
"10" },
1253 {
'S',
"AGA",
"4,6,11,14,17" },
1254 {
'R',
"AGA",
"0,2,3,5,7,8,9,12,13,15,16,18,19,20,21,22,23,24" },
1255 {
'*',
"AGA",
"1" },
1257 {
'K',
"AGG",
"17" },
1259 {
'W',
"TGR",
"1,2,3,4,6,10,11,14,17,20,21,24" },
1260 {
'X',
"TGR",
"0,5,7,8,9,12,13,15,16,18,19,22,23" },
1262 {
'C',
"TGW",
"7" },
1263 {
'X',
"TGW",
"0,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" },
1265 {
'C',
"TGT", ALL_TABLES },
1267 {
'C',
"TGA",
"7" },
1268 {
'G',
"TGA",
"18" },
1269 {
'W',
"TGA",
"1,2,3,4,6,10,11,14,17,20,21,24" },
1270 {
'*',
"TGA",
"0,5,8,9,12,13,15,16,19,20,21,22,23" },
1271 {
'*',
"TAA",
"0,1,2,3,4,6,7,8,9,10,12,13,14,15,16,17,18,19,21,24" },
1272 {
'*',
"TAG",
"0,1,2,3,4,6,7,8,9,10,11,14,16,17,18,19,21,24" },
1274 {
'*',
"TRA",
"0,8,9,12,13,15,16,19,21" },
1275 {
'X',
"TRA",
"1,2,3,4,5,6,7,10,11,14,17,18,20,22,23,24" },
1277 {
'*',
"TAR",
"0,1,2,3,4,6,7,8,9,10,14,16,17,18,19,21,24" },
1278 {
'Y',
"TAR",
"22" },
1279 {
'E',
"TAR",
"23,24" },
1280 {
'Q',
"TAR",
"5,20,21" },
1281 {
'Z',
"TAR",
"5,20,21,23,24" },
1282 {
'X',
"TAR",
"11,12,13,15" },
1284 {
'B',
"AAW",
"6,11,14" },
1285 {
'N',
"AAW",
"6,11,14" },
1286 {
'X',
"AAW",
"0,1,2,3,4,5,7,8,9,10,12,13,15,16,17,18,19,20,21,22,23,24" },
1288 {
'T',
"CTG",
"2" },
1289 {
'S',
"CTG",
"9" },
1290 {
'A',
"CTG",
"19" },
1291 {
'L',
"CTG",
"0,1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,20,21,22,23,24" },
1292 {
'J',
"CTG",
"0,1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,20,21,22,23,24" },
1293 {
'M',
"CTG",
"0,3,8,9,17,19" },
1295 {
'T',
"CTR",
"2" },
1296 {
'X',
"CTR",
"9,19" },
1297 {
'L',
"CTR",
"0,1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,20,21,22,23,24" },
1299 {
'E',
"KAR",
"23,24" },
1301 {
'Z',
"KAR",
"5,20,21,23,24" },
1302 {
'X',
"KAR",
"0,1,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,22" },
1304 {
'G',
"KGA",
"18" },
1305 {
'X',
"KGA",
"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,19,20,21,22,23,24" },
1307 {
'E',
"TAG",
"23,24" },
1308 {
'Q',
"TAG",
"5,12,20,21" },
1309 {
'L',
"TAG",
"13,15" },
1310 {
'Y',
"TAG",
"22" },
1311 {
'J',
"TAG",
"13,15" },
1312 {
'Z',
"TAG",
"5,12,20,21,23,24" },
1313 {
'*',
"TAG",
"0,1,2,3,4,6,7,8,9,10,11,14,16,17,18,19,21,24" },
1315 {
'J',
"WTA",
"0,3,5,6,7,8,9,11,12,13,15,17,18,19,20,21,22,23,24" },
1317 {
'X',
"A-C", ALL_TABLES },
1318 {
'X',
".T.", ALL_TABLES },
1321 {
'X',
"CG", ALL_TABLES },
1322 {
'X',
"T", ALL_TABLES },
1334 {
'M',
"TTA",
"3" },
1335 {
'M',
"TTG",
"0,3,4,8,10,17,18" },
1336 {
'L',
"TTG", ALL_TABLES },
1338 {
'M',
"ATT",
"1,3,4,8,16" },
1339 {
'M',
"ATC",
"1,3,4,8" },
1340 {
'M',
"ATA",
"1,2,3,4,8,10,14" },
1341 {
'I',
"ATA",
"0,3,5,6,7,8,9,11,12,13,15,16,17,18,19,20,21,22,23,24" },
1342 {
'M',
"ATG", ALL_TABLES },
1343 {
'M',
"ATR",
"1,2,3,4,8,10,14" },
1344 {
'M',
"ATM",
"1,3,4,8" },
1345 {
'M',
"ATS",
"1,3,4,8" },
1346 {
'M',
"ATY",
"1,3,4,8" },
1347 {
'M',
"ATK",
"1,3,4,8,16" },
1348 {
'M',
"ATW",
"1,3,4,8" },
1349 {
'M',
"ATV",
"1,3,4,8" },
1350 {
'M',
"ATB",
"1,3,4,8" },
1351 {
'M',
"ATD",
"1,3,4,8" },
1353 {
'M',
"ATH",
"1,3,4,8" },
1354 {
'I',
"ATH",
"0,3,5,6,7,8,9,11,12,13,15,16,17,18,19,20,21,22,23,24" },
1355 {
'X',
"ATH",
"2,10,14" },
1357 {
'M',
"ATN",
"1,3,4,8" },
1358 {
'M',
"GTG",
"1,3,4,6,8,10,14,16,17,18" },
1361 {
'*',
"AGA",
"1" },
1362 {
'*',
"AGG",
"1" },
1363 {
'*',
"TAA",
"0,1,2,3,4,6,7,8,9,10,12,13,14,15,16,17,18,19,21,24" },
1364 {
'*',
"TAG",
"0,1,2,3,4,6,7,8,9,10,11,14,16,17,18,19,21,24" },
1365 {
'*',
"TCA",
"15" },
1366 {
'*',
"TGA",
"0,5,8,9,12,13,15,16,19,20,21,22,23" },
1367 {
'*',
"TTA",
"16" },
1369 {
'*',
"TWA",
"16" },
1370 {
'*',
"TMA",
"15" },
1371 {
'*',
"TAR",
"0,1,2,3,4,6,7,8,9,10,14,16,17,18,19,21,24" },
1372 {
'*',
"TRA",
"0,8,9,12,13,15,16,19,21" },
1373 {
'*',
"AGR",
"1" },
1378 for (
int c = 0; is_codon[c].protein; ++c) {
1379 const test_is_codon&
C = is_codon[c];
1383 const char *failure;
1384 bool isCodon =
AWT_is_codon(C.protein, C.codon, allowed, remaining, &failure);
1393 struct test_not_codon {
1398 test_not_codon not_codon[] = {
1399 {
'P',
"SYK",
"Not all IUPAC-combinations of 'SYK' translate to 'P'" },
1400 {
'F',
"SYK",
"'SYK' never translates to 'F'" },
1401 {
'P',
"NNN",
"Not all IUPAC-combinations of 'NNN' translate to 'P'" },
1402 {
'D',
"RAY",
"Not all IUPAC-combinations of 'RAY' translate to 'D'" },
1403 {
'E',
"SAR",
"Not all IUPAC-combinations of 'SAR' translate to 'E'" },
1404 {
'Q',
"KAR",
"Not all IUPAC-combinations of 'KAR' translate to 'Q'" },
1406 {
'S',
"CYT",
"'CYT' never translates to 'S'" },
1408 {
'O',
"RAY",
"'O' is no valid amino acid" },
1409 {
'U',
"AAA",
"'U' is no valid amino acid" },
1411 {
'L',
"A-C",
"Not enough nucleotides (got 'A-C')" },
1412 {
'V',
".T.",
"Not enough nucleotides (got '.T.')" },
1413 {
'L',
"...",
"No nucleotides left" },
1414 {
'J',
"...",
"No nucleotides left" },
1416 {
'I',
"ATR",
"Not all IUPAC-combinations of 'ATR' translate to 'I'" },
1418 {
'*',
"TYA",
"Not all IUPAC-combinations of 'TYA' translate to '*'" },
1419 {
'*',
"TRR",
"Not all IUPAC-combinations of 'TRR' translate to '*'" },
1420 {
'*',
"WGA",
"Not all IUPAC-combinations of 'WGA' translate to '*'" },
1421 {
'*',
"THA",
"Not all IUPAC-combinations of 'THA' translate to '*'" },
1423 {
'X',
"...",
"No nucleotides left" },
1424 {
'X',
"..",
"No nucleotides left" },
1425 {
'X',
"-",
"No nucleotides left" },
1426 {
'X',
"",
"No nucleotides left" },
1429 {
'X',
"AZA",
"Invalid character 'Z' in DNA" },
1430 {
'X',
"A@A",
"Invalid character '@' in DNA" },
1431 {
'L',
"AZA",
"Invalid character 'Z' in DNA" },
1435 {
'A',
"--",
"No nucleotides left" },
1436 {
'L',
".",
"No nucleotides left" },
1437 {
'J',
".",
"No nucleotides left" },
1438 {
'L',
"AT",
"Not enough nucleotides (got 'AT')" },
1439 {
'L',
"C",
"Not enough nucleotides (got 'C')" },
1440 {
'L',
"",
"No nucleotides left" },
1444 for (
int c = 0; not_codon[c].protein; ++c) {
1445 const test_not_codon& C = not_codon[c];
1449 const char *failure;
1450 bool isCodon =
AWT_is_codon(C.protein, C.codon, allowed, remaining, &failure);
1463 struct test_uncombinable_codons {
1471 test_uncombinable_codons uncomb_codons[] = {
1472 {
'*',
"TTA",
"16",
'E',
"SAR",
"Not all IUPAC-combinations of 'SAR' translate to 'E' (for trans-table 23)" },
1473 {
'*',
"TTA",
"16",
'X',
"TRA",
"'TRA' never translates to 'X' (for trans-table 23)" },
1474 {
'L',
"TAG",
"13,15",
'X',
"TRA",
"'TRA' never translates to 'X' (for any of the leftover trans-tables: 16,22)" },
1475 {
'L',
"TAG",
"13,15",
'Q',
"TAR",
"'TAR' never translates to 'Q' (for any of the leftover trans-tables: 16,22)" },
1476 {
'*',
"TTA",
"16",
'*',
"TCA",
"'TCA' does not translate to '*' (for trans-table 23)" },
1477 {
'N',
"AAA",
"6,11,14",
'X',
"AAW",
"'AAW' never translates to 'X' (for any of the leftover trans-tables: 9,14,21)" },
1478 {
'N',
"AAA",
"6,11,14",
'K',
"AAA",
"'AAA' does not translate to 'K' (for any of the leftover trans-tables: 9,14,21)" },
1483 for (
int c = 0; uncomb_codons[c].protein1; ++c) {
1484 const test_uncombinable_codons& C = uncomb_codons[c];
1485 TEST_ANNOTATE(
GBS_global_string(
"%c <- %s + %c <- %s", C.protein1, C.codon1, C.protein2, C.codon2));
1488 const char *failure;
1489 bool isCodon =
AWT_is_codon(C.protein1, C.codon1, allowed, remaining1, &failure);
1497 isCodon =
AWT_is_codon(C.protein2, C.codon2, remaining1, remaining2, &failure);
1504 #endif // UNIT_TESTS
bool protMatches(char p1, char p2)
#define implicated(hypothesis, conclusion)
const char * AP_get_codons(char protein, int code_nr)
const char * to_string(TranslationTableIndexType type) const
#define VALID_PROTEIN_NO_X
static char * ambiguous_codons[AWT_MAX_CODONS]
const char * GBS_global_string(const char *templat,...)
void forbidAllBut(int nr)
static AWT_Codon_Code_Definition AWT_codon_def[AWT_CODON_TABLES+1]
static char definite_translation[AWT_MAX_CODONS]
char buffer[MESSAGE_BUFFERSIZE]
char combine(char c1, char c2, GB_alignment_type ali)
void build_codon(int codon_nr, char *to_buffer)
int embl_feature_transl_table
int TTIT_embl2arb(int embl_code_nr)
char isStartOrStopCodonNr(int codon_nr, int code_nr)
#define TEST_EXPECT(cond)
const char * decode(char iupac, GB_alignment_type aliType, bool decode_amino_iupac_groups)
static int expandMore(const char *bufferStart, int no_of_condons, char *&to_buffer)
#define AWT_CODON_TABLE_MAX_NAME_LENGTH
int calc_codon_nr(const char *dna)
int TTIT_arb2embl(int arb_code_nr)
#define TEST_REJECT(cond)
static void error(const char *msg)
GB_ERROR neverTranslatesError(const char *dna, char protein)
char isStartOrStopCodon(const char *codon) const
const char * AWT_get_codon_code_name(int code)
Codon_Group(char protein, int code_nr)
fputs(TRACE_PREFIX, stderr)
#define TEST_EXPECT_NULL(n)
static void addToAmbiguous(int codon_nr, char possible_translation)
const int AWAR_PROTEIN_TYPE_bacterial_code_index
int expand(char *to_buffer) const
int explicit_table() const
bool is_std_gap(const char c)
Codon_Group & operator+=(const Codon_Group &other)
static const char * aa_3letter_name[26+1]
#define EMBL_BACTERIAL_TABLE_INDEX
bool ARB_strBeginsWith(const char *str, const char *with)
static bool codon_tables_initialized
bool containsProtMatching(const char *pstr, char p)
bool AWT_is_codon(char protein, const char *const dna, const TransTables &allowed, TransTables &remaining, const char **fail_reason_ptr)
void AP_initialize_codon_tables()
const char * buildMixedCodon(const char *const con1, const char *const con2)
static Codon_Group * get_Codon_Group(char protein, int code_nr)
bool is_allowed(int nr) const
#define MAX_EMBL_TRANSL_TABLE_VALUE
#define TEST_EXPECT_EQUAL(expr, want)
bool is_subset_of(const TransTables &other) const
TranslationTableIndexType
const char * getAminoAcidAbbr(char aa)
#define MAX_CODON_LIST_LENGTH