ARB
AP_pro_a_nucs.hxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : AP_pro_a_nucs.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #ifndef AP_PRO_A_NUCS_HXX
12 #define AP_PRO_A_NUCS_HXX
13 
14 #ifndef ARBDB_H
15 #include <arbdb.h>
16 #endif
17 #ifndef ARBTOOLS_H
18 #include <arbtools.h>
19 #endif
20 
21 
22 enum AP_BASES {
24 
25  AP_A = 1,
26  AP_C = 2,
27  AP_G = 4,
28  AP_T = 8,
29  AP_GAP = 16, // known gap ('-')
30 
31  // -------------------- above are bit values, below combinations of them
32 
33  // @@@ define IUPAC here not in AP_pro_a_nucs.cxx@AP_create_dna_to_ap_bases
34 
35  AP_DOT = 31, // maybe gap, maybe some base (anything unknown, esp. '.', '?'; interpreted as dot)
36 
37  AP_MAX = 32 // amount of possible values
38 };
39 
40 struct arb_r2a_pro_2_nucs : virtual Noncopyable {
42  char nucbits[3]; // bitsets of nucs
43 
46 };
47 
48 struct arb_r2a_pro_2_nuc : virtual Noncopyable {
49  char single_pro;
50  int index; // < 0x007fffff
51 
53 
56 };
57 
58 struct AWT_PDP { // distance definition for one protein
59  long patd[3]; // proteins at dist
60  // every bit in patd[x] represents one protein (used bits: 0-23)
61  // bit in patd[0] is set = > distance == 0
62  // bit in patd[1] is set = > distance <= 1
63  // bit in patd[2] is set = > distance <= 2
64 
65  char nucbits[3]; // bitsets of nucs
66 };
67 
68 class AWT_translator;
69 
71  AWT_PDP *dist_[64]; // sets of proteins with special distance (64 > max_aa)
72 
73 public:
74  AWT_distance_meter(const AWT_translator *translator);
76 
77  const AWT_PDP *getDistance(int idx) const { return dist_[idx]; }
78  AWT_PDP *getDistance(int idx) { return dist_[idx]; }
79 };
80 
81 
82 class AWT_translator : virtual Noncopyable {
83 private:
84  mutable AWT_distance_meter *distance_meter; // (mutable to allow lazy-evaluation)
85 
86  int code_nr; // arb (not embl)
87  GB_HASH *t2i_hash; // hash table trin >> singlepro
88  arb_r2a_pro_2_nuc *s2str[256]; // singlecode protein >> dna ...
89 
90  long *pro_2_bitset; // aa-index(!) to bitset
91  char *nuc_2_bitset; // dna-character to bitset
92 
93  unsigned char index_2_spro[64]; // 64 > max_aa
94 
95  int realmax_aa; // number of real AA + stop codon
96  int max_aa; // plus ambiguous codes
97 
98  void build_table(unsigned char pbase, const char *nuc);
99  long *create_pro_to_bits() const;
100 
101 public:
102 
103  AWT_translator(int arb_protein_code_nr);
104  ~AWT_translator();
105 
106  const AWT_distance_meter *getDistanceMeter() const;
108  return const_cast<AWT_distance_meter*>(const_cast<const AWT_translator*>(this)->getDistanceMeter());
109  }
110 
111  int CodeNr() const { return code_nr; }
112  const arb_r2a_pro_2_nuc *S2str(int index) const { return s2str[index]; }
113  const arb_r2a_pro_2_nuc * const *S2strArray() const { return s2str; }
114 
115  long index2bitset(int index) const { return pro_2_bitset[index]; }
116  unsigned char index2spro(int index) const { return index_2_spro[index]; }
117 
118  int RealmaxAA() const { return realmax_aa; }
119  int MaxAA() const { return max_aa; } // incl. ambiguity codes
120 
121  char codon2aa(const char *codon) const {
122  long spro = GBS_read_hash(t2i_hash, codon);
123  return spro ? char(spro) : 'X';
124  }
125 
126  char isStartOrStopCodon(const char *codon) const;
127  char isStartCodon(const char *codon) const {
128  char start = isStartOrStopCodon(codon);
129  return start == '*' ? 0 : start; // ignore stop
130  }
131  char isStopCodon(const char *codon) const {
132  char stop = isStartOrStopCodon(codon);
133  return stop == 'M' ? 0 : stop; // ignore start
134  }
135 };
136 
137 #define AWAR_PROTEIN_TYPE "nt/protein_codon_type"
138 
139 char *AP_create_dna_to_ap_bases(); // create dna 2 nuc_bitset
140 
141 // ------------------------------
142 
143 int AWT_default_protein_type(GBDATA *gb_main = NULp); // returns protein code selected in AWAR_PROTEIN_TYPE
144 
145 AWT_translator *AWT_get_translator(int code_nr); // use explicit protein code
146 AWT_translator *AWT_get_user_translator(GBDATA *gb_main = NULp); // uses user setting for protein code from AWAR_PROTEIN_TYPE
147 // AWAR_PROTEIN_TYPE has to exist; the first call of AWT_get_user_translator needs 'gb_main' != 0
148 
149 #else
150 #error AP_pro_a_nucs.hxx included twice
151 #endif // AP_PRO_A_NUCS_HXX
AWT_translator * AWT_get_user_translator(GBDATA *gb_main=NULp)
AWT_distance_meter * getDistanceMeter()
char isStartCodon(const char *codon) const
const AWT_distance_meter * getDistanceMeter() const
AWT_distance_meter(const AWT_translator *translator)
char codon2aa(const char *codon) const
int MaxAA() const
AWT_PDP * getDistance(int idx)
AP_BASES
struct arb_r2a_pro_2_nucs * nucs
static HelixNrInfo * start
int RealmaxAA() const
int CodeNr() const
const arb_r2a_pro_2_nuc *const * S2strArray() const
struct arb_r2a_pro_2_nucs * next
char isStartOrStopCodon(const char *codon) const
char isStopCodon(const char *codon) const
AWT_translator * AWT_get_translator(int code_nr)
char * AP_create_dna_to_ap_bases()
AWT_translator(int arb_protein_code_nr)
#define NULp
Definition: cxxforward.h:116
long patd[3]
int AWT_default_protein_type(GBDATA *gb_main=NULp)
GBDATA * gb_main
Definition: adname.cxx:32
unsigned char index2spro(int index) const
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
const AWT_PDP * getDistance(int idx) const
long index2bitset(int index) const
const arb_r2a_pro_2_nuc * S2str(int index) const
char nucbits[3]