ARB
SQ_GroupData.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : SQ_GroupData.cxx //
4 // Purpose : Classes to store global information about sequences //
5 // //
6 // //
7 // Coded by Juergen Huber in July 2003 - February 2004 //
8 // Coded by Kai Bader (baderk@in.tum.de) in 2007 - 2008 //
9 // Copyright Department of Microbiology (Technical University Munich) //
10 // //
11 // Visit our web site at: http://www.arb-home.de/ //
12 // //
13 // ==================================================================== //
14 
15 
16 #include <cstdio>
17 #include <cctype>
18 #include "SQ_GroupData.h"
19 
20 using namespace std;
21 
23  size = 0;
24  avg_bases = 0;
25  nr_sequences = 0;
26  gc_prop = 0;
27  initialized = false;
28 }
29 
31 }
32 
35  cr.conformity = 0;
36  cr.deviation = 0;
37 
38  for (int i = 0; i < size; i++) {
39  int current[6] = { 0, 0, 0, 0, 0, 0 };
40 
41  // fill up current with decoded iupac values
42  switch (sequence[i]) {
43  case 'a':
44  case 'A':
45  current[0] = 100;
46  break;
47  case 't':
48  case 'T':
49  current[1] = 100;
50  break;
51  case 'c':
52  case 'C':
53  current[2] = 100;
54  break;
55  case 'g':
56  case 'G':
57  current[3] = 100;
58  break;
59  case 'u':
60  case 'U':
61  current[1] = 100;
62  break;
63  case 'r':
64  case 'R':
65  current[0] = 50;
66  current[3] = 50;
67  break;
68  case 'y':
69  case 'Y':
70  current[2] = 50;
71  current[1] = 50;
72  break;
73  case 'm':
74  case 'M':
75  current[0] = 50;
76  current[2] = 50;
77  break;
78  case 'k':
79  case 'K':
80  current[3] = 50;
81  current[1] = 50;
82  break;
83  case 'w':
84  case 'W':
85  current[0] = 50;
86  current[1] = 50;
87  break;
88  case 's':
89  case 'S':
90  current[3] = 50;
91  current[2] = 50;
92  break;
93  case 'b':
94  case 'B':
95  current[2] = 33;
96  current[1] = 33;
97  current[3] = 33;
98  break;
99  case 'd':
100  case 'D':
101  current[0] = 33;
102  current[1] = 33;
103  current[3] = 33;
104  break;
105  case 'h':
106  case 'H':
107  current[2] = 33;
108  current[1] = 33;
109  current[0] = 33;
110  break;
111  case 'v':
112  case 'V':
113  current[0] = 33;
114  current[2] = 33;
115  current[3] = 33;
116  break;
117  case 'n':
118  case 'N':
119  case 'x':
120  case 'X':
121  current[2] = 25;
122  current[1] = 25;
123  current[0] = 25;
124  current[3] = 25;
125  break;
126  case '.':
127  current[4] = 1;
128  break;
129  case '-':
130  current[5] = 1;
131  break;
132  default:
133  seq_assert(0);
134  // unhandled character
135  break;
136 
137  }
138 
139  int *cs = consensus[i].i;
140  double sum = (double) (cs[0] + cs[1] + cs[2] + cs[3] + cs[4] + cs[5]);
141 
142  for (int j = 0; j < 6; j++) {
143  int currentj = current[j];
144  if (currentj > 0) {
145  if (cs[j] > currentj) {
146  cr.conformity += (double) (cs[j] - currentj) / sum;
147  }
148  else {
149  cr.deviation += current[j];
150  }
151  }
152  }
153  }
154 
155  cr.conformity = cr.conformity / size; // set conformity in relation to sequencelength
156  cr.deviation = cr.deviation / size; // set deviation in relation to sequencelength
157  return cr;
158 }
159 
160 void SQ_GroupData_RNA::SQ_add_sequence(const char *sequence) {
161  for (int i = 0; i < size; i++) {
162  int *cs = consensus[i].i;
163  switch (sequence[i]) {
164  case 'a':
165  case 'A':
166  cs[0] += 100;
167  break;
168  case 't':
169  case 'T':
170  cs[1] += 100;
171  break;
172  case 'c':
173  case 'C':
174  cs[2] += 100;
175  break;
176  case 'g':
177  case 'G':
178  cs[3] += 100;
179  break;
180  case 'u':
181  case 'U':
182  cs[1] += 100;
183  break;
184  case 'r':
185  case 'R':
186  cs[0] += 50;
187  cs[3] += 50;
188  break;
189  case 'y':
190  case 'Y':
191  cs[2] += 50;
192  cs[1] += 50;
193  break;
194  case 'm':
195  case 'M':
196  cs[0] += 50;
197  cs[2] += 50;
198  break;
199  case 'k':
200  case 'K':
201  cs[3] += 50;
202  cs[1] += 50;
203  break;
204  case 'w':
205  case 'W':
206  cs[0] += 50;
207  cs[1] += 50;
208  break;
209  case 's':
210  case 'S':
211  cs[3] += 50;
212  cs[2] += 50;
213  break;
214  case 'b':
215  case 'B':
216  cs[2] += 33;
217  cs[1] += 33;
218  cs[3] += 33;
219  break;
220  case 'd':
221  case 'D':
222  cs[0] += 33;
223  cs[1] += 33;
224  cs[3] += 33;
225  break;
226  case 'h':
227  case 'H':
228  cs[2] += 33;
229  cs[1] += 33;
230  cs[0] += 33;
231  break;
232  case 'v':
233  case 'V':
234  cs[0] += 33;
235  cs[2] += 33;
236  cs[3] += 33;
237  break;
238  case 'n':
239  case 'x':
240  case 'N':
241  case 'X':
242  cs[2] += 25;
243  cs[1] += 25;
244  cs[0] += 25;
245  cs[3] += 25;
246  break;
247  case '.':
248  cs[4] += 1;
249  break;
250  case '-':
251  cs[5] += 1;
252  break;
253  default:
254  fprintf(stderr, "Illegal character '%c'", sequence[i]);
255  // seq_assert(0);
256  // unhandled character
257  break;
258  }
259  }
260 }
261 
STL namespace.
virtual ~SQ_GroupData()
static bool initialized
Definition: AW_advice.cxx:36
void SQ_add_sequence(const char *sequence) OVERRIDE
#define seq_assert(bed)
consensus_result SQ_calc_consensus(const char *sequence) const OVERRIDE