ARB
BI_helix.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : BI_helix.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "BI_helix.hxx"
12 #include <arbdbt.h>
13 #include <cctype>
14 
15 #define LEFT_HELIX "{[<("
16 #define RIGHT_HELIX "}]>)"
17 
18 
20  for (int i=0; i<PAIR_TYPE_COUNT; i++) {
21  pairs_def[i] = NULp;
22  char_bind[i] = NULp;
23  }
24 
25  pairs_def[HELIX_STRONG_PAIR] = strdup("CG AT AU");
26  char_bind[HELIX_STRONG_PAIR] = strdup("~");
27 
28  pairs_def[HELIX_NORMAL_PAIR] = strdup("GT GU");
29  char_bind[HELIX_NORMAL_PAIR] = strdup("-");
30 
31  pairs_def[HELIX_WEAK_PAIR] = strdup("GA GG");
32  char_bind[HELIX_WEAK_PAIR] = strdup("=");
33 
34  pairs_def[HELIX_NO_PAIR] = strdup("AA AC CC CT CU TT UU");
35  char_bind[HELIX_NO_PAIR] = strdup("#");
36 
37  pairs_def[HELIX_USER0] = strdup(".A .C .G .T .U");
38  char_bind[HELIX_USER0] = strdup("*");
39 
40  pairs_def[HELIX_USER1] = strdup("-A -C -G -T -U");
41  char_bind[HELIX_USER1] = strdup("#");
42 
43  pairs_def[HELIX_USER2] = strdup("-- .. -.");
44  char_bind[HELIX_USER2] = strdup("");
45 
46  pairs_def[HELIX_USER3] = strdup("");
47  char_bind[HELIX_USER3] = strdup("+");
48 
49  pairs_def[HELIX_USER4] = strdup("");
50  char_bind[HELIX_USER4] = strdup("");
51 
52  pairs_def[HELIX_USER5] = strdup("");
53  char_bind[HELIX_USER5] = strdup("");
54 
55  pairs_def[HELIX_DEFAULT] = strdup("");
56  char_bind[HELIX_DEFAULT] = strdup("%");
57 }
58 
60  for (int i=0; i<PAIR_TYPE_COUNT; i++) {
61  free(pairs_def[i]);
62  free(char_bind[i]);
63  }
64 }
65 
66 bool BI_pairdef::is_pairtype(char left, char right, BI_PAIR_TYPE pair_type) const {
67  const char *pairs = pairs_def[pair_type];
68  int len = strlen(pairs)-1;
69 
70  for (int i=0; i<len; i+=3) {
71  if ((pairs[i] == left && pairs[i+1] == right) ||
72  (pairs[i] == right && pairs[i+1] == left)) {
73  return true;
74  }
75  }
76  return false;
77 }
78 
79 char BI_pairdef::get_symbol(char left, char right) const {
80  left = toupper(left);
81  right = toupper(right);
82 
83  int erg = *char_bind[HELIX_DEFAULT];
84  for (int i = HELIX_STRONG_PAIR; i< PAIR_TYPE_COUNT; i++) {
85  if (is_pairtype(left, right, BI_PAIR_TYPE(i))) {
86  erg = *char_bind[i];
87  break;
88  }
89  }
90  if (!erg) erg = ' ';
91  return erg;
92 }
93 
94 int BI_pairdef::pair_strength(char left, char right) {
95  // returned values:
96  //
97  // 3 = strong helix
98  // 2 = normal helix
99  // 1 = weak helix
100  // 0 = no helix
101 
102  left = toupper(left);
103  right = toupper(right);
104 
105  if (is_pairtype(left, right, HELIX_STRONG_PAIR)) return 3;
106  if (is_pairtype(left, right, HELIX_NORMAL_PAIR)) return 2;
107  if (is_pairtype(left, right, HELIX_WEAK_PAIR)) return 1;
108  return 0;
109 }
110 
111 // --------------------------------------------------------------------------------
112 
113 char *BI_helix::helix_error = NULp;
114 
115 struct helix_stack {
116  struct helix_stack *next;
117 
118  long pos;
119  char c;
120 };
121 
123  entries = NULp;
124  Size = 0;
125 }
126 
128  if (entries) {
129  for (size_t i = 0; i<Size; ++i) {
130  if (entries[i].allocated) {
131  free(entries[i].helix_nr);
132  }
133  }
134  free(entries);
135  }
136 }
137 
138 
139 static void BI_helix_check_error(const char *key, long val, void *client_data) {
140  struct helix_stack *stack = (struct helix_stack *)val;
141  GB_ERROR *errorPtr = (GB_ERROR*)client_data;
142  if (!*errorPtr && stack) {
143  *errorPtr = GBS_global_string("Too many '%c' in Helix '%s' pos %li", stack->c, key, stack->pos);
144  }
145 }
146 
147 
148 static long BI_helix_free_hash(const char *, long val, void *) {
149  struct helix_stack *stack = (struct helix_stack *)val;
150  struct helix_stack *next;
151  for (; stack; stack = next) {
152  next = stack->next;
153  delete stack;
154  }
155  return 0;
156 }
157 
158 GB_ERROR BI_helix::initFromData(const char *helix_nr_in, const char *helix_in, size_t sizei) {
159  /* helix_nr string of helix identifiers
160  * helix helix
161  * size alignment len
162  */
163  clear_error();
164 
166  size_t pos;
167  char c;
168  char ident[256];
169  char *sident;
170 
171  helix_stack *laststack = NULp;
172  helix_stack *stack;
173 
174  Size = sizei;
175 
176  char *helix = NULp;
177  {
178  size_t len = strlen(helix_in);
179  if (len > Size) len = Size;
180 
181  char *h = ARB_alloc<char>(Size+1);
182  h[Size] = 0;
183 
184  if (len<Size) memset(h+len, '.', Size-len);
185  memcpy(h, helix_in, len);
186  helix = h;
187  }
188 
189  char *helix_nr = NULp;
190  if (helix_nr_in) {
191  size_t len = strlen(helix_nr_in);
192  if (len > Size) len = (int)Size;
193 
194  char *h = ARB_alloc<char>(Size+1);
195  h[Size] = 0;
196 
197  if (len<Size) memset(h+len, '.', (int)(Size-len));
198  memcpy(h, helix_nr_in, len);
199  helix_nr = h;
200  }
201 
202  strcpy(ident, "0");
203  long pos_scanned_till = -1;
204 
205  ARB_calloc(entries, Size);
206  sident = NULp;
207 
208  for (pos = 0; pos < Size; pos ++) {
209  if (helix_nr) {
210  if (long(pos)>pos_scanned_till && isalnum(helix_nr[pos])) {
211  for (int j=0; (pos+j)<Size; j++) {
212  char hn = helix_nr[pos+j];
213  if (isalnum(hn)) {
214  ident[j] = hn;
215  }
216  else {
217  ident[j] = 0;
218  pos_scanned_till = pos+j;
219  break;
220  }
221  }
222  }
223  }
224  c = helix[pos];
225  if (strchr(LEFT_HELIX, c)) { // push
226  laststack = (struct helix_stack *)GBS_read_hash(hash, ident);
227  stack = new helix_stack;
228  stack->next = laststack;
229  stack->pos = pos;
230  stack->c = c;
231  GBS_write_hash(hash, ident, (long)stack);
232  }
233  else if (strchr(RIGHT_HELIX, c)) { // pop
234  stack = (struct helix_stack *)GBS_read_hash(hash, ident);
235  if (!stack) {
236  bi_assert(!helix_error); // already have an error
237  helix_error = GBS_global_string_copy("Too many '%c' in Helix '%s' pos %zu", c, ident, pos);
238  goto helix_end;
239  }
240  if (strchr(RIGHT_HELIX, c)) {
241  entries[pos].is_pairpos = true;
242  entries[stack->pos].is_pairpos = true;
243  }
244  else {
245  c = tolower(c);
246  if (stack->c != c) {
247  bi_assert(!helix_error); // already have an error
248  helix_error = GBS_global_string_copy("Character '%c' pos %li doesn't match character '%c' pos %zu in Helix '%s'",
249  stack->c, stack->pos, c, pos, ident);
250  goto helix_end;
251  }
252 
253  arb_assert(!isalpha(c)); // shall not occur
254 
255  entries[pos].is_pairpos = false;
256  entries[stack->pos].is_pairpos = false;
257  }
258  entries[pos].pair_pos = stack->pos;
259  entries[stack->pos].pair_pos = pos;
260  GBS_write_hash(hash, ident, (long)stack->next);
261 
262  if (!sident || strcmp(sident+1, ident) != 0) {
263  ARB_alloc(sident, strlen(ident)+2);
264  sprintf(sident, "-%s", ident);
265 
266  entries[stack->pos].allocated = true;
267  }
268  entries[pos].helix_nr = sident+1;
269  entries[stack->pos].helix_nr = sident;
270  bi_assert((long)pos != stack->pos);
271 
272  delete stack;
273  }
274  }
275 
276  if (!get_error()) {
277  GB_ERROR error = NULp;
278  GBS_hash_do_const_loop(hash, BI_helix_check_error, (void*)&error);
279  if (error) set_error(error);
280  }
281 
282  helix_end :
284  GBS_free_hash(hash);
285 
286  free(helix_nr);
287  free(helix);
288 
289  return get_error();
290 }
291 
293  GB_transaction ta(gb_main);
294 
295  clear_error();
296 
297  GBDATA *gb_sai_data = GBT_get_SAI_data(gb_main);
298  long sizei = GBT_get_alignment_len(gb_main, alignment_name);
299 
300  if (sizei<=0) {
301  set_error(GB_await_error());
302  }
303  else {
304  char *helix_name = GBT_get_default_helix(gb_main);
305  char *helix_nr_name = GBT_get_default_helix_nr(gb_main);
306  GBDATA *gb_helix_nr_con = GBT_find_SAI_rel_SAI_data(gb_sai_data, helix_nr_name);
307  GBDATA *gb_helix_con = GBT_find_SAI_rel_SAI_data(gb_sai_data, helix_name);
308  GBDATA *gb_helix = NULp;
309  GBDATA *gb_helix_nr = NULp;
310 
311  if (gb_helix_nr_con) gb_helix_nr = GBT_find_sequence(gb_helix_nr_con, alignment_name);
312  if (gb_helix_con) gb_helix = GBT_find_sequence(gb_helix_con, alignment_name);
313 
314  if (!gb_helix) set_error(GBS_global_string("Can't find SAI:%s", helix_name));
315  else if (!gb_helix_nr) set_error(GBS_global_string("Can't find SAI:%s", helix_nr_name));
316  else {
317  initFromData(GB_read_char_pntr(gb_helix_nr), GB_read_char_pntr(gb_helix), sizei); // does set 'helix_error' // @@@ NOT_ALL_SAI_HAVE_DATA
318  }
319 
320  free(helix_name);
321  free(helix_nr_name);
322  }
323 
324  return get_error();
325 }
326 
328  GB_transaction ta(gb_main);
329 
330  char *alignment_name = GBT_get_default_alignment(gb_main);
331  if (!alignment_name) {
332  set_error(GB_await_error());
333  }
334  else {
335  init(gb_main, alignment_name); // does set 'helix_error'
336  free(alignment_name);
337  }
338 
339  return get_error();
340 }
341 
343  return entries[0].is_pairpos ? 0 : next_pair_position(0);
344 }
345 
346 long BI_helix::next_pair_position(size_t pos) const {
347  if (entries[pos].next_pair_pos == 0) {
348  size_t p;
349  long next_pos = -1;
350  for (p = pos+1; p<Size && next_pos == -1; ++p) {
351  if (entries[p].is_pairpos) {
352  next_pos = p;
353  }
354  else if (entries[p].next_pair_pos != 0) {
355  next_pos = entries[p].next_pair_pos;
356  }
357  }
358 
359  size_t q = p<Size ? p-2 : Size-1;
360 
361  for (p = pos; p <= q; ++p) {
362  bi_assert(entries[p].next_pair_pos == 0);
363  entries[p].next_pair_pos = next_pos;
364  }
365  }
366  return entries[pos].next_pair_pos;
367 }
368 
369 long BI_helix::first_position(const char *helix_Nr) const {
370  long pos;
371  for (pos = first_pair_position(); pos != -1; pos = next_pair_position(pos)) {
372  if (strcmp(helix_Nr, entries[pos].helix_nr) == 0) break;
373  }
374  return pos;
375 }
376 
377 long BI_helix::last_position(const char *helix_Nr) const {
378  long pos = first_position(helix_Nr);
379  if (pos != -1) {
380  long next_pos = next_pair_position(pos);
381  while (next_pos != -1 && strcmp(helix_Nr, entries[next_pos].helix_nr) == 0) {
382  pos = next_pos;
383  next_pos = next_pair_position(next_pos);
384  }
385  }
386  return pos;
387 }
388 
bool is_pairpos(size_t pos) const
Definition: BI_helix.hxx:117
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:454
#define LEFT_HELIX
Definition: BI_helix.cxx:15
BI_PAIR_TYPE
Definition: BI_helix.hxx:19
int pair_strength(char left, char right)
Definition: BI_helix.cxx:94
long pair_pos
Definition: BI_helix.hxx:74
long first_position(const char *helixNr) const
Definition: BI_helix.cxx:369
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
static char * alignment_name
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
bool is_pairpos
Definition: BI_helix.hxx:75
bool allocated
Definition: BI_helix.hxx:82
char * GBT_get_default_helix_nr(GBDATA *)
Definition: adtools.cxx:65
long last_position(const char *helixNr) const
Definition: BI_helix.cxx:377
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
void GBS_hash_do_const_loop(const GB_HASH *hs, gb_hash_const_loop_type func, void *client_data)
Definition: adhash.cxx:559
long first_pair_position() const
Definition: BI_helix.cxx:342
static GB_ERROR get_error()
Definition: BI_helix.hxx:96
static long BI_helix_free_hash(const char *, long val, void *)
Definition: BI_helix.cxx:148
static void error(const char *msg)
Definition: mkptypes.cxx:96
#define bi_assert(bed)
Definition: BI_basepos.hxx:25
long next_pair_position(size_t pos) const
Definition: BI_helix.cxx:346
long next_pair_pos
Definition: BI_helix.hxx:78
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:708
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)
Definition: adhash.cxx:545
char get_symbol(char left, char right) const
Definition: BI_helix.cxx:79
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
char * helix_nr
Definition: BI_helix.hxx:76
GB_ERROR initFromData(const char *helix_nr, const char *helix, size_t size)
Definition: BI_helix.cxx:158
char * GBT_get_default_helix(GBDATA *)
Definition: adtools.cxx:64
#define NULp
Definition: cxxforward.h:116
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
GBDATA * GBT_find_SAI_rel_SAI_data(GBDATA *gb_sai_data, const char *name)
Definition: aditem.cxx:171
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
GBDATA * GBT_get_SAI_data(GBDATA *gb_main)
Definition: aditem.cxx:154
#define RIGHT_HELIX
Definition: BI_helix.cxx:16
static void BI_helix_check_error(const char *key, long val, void *client_data)
Definition: BI_helix.cxx:139
const BI_PAIR_TYPE PAIR_TYPE_COUNT
Definition: BI_helix.hxx:36
GB_ERROR init(GBDATA *gb_main)
Definition: BI_helix.cxx:327
struct helix_stack * next
Definition: BI_helix.cxx:116
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253