ARB
BI_helix.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : BI_helix.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "BI_helix.hxx"
12 
13 #include <arbdbt.h>
14 
15 #include <cctype>
16 
17 #define LEFT_HELIX "{[<("
18 #define RIGHT_HELIX "}]>)"
19 #define LEFT_NONS "#*abcdefghij"
20 #define RIGHT_NONS "#*ABCDEFGHIJ"
21 
22 char *BI_helix::helix_error = NULp;
23 
24 struct helix_stack {
25  struct helix_stack *next;
26  long pos;
28  char c;
29 };
30 
31 void BI_helix::_init() {
32  int i;
33  for (i=0; i<HELIX_MAX; i++) pairs[i] = NULp;
34  for (i=0; i<HELIX_MAX; i++) char_bind[i] = NULp;
35 
36  entries = NULp;
37  Size = 0;
38 
39  pairs[HELIX_NONE]=strdup("");
40  char_bind[HELIX_NONE] = strdup(" ");
41 
42  pairs[HELIX_STRONG_PAIR]=strdup("CG AT AU");
43  char_bind[HELIX_STRONG_PAIR] = strdup("~");
44 
45  pairs[HELIX_PAIR]=strdup("GT GU");
46  char_bind[HELIX_PAIR] = strdup("-");
47 
48  pairs[HELIX_WEAK_PAIR]=strdup("GA");
49  char_bind[HELIX_WEAK_PAIR] = strdup("=");
50 
51  pairs[HELIX_NO_PAIR]=strdup("AA AC CC CT CU GG TU");
52  char_bind[HELIX_NO_PAIR] = strdup("#");
53 
54  pairs[HELIX_USER0]=strdup(".A .C .G .T .U");
55  char_bind[HELIX_USER0] = strdup("*");
56 
57  pairs[HELIX_USER1]=strdup("-A -C -G -T -U");
58  char_bind[HELIX_USER1] = strdup("#");
59 
60  pairs[HELIX_USER2]=strdup("UU TT");
61  char_bind[HELIX_USER2] = strdup("+");
62 
63  pairs[HELIX_USER3]=strdup("");
64  char_bind[HELIX_USER3] = strdup("");
65 
66  pairs[HELIX_DEFAULT]=strdup("");
67  char_bind[HELIX_DEFAULT] = strdup("");
68 
69  for (i=HELIX_NON_STANDARD0; i<=HELIX_NON_STANDARD9; i++) {
70  pairs[i] = strdup("");
71  char_bind[i] = strdup("");
72  }
73 
74  pairs[HELIX_NO_MATCH]=strdup("");
75  char_bind[HELIX_NO_MATCH] = strdup("|");
76 }
77 
79  _init();
80 }
81 
83  unsigned i;
84  for (i=0; i<HELIX_MAX; i++) free(pairs[i]);
85  for (i=0; i<HELIX_MAX; i++) free(char_bind[i]);
86 
87  if (entries) {
88  for (i = 0; i<Size; ++i) {
89  if (entries[i].allocated) {
90  free(entries[i].helix_nr);
91  }
92  }
93  free(entries);
94  }
95 }
96 
97 static void BI_helix_check_error(const char *key, long val, void *) {
98  struct helix_stack *stack = (struct helix_stack *)val;
99  if (!BI_helix::get_error() && stack) { // don't overwrite existing error
100  BI_helix::set_error(GBS_global_string("Too many '%c' in Helix '%s' pos %li", stack->c, key, stack->pos));
101  }
102 }
103 
104 
105 static long BI_helix_free_hash(const char *, long val, void *) {
106  struct helix_stack *stack = (struct helix_stack *)val;
107  struct helix_stack *next;
108  for (; stack; stack = next) {
109  next = stack->next;
110  delete stack;
111  }
112  return 0;
113 }
114 
115 const char *BI_helix::initFromData(const char *helix_nr_in, const char *helix_in, size_t sizei) {
116  /* helix_nr string of helix identifiers
117  * helix helix
118  * size alignment len
119  */
120  clear_error();
121 
123  size_t pos;
124  char c;
125  char ident[256];
126  char *sident;
127 
128  helix_stack *laststack = NULp;
129  helix_stack *stack;
130 
131  Size = sizei;
132 
133  char *helix = NULp;
134  {
135  size_t len = strlen(helix_in);
136  if (len > Size) len = Size;
137 
138  char *h = ARB_alloc<char>(Size+1);
139  h[Size] = 0;
140 
141  if (len<Size) memset(h+len, '.', Size-len);
142  memcpy(h, helix_in, len);
143  helix = h;
144  }
145 
146  char *helix_nr = NULp;
147  if (helix_nr_in) {
148  size_t len = strlen(helix_nr_in);
149  if (len > Size) len = (int)Size;
150 
151  char *h = ARB_alloc<char>(Size+1);
152  h[Size] = 0;
153 
154  if (len<Size) memset(h+len, '.', (int)(Size-len));
155  memcpy(h, helix_nr_in, len);
156  helix_nr = h;
157  }
158 
159  strcpy(ident, "0");
160  long pos_scanned_till = -1;
161 
162  ARB_calloc(entries, Size);
163  sident = NULp;
164 
165  for (pos = 0; pos < Size; pos ++) {
166  if (helix_nr) {
167  if (long(pos)>pos_scanned_till && isalnum(helix_nr[pos])) {
168  for (int j=0; (pos+j)<Size; j++) {
169  char hn = helix_nr[pos+j];
170  if (isalnum(hn)) {
171  ident[j] = hn;
172  }
173  else {
174  ident[j] = 0;
175  pos_scanned_till = pos+j;
176  break;
177  }
178  }
179  }
180  }
181  c = helix[pos];
182  if (strchr(LEFT_HELIX, c) || strchr(LEFT_NONS, c)) { // push
183  laststack = (struct helix_stack *)GBS_read_hash(hash, ident);
184  stack = new helix_stack;
185  stack->next = laststack;
186  stack->pos = pos;
187  stack->c = c;
188  GBS_write_hash(hash, ident, (long)stack);
189  }
190  else if (strchr(RIGHT_HELIX, c) || strchr(RIGHT_NONS, c)) { // pop
191  stack = (struct helix_stack *)GBS_read_hash(hash, ident);
192  if (!stack) {
193  bi_assert(!helix_error); // already have an error
194  helix_error = GBS_global_string_copy("Too many '%c' in Helix '%s' pos %zu", c, ident, pos);
195  goto helix_end;
196  }
197  if (strchr(RIGHT_HELIX, c)) {
198  entries[pos].pair_type = HELIX_PAIR;
199  entries[stack->pos].pair_type = HELIX_PAIR;
200  }
201  else {
202  c = tolower(c);
203  if (stack->c != c) {
204  bi_assert(!helix_error); // already have an error
205  helix_error = GBS_global_string_copy("Character '%c' pos %li doesn't match character '%c' pos %zu in Helix '%s'",
206  stack->c, stack->pos, c, pos, ident);
207  goto helix_end;
208  }
209  if (isalpha(c)) {
210  entries[pos].pair_type = (BI_PAIR_TYPE)(HELIX_NON_STANDARD0+c-'a');
211  entries[stack->pos].pair_type = (BI_PAIR_TYPE)(HELIX_NON_STANDARD0+c-'a');
212  }
213  else {
214  entries[pos].pair_type = HELIX_NO_PAIR;
215  entries[stack->pos].pair_type = HELIX_NO_PAIR;
216  }
217  }
218  entries[pos].pair_pos = stack->pos;
219  entries[stack->pos].pair_pos = pos;
220  GBS_write_hash(hash, ident, (long)stack->next);
221 
222  if (!sident || strcmp(sident+1, ident) != 0) {
223  ARB_alloc(sident, strlen(ident)+2);
224  sprintf(sident, "-%s", ident);
225 
226  entries[stack->pos].allocated = true;
227  }
228  entries[pos].helix_nr = sident+1;
229  entries[stack->pos].helix_nr = sident;
230  bi_assert((long)pos != stack->pos);
231 
232  delete stack;
233  }
234  }
235 
237 
238  helix_end :
240  GBS_free_hash(hash);
241 
242  free(helix_nr);
243  free(helix);
244 
245  return get_error();
246 }
247 
248 
249 const char *BI_helix::init(GBDATA *gb_helix_nr, GBDATA *gb_helix, size_t sizei) {
250  clear_error();
251 
252  if (!gb_helix) set_error("Can't find SAI:HELIX");
253  else if (!gb_helix_nr) set_error("Can't find SAI:HELIX_NR");
254  else {
255  GB_transaction ta(gb_helix);
256  initFromData(GB_read_char_pntr(gb_helix_nr), GB_read_char_pntr(gb_helix), sizei);
257  }
258 
259  return get_error();
260 }
261 
262 const char *BI_helix::init(GBDATA *gb_main, const char *alignment_name, const char *helix_nr_name, const char *helix_name) {
263  GB_transaction ta(gb_main);
264  clear_error();
265 
266  GBDATA *gb_sai_data = GBT_get_SAI_data(gb_main);
267  long size2 = GBT_get_alignment_len(gb_main, alignment_name);
268 
269  if (size2<=0) set_error(GB_await_error());
270  else {
271  GBDATA *gb_helix_nr_con = GBT_find_SAI_rel_SAI_data(gb_sai_data, helix_nr_name);
272  GBDATA *gb_helix_con = GBT_find_SAI_rel_SAI_data(gb_sai_data, helix_name);
273  GBDATA *gb_helix = NULp;
274  GBDATA *gb_helix_nr = NULp;
275 
276  if (gb_helix_nr_con) gb_helix_nr = GBT_find_sequence(gb_helix_nr_con, alignment_name);
277  if (gb_helix_con) gb_helix = GBT_find_sequence(gb_helix_con, alignment_name);
278 
279  init(gb_helix_nr, gb_helix, size2);
280  }
281 
282  return get_error();
283 }
284 
285 const char *BI_helix::init(GBDATA *gb_main, const char *alignment_name) {
286  GB_transaction ta(gb_main);
287 
288  char *helix = GBT_get_default_helix(gb_main);
289  char *helix_nr = GBT_get_default_helix_nr(gb_main);
290 
291  const char *err = init(gb_main, alignment_name, helix_nr, helix);
292 
293  free(helix);
294  free(helix_nr);
295 
296  return err;
297 }
298 
299 const char *BI_helix::init(GBDATA *gb_main) {
300  GB_transaction ta(gb_main);
301 
302  char *alignment_name = GBT_get_default_alignment(gb_main);
303  const char *err = init(gb_main, alignment_name);
304 
305  free(alignment_name);
306  return err;
307 }
308 
309 bool BI_helix::is_pairtype(char left, char right, BI_PAIR_TYPE pair_type) {
310  int len = strlen(pairs[pair_type])-1;
311  char *pai = pairs[pair_type];
312 
313  for (int i=0; i<len; i+=3) {
314  if ((pai[i] == left && pai[i+1] == right) ||
315  (pai[i] == right && pai[i+1] == left)) return true;
316  }
317  return false;
318 }
319 
320 int BI_helix::check_pair(char left, char right, BI_PAIR_TYPE pair_type) {
321  // return value:
322  // 2 = helix
323  // 1 = weak helix
324  // 0 = no helix
325  // -1 = nothing
326 
327  left = toupper(left);
328  right = toupper(right);
329  switch (pair_type) {
330  case HELIX_PAIR:
331  if (is_pairtype(left, right, HELIX_STRONG_PAIR) ||
332  is_pairtype(left, right, HELIX_PAIR)) return 2;
333  if (is_pairtype(left, right, HELIX_WEAK_PAIR)) return 1;
334  return 0;
335 
336  case HELIX_NO_PAIR:
337  if (is_pairtype(left, right, HELIX_STRONG_PAIR) ||
338  is_pairtype(left, right, HELIX_PAIR)) return 0;
339  return 1;
340 
341  default:
342  return is_pairtype(left, right, pair_type) ? 1 : 0;
343  }
344 }
345 
347  return entries[0].pair_type == HELIX_NONE ? next_pair_position(0) : 0;
348 }
349 
350 long BI_helix::next_pair_position(size_t pos) const {
351  if (entries[pos].next_pair_pos == 0) {
352  size_t p;
353  long next_pos = -1;
354  for (p = pos+1; p<Size && next_pos == -1; ++p) {
355  if (entries[p].pair_type != HELIX_NONE) {
356  next_pos = p;
357  }
358  else if (entries[p].next_pair_pos != 0) {
359  next_pos = entries[p].next_pair_pos;
360  }
361  }
362 
363  size_t q = p<Size ? p-2 : Size-1;
364 
365  for (p = pos; p <= q; ++p) {
366  bi_assert(entries[p].next_pair_pos == 0);
367  entries[p].next_pair_pos = next_pos;
368  }
369  }
370  return entries[pos].next_pair_pos;
371 }
372 
373 long BI_helix::first_position(const char *helix_Nr) const {
374  long pos;
375  for (pos = first_pair_position(); pos != -1; pos = next_pair_position(pos)) {
376  if (strcmp(helix_Nr, entries[pos].helix_nr) == 0) break;
377  }
378  return pos;
379 }
380 
381 long BI_helix::last_position(const char *helix_Nr) const {
382  long pos = first_position(helix_Nr);
383  if (pos != -1) {
384  long next_pos = next_pair_position(pos);
385  while (next_pos != -1 && strcmp(helix_Nr, entries[next_pos].helix_nr) == 0) {
386  pos = next_pos;
387  next_pos = next_pair_position(next_pos);
388  }
389  }
390  return pos;
391 }
392 
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:457
BI_PAIR_TYPE type
Definition: BI_helix.cxx:27
#define LEFT_HELIX
Definition: BI_helix.cxx:17
BI_PAIR_TYPE
Definition: BI_helix.hxx:18
long pair_pos
Definition: BI_helix.hxx:44
long first_position(const char *helixNr) const
Definition: BI_helix.cxx:373
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:706
static char * alignment_name
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
bool allocated
Definition: BI_helix.hxx:52
char * GBT_get_default_helix_nr(GBDATA *)
Definition: adtools.cxx:65
long last_position(const char *helixNr) const
Definition: BI_helix.cxx:381
char * char_bind[HELIX_MAX]
Definition: BI_helix.hxx:68
char * pairs[HELIX_MAX]
Definition: BI_helix.hxx:67
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
void GBS_hash_do_const_loop(const GB_HASH *hs, gb_hash_const_loop_type func, void *client_data)
Definition: adhash.cxx:562
long first_pair_position() const
Definition: BI_helix.cxx:346
#define LEFT_NONS
Definition: BI_helix.cxx:19
#define RIGHT_NONS
Definition: BI_helix.cxx:20
bool is_pairtype(char left, char right, BI_PAIR_TYPE pair_type)
Definition: BI_helix.cxx:309
static long BI_helix_free_hash(const char *, long val, void *)
Definition: BI_helix.cxx:105
BI_helix()
Definition: BI_helix.cxx:78
#define bi_assert(bed)
Definition: BI_basepos.hxx:25
long next_pair_position(size_t pos) const
Definition: BI_helix.cxx:350
long next_pair_pos
Definition: BI_helix.hxx:48
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:670
static void set_error(const char *err)
Definition: BI_helix.hxx:76
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)
Definition: adhash.cxx:548
~BI_helix()
Definition: BI_helix.cxx:82
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
char * helix_nr
Definition: BI_helix.hxx:46
int check_pair(char left, char right, BI_PAIR_TYPE pair_type)
Definition: BI_helix.cxx:320
static char * get_error()
Definition: BI_helix.hxx:74
static ARB_init_perl_interface init
Definition: ARB_ext.c:101
char * GBT_get_default_helix(GBDATA *)
Definition: adtools.cxx:64
#define NULp
Definition: cxxforward.h:97
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:675
GBDATA * GBT_find_SAI_rel_SAI_data(GBDATA *gb_sai_data, const char *name)
Definition: aditem.cxx:169
static void clear_error()
Definition: BI_helix.hxx:75
const char * initFromData(const char *helix_nr, const char *helix, size_t size)
Definition: BI_helix.cxx:115
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:874
GBDATA * gb_main
Definition: adname.cxx:33
static void BI_helix_check_error(const char *key, long val, void *)
Definition: BI_helix.cxx:97
GBDATA * GBT_get_SAI_data(GBDATA *gb_main)
Definition: aditem.cxx:152
#define RIGHT_HELIX
Definition: BI_helix.cxx:18
BI_PAIR_TYPE pair_type
Definition: BI_helix.hxx:45
struct helix_stack * next
Definition: BI_helix.cxx:25
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253