ARB
ali_solution.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ali_solution.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "ali_solution.hxx"
12 
13 
14 // ----------------
15 // ALI_MAP
16 
17 ALI_MAP::ALI_MAP(unsigned long first_seq, unsigned long last_seq,
18  unsigned long first_ref, unsigned long last_ref)
19 {
20  unsigned long l;
21 
22  insert_counter = 0;
23  first_seq_base = first_seq;
24  last_seq_base = last_seq;
25  first_ref_base = first_ref;
26  last_ref_base = last_ref;
27 
28  mapping = (long **) CALLOC((unsigned int) (last_seq_base - first_seq_base + 1), sizeof(long));
29  inserted = (unsigned char **) CALLOC((unsigned int) ((last_seq_base - first_seq_base)/8) + 1, sizeof(unsigned char));
30  undefined = (unsigned char **) CALLOC((unsigned int) ((last_seq_base - first_seq_base)/8) + 1, sizeof(unsigned char));
31 
32  ali_out_of_memory_if(!mapping || !inserted || !undefined);
33 
34  for (l = 0; l < (last_seq_base - first_seq_base)/8 + 1; l++)
35  (*undefined)[l] = 0xff;
36 }
37 
39  unsigned long l;
40 
41  first_seq_base = map->first_seq_base;
42  last_seq_base = map->last_seq_base;
43  first_ref_base = map->first_ref_base;
44  last_ref_base = map->last_ref_base;
45 
46  mapping = (long **) CALLOC((unsigned int) (last_seq_base - first_seq_base + 1), sizeof(long));
47  inserted = (unsigned char **) CALLOC((unsigned int) ((last_seq_base - first_seq_base) / 8) + 1, sizeof(unsigned char));
48  undefined = (unsigned char **) CALLOC((unsigned int) ((last_seq_base - first_seq_base) / 8) + 1, sizeof(unsigned char));
49 
50  ali_out_of_memory_if(!mapping || !inserted || !undefined);
51 
52  for (l = 0; l < last_seq_base - first_seq_base + 1; l++) {
53  if (l < (last_seq_base - first_seq_base)/8 + 1) {
54  (*inserted)[l] = (*map->inserted)[l];
55  (*undefined)[l] = (*map->undefined)[l];
56  }
57  (*mapping)[l] = (*map->mapping)[l];
58  }
59 }
60 
62  unsigned long i;
63 
64  for (i = 1; i <= last_seq_base - first_seq_base; i++)
65  if ((*mapping)[i-1] >= (*mapping)[i])
66  return 0;
67 
68  return 1;
69 }
70 
72  int ins_counter;
73  int begin_flag = 0, undefined_flag;
74  unsigned long map_pos, seq_pos;
75  unsigned char *seq, *seq_buffer;
76 
77  seq_buffer = (unsigned char *) CALLOC((unsigned int)
78  (last_ref_base - first_ref_base + insert_counter + 1),
79  sizeof(unsigned char));
80 
81  ali_out_of_memory_if(!seq_buffer);
82 
83  seq = seq_buffer;
84  seq_pos = 0;
85  undefined_flag = 0;
86  ins_counter = 0;
87  for (map_pos = 0; map_pos <= last_seq_base - first_seq_base; map_pos++) {
88  if (!undefined_flag)
89  begin_flag = ref_seq->is_begin(first_seq_base + map_pos);
90  if (!(((*undefined)[map_pos/8]>>(7-(map_pos%8))) & 0x01)) {
91  for (; seq_pos < (unsigned long)((*mapping)[map_pos] + ins_counter); seq_pos++) {
92  if (begin_flag)
93  *seq++ = ALI_DOT_CODE;
94  else
95  *seq++ = ALI_GAP_CODE;
96  }
97  *seq++ = ref_seq->base(first_seq_base + map_pos);
98  seq_pos++;
99  undefined_flag = 0;
100  }
101  else {
102  undefined_flag = 1;
103  }
104  if ((*inserted)[map_pos/8]>>(7-(map_pos%8)) & 0x01)
105  ins_counter++;
106  }
107 
108  begin_flag = ref_seq->is_begin(first_seq_base + map_pos);
109  for (; seq_pos <= last_ref_base - first_ref_base + ins_counter; seq_pos++) {
110  if (begin_flag)
111  *seq++ = ALI_DOT_CODE;
112  else
113  *seq++ = ALI_GAP_CODE;
114  }
115 
116  return new ALI_SEQUENCE(ref_seq->name(), seq_buffer,
117  last_ref_base - first_ref_base + insert_counter + 1);
118 }
119 
121  int begin_flag = 0, undefined_flag;
122  unsigned long map_pos, seq_pos;
123  unsigned char *seq, *seq_buffer;
124 
125  seq_buffer = (unsigned char *) CALLOC((unsigned int) (last_ref_base - first_ref_base + 1), sizeof(unsigned char));
126  ali_out_of_memory_if(!seq_buffer);
127 
128  seq = seq_buffer;
129  seq_pos = 0;
130  undefined_flag = 0;
131  for (map_pos = 0; map_pos <= last_seq_base - first_seq_base; map_pos++) {
132  if (!undefined_flag)
133  begin_flag = ref_seq->is_begin(first_seq_base + map_pos);
134  if (!((*undefined)[map_pos/8]>>(7-(map_pos%8)) & 0x01) &&
135  !((*inserted)[map_pos/8]>>(7-(map_pos%8)) & 0x01)) {
136  for (; seq_pos < (unsigned long)((*mapping)[map_pos]); seq_pos++) {
137  if (begin_flag)
138  *seq++ = ALI_DOT_CODE;
139  else
140  *seq++ = ALI_GAP_CODE;
141  }
142  *seq++ = ref_seq->base(first_seq_base + map_pos);
143  seq_pos++;
144  undefined_flag = 0;
145  }
146  else
147  undefined_flag = 1;
148  }
149 
150  begin_flag = ref_seq->is_begin(first_seq_base + map_pos);
151  for (; seq_pos <= last_ref_base - first_ref_base; seq_pos++) {
152  if (begin_flag)
153  *seq++ = ALI_DOT_CODE;
154  else
155  *seq++ = ALI_GAP_CODE;
156  }
157 
158  return new ALI_SEQUENCE(ref_seq->name(), seq_buffer,
159  last_ref_base - first_ref_base + 1);
160 }
161 
163  unsigned long map_pos;
164  ALI_MAP *inv_map;
165 
166  inv_map = new ALI_MAP(first_ref_base, last_ref_base,
167  first_seq_base, last_seq_base);
168 
169  for (map_pos = 0; map_pos <= last_seq_base - first_seq_base; map_pos++) {
170  if (!((*undefined)[map_pos/8]>>(7-(map_pos%8)) & 0x01) &&
171  !((*inserted)[map_pos/8]>>(7-(map_pos%8)) & 0x01)) {
172  inv_map->set(first_ref_base + (*mapping)[map_pos],
173  map_pos);
174  }
175  }
176 
177  return inv_map;
178 }
179 
181  int ins_counter;
182  unsigned long map_pos, seq_pos;
183  char *seq, *seq_buffer;
184 
185  seq_buffer = (char *) CALLOC((last_ref_base - first_ref_base + insert_counter + 2),
186  sizeof(char));
187 
188  seq = seq_buffer;
189  seq_pos = 0;
190  ins_counter = 0;
191  for (map_pos = 0; map_pos <= last_seq_base - first_seq_base; map_pos++) {
192  if (!(((*undefined)[map_pos/8]>>(7-(map_pos%8))) & 0x01)) {
193  for (; seq_pos < (unsigned long)((*mapping)[map_pos] + ins_counter); seq_pos++) {
194  *seq++ = '.';
195  }
196  if ((*inserted)[map_pos/8]>>(7-(map_pos%8)) & 0x01)
197  *seq++ = 'X';
198  else
199  *seq++ = '.';
200  seq_pos++;
201  }
202  if ((*inserted)[map_pos/8]>>(7-(map_pos%8)) & 0x01)
203  ins_counter++;
204  }
205 
206  for (; seq_pos <= last_ref_base - first_ref_base + ins_counter; seq_pos++) {
207  *seq++ = '.';
208  }
209 
210  *seq = '\0';
211 
212  return seq_buffer;
213 }
214 
215 // -------------------------
216 // ALI_SUB_SOLUTION
217 
219  ALI_MAP *map;
220  ALI_TLIST<ALI_MAP *> *list;
221 
222  profile = solution->profile;
223 
224  if (!solution->map_list.is_empty()) {
225  list = &solution->map_list;
226  map = new ALI_MAP(list->first());
227  map_list.append_end(map);
228  while (list->has_next()) {
229  map = new ALI_MAP(list->next());
230  map_list.append_end(map);
231  }
232  }
233 }
234 
236  ALI_MAP *map;
237 
238  if (!map_list.is_empty()) {
239  map = map_list.first();
240  delete map;
241  while (map_list.has_next()) {
242  map = map_list.next();
243  delete map;
244  }
245  }
246 }
247 
249  unsigned long *start, unsigned long *end,
250  unsigned long *start_ref, unsigned long *end_ref,
251  unsigned long area_number)
252 {
253  ALI_MAP *map;
254  unsigned long last_of_prev, last_of_prev_ref;
255  unsigned long area_number_akt;
256 
257  if (map_list.is_empty()) {
258  if (area_number > 0)
259  return 0;
260  *start = 0;
261  *end = profile->sequence_length() - 1;
262  *start_ref = 0;
263  *end_ref = profile->length() - 1;
264  return 1;
265  }
266 
267  area_number_akt = 0;
268  map = map_list.first();
269  if (map->first_base() > 0 &&
270  map->first_reference_base() > 0) {
271  if (area_number_akt == area_number) {
272  *start = 0;
273  *end = map->first_base() - 1;
274  *start_ref = 0;
275  *end_ref = map->first_reference_base() - 1;
276  return 1;
277  }
278  area_number_akt++;
279  }
280 
281  last_of_prev = map->last_base();
282  last_of_prev_ref = map->last_reference_base();
283  while (map_list.has_next()) {
284  map = map_list.next();
285  if (map->first_base() > last_of_prev + 1) {
286  if (area_number_akt == area_number) {
287  *start = last_of_prev + 1;
288  *end = map->first_base() - 1;
289  *start_ref = last_of_prev_ref + 1;
290  *end_ref = map->first_reference_base() - 1;
291  return 1;
292  }
293  area_number_akt++;
294  }
295  last_of_prev = map->last_base();
296  last_of_prev_ref = map->last_reference_base();
297  }
298 
299  if (map->last_base() < profile->sequence_length() - 1 &&
300  area_number_akt == area_number) {
301  *start = map->last_base() + 1;
302  *end = profile->sequence_length() - 1;
303  *start_ref = map->last_reference_base() + 1;
304  *end_ref = profile->length() - 1;
305  return 1;
306  }
307 
308  return 0;
309 }
310 
312  ALI_MAP *map;
313  unsigned long last_of_prev;
314  unsigned long counter;
315 
316  if (map_list.is_empty())
317  return 1;
318 
319  counter = 0;
320  map = map_list.first();
321  if (map->first_base() > 0 && map->first_reference_base() > 0)
322  counter++;
323 
324  last_of_prev = map->last_base();
325  while (map_list.has_next()) {
326  map = map_list.next();
327  if (map->first_base() > last_of_prev + 1)
328  counter++;
329  last_of_prev = map->last_base();
330  }
331 
332  if (map->last_base() < profile->sequence_length() - 1)
333  counter++;
334 
335  return counter;
336 }
337 
338 
340  ALI_MAP *map;
341  unsigned long last_of_prev, last_of_prev_ref;
342 
343  if (map_list.is_empty()) {
344  if (in_map->last_base() < profile->sequence_length() &&
345  in_map->last_reference_base() < profile->length())
346  return 1;
347  return 0;
348  }
349 
350  map = map_list.first();
351  if (in_map->last_base() < map->first_base() &&
352  in_map->last_reference_base() < map->first_reference_base()) {
353  return 1;
354  }
355 
356  last_of_prev = map->last_base();
357  last_of_prev_ref = map->last_reference_base();
358  while (map_list.has_next()) {
359  map = map_list.next();
360  if (last_of_prev < in_map->first_base() &&
361  in_map->last_base() < map->first_base() &&
362  last_of_prev_ref < in_map->first_reference_base() &&
363  in_map->last_reference_base() < map->first_reference_base()) {
364  return 1;
365  }
366  last_of_prev = map->last_base();
367  last_of_prev_ref = map->last_reference_base();
368  }
369 
370  if (map->last_base() < in_map->first_base() &&
371  in_map->last_base() < profile->sequence_length() &&
372  map->last_reference_base() < in_map->first_base() &&
373  in_map->last_base() < profile->length()) {
374  return 1;
375  }
376 
377  return 0;
378 }
379 
381  ALI_MAP *map;
382  unsigned long last_of_prev, last_of_prev_ref;
383 
384  if (map_list.is_empty()) {
385  if (in_map->last_base() < profile->sequence_length() &&
386  in_map->last_reference_base() < profile->length()) {
387  map_list.append_end(in_map);
388  return 1;
389  }
390  return 0;
391  }
392 
393  map = map_list.first();
394  if (in_map->last_base() < map->first_base() &&
395  in_map->last_reference_base() < map->first_reference_base()) {
396  map_list.insert_bevor(in_map);
397  return 1;
398  }
399 
400  last_of_prev = map->last_base();
401  last_of_prev_ref = map->last_reference_base();
402  while (map_list.has_next()) {
403  map = map_list.next();
404  if (last_of_prev < in_map->first_base() &&
405  in_map->last_base() < map->first_base() &&
406  last_of_prev_ref < in_map->first_reference_base() &&
407  in_map->last_reference_base() < map->first_reference_base()) {
408  map_list.insert_bevor(in_map);
409  return 1;
410  }
411  last_of_prev = map->last_base();
412  last_of_prev_ref = map->last_reference_base();
413  }
414 
415  if (map->last_base() < in_map->first_base() &&
416  in_map->last_base() < profile->sequence_length() &&
417  map->last_reference_base() < in_map->first_reference_base() &&
418  in_map->last_reference_base() < profile->length()) {
419  map_list.append_end(in_map);
420  return 1;
421  }
422 
423  return 0;
424 }
425 
427  ALI_MAP *map;
428 
429  if (map_list.is_empty())
430  return 0;
431 
432  map = map_list.first();
433  if (map == del_map) {
434  map_list.delete_element();
435  return 1;
436  }
437 
438  while (map_list.has_next()) {
439  map = map_list.next();
440  if (map == del_map) {
441  map_list.delete_element();
442  return 1;
443  }
444  }
445 
446  return 0;
447 }
448 
450  ALI_MAP *map, *new_map;
451  unsigned long i;
452  unsigned long last_pos;
453  unsigned long first_base_of_first, first_reference_of_first;
454  unsigned long last_base_of_last, last_reference_of_last;
455 
456  // check if maps are closed
457  if (map_list.is_empty())
458  return NULp;
459 
460  map = map_list.first();
461  first_base_of_first = map->first_base();
462  first_reference_of_first = map->first_reference_base();
463  last_base_of_last = map->last_base();
464  last_reference_of_last = map->last_reference_base();
465 
466  while (map_list.has_next()) {
467  map = map_list.next();
468  if (last_base_of_last != map->first_base() - 1 ||
469  last_reference_of_last != map->first_reference_base() - 1)
470  ali_fatal_error("maps are not compact",
471  "ALI_SUB_SOLUTION::make_one_map()");
472  last_base_of_last = map->last_base();
473  last_reference_of_last = map->last_reference_base();
474  }
475 
476  new_map = new ALI_MAP(first_base_of_first, last_base_of_last,
477  first_reference_of_first, last_reference_of_last);
478 
479  map = map_list.first();
480  do {
481  last_pos = 0;
482  for (i = map->first_base(); i <= map->last_base(); i++) {
483  if (map->is_undefined(i))
484  ali_fatal_error("Unexpected value",
485  "ALI_SUB_SOLUTION::make_one_map()");
486  if ((unsigned long)(map->position(i)) < last_pos)
487  ali_fatal_error("Inconsistent positions",
488  "ALI_SUB_SOLUTION::make_one_map()");
489  last_pos = map->position(i);
490 
491  if (map->is_inserted(i))
492  new_map->set(i, map->first_reference_base() +
493  map->position(i) - first_reference_of_first, 1);
494  else
495  new_map->set(i, map->first_reference_base() +
496  map->position(i) - first_reference_of_first, 0);
497  }
498 
499  if (map_list.has_next())
500  map = map_list.next();
501  else
502  map = NULp;
503  } while (map);
504 
505  return new_map;
506 }
507 
509  ALI_MAP *map;
510 
511  printf("ALI_SUB_SOLUTION:\n");
512  if (!map_list.is_empty()) {
513  map = map_list.first();
514  printf("(%ld to %ld) -> (%ld to %ld)\n",
515  map->first_base(), map->last_base(),
517  while (map_list.has_next()) {
518  map = map_list.next();
519  printf("(%ld to %ld) -> (%ld to %ld)\n",
520  map->first_base(), map->last_base(),
522  }
523  }
524 }
525 
void ali_out_of_memory_if(bool cond)
Definition: ali_misc.hxx:52
long position(unsigned long base)
int has_next()
Definition: ali_tlist.hxx:168
long
Definition: AW_awar.cxx:152
ALI_MAP(unsigned long first_seq_base, unsigned long last_seq_base, unsigned long first_ref_base, unsigned long last_ref_base)
unsigned long length()
char * insert_marker()
void delete_element()
Definition: ali_tlist.hxx:355
#define ALI_GAP_CODE
Definition: ali_misc.hxx:38
unsigned long first_base()
unsigned long first_reference_base()
FILE * seq
Definition: rns.c:46
int is_undefined(unsigned long base)
static HelixNrInfo * start
int is_konsistent(ALI_MAP *map)
#define ALI_DOT_CODE
Definition: ali_misc.hxx:40
void * CALLOC(long i, long j)
Definition: ali_misc.hxx:56
int is_konsistent()
int free_area(unsigned long *start, unsigned long *end, unsigned long *start_ref, unsigned long *end_ref, unsigned long area_number=0)
unsigned char base(unsigned long position)
ALI_SEQUENCE * sequence_without_inserts(ALI_NORM_SEQUENCE *ref_seq)
ALI_MAP * make_one_map()
int is_empty()
Definition: ali_tlist.hxx:165
ALI_MAP * inverse_without_inserts()
void set(unsigned long base, unsigned long pos, int insert=-1)
unsigned long last_reference_base()
int insert(ALI_MAP *map)
void ali_fatal_error(const char *message, const char *func)
Definition: ali_main.cxx:85
unsigned long number_of_free_areas()
ALI_SUB_SOLUTION(ALI_PROFILE *prof)
unsigned long sequence_length()
#define NULp
Definition: cxxforward.h:116
int is_inserted(unsigned long base)
void insert_bevor(T &a)
Definition: ali_tlist.hxx:311
int delete_map(ALI_MAP *map)
unsigned long last_base()
void append_end(T &a)
Definition: ali_tlist.hxx:198
int is_begin(unsigned long pos)
ALI_SEQUENCE * sequence(ALI_NORM_SEQUENCE *ref_seq)