ARB
ali_prealigner.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ali_prealigner.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "ali_prealigner.hxx"
12 #include "ali_aligner.hxx"
13 
14 void ali_prealigner_mask::insert(ALI_MAP * in_map, float costs) {
15  // Insert a new map
16  unsigned long i;
17 
18  calls++;
19  if (!map) {
20  map = in_map;
21  cost_of_binding = costs;
22  }
23  else {
24  if (costs > cost_of_binding) {
25  delete in_map;
26  return;
27  }
28  if (map->first_base() != in_map->first_base() ||
29  map->last_base() != in_map->last_base() ||
32  ali_fatal_error("Incompatible maps",
33  "ali_prealigner_mask::insert()");
34  }
35  if (costs < cost_of_binding) {
36  delete map;
37  map = in_map;
38  cost_of_binding = costs;
39  last_new = calls;
40  last_joins = 0;
41  return;
42  }
43  joins++;
44  last_joins++;
45  for (i = map->first_base(); i <= map->last_base(); i++)
46  if ((!map->is_undefined(i)) && map->position(i) != in_map->position(i))
47  map->undefine(i);
48  delete in_map;
49  }
50 }
51 
53  // Delete expensive parts of solution
54  ALI_MAP *inverse_map;
55  unsigned long start_hel, end_hel;
56  unsigned long start_seq, end_seq;
57  unsigned long start_mapped, end_mapped;
58  unsigned long start_ok=0, end_ok=0;
59  int start_ok_flag;
60  unsigned long found_helix;
61  unsigned long error_counter;
62 
63  unsigned long map_pos, i, j;
64  float max_cost, helix_cost;
65  unsigned long helix_counter;
66  long compl_pos;
67  unsigned char base1, base2;
68 
69  printf("MASK : calls = %ld joins = %ld last_new = %ld last_joins = %ld\n",
71 
72  max_cost = profile->w_sub_maximum() * context->max_cost_of_sub_percent;
73 
74  // Delete expensive Bases
75  error_counter = 0;
76  for (i = map->first_base(); i <= map->last_base(); i++) {
77  if (!(map->is_inserted(i)) &&
78  profile->w_sub(map->position(i), i) > max_cost) {
79  error_counter++;
80  if (error_counter > context->error_count)
81  map->undefine(i);
82  else {
83  if (error_counter == context->error_count) {
84  for (j = i - error_counter + 1; j <= i; j++)
85  map->undefine(j);
86  }
87  }
88  }
89  else {
90  // If error was in helix => delete helix total
91  if (error_counter > 0 &&
92  profile->is_in_helix(map->position(i - 1), &start_hel, &end_hel)) {
93  for (j = i - 1; map->position(j) >= long(start_hel); j--) ;
94  for (; map->position(j) <= long(end_hel); j++)
95  map->undefine(j);
96  }
97  error_counter = 0;
98  }
99  }
100 
101  // Delete expensive Helizes
102  inverse_map = map->inverse_without_inserts();
103  for (i = inverse_map->first_base(); i <= inverse_map->last_base(); i++) {
104  // found a helix
105  if (profile->is_in_helix(i, &start_hel, &end_hel)) {
106  if (i != start_hel)
107  ali_fatal_error("Inconsistent positions",
108  "ali_prealigner_mask::delete_expensive()");
109  compl_pos = profile->complement_position(start_hel);
110  // only forward bindings
111  if (compl_pos > long(end_hel)) {
112  helix_cost = 0.0;
113  helix_counter = 0;
114  while (i <= end_hel) {
115  // is binding ?
116  if (compl_pos > 0) {
117  if (!inverse_map->is_undefined(i)) {
118  base1 = (profile->sequence())->base(
119  inverse_map->first_reference_base() +
120  inverse_map->position(i));
121  }
122  else {
123  base1 = ALI_GAP_CODE;
124  }
125  if (!inverse_map->is_undefined(compl_pos)) {
126  base2 = (profile->sequence())->base(
127  inverse_map->first_reference_base() +
128  inverse_map->position(compl_pos));
129  }
130  else {
131  base2 = ALI_GAP_CODE;
132  }
133  if (base1 != ALI_GAP_CODE || base2 != ALI_GAP_CODE) {
134  helix_cost += profile->w_bind(i, base1, compl_pos, base2);
135  helix_counter++;
136  }
137  }
138  i++;
139  compl_pos = profile->complement_position(i);
140  }
141  if (helix_counter > 0) helix_cost /= helix_counter;
142  if (helix_cost > context->max_cost_of_helix) {
143  for (j = start_hel; j <= end_hel; j++) {
144  if (!inverse_map->is_undefined(j)) {
145  map->undefine(inverse_map->first_reference_base() + inverse_map->position(j));
146  }
147  }
148  for (j = profile->complement_position(end_hel); long(j) <= profile->complement_position(start_hel); j++) {
149  if (!inverse_map->is_undefined(j)) {
150  map->undefine(inverse_map->first_reference_base() + inverse_map->position(j));
151  }
152  }
153  }
154  }
155  i = end_hel;
156  }
157  }
158  delete inverse_map;
159 
160  // Check for good parts
161  for (map_pos = map->first_base(); map_pos <= map->last_base(); map_pos++) {
162  // search next defined segment
163  if (!map->is_undefined(map_pos)) {
164  // find start and end of segment
165  start_seq = map_pos;
166  start_mapped = map->position(map_pos);
167  for (map_pos++;
168  map_pos <= map->last_base() && (!map->is_undefined(map_pos));
169  map_pos++) ;
170 
171  end_seq = map_pos - 1;
172  end_mapped = map->position(end_seq);
173 
174  // Check segment for helizes
175  found_helix = 0;
176  start_ok_flag = 0;
177  for (i = start_seq; i <= end_seq; i++) {
178  if (profile->is_in_helix(map->position(i), &start_hel, &end_hel)) {
179  found_helix++;
180  // Helix is inside the segment
181  if (start_hel >= start_mapped && end_hel <= end_mapped) {
182  if (start_ok_flag == 0) {
183  start_ok = start_hel;
184  start_ok_flag = 1;
185  }
186  end_ok = end_hel;
187  }
188  }
189  }
190 
191  // Found good helizes
192  if (start_ok_flag == 1) {
193  for (i = start_seq; map->position(i) < long(start_ok); i++) map->undefine(i);
194  for (i = end_seq; map->position(i) > long(end_ok); i--) map->undefine(i);
195  }
196  else {
197  // Found bad helizes
198  if (found_helix > 0) {
199  for (i = start_seq; i <= end_seq; i++)
200  map->undefine(i);
201  }
202  // Segment without helix
203  else {
204  if (end_seq - start_seq + 1 >= (unsigned long)((2 * context->interval_border) + context->interval_center)) {
205  for (i = start_seq; i < start_seq + context->interval_border; i++) map->undefine(i);
206  for (i = end_seq; i > end_seq - context->interval_border; i--) map->undefine(i);
207  }
208  else {
209  for (i = start_seq; i <= end_seq; i++) map->undefine(i);
210  }
211  }
212  }
213  }
214  }
215 }
216 
217 // -----------------------
218 // ALI_PREALIGNER
219 
220 inline float ALI_PREALIGNER::minimum2(float a, float b) {
221  return (a < b) ? a : b;
222 }
223 
224 inline float ALI_PREALIGNER::minimum3(float a, float b, float c) {
225  return (a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c);
226 }
227 
228 
229 inline void ALI_PREALIGNER::calculate_first_column_first_cell(ali_prealigner_cell * akt_cell) {
230  float v1, v2;
231 
232  v1 = profile->w_ins_multi_cheap(start_x, start_y) + profile->w_sub_multi_gap_cheap(start_y, start_y);
233  v2 = profile->w_sub(start_y, start_x);
234 
235  akt_cell->d = minimum2(v1, v2);
236 
237  if (akt_cell->d == v1) path_map->set(0, 0, ALI_UP | ALI_LEFT);
238  if (akt_cell->d == v2) path_map->set(0, 0, ALI_DIAG);
239 }
240 
241 inline void ALI_PREALIGNER::calculate_first_column_cell(ali_prealigner_cell * up_cell,
242  ali_prealigner_cell * akt_cell,
243  unsigned long pos_y)
244 {
245  float v1, v2, v3;
246  unsigned long positiony;
247 
248  positiony = start_y + pos_y;
249 
250  v1 = up_cell->d + profile->w_sub_gap(positiony);
251  v2 = profile->w_sub_multi_gap_cheap(start_y, positiony - 1) + profile->w_sub(positiony, start_x);
252  v3 = profile->w_sub_multi_gap_cheap(start_y, positiony) + profile->w_ins(start_x, positiony);
253 
254  akt_cell->d = minimum3(v1, v2, v3);
255 
256  if (v1 == akt_cell->d) path_map->set(0, pos_y, ALI_UP);
257  if (v2 == akt_cell->d) path_map->set(0, pos_y, ALI_DIAG);
258  if (v3 == akt_cell->d) path_map->set(0, pos_y, ALI_LEFT);
259 }
260 
261 void ALI_PREALIGNER::calculate_first_column(ali_prealigner_column * akt_column) {
262  unsigned long pos_y;
263 
264  calculate_first_column_first_cell(&(*akt_column->cells)[0]);
265 
266  for (pos_y = 1; pos_y < akt_column->column_length; pos_y++)
267  calculate_first_column_cell(&(*akt_column->cells)[pos_y - 1],
268  &(*akt_column->cells)[pos_y],
269  pos_y);
270 }
271 
272 
273 inline void ALI_PREALIGNER::calculate_first_cell(ali_prealigner_cell * left_cell,
274  ali_prealigner_cell * akt_cell,
275  unsigned long pos_x)
276 {
277  float v1, v2, v3;
278  unsigned long positionx;
279 
280  positionx = start_x + pos_x;
281 
282  v1 = profile->w_ins_multi_cheap(start_x, positionx) + profile->w_sub_gap(start_y);
283  v2 = profile->w_ins_multi_cheap(start_x, positionx - 1) + profile->w_sub(start_y, positionx);
284  v3 = left_cell->d + profile->w_ins(positionx, start_y);
285 
286  akt_cell->d = minimum3(v1, v2, v3);
287 
288  if (v1 == akt_cell->d) path_map->set(pos_x, 0, ALI_UP);
289  if (v2 == akt_cell->d) path_map->set(pos_x, 0, ALI_DIAG);
290  if (v3 == akt_cell->d) path_map->set(pos_x, 0, ALI_LEFT);
291 }
292 
293 inline void ALI_PREALIGNER::calculate_cell(ali_prealigner_cell * diag_cell, ali_prealigner_cell * left_cell,
294  ali_prealigner_cell * up_cell, ali_prealigner_cell * akt_cell,
295  unsigned long pos_x, unsigned long pos_y)
296 {
297  float v1, v2, v3;
298  unsigned long positionx, positiony;
299 
300  positionx = start_x + pos_x;
301  positiony = start_y + pos_y;
302 
303  v1 = up_cell->d + profile->w_sub_gap(positiony);
304  v2 = diag_cell->d + profile->w_sub(positiony, positionx);
305  v3 = left_cell->d + profile->w_ins(positionx, positiony);
306 
307  akt_cell->d = minimum3(v1, v2, v3);
308 
309  if (v1 == akt_cell->d) path_map->set(pos_x, pos_y, ALI_UP);
310  if (v2 == akt_cell->d) path_map->set(pos_x, pos_y, ALI_DIAG);
311  if (v3 == akt_cell->d) path_map->set(pos_x, pos_y, ALI_LEFT);
312 }
313 
314 void ALI_PREALIGNER::calculate_column(ali_prealigner_column * prev_col,
315  ali_prealigner_column * akt_col,
316  unsigned long pos_x)
317 {
318  unsigned long pos_y;
319 
320  calculate_first_cell(&(*prev_col->cells)[0], &(*akt_col->cells)[0], pos_x);
321 
322  for (pos_y = 1; pos_y < akt_col->column_length; pos_y++)
323  calculate_cell(&(*prev_col->cells)[pos_y - 1], &(*prev_col->cells)[pos_y],
324  &(*akt_col->cells)[pos_y - 1], &(*akt_col->cells)[pos_y],
325  pos_x, pos_y);
326 }
327 
328 inline void ALI_PREALIGNER::calculate_last_column_first_cell(ali_prealigner_cell * left_cell,
329  ali_prealigner_cell * akt_cell,
330  unsigned long pos_x)
331 {
332  float v1, v2, v3;
333  unsigned long positionx;
334 
335  positionx = start_x + pos_x;
336 
337  v1 = profile->w_ins_multi_cheap(start_x, positionx) + profile->w_sub_gap_cheap(start_y);
338  v2 = profile->w_ins_multi_cheap(start_x, positionx - 1) + profile->w_sub(start_y, positionx);
339  v3 = left_cell->d + profile->w_ins(positionx, start_y);
340 
341  akt_cell->d = minimum3(v1, v2, v3);
342 
343  if (v1 == akt_cell->d) path_map->set(pos_x, 0, ALI_UP);
344  if (v2 == akt_cell->d) path_map->set(pos_x, 0, ALI_DIAG);
345  if (v3 == akt_cell->d) path_map->set(pos_x, 0, ALI_LEFT);
346 }
347 
348 inline void ALI_PREALIGNER::calculate_last_column_cell(ali_prealigner_cell * diag_cell, ali_prealigner_cell * left_cell,
349  ali_prealigner_cell * up_cell, ali_prealigner_cell * akt_cell,
350  unsigned long pos_x, unsigned long pos_y)
351 {
352  float v1, v2, v3;
353  unsigned long positionx, positiony;
354 
355  positionx = start_x + pos_x;
356  positiony = start_y + pos_y;
357 
358  v1 = up_cell->d + profile->w_sub_gap_cheap(positiony);
359  v2 = diag_cell->d + profile->w_sub(positiony, positionx);
360  v3 = left_cell->d + profile->w_ins(positionx, positiony);
361 
362  akt_cell->d = minimum3(v1, v2, v3);
363 
364  if (v1 == akt_cell->d) path_map->set(pos_x, pos_y, ALI_UP);
365  if (v2 == akt_cell->d) path_map->set(pos_x, pos_y, ALI_DIAG);
366  if (v3 == akt_cell->d) path_map->set(pos_x, pos_y, ALI_LEFT);
367 }
368 
369 void ALI_PREALIGNER::calculate_last_column(ali_prealigner_column * prev_col,
370  ali_prealigner_column * akt_col,
371  unsigned long pos_x)
372 {
373  unsigned long pos_y;
374 
375  calculate_last_column_first_cell(&(*prev_col->cells)[0],
376  &(*akt_col->cells)[0], pos_x);
377 
378  for (pos_y = 1; pos_y < akt_col->column_length; pos_y++)
379  calculate_last_column_cell(&(*prev_col->cells)[pos_y - 1],
380  &(*prev_col->cells)[pos_y],
381  &(*akt_col->cells)[pos_y - 1], &(*akt_col->cells)[pos_y],
382  pos_x, pos_y);
383 }
384 
385 
386 void ALI_PREALIGNER::calculate_matrix() {
387  unsigned long pos_x;
388  ali_prealigner_column *akt_col, *prev_col, *h_col;
389 
390  akt_col = new ali_prealigner_column(end_y - start_y + 1);
391  prev_col = new ali_prealigner_column(end_y - start_y + 1);
392 
393  calculate_first_column(prev_col);
394 
395  for (pos_x = 1; pos_x < end_x - start_x + 1; pos_x++) {
396  if (pos_x == end_x - start_x || profile->is_internal_last(pos_x))
397  calculate_last_column(prev_col, akt_col, pos_x);
398  else
399  calculate_column(prev_col, akt_col, pos_x);
400  h_col = akt_col;
401  akt_col = prev_col;
402  prev_col = h_col;
403  }
404 
405  delete akt_col;
406  delete prev_col;
407 }
408 
409 void ALI_PREALIGNER::generate_solution(ALI_MAP * map) {
410  // Generate a sub_solution by deleting all undefined segments
411  ALI_MAP *seg_map;
412  unsigned long map_pos;
413  unsigned long start_seg, end_seg, pos_seg;
414 
415  sub_solution = new ALI_SUB_SOLUTION(profile);
416 
417  for (map_pos = map->first_base(); map_pos <= map->last_base(); map_pos++) {
418  // search for segment
419  for (start_seg = map_pos;
420  start_seg <= map->last_base() && map->is_undefined(start_seg);
421  start_seg++) ;
422 
423  if (start_seg <= map->last_base()) {
424  for (end_seg = start_seg;
425  end_seg <= map->last_base() && (!map->is_undefined(end_seg));
426  end_seg++) ;
427 
428  end_seg--;
429 
430  seg_map = new ALI_MAP(start_seg,
431  end_seg,
432  map->position(start_seg),
433  map->position(end_seg));
434  // Copy segment
435  for (pos_seg = start_seg; pos_seg <= end_seg; pos_seg++) {
436  if (map->is_inserted(pos_seg))
437  seg_map->set(pos_seg,
438  map->position(pos_seg) - map->position(start_seg), 1);
439  else
440  seg_map->set(pos_seg,
441  map->position(pos_seg) - map->position(start_seg), 0);
442  }
443 
444  if (sub_solution->insert(seg_map) != 1)
445  ali_fatal_error("Inconsistent solution?",
446  "ALI_PREALIGNER::generate_solution()");
447 
448  map_pos = end_seg;
449  }
450  }
451 }
452 
453 void ALI_PREALIGNER::generate_result_mask(ALI_TSTACK < unsigned char >*stack) {
454  // generate the result mask from an stack of operations
455  ALI_SEQUENCE *seq;
456  float cost_of_bindings;
457  ALI_MAP *map;
458  unsigned long seq_pos, dest_pos;
459  long i;
460 
461  map = new ALI_MAP(start_x, end_x, start_y, end_y);
462 
463  seq_pos = start_x;
464  dest_pos = 0;
465  for (i = (long) stack->akt_size() - 1; i >= 0; i--) {
466  switch (stack->get(i)) {
467  case ALI_PREALIGNER_INS:
468  map->set(seq_pos++, dest_pos, 1);
469  break;
470  case ALI_PREALIGNER_SUB:
471  map->set(seq_pos++, dest_pos++, 0);
472  break;
473  case ALI_PREALIGNER_DEL:
474  dest_pos++;
475  break;
477  map->set(seq_pos, dest_pos, 1);
478  map->undefine(seq_pos++);
479  break;
481  map->set(seq_pos, dest_pos++, 0);
482  map->undefine(seq_pos++);
483  break;
485  dest_pos++;
486  break;
487  default:
488  ali_fatal_error("Unexpected value",
489  "ALI_PREALIGNER::generate_result_mask()");
490  }
491  }
492 
493  if (result_mask_counter > 0)
494  result_mask_counter--;
495 
496  seq = map->sequence_without_inserts(profile->sequence());
497  cost_of_bindings = profile->w_binding(map->first_reference_base(), seq);
498  delete seq;
499 
500  // make the intersection
501  result_mask.insert(map, cost_of_bindings);
502 }
503 
504 void ALI_PREALIGNER::mapper_post(ALI_TSTACK < unsigned char >*stack, unsigned long ins_nu, unsigned long del_nu) {
505  // Fill the stack with rest DELs or INSs
506  if (ins_nu > 0 && del_nu > 0)
507  ali_fatal_error("Unexpected values",
508  "ALI_PREALIGNER::mapper_post()");
509 
510  if (ins_nu > 0) {
511  stack->push(ALI_PREALIGNER_INS, ins_nu);
512  generate_result_mask(stack);
513  stack->pop(ins_nu);
514  }
515  else {
516  if (del_nu > 0) {
517  stack->push(ALI_PREALIGNER_DEL, del_nu);
518  generate_result_mask(stack);
519  stack->pop(del_nu);
520  } else
521  generate_result_mask(stack);
522  }
523 }
524 
525 void ALI_PREALIGNER::mapper_post_multi(ALI_TSTACK < unsigned char >*stack, unsigned long ins_nu, unsigned long del_nu) {
526  // Fill the stack with rest DELs or INSs (with MULTI_FLAG)
527  if (ins_nu > 0 && del_nu > 0)
528  ali_fatal_error("Unexpected values",
529  "ALI_PREALIGNER::mapper_post_multi()");
530 
531  if (ins_nu > 0) {
533  generate_result_mask(stack);
534  stack->pop(ins_nu);
535  }
536  else {
537  if (del_nu > 0) {
539  generate_result_mask(stack);
540  stack->pop(del_nu);
541  } else
542  generate_result_mask(stack);
543  }
544 }
545 
546 void ALI_PREALIGNER::mapper_random(ALI_TSTACK < unsigned char >*stack, unsigned long pos_x, unsigned long pos_y) {
547  // generate a stack of operations by taking a random path of the pathmap
548  unsigned long next_x, next_y;
549  unsigned long random;
550  unsigned char value;
551  unsigned long stack_counter = 0;
552 
553 
554 
555  next_x = pos_x;
556  next_y = pos_y;
557  while (next_x <= pos_x && next_y <= pos_y) {
558  stack_counter++;
559 
560  random = GB_random(6);
561 
562  value = path_map->get_value(next_x, next_y);
563  if (value == 0)
564  ali_fatal_error("Unexpected value (1)",
565  "ALI_PREALIGNER::mapper_random()");
566 
567  switch (random) {
568  case 0:
569  if (value & ALI_UP) {
570  stack->push(ALI_PREALIGNER_DEL);
571  next_y--;
572  }
573  else {
574  if (value & ALI_DIAG) {
575  stack->push(ALI_PREALIGNER_SUB);
576  next_x--;
577  next_y--;
578  }
579  else {
580  stack->push(ALI_PREALIGNER_INS);
581  next_x--;
582  }
583  }
584  break;
585  case 1:
586  if (value & ALI_UP) {
587  stack->push(ALI_PREALIGNER_DEL);
588  next_y--;
589  }
590  else {
591  if (value & ALI_LEFT) {
592  stack->push(ALI_PREALIGNER_INS);
593  next_x--;
594  }
595  else {
596  stack->push(ALI_PREALIGNER_SUB);
597  next_x--;
598  next_y--;
599  }
600  }
601  break;
602  case 2:
603  if (value & ALI_DIAG) {
604  stack->push(ALI_PREALIGNER_SUB);
605  next_x--;
606  next_y--;
607  }
608  else {
609  if (value & ALI_UP) {
610  stack->push(ALI_PREALIGNER_DEL);
611  next_y--;
612  }
613  else {
614  stack->push(ALI_PREALIGNER_INS);
615  next_x--;
616  }
617  }
618  break;
619  case 3:
620  if (value & ALI_DIAG) {
621  stack->push(ALI_PREALIGNER_SUB);
622  next_x--;
623  next_y--;
624  }
625  else {
626  if (value & ALI_LEFT) {
627  stack->push(ALI_PREALIGNER_INS);
628  next_x--;
629  }
630  else {
631  stack->push(ALI_PREALIGNER_DEL);
632  next_y--;
633  }
634  }
635  break;
636  case 4:
637  if (value & ALI_LEFT) {
638  stack->push(ALI_PREALIGNER_INS);
639  next_x--;
640  }
641  else {
642  if (value & ALI_UP) {
643  stack->push(ALI_PREALIGNER_DEL);
644  next_y--;
645  }
646  else {
647  stack->push(ALI_PREALIGNER_SUB);
648  next_x--;
649  next_y--;
650  }
651  }
652  break;
653  case 5:
654  if (value & ALI_LEFT) {
655  stack->push(ALI_PREALIGNER_INS);
656  next_x--;
657  }
658  else {
659  if (value & ALI_DIAG) {
660  stack->push(ALI_PREALIGNER_SUB);
661  next_x--;
662  next_y--;
663  }
664  else {
665  stack->push(ALI_PREALIGNER_DEL);
666  next_y--;
667  }
668  }
669  break;
670  default:
671  ali_fatal_error("Unexpected random value",
672  "ALI_PREALIGNER::mapper_random()");
673  }
674  }
675 
676  if (next_x <= pos_x) {
677  mapper_post(stack, next_x + 1, 0);
678  }
679  else {
680  if (next_y <= pos_y) {
681  mapper_post(stack, 0, next_y + 1);
682  }
683  else {
684  mapper_post(stack, 0, 0);
685  }
686  }
687 
688  if (stack_counter > 0)
689  stack->pop(stack_counter);
690 }
691 
692 void ALI_PREALIGNER::mapper(ALI_TSTACK < unsigned char >*stack, unsigned long pos_x, unsigned long pos_y) {
693  // generate a stack of operations by taking every path
694  unsigned char value;
695  unsigned long stack_counter = 0;
696 
697  value = path_map->get_value(pos_x, pos_y);
698 
699  if (pos_x == 0 || pos_y == 0) {
700  if (value & ALI_UP) {
701  stack->push(ALI_PREALIGNER_DEL);
702  if (pos_y == 0)
703  mapper_post(stack, pos_x + 1, 0);
704  else
705  mapper(stack, pos_x, pos_y - 1);
706  stack->pop();
707  }
708  if (value & ALI_DIAG) {
709  stack->push(ALI_PREALIGNER_SUB);
710  if (pos_y > 0) {
711  mapper_post(stack, 0, pos_y);
712  }
713  else {
714  if (pos_x > 0) {
715  mapper_post(stack, pos_x, 0);
716  }
717  else {
718  mapper_post(stack, 0, 0);
719  }
720  }
721  stack->pop();
722  }
723  if (value & ALI_LEFT) {
724  stack->push(ALI_PREALIGNER_INS);
725  if (pos_x == 0)
726  mapper_post(stack, 0, pos_y + 1);
727  else
728  mapper(stack, pos_x - 1, pos_y);
729  stack->pop();
730  }
731  return;
732  }
733  // follow an unique path
734  while (value == ALI_UP || value == ALI_DIAG || value == ALI_LEFT) {
735  stack_counter++;
736  switch (value) {
737  case ALI_UP:
738  stack->push(ALI_PREALIGNER_DEL);
739  pos_y--;
740  break;
741  case ALI_DIAG:
742  stack->push(ALI_PREALIGNER_SUB);
743  pos_x--;
744  pos_y--;
745  break;
746  case ALI_LEFT:
747  stack->push(ALI_PREALIGNER_INS);
748  pos_x--;
749  break;
750  }
751 
752  value = path_map->get_value(pos_x, pos_y);
753 
754  if (pos_x == 0 || pos_y == 0) {
755  if (value & ALI_UP) {
756  stack->push(ALI_PREALIGNER_DEL);
757  if (pos_y == 0)
758  mapper_post(stack, pos_x + 1, 0);
759  else
760  mapper(stack, pos_x, pos_y - 1);
761  stack->pop();
762  }
763  if (value & ALI_DIAG) {
764  stack->push(ALI_PREALIGNER_SUB);
765  if (pos_y > 0) {
766  mapper_post(stack, 0, pos_y);
767  }
768  else {
769  if (pos_x > 0) {
770  mapper_post(stack, pos_x, 0);
771  }
772  else {
773  mapper_post(stack, 0, 0);
774  }
775  }
776  stack->pop();
777  }
778  if (value & ALI_LEFT) {
779  stack->push(ALI_PREALIGNER_INS);
780  if (pos_x == 0)
781  mapper_post(stack, 0, pos_y + 1);
782  else
783  mapper(stack, pos_x - 1, pos_y);
784  stack->pop();
785  }
786  if (stack_counter > 0)
787  stack->pop(stack_counter);
788 
789  return;
790  }
791  }
792 
793  if (value & ALI_UP) {
794  stack->push(ALI_PREALIGNER_DEL);
795  mapper(stack, pos_x, pos_y - 1);
796  stack->pop();
797  }
798  if (value & ALI_DIAG) {
799  stack->push(ALI_PREALIGNER_SUB);
800  mapper(stack, pos_x - 1, pos_y - 1);
801  stack->pop();
802  }
803  if (value & ALI_LEFT) {
804  stack->push(ALI_PREALIGNER_INS);
805  mapper(stack, pos_x - 1, pos_y);
806  stack->pop();
807  }
808  if (stack_counter > 0)
809  stack->pop(stack_counter);
810 }
811 
812 
813 void ALI_PREALIGNER::make_map() {
814  // make the result map from the path matrix
815  unsigned long number_of_sol;
817 
818  stack = new ALI_TSTACK < unsigned char >(end_x - start_x + end_y - start_y + 3);
819  ali_out_of_memory_if(!stack);
820 
821  number_of_sol = number_of_solutions();
822  printf("%lu solutions generated\n", number_of_sol);
823 
824  if (number_of_sol == 0 || number_of_sol > result_mask_counter) {
825  ali_message("Starting random mapping");
826  do {
827  mapper_random(stack, end_x - start_x, end_y - start_y);
828  } while (result_mask_counter > 0);
829  }
830  else {
831  ali_message("Starting systematic mapping");
832  mapper(stack, end_x - start_x, end_y - start_y);
833  }
834 
835  delete stack;
836 
837  ali_message("Mapping finished");
838 }
839 
840 
841 void ALI_PREALIGNER::generate_approximation(ALI_SUB_SOLUTION * work_sol) {
842  // generate an approximation of a complete solution
843  ALI_MAP *map;
844  ALI_SEQUENCE *seq;
845  char *ins_marker;
846  float binding_costs;
847 
848  map = work_sol->make_one_map();
849  if (!map)
850  ali_fatal_error("Can't make one map",
851  "ALI_PREALIGNER::generate_approximation()");
852 
853  seq = map->sequence_without_inserts(profile->sequence());
854  binding_costs = profile->w_binding(map->first_base(), seq);
855  delete seq;
856 
857  ins_marker = map->insert_marker();
858 
859  result_approx.insert(map, ins_marker, binding_costs);
860 }
861 
862 void ALI_PREALIGNER::mapper_approximation(unsigned long area_no, ALI_TARRAY < ALI_TLIST < ALI_MAP * >*>*map_lists, ALI_SUB_SOLUTION * work_sol) {
863  // combine subsolutions for an approximation
864  ALI_TLIST < ALI_MAP * >*map_list;
865  ALI_MAP *map;
866 
867  // stop mapping at last area
868  if (area_no > map_lists->size())
869  return;
870 
871  if (area_no == map_lists->size()) {
872  generate_approximation(work_sol);
873  return;
874  }
875  // map area number 'area_no'
876  map_list = map_lists->get(area_no);
877  if (map_list->is_empty())
878  ali_fatal_error("Found empty list",
879  "ALI_PREALIGNER::mapper_approximation()");
880 
881  // combine all possibilities
882  map = map_list->first();
883  do {
884  if (!work_sol->insert(map))
885  ali_fatal_error("Can't insert map",
886  "ALI_PREALIGNER::mapper_approximation()");
887 
888  mapper_approximation(area_no + 1, map_lists, work_sol);
889 
890  if (!work_sol->delete_map(map))
891  ali_fatal_error("Can't delete map",
892  "ALI_PREALIGNER::mapper_approximation()");
893 
894  if (map_list->has_next())
895  map = map_list->next();
896  else
897  map = NULp;
898  } while (map);
899 }
900 
901 void ALI_PREALIGNER::make_approximation(ALI_PREALIGNER_CONTEXT * context) {
902  // Make an approximation by aligning the undefined sections
903  ALI_SUB_SOLUTION *work_solution;
904  ALI_ALIGNER_CONTEXT aligner_context;
906  ALI_ALIGNER *aligner;
907  unsigned long area_number;
908  unsigned long start_seq, end_seq, start_ref, end_ref;
909 
910  ali_message("Align free areas");
911 
912  work_solution = new ALI_SUB_SOLUTION(sub_solution);
913 
914  aligner_context.max_number_of_maps = context->max_number_of_maps_aligner;
915 
916  area_number = sub_solution->number_of_free_areas();
917  printf("number of areas = %ld (Maximal %ld solutions)\n", area_number,
918  aligner_context.max_number_of_maps);
919 
920  map_lists = new ALI_TARRAY < ALI_TLIST < ALI_MAP * >*>(area_number);
921 
922  // generate Solutions for all free areas
923  area_number = 0;
924  while (sub_solution->free_area(&start_seq, &end_seq, &start_ref, &end_ref,
925  area_number)) {
926  printf("aligning area %ld (%ld,%ld) - (%ld,%ld)\n",
927  area_number, start_seq, end_seq, start_ref, end_ref);
928 
929  aligner = new ALI_ALIGNER(&aligner_context, profile,
930  start_seq, end_seq, start_ref, end_ref);
931  map_lists->set(area_number, aligner->solutions());
932 
933  printf("%d solutions generated\n",
934  map_lists->get(area_number)->cardinality());
935 
936  delete aligner;
937  area_number++;
938  }
939 
940  // combine and evaluate the solutions
941  mapper_approximation(0, map_lists, work_solution);
942 
943  delete work_solution;
944 
945  ali_message("Free areas aligned");
946 }
947 
948 
949 unsigned long ALI_PREALIGNER::number_of_solutions() {
950  // approximate the number of solutions in the pathmap
951 #define INFINIT 1000000
952 #define ADD(a, b) if (a>=INFINIT || b>=INFINIT) { a = INFINIT; } else { a += b; }
953 
954  unsigned long pos_x, pos_y, col_length;
955  unsigned long number;
956  unsigned char value;
957  unsigned long *column1, *column2, *elem_akt_col, *elem_left_col;
958 
959  col_length = end_y - start_y + 1;
960  column1 = (unsigned long *) CALLOC((unsigned int) col_length, sizeof(unsigned long));
961  column2 = (unsigned long *) CALLOC((unsigned int) col_length, sizeof(unsigned long));
962  ali_out_of_memory_if(!column1 || !column2);
963 
964  ali_message("Start: Checking number of solutions");
965 
966  if (end_x - (start_x & 0x01)) {
967  elem_akt_col = column1 + col_length - 1;
968  elem_left_col = column2 + col_length - 1;
969  }
970  else {
971  elem_akt_col = column2 + col_length - 1;
972  elem_left_col = column1 + col_length - 1;
973  }
974 
975  number = 0;
976  *elem_akt_col = 1;
977  for (pos_x = end_x - start_x; pos_x > 0;) {
978  *(elem_left_col) = 0;
979  for (pos_y = end_y - start_y; pos_y > 0; pos_y--) {
980  *(elem_left_col - 1) = 0;
981  value = path_map->get_value(pos_x, pos_y);
982  if (value & ALI_UP) {
983  ADD(*(elem_akt_col - 1), *elem_akt_col);
984  }
985  if (value & ALI_DIAG) {
986  ADD(*(elem_left_col - 1), *elem_akt_col);
987  }
988  if (value & ALI_LEFT) {
989  ADD(*(elem_left_col), *elem_akt_col);
990  }
991  elem_akt_col--;
992  elem_left_col--;
993  }
994  value = path_map->get_value(pos_x, 0);
995  if (value & ALI_UP) {
996  ADD(number, *elem_akt_col);
997  }
998  if (value & ALI_DIAG) {
999  ADD(number, *elem_akt_col);
1000  }
1001  if (value & ALI_LEFT) {
1002  ADD(*(elem_left_col), *elem_akt_col);
1003  }
1004  pos_x--;
1005  // toggle the columns
1006  if (pos_x & 0x01) {
1007  elem_akt_col = column1 + col_length - 1;
1008  elem_left_col = column2 + col_length - 1;
1009  }
1010  else {
1011  elem_akt_col = column2 + col_length - 1;
1012  elem_left_col = column1 + col_length - 1;
1013  }
1014  }
1015 
1016  for (pos_y = end_y - start_y; pos_y > 0; pos_y--) {
1017  value = path_map->get_value(0, pos_y);
1018  if (value & ALI_UP) {
1019  ADD(*(elem_akt_col - 1), *elem_akt_col);
1020  }
1021  if (value & ALI_DIAG) {
1022  ADD(number, *elem_akt_col);
1023  }
1024  if (value & ALI_LEFT) {
1025  ADD(number, *elem_akt_col);
1026  }
1027  elem_akt_col--;
1028  }
1029 
1030  ADD(number, *elem_akt_col);
1031 
1032  ali_message("End: Checking number of solutions");
1033 
1034  free(column1);
1035  free(column2);
1036 
1037  return number;
1038 }
1039 
1040 
1042  ALI_PROFILE * prof,
1043  unsigned long sx, unsigned long ex,
1044  unsigned long sy, unsigned long ey)
1045 {
1046  profile = prof;
1047 
1048  start_x = sx;
1049  end_x = ex;
1050  start_y = sy;
1051  end_y = ey;
1052 
1053  result_mask_counter = context->max_number_of_maps;
1054 
1055  ali_message("Prealigning");
1056 
1057  path_map = new ALI_PATHMAP(end_x - start_x + 1, end_y - start_y + 1);
1058 
1059  calculate_matrix();
1060 
1061  make_map();
1062 
1063  result_mask.delete_expensive(context, profile);
1064  delete path_map;
1065 
1066  generate_solution(result_mask.map);
1067 
1068  make_approximation(context);
1069 
1070  ali_message("Prealigning finished");
1071 }
float w_bind(unsigned long pos_1, unsigned char base_1, unsigned long pos_2, unsigned char base_2)
void insert(ALI_MAP *in_map, float costs)
#define ALI_PREALIGNER_INS
void ali_out_of_memory_if(bool cond)
Definition: ali_misc.hxx:52
long position(unsigned long base)
unsigned long last_new
ALI_PREALIGNER(ALI_PREALIGNER_CONTEXT *context, ALI_PROFILE *profile, unsigned long start_x, unsigned long end_x, unsigned long start_y, unsigned long end_y)
int has_next()
Definition: ali_tlist.hxx:168
#define ADD(a, b)
long
Definition: AW_awar.cxx:154
float w_sub_gap_cheap(unsigned long positiony)
float w_sub_multi_gap_cheap(unsigned long start, unsigned long end)
unsigned long column_length
float w_binding(unsigned long first_position_of_sequence, ALI_SEQUENCE *sequence)
int is_in_helix(unsigned long pos, unsigned long *first, unsigned long *last)
float w_ins(unsigned long, unsigned long)
float w_sub_maximum()
#define ALI_DIAG
Definition: ali_pathmap.hxx:20
char * insert_marker()
#define ALI_GAP_CODE
Definition: ali_misc.hxx:38
unsigned long first_base()
unsigned long first_reference_base()
FILE * seq
Definition: rns.c:46
void delete_expensive(ALI_PREALIGNER_CONTEXT *context, ALI_PROFILE *profile)
void set(unsigned long position, T value)
Definition: ali_tarray.hxx:49
int is_undefined(unsigned long base)
void set(unsigned long x, unsigned long y, unsigned char val, ALI_TARRAY< ali_pathmap_up_pointer > *up_pointer=NULp)
Definition: ali_pathmap.cxx:36
void push(T value, unsigned long count=1)
Definition: ali_tstack.hxx:36
void insert(ALI_MAP *in_map, char *in_insert_marker, float costs)
void * CALLOC(long i, long j)
Definition: ali_misc.hxx:56
int free_area(unsigned long *start, unsigned long *end, unsigned long *start_ref, unsigned long *end_ref, unsigned long area_number=0)
ALI_NORM_SEQUENCE * sequence()
int complement_position(unsigned long pos)
#define ALI_PREALIGNER_DEL
unsigned long akt_size()
Definition: ali_tstack.hxx:33
ALI_SEQUENCE * sequence_without_inserts(ALI_NORM_SEQUENCE *ref_seq)
#define ALI_LEFT
Definition: ali_pathmap.hxx:19
#define ALI_PREALIGNER_SUB
#define ALI_UP
Definition: ali_pathmap.hxx:21
ALI_MAP * make_one_map()
int is_empty()
Definition: ali_tlist.hxx:165
T get(unsigned long position)
Definition: ali_tarray.hxx:54
int is_internal_last(unsigned long pos)
int GB_random(int range)
Definition: admath.cxx:88
ALI_TLIST< ALI_MAP * > * solutions()
ALI_MAP * inverse_without_inserts()
void set(unsigned long base, unsigned long pos, int insert=-1)
unsigned long last_reference_base()
unsigned long last_joins
int insert(ALI_MAP *map)
void ali_fatal_error(const char *message, const char *func)
Definition: ali_main.cxx:85
unsigned long number_of_free_areas()
float w_sub(unsigned long positiony, unsigned long positionx)
#define ALI_PREALIGNER_MULTI_FLAG
void ali_message(const char *message)
Definition: ali_misc.hxx:46
float w_ins_multi_cheap(unsigned long startx, unsigned long endx)
unsigned char get_value(unsigned long x, unsigned long y)
Definition: ali_pathmap.hxx:44
#define NULp
Definition: cxxforward.h:97
int is_inserted(unsigned long base)
T pop(unsigned long count=1)
Definition: ali_tstack.hxx:42
int delete_map(ALI_MAP *map)
unsigned long last_base()
ali_prealigner_cell ** cells
T get(unsigned long position)
Definition: ali_tstack.hxx:55
void undefine(unsigned long base)
float w_sub_gap(unsigned long positiony)