ARB
ali_aligner.hxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ali_aligner.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #ifndef ALI_ALIGNER_HXX
12 #define ALI_ALIGNER_HXX
13 
14 #ifndef ALI_SOLUTION_HXX
15 #include "ali_solution.hxx"
16 #endif
17 #ifndef ALI_TSTACK_HXX
18 #include "ali_tstack.hxx"
19 #endif
20 #ifndef ALI_PATHMAP_HXX
21 #include "ali_pathmap.hxx"
22 #endif
23 
24 #define ALI_ALIGNER_INS 1
25 #define ALI_ALIGNER_SUB 2
26 #define ALI_ALIGNER_DEL 3
27 #define ALI_ALIGNER_MULTI_FLAG 4
28 
31 };
32 
33 // Structure of a cell of the distance matrix
35  float d1, d2, d3;
37 
39  d1 = d2 = d3 = 0.0;
40  starts = NULp;
41  }
42 
43  void print() {
44  printf("%4.2f %4.2f %4.2f %8p", d1, d2, d3, starts);
45  }
46 };
47 
48 // Structure of a column of the distance matrix
49 struct ali_aligner_column : virtual Noncopyable {
50  unsigned long column_length;
52 
53  ali_aligner_column(unsigned long length) {
54  column_length = length;
55  cells = (ali_aligner_cell**) calloc((unsigned int) column_length, sizeof(ali_aligner_cell));
56  ali_out_of_memory_if(!cells);
57  }
59  if (cells)
60  free((char *) cells);
61  }
62 
63  void print() {
64  unsigned int i;
65  for (i = 0; i < column_length; i++) {
66  printf("%2d: ", i);
67  (*cells)[i].print();
68  printf("\n");
69  }
70  }
71 };
72 
73 // Structure of a LONG deletion (multi gap) in the distance matrix
75  unsigned long start;
76  float costs;
77  unsigned char operation;
78 
79  ali_aligner_dellist_elem(unsigned long s = 0, float c = 0.0,
80  unsigned char op = 0) {
81  start = s;
82  costs = c;
83  operation = op;
84  }
85 
86  void print() {
87  printf("(%3ld %4.2f %2d)", start, costs, operation);
88  }
89 };
90 
91 // Structure of the list of LONG deletions in the distance matrix
92 struct ali_aligner_dellist : virtual Noncopyable {
95 
97  profile = p;
98  }
101  if (!list_of_dels.is_empty()) {
102  elem = list_of_dels.first();
103  while (list_of_dels.has_next()) {
104  delete elem;
105  elem = list_of_dels.next();
106  }
107  delete elem;
108  }
109  }
110 
111  void print() {
112  printf("DEL_LIST: ");
113  if (!list_of_dels.is_empty()) {
114  list_of_dels.first()->print();
115  while (list_of_dels.has_next()) {
116  printf(", ");
117  list_of_dels.next()->print();
118  }
119  }
120  printf("\n");
121  }
122  void print_cont(unsigned long position) {
124  if (!list_of_dels.is_empty()) {
125  elem = list_of_dels.first();
126  while (list_of_dels.has_next()) {
127  elem->print();
128  printf("\t %f\n", profile->gap_percent(elem->start, position));
129  elem = list_of_dels.next();
130  }
131  elem->print();
132  printf("\t %f\n", profile->gap_percent(elem->start, position));
133  }
134  }
135  unsigned long length() {
136  return list_of_dels.cardinality();
137  }
138  void make_empty() {
140  if (!list_of_dels.is_empty()) {
141  elem = list_of_dels.first();
142  while (list_of_dels.has_next()) {
143  delete elem;
144  elem = list_of_dels.next();
145  }
146  delete elem;
147  }
148  list_of_dels.make_empty();
149  }
150  void insert(unsigned long start, float costs, unsigned char operation) {
151  ali_aligner_dellist_elem *new_elem;
152 
153  new_elem = new ali_aligner_dellist_elem(start, costs, operation);
154  list_of_dels.append_end(new_elem);
155  }
156  float update(unsigned long position);
158  unsigned long y_offset);
159  void optimize(unsigned long position);
160 };
161 
162 
163 // Structure of the virtual cell at the left buttom
166  float d1, d2, d3;
169 
171  profile = prof;
172  d1 = d2 = d3 = -1.0;
173  }
174 
175  void insert_left(unsigned long start, unsigned char operation, float costs) {
177  if (costs < d3 || d3 == -1.0) {
178  left_starts.make_empty();
179  d3 = costs;
180  }
181  if (costs == d3) {
182  left.start = start;
183  left.operation = operation;
184  left_starts.append_end(left);
185  }
186  }
187  void insert_up(unsigned long start, unsigned char operation, float costs) {
189  if (costs < d1 || d1 == -1.0) {
190  up_starts.make_empty();
191  d1 = costs;
192  }
193  if (costs == d1) {
194  up.start = start;
195  up.operation = operation;
196  up_starts.append_end(up);
197  }
198  }
199  void update_border(unsigned long start_x, unsigned long end_x,
200  unsigned long start_y, unsigned long end_y)
201  {
202  float cost;
203 
204  cost = profile->w_del_multi(start_y, end_y) +
205  profile->w_ins_multi(start_x, end_x);
206 
207  insert_left(0, ALI_UP, cost);
208  insert_up(0, ALI_LEFT, cost);
209  }
210  void update_left(ali_aligner_cell *akt_cell, unsigned long akt_pos,
211  unsigned long start_x, unsigned long end_x) {
212  float min;
213  unsigned char operation = 0;
214 
215  min = (akt_cell->d1 < akt_cell->d2) ? akt_cell->d1 : akt_cell->d2;
216  if (akt_cell->d1 == min)
217  operation |= ALI_UP;
218  if (akt_cell->d2 == min)
219  operation |= ALI_DIAG;
220 
221  insert_left(akt_pos, operation,
222  min + profile->w_ins_multi_cheap(start_x + akt_pos, end_x));
223  }
224  void update_up(ali_aligner_cell *akt_cell, unsigned long akt_pos,
225  unsigned long start_y, unsigned long end_y) {
226  float min;
227  unsigned char operation = 0;
228 
229  min = (akt_cell->d3 < akt_cell->d2) ? akt_cell->d3 : akt_cell->d2;
230  if (akt_cell->d3 == min)
231  operation |= ALI_LEFT;
232  if (akt_cell->d2 == min)
233  operation |= ALI_DIAG;
234 
235  insert_up(akt_pos, operation,
236  min + profile->w_del_multi_cheap(start_y + akt_pos, end_y));
237  }
239  unsigned long start_y, unsigned long end_y) {
240  unsigned long cell;
241 
242  // Value for start := start + 1 (because of -1)
243 
244  for (cell = 0; cell < akt_col->column_length - 1; cell++)
245  update_up(&(*akt_col->cells)[cell], cell + 1, start_y, end_y);
246 
247  d2 = (*akt_col->cells)[akt_col->column_length - 1].d2;
248  }
249 
250  void print() {
252  printf("d1 = %f, d2 = %f, d3 = %f\n", d1, d2, d3);
253  printf("left starts = ");
254  if (!left_starts.is_empty()) {
255  elem = left_starts.first();
256  printf("<(%ld %d)", (long)elem.start, elem.operation);
257  while (left_starts.has_next()) {
258  elem = left_starts.next();
259  printf(", (%ld %d)", (long)elem.start, elem.operation);
260  }
261  printf(">\n");
262  }
263  else {
264  printf("empty\n");
265  }
266  printf("up starts = ");
267  if (!up_starts.is_empty()) {
268  elem = up_starts.first();
269  printf("<(%ld %d)", elem.start, elem.operation);
270  while (up_starts.has_next()) {
271  elem = up_starts.next();
272  printf(", (%ld %d)", (long)elem.start, elem.operation);
273  }
274  printf(">\n");
275  }
276  else {
277  printf("empty\n");
278  }
279  }
280 };
281 
282 
283 // Structure for collecting all possible solution
284 struct ali_aligner_result : virtual Noncopyable {
286 
288  map_list = new ALI_TLIST<ALI_MAP *>;
289  }
291  if (map_list) {
292  clear();
293  delete map_list;
294  }
295  }
296 
297  void insert(ALI_MAP *in_map) {
298  map_list->append_end(in_map);
299  }
300  void clear() {
301  if (!map_list->is_empty()) {
302  delete map_list->first();
303  while (map_list->has_next())
304  delete map_list->next();
305  }
306  }
307 };
308 
309 
310 // Class of the extended aligner
311 class ALI_ALIGNER : virtual Noncopyable {
312  ALI_PROFILE *profile;
313  ALI_PATHMAP *path_map[3];
314  ali_aligner_last_cell *last_cell;
315 
316  ali_aligner_result result;
317  unsigned long result_counter;
318  unsigned long start_x, end_x, start_y, end_y;
319 
320  float minimum2(float a, float b);
321  float minimum3(float a, float b, float c);
322 
323  void calculate_first_column_first_cell(
324  ali_aligner_cell *akt_cell, ali_aligner_dellist *del_list);
325  void calculate_first_column_second_cell(
326  ali_aligner_cell *up_cell, ali_aligner_cell *akt_cell,
327  ali_aligner_dellist *del_list);
328  void calculate_first_column_cell(
329  ali_aligner_cell *up_cell, ali_aligner_cell *akt_cell,
330  unsigned long pos_y, ali_aligner_dellist *del_list);
331  void calculate_first_column(ali_aligner_column *akt_col,
332  ali_aligner_dellist *del_list);
333 
334  void calculate_second_column_first_cell(
335  ali_aligner_cell *left_cell, ali_aligner_cell *akt_cell,
336  ali_aligner_dellist *del_list);
337  void calculate_second_column_second_cell(
338  ali_aligner_cell *diag_cell, ali_aligner_cell *left_cell,
339  ali_aligner_cell *up_cell, ali_aligner_cell *akt_cell,
340  ali_aligner_dellist *del_list);
341  void calculate_second_column_cell(
342  ali_aligner_cell *diag_cell, ali_aligner_cell *left_cell,
343  ali_aligner_cell *up_cell, ali_aligner_cell *akt_cell,
344  unsigned long pos_y, ali_aligner_dellist *del_list);
345  void calculate_second_column(ali_aligner_column *prev_col,
346  ali_aligner_column *akt_col,
347  ali_aligner_dellist *del_list);
348 
349  void calculate_first_cell(
350  ali_aligner_cell *left_cell, ali_aligner_cell *akt_cell,
351  unsigned long pos_x, ali_aligner_dellist *del_list);
352  void calculate_second_cell(
353  ali_aligner_cell *diag_cell, ali_aligner_cell *left_cell,
354  ali_aligner_cell *up_cell, ali_aligner_cell *akt_cell,
355  unsigned long pos_x, ali_aligner_dellist *del_list);
356  void calculate_cell(ali_aligner_cell *diag_cell, ali_aligner_cell *left_cell,
357  ali_aligner_cell *up_cell, ali_aligner_cell *akt_cell,
358  unsigned long pos_x, unsigned long pos_y,
359  ali_aligner_dellist *del_list);
360  void calculate_column(ali_aligner_column *prev_col,
361  ali_aligner_column *akt_col,
362  unsigned long pos_x,
363  ali_aligner_dellist *del_list);
364 
365  void calculate_matrix();
366 
367  void generate_result(ALI_TSTACK<unsigned char> *stack);
368  void mapper_pre(ALI_TSTACK<unsigned char> *stack,
369  unsigned long pos_x, unsigned long pos_y,
370  unsigned char operation,
371  int random_mapping_flag = 0);
372  void mapper_post(ALI_TSTACK<unsigned char> *stack,
373  unsigned long pos_x, unsigned long pos_y);
374  void mapper_pre_random_up(
377  void mapper_pre_random_left(
380  void mapper_random(ALI_TSTACK<unsigned char> *stack, int plane,
381  unsigned long pos_x, unsigned long pos_y);
382  void mapper(ALI_TSTACK<unsigned char> *stack, int plane,
383  unsigned long pos_x, unsigned long pos_y);
384  void make_map_random(ALI_TSTACK<unsigned char> *stack);
385  void make_map_systematic(ALI_TSTACK<unsigned char> *stack);
386  void make_map();
387 
388  unsigned long number_of_solutions();
389 
390 public:
391  ALI_ALIGNER(ALI_ALIGNER_CONTEXT *context, ALI_PROFILE *profile,
392  unsigned long start_x, unsigned long end_x,
393  unsigned long start_y, unsigned long end_y);
395  ALI_TLIST<ALI_MAP *> *ret = result.map_list;
396  result.map_list = NULp;
397  return ret;
398  }
399 };
400 
401 
402 #else
403 #error ali_aligner.hxx included twice
404 #endif // ALI_ALIGNER_HXX
ALI_TLIST< ali_aligner_dellist_elem * > list_of_dels
Definition: ali_aligner.hxx:94
float w_ins_multi(unsigned long startx, unsigned long endx)
void ali_out_of_memory_if(bool cond)
Definition: ali_misc.hxx:52
ALI_TLIST< ali_pathmap_up_pointer > up_starts
float update(unsigned long position)
Definition: ali_aligner.cxx:16
int has_next()
Definition: ali_tlist.hxx:168
void optimize(unsigned long position)
Definition: ali_aligner.cxx:79
void print_cont(unsigned long position)
void make_empty()
Definition: ali_tlist.hxx:116
ALI_TARRAY< ali_pathmap_up_pointer > * starts
Definition: ali_aligner.hxx:36
ali_aligner_cell ** cells
Definition: ali_aligner.hxx:51
#define ALI_DIAG
Definition: ali_pathmap.hxx:20
int cardinality()
Definition: ali_tlist.hxx:162
float w_del_multi_cheap(unsigned long start, unsigned long end)
ALI_TLIST< ALI_MAP * > * map_list
static HelixNrInfo * start
ALI_PROFILE * profile
Definition: ali_aligner.hxx:93
unsigned char operation
Definition: ali_pathmap.hxx:27
ALI_TARRAY< ali_pathmap_up_pointer > * starts(float costs, unsigned long y_offset)
Definition: ali_aligner.cxx:37
void insert_up(unsigned long start, unsigned char operation, float costs)
#define ALI_LEFT
Definition: ali_pathmap.hxx:19
#define ALI_UP
Definition: ali_pathmap.hxx:21
void insert_left(unsigned long start, unsigned char operation, float costs)
int is_empty()
Definition: ali_tlist.hxx:165
void update_border(unsigned long start_x, unsigned long end_x, unsigned long start_y, unsigned long end_y)
void insert(unsigned long start, float costs, unsigned char operation)
GB_write_int const char GB_write_autoconv_string WRITE_SKELETON(write_pointer, GBDATA *,"%p", GB_write_pointer) char *AW_awa if)(!gb_var) return strdup("")
Definition: AW_awar.cxx:163
ALI_TLIST< ALI_MAP * > * solutions()
void update_left(ali_aligner_cell *akt_cell, unsigned long akt_pos, unsigned long start_x, unsigned long end_x)
ali_aligner_last_cell(ALI_PROFILE *prof)
ALI_ALIGNER(ALI_ALIGNER_CONTEXT *context, ALI_PROFILE *profile, unsigned long start_x, unsigned long end_x, unsigned long start_y, unsigned long end_y)
float gap_percent(unsigned long begin, unsigned long end)
float w_ins_multi_cheap(unsigned long startx, unsigned long endx)
ali_aligner_dellist(ALI_PROFILE *p)
Definition: ali_aligner.hxx:96
ali_aligner_column(unsigned long length)
Definition: ali_aligner.hxx:53
ALI_PROFILE * profile
ali_aligner_dellist_elem(unsigned long s=0, float c=0.0, unsigned char op=0)
Definition: ali_aligner.hxx:79
#define NULp
Definition: cxxforward.h:116
unsigned long length()
void insert(ALI_MAP *in_map)
void update_up(ali_aligner_cell *akt_cell, unsigned long akt_pos, unsigned long start_y, unsigned long end_y)
ALI_TLIST< ali_pathmap_up_pointer > left_starts
void append_end(T &a)
Definition: ali_tlist.hxx:198
size_t length
#define min(a, b)
Definition: f2c.h:153
unsigned long column_length
Definition: ali_aligner.hxx:50
void update_up(ali_aligner_column *akt_col, unsigned long start_y, unsigned long end_y)
float w_del_multi(unsigned long start, unsigned long end)
GB_write_int const char s
Definition: AW_awar.cxx:154