ARB
ClustalV.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ClustalV.cxx //
4 // Purpose : //
5 // //
6 // Coded based on ClustalV by Ralf Westram (coder@reallysoft.de) //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // =============================================================== //
11 
12 #include "ClustalV.hxx"
13 #include "seq_search.hxx"
14 
15 #include <cctype>
16 #include <climits>
17 
18 
19 // ----------------------------------------------------------------
20 
21 #define MASTER_GAP_OPEN 50
22 #define CHEAP_GAP_OPEN 20 // penalty for creating a gap (if we have gaps in master)
23 
24 #define DEFAULT_GAP_OPEN 30 // penalty for creating a gap
25 
26 #define MASTER_GAP_EXTEND 18
27 #define CHEAP_GAP_EXTEND 5 // penalty for extending a gap (if we have gaps in master)
28 
29 #define DEFAULT_GAP_EXTEND 10 // penalty for extending a gap
30 
31 #define DEFAULT_IMPROBABLY_MUTATION 10 // penalty for mutations
32 #define DEFAULT_PROBABLY_MUTATION 4 // penalty for special mutations (A<->G,C<->U/T) (only if 'weighted')
33 
34 #define DYNAMIC_PENALTIES // otherwise you get FIXED PENALTIES (=no cheap penalties)
35 
36 // ----------------------------------------------------------------
37 
38 #define MAX_GAP_OPEN_DISCOUNT (DEFAULT_GAP_OPEN-CHEAP_GAP_OPEN) // maximum subtracted from DEFAULT_GAP_OPEN
39 #define MAX_GAP_EXTEND_DISCOUNT (DEFAULT_GAP_EXTEND-CHEAP_GAP_EXTEND) // maximum subtracted from DEFAULT_GAP_EXTEND
40 
41 #define MAXN 2 // Maximum number of sequences (both groups)
42 
43 static bool module_initialized = false;
44 
45 typedef int Boolean;
46 
49 
50 #define MAX_BASETYPES 21
51 
52 static int xover;
53 static int little_pam;
54 static int big_pam;
55 static int pamo[(MAX_BASETYPES-1)*MAX_BASETYPES/2];
57 
58 static int pos1;
59 static int pos2;
60 static int **naa1; // naa1[basetype][position] counts bases for each position of all sequences in group1
61 static int **naa2; // naa2[basetype][position] same for group2
62 static int **naas; //
63 static int seqlen_array[MAXN+1]; // length of all sequences
64 static unsigned char *seq_array[MAXN+1]; // the sequences
65 static int group[MAXN+1]; // group of sequence
66 static int alist[MAXN+1]; // indices of sequences to be aligned
67 static int fst_list[MAXN+1];
68 static int snd_list[MAXN+1];
69 
70 static int nseqs; // # of sequences
71 static int weights[MAX_BASETYPES][MAX_BASETYPES]; // weights[b1][b2] : penalty for mutation from base 'b1' to base 'b2'
72 
73 #if defined(ASSERTION_USED)
74 static size_t displ_size = 0;
75 #endif // ASSERTION_USED
76 
77 static int *displ; // displ == 0 -> base in both , displ<0 -> displ gaps in slave, displ>0 -> displ gaps in master
78 static int *zza; // column (left->right) of align matrix (minimum of all paths to this matrix element)
79 static int *zzb; // -------------- " ------------------- (minimum of all paths, where gap inserted into slave)
80 static int *zzc; // column (left<-right) of align matrix (minimum of all paths to this matrix element)
81 static int *zzd; // -------------- " ------------------- (minimum of all paths, where gap inserted into slave)
82 static int print_ptr;
83 static int last_print;
84 
85 static const int *gaps_before_position;
86 
87 #if defined(DEBUG)
88 // #define MATRIX_DUMP
89 // #define DISPLAY_DIFF
90 #endif // DEBUG
91 
92 #ifdef MATRIX_DUMP
93 # define IF_MATRIX_DUMP(xxx) xxx
94 # define DISPLAY_MATRIX_SIZE 3000
95 
96 static int vertical [DISPLAY_MATRIX_SIZE+2][DISPLAY_MATRIX_SIZE+2];
97 static int verticalOpen [DISPLAY_MATRIX_SIZE+2][DISPLAY_MATRIX_SIZE+2];
98 static int diagonal [DISPLAY_MATRIX_SIZE+2][DISPLAY_MATRIX_SIZE+2];
99 static int horizontal [DISPLAY_MATRIX_SIZE+2][DISPLAY_MATRIX_SIZE+2];
100 static int horizontalOpen [DISPLAY_MATRIX_SIZE+2][DISPLAY_MATRIX_SIZE+2];
101 
102 #else
103 # define IF_MATRIX_DUMP(xxx)
104 #endif
105 
106 inline int master_gap_open(int beforePosition) {
107 #ifdef DYNAMIC_PENALTIES
108  long gaps = gaps_before_position[beforePosition-1];
110 
111  /* return
112  gaps >= MAX_GAP_OPEN_DISCOUNT
113  ? DEFAULT_GAP_OPEN-MAX_GAP_OPEN_DISCOUNT
114  : DEFAULT_GAP_OPEN-gaps; */
115 #else
116  return DEFAULT_GAP_OPEN;
117 #endif
118 }
119 inline int master_gap_extend(int beforePosition) {
120 #ifdef DYNAMIC_PENALTIES
121  long gaps = gaps_before_position[beforePosition-1];
123  /* return
124  gaps >= MAX_GAP_EXTEND_DISCOUNT
125  ? DEFAULT_GAP_EXTEND-MAX_GAP_EXTEND_DISCOUNT
126  : DEFAULT_GAP_EXTEND-gaps; */
127 #else
128  return DEFAULT_GAP_EXTEND;
129 #endif
130 }
131 
132 inline int master_gapAtWithOpenPenalty(int atPosition, int length, int penalty) {
133  if (length<=0) return 0;
134 
135  int beforePosition = atPosition;
136  int afterPosition = atPosition-1;
137 
138  while (length--) {
139  int p1 = master_gap_extend(beforePosition);
140  int p2 = master_gap_extend(afterPosition+1);
141 
142  if (p1<p2 && beforePosition>1) {
143  penalty += p1;
144  beforePosition--;
145  }
146  else {
147  penalty += p2;
148  afterPosition++;
149  }
150  }
151 
152  return penalty;
153 }
154 
155 inline int master_gapAt(int atPosition, int length) {
156  return master_gapAtWithOpenPenalty(atPosition, length, master_gap_open(atPosition));
157 }
158 
159 inline int slave_gap_open(int /* beforePosition */) {
160  return DEFAULT_GAP_OPEN;
161 }
162 
163 inline int slave_gap_extend(int /* beforePosition */) {
164  return DEFAULT_GAP_EXTEND;
165 }
166 
167 inline int slave_gapAtWithOpenPenalty(int atPosition, int length, int penalty) {
168  return length<=0 ? 0 : penalty + length*slave_gap_extend(atPosition);
169 }
170 
171 inline int slave_gapAt(int atPosition, int length) {
172  return slave_gapAtWithOpenPenalty(atPosition, length, slave_gap_open(atPosition));
173 }
174 
175 #define UNKNOWN_ACID 255
176 static const char *amino_acid_order = "XCSTPAGNDEQHRKMILVFYW";
177 
178 #define NUCLEIDS 16
179 static const char *nucleic_acid_order = "-ACGTUMRWSYKVHDBN";
180 static const char *nucleic_maybe_A = "-A----AAA---AAA-A";
181 static const char *nucleic_maybe_C = "--C---C--CC-CC-CC";
182 static const char *nucleic_maybe_G = "---G---G-G-GG-GGG";
183 static const char *nucleic_maybe_T = "----T---T-TT-TTTT";
184 static const char *nucleic_maybe_U = "-----U--U-UU-UUUU";
185 static const char *nucleic_maybe[6] = { NULp, nucleic_maybe_A, nucleic_maybe_C, nucleic_maybe_G, nucleic_maybe_T, nucleic_maybe_U };
186 
187 /*
188  * M = A or C S = G or C V = A or G or C N = A or C or G or T
189  * R = A or G Y = C or T H = A or C or T
190  * W = A or T K = G or T D = A or G or T
191  */
192 
193 #define cheap_if(cond) ((cond) ? 1 : 2)
194 static int baseCmp(unsigned char c1, unsigned char c2) {
195  // c1,c2 == 1=A,2=C (==index of character in nucleic_acid_order[])
196  // returns 0 for equal
197  // 1 for probably mutations
198  // 2 for improbably mutations
199 #define COMPARABLE_BASES 5
200 
201  if (c1==c2) return 0;
202 
203  if (c2<c1) { // swap
204  int c3 = c1;
205 
206  c1 = c2;
207  c2 = c3;
208  }
209 
210  if (c2<=COMPARABLE_BASES) {
212 
213  switch (c1) {
214  case 0: return 2;
215  case 1: return cheap_if(c2==3); // A->G
216  case 2: return cheap_if(c2==4 || c2==5); // C->T/U
217  case 3: return cheap_if(c2==1); // G->A
218  case 4: if (c2==5) return 0; // T->U
219  FALLTHROUGH;
220  case 5: if (c2==4) return 0; // U->T
221  return cheap_if(c2==2); // T/U->C
222  default: fa_assert(0);
223  }
224  }
225 
226  int bestMatch = 3;
227  if (c1<=COMPARABLE_BASES) {
228  for (int i=1; i<=COMPARABLE_BASES; i++) {
229  if (isalpha(nucleic_maybe[i][c2])) { // 'c2' maybe a 'i'
230  int match = baseCmp(c1, i);
231  if (match<bestMatch) bestMatch = match;
232  }
233  }
234  }
235  else {
236  for (int i=1; i<=COMPARABLE_BASES; i++) {
237  if (isalpha(nucleic_maybe[i][c1])) { // 'c1' maybe a 'i'
238  int match = baseCmp(i, c2);
239  if (match<bestMatch) bestMatch = match;
240  }
241  }
242  }
243 
244  fa_assert(bestMatch>=0 && bestMatch<=2);
245  return bestMatch;
246 }
247 #undef cheap_if
248 
249 int baseMatch(char c1, char c2) {
250  // c1,c2 == ACGUTRS...
251  // returns 0 for equal
252  // 1 for probably mutations
253  // 2 for improbably mutations
254  // -1 if one char is illegal
255 
256  const char *p1 = strchr(nucleic_acid_order, c1);
257  const char *p2 = strchr(nucleic_acid_order, c2);
258 
259  fa_assert(c1);
260  fa_assert(c2);
261 
262  if (p1 && p2) return baseCmp(p1-nucleic_acid_order, p2-nucleic_acid_order);
263  return -1;
264 }
265 
266 static int getPenalty(char c1, char c2) {
267  // c1,c2 = A=0,C=1,... s.o.
268  switch (baseCmp(c1, c2)) {
269  case 1: return DEFAULT_PROBABLY_MUTATION;
270  case 2: return DEFAULT_IMPROBABLY_MUTATION;
271  default: break;
272  }
273 
274  return 0;
275 }
276 
277 static char *result[3]; // result buffers
278 
279 static char pam250mt[] = {
280  12,
281  0, 2,
282  -2, 1, 3,
283  -3, 1, 0, 6,
284  -2, 1, 1, 1, 2,
285  -3, 1, 0, -1, 1, 5,
286  -4, 1, 0, -1, 0, 0, 2,
287  -5, 0, 0, -1, 0, 1, 2, 4,
288  -5, 0, 0, -1, 0, 0, 1, 3, 4,
289  -5, -1, -1, 0, 0, -1, 1, 2, 2, 4,
290  -3, -1, -1, 0, -1, -2, 2, 1, 1, 3, 6,
291  -4, 0, -1, 0, -2, -3, 0, -1, -1, 1, 2, 6,
292  -5, 0, 0, -1, -1, -2, 1, 0, 0, 1, 0, 3, 5,
293  -5, -2, -1, -2, -1, -3, -2, -3, -2, -1, -2, 0, 0, 6,
294  -2, -1, 0, -2, -1, -3, -2, -2, -2, -2, -2, -2, -2, 2, 5,
295  -6, -3, -2, -3, -2, -4, -3, -4, -3, -2, -2, -3, -3, 4, 2, 6,
296  -2, -1, 0, -1, 0, -1, -2, -2, -2, -2, -2, -2, -2, 2, 4, 2, 4,
297  -4, -3, -3, -5, -4, -5, -4, -6, -5, -5, -2, -4, -5, 0, 1, 2, -1, 9,
298  0, -3, -3, -5, -3, -5, -2, -4, -4, -4, 0, -4, -4, -2, -1, -1, -2, 7, 10,
299  -8, -2, -5, -6, -6, -7, -4, -7, -7, -5, -3, 2, -3, -4, -5, -2, -6, 0, 0, 17
300 };
301 
302 static char *matptr = pam250mt;
303 
304 #if (defined(DISPLAY_DIFF) || defined(MATRIX_DUMP))
305 static void p_decode(const unsigned char *naseq, unsigned char *seq, int l) {
306  int len = strlen(amino_acid_order);
307 
308  for (int i=1; i<=l && naseq[i]; i++) {
309  fa_assert(naseq[i]<len);
310  seq[i] = amino_acid_order[naseq[i]];
311  }
312 }
313 
314 static void n_decode(const unsigned char *naseq, unsigned char *seq, int l) {
315  int len = strlen(nucleic_acid_order);
316 
317  for (int i=1; i<=l && naseq[i]; i++) {
318  fa_assert(naseq[i]<len);
319  seq[i] = nucleic_acid_order[naseq[i]];
320  }
321 }
322 #endif
323 
325  return "ClustalV-aligner: MAXLEN is dimensioned to small for this sequence";
326 }
327 
328 inline void *ckalloc(size_t bytes, ARB_ERROR& error) {
329  if (error) return NULp;
330 
331  void *ret = malloc(bytes);
332 
333  if (!ret) error = "out of memory";
334  return ret;
335 }
336 
337 static ARB_ERROR init_myers(long max_seq_length) {
339 
340  naa1 = (int **)ckalloc(MAX_BASETYPES * sizeof (int *), error);
341  naa2 = (int **)ckalloc(MAX_BASETYPES * sizeof (int *), error);
342  naas = (int **)ckalloc(MAX_BASETYPES * sizeof (int *), error);
343 
344  for (int i=0; i<MAX_BASETYPES && !error; i++) {
345  naa1[i] = (int *)ckalloc((max_seq_length+1)*sizeof(int), error);
346  naa2[i] = (int *)ckalloc((max_seq_length+1)*sizeof(int), error);
347  naas[i] = (int *)ckalloc((max_seq_length+1)*sizeof(int), error);
348  }
349  return error;
350 }
351 
352 static void make_pamo(int nv) {
353  little_pam=big_pam=matptr[0];
354  for (int i=0; i<210; ++i) { // LOOP_VECTORIZED
355  int c=matptr[i];
356  little_pam=(little_pam<c) ? little_pam : c;
357  big_pam=(big_pam>c) ? big_pam : c;
358  }
359  for (int i=0; i<210; ++i) { // LOOP_VECTORIZED
360  pamo[i] = matptr[i]-little_pam;
361  }
362  nv -= little_pam;
363  big_pam -= little_pam;
364  xover = big_pam - nv;
365  /*
366  fprintf(stdout,"\n\nxover= %d, big_pam = %d, little_pam=%d, nv = %d\n\n"
367  ,xover,big_pam,little_pam,nv);
368  */
369 }
370 
371 static void fill_pam() {
372  int pos = 0;
373 
374  for (int i=0; i<20; ++i) {
375  for (int j=0; j<=i; ++j) {
376  pam[i][j]=pamo[pos++];
377  }
378  }
379 
380  for (int i=0; i<20; ++i) {
381  for (int j=0; j<=i; ++j) { // LOOP_VECTORIZED[!<8.1]
382  pam[j][i]=pam[i][j];
383  }
384  }
385 
386  if (dnaflag) {
387  xover=4;
388  big_pam=8;
389  for (int i=0; i<=NUCLEIDS; ++i) {
390  for (int j=0; j<=NUCLEIDS; ++j) {
391  weights[i][j] = getPenalty(i, j);
392  }
393  }
394  }
395  else {
396  for (int i=1; i<MAX_BASETYPES; ++i) {
397  for (int j=1; j<MAX_BASETYPES; ++j) { // LOOP_VECTORIZED
398  weights[i][j] = big_pam - pam[i-1][j-1];
399  }
400  }
401  for (int i=0; i<MAX_BASETYPES; ++i) {
402  weights[0][i] = xover;
403  weights[i][0] = xover;
404  }
405  }
406 }
407 
408 static void exit_myers() {
409  for (int i=0; i<MAX_BASETYPES; i++) {
410  free(naas[i]);
411  free(naa2[i]);
412  free(naa1[i]);
413  }
414  free(naas);
415  free(naa2);
416  free(naa1);
417 }
418 
419 static ARB_ERROR init_show_pair(long max_seq_length) {
421 
422 #if defined(ASSERTION_USED)
423  displ_size = (2*max_seq_length + 1);
424 #endif // ASSERTION_USED
425 
426  displ = (int *) ckalloc((2*max_seq_length + 1) * sizeof (int), error);
427  last_print = 0;
428 
429  zza = (int *)ckalloc((max_seq_length+1) * sizeof (int), error);
430  zzb = (int *)ckalloc((max_seq_length+1) * sizeof (int), error);
431 
432  zzc = (int *)ckalloc((max_seq_length+1) * sizeof (int), error);
433  zzd = (int *)ckalloc((max_seq_length+1) * sizeof (int), error);
434 
435  return error;
436 }
437 
438 static void exit_show_pair() {
439  freenull(zzd);
440  freenull(zzc);
441  freenull(zzb);
442  freenull(zza);
443  freenull(displ);
444 #if defined(ASSERTION_USED)
445  displ_size = 0;
446 #endif // ASSERTION_USED
447 }
448 
449 inline int set_displ(int offset, int value) {
450  fa_assert(offset >= 0 && offset<int(displ_size));
451  displ[offset] = value;
452  return displ[offset];
453 }
454 inline int decr_displ(int offset, int value) {
455  fa_assert(offset >= 0 && offset<int(displ_size));
456  displ[offset] -= value;
457  return displ[offset];
458 }
459 
460 
461 inline void add(int v) {
462  // insert 'v' gaps into master ???
463  if (last_print<0 && print_ptr>0) {
464  set_displ(print_ptr-1, v);
465  set_displ(print_ptr++, last_print);
466  }
467  else {
468  last_print = set_displ(print_ptr++, v);
469  }
470 }
471 
472 inline int calc_weight(int iat, int jat, int v1, int v2) {
473  fa_assert(pos1==1 && pos2==1);
474  unsigned char j = seq_array[alist[1]][v2+jat-1];
475  return j<128 ? naas[j][v1+iat-1] : 0;
476 }
477 
478 #ifdef MATRIX_DUMP
479 
480 inline const unsigned char *lstr(const unsigned char *s, int len) {
481  static unsigned char *lstr_ss = 0;
482 
483  freeset(lstr_ss, (unsigned char*)strndup((const char*)s, len));
484  return lstr_ss;
485 }
486 
487 inline const unsigned char *nstr(unsigned char *cp, int length) {
488  const unsigned char *s = lstr(cp, length);
489  (dnaflag ? n_decode : p_decode)(s-1, const_cast<unsigned char *>(s-1), length);
490  return s;
491 }
492 
493 inline void dumpMatrix(int x0, int y0, int breite, int hoehe, int mitte_vert) {
494  char *sl = ARB_alloc<char>(hoehe+3);
495  char *ma = ARB_alloc<char>(breite+3);
496 
497  sprintf(ma, "-%s-", nstr(seq_array[1]+x0, breite));
498  sprintf(sl, "-%s-", nstr(seq_array[2]+y0, hoehe));
499 
500  printf(" ");
501 
502  {
503  int b;
504  for (b=0; b<=mitte_vert; b++) {
505  printf("%5c", ma[b]);
506  }
507  printf(" MID");
508  for (b++; b<=breite+1+1; b++) {
509  printf("%5c", ma[b-1]);
510  }
511  }
512 
513  for (int h=0; h<=hoehe+1; h++) {
514  printf("\n%c vertical: ", sl[h]);
515  for (int b=0; b<=breite+1+1; b++) printf("%5i", vertical[b][h]);
516  printf("\n verticalOpen: ");
517  for (int b=0; b<=breite+1+1; b++) printf("%5i", verticalOpen[b][h]);
518  printf("\n diagonal: ");
519  for (int b=0; b<=breite+1+1; b++) printf("%5i", diagonal[b][h]);
520  printf("\n horizontal: ");
521  for (int b=0; b<=breite+1+1; b++) printf("%5i", horizontal[b][h]);
522  printf("\n horizontalOpen:");
523  for (int b=0; b<=breite+1+1; b++) printf("%5i", horizontalOpen[b][h]);
524  printf("\n");
525  }
526 
527  printf("--------------------\n");
528 
529  free(ma);
530  free(sl);
531 }
532 #endif
533 
534 static int diff(int v1, int v2, int v3, int v4, int st, int en) {
535  /* v1,v3 master sequence (v1=offset, v3=length)
536  * v2,v4 slave sequence (v2=offset, v4=length)
537  * st,en gap_open-penalties for start and end of the sequence
538  *
539  * returns costs for inserted gaps
540  */
541 #ifdef DEBUG
542 # ifdef MATRIX_DUMP
543  int display_matrix = 0;
544 
545  fa_assert(v3<=(DISPLAY_MATRIX_SIZE+2)); // width
546  fa_assert(v4<=(DISPLAY_MATRIX_SIZE)); // height
547 
548 # define DISPLAY_MATRIX_ELEMENTS ((DISPLAY_MATRIX_SIZE+2)*(DISPLAY_MATRIX_SIZE+2))
549 
550  memset(vertical, -1, DISPLAY_MATRIX_ELEMENTS*sizeof(int));
551  memset(verticalOpen, -1, DISPLAY_MATRIX_ELEMENTS*sizeof(int));
552  memset(diagonal, -1, DISPLAY_MATRIX_ELEMENTS*sizeof(int));
553  memset(horizontal, -1, DISPLAY_MATRIX_ELEMENTS*sizeof(int));
554  memset(horizontalOpen, -1, DISPLAY_MATRIX_ELEMENTS*sizeof(int));
555 
556 # endif
557 #endif
558  static int deep;
559  deep++;
560 
561 #if (defined (DEBUG) && 0)
562  {
563  char *d = lstr(seq_array[1]+v1, v3);
564 
565  (dnaflag ? n_decode : p_decode)(d-1, d-1, v3);
566 
567  for (int cnt=0; cnt<deep; cnt++) putchar(' ');
568  printf("master = '%s' (penalties left=%i right=%i)\n", d, st, en);
569 
570  d = lstr(seq_array[2]+v2, v4);
571  (dnaflag ? n_decode : p_decode)(d-1, d-1, v4);
572 
573  for (cnt=0; cnt<deep; cnt++) putchar(' ');
574  printf("slave = '%s'\n", d);
575  }
576 #endif
577 
578  if (v4<=0) { // if slave sequence is empty
579  if (v3>0) {
580  if (last_print<0 && print_ptr>0) {
581  last_print = decr_displ(print_ptr-1, v3); // add ..
582  }
583  else {
584  last_print = set_displ(print_ptr++, -(v3)); // .. or insert gap of length 'v3' into slave
585  }
586  }
587 
588  deep--;
589  return master_gapAt(v1, v3);
590  }
591 
592  int ctrj = 0;
593  if (v3<=1) {
594  if (v3<=0) { // if master sequence is empty
595  add(v4); // ??? insert gap length 'v4' into master ???
596  deep--;
597  return slave_gapAt(v2, v4);
598  }
599 
600  fa_assert(v3==1);
601  // if master length == 1
602 
603  if (v4==1) {
604  if (st>en) st=en;
605 
607 
608  int ctrc = slave_gapAtWithOpenPenalty(v2, v4, st);
609  ctrj = 0;
610 
611  for (int j=1; j<=v4; ++j) {
612  int k = slave_gapAt(v2, j-1) + calc_weight(1, j, v1, v2) + slave_gapAt(v2+j, v4-j);
613  if (k<ctrc) { ctrc = k; ctrj = j; }
614  }
615 
616  if (!ctrj) {
617  add(v4);
618  if (last_print<0 && print_ptr>0) {
619  last_print = decr_displ(print_ptr-1, 1);
620  }
621  else {
622  last_print = set_displ(print_ptr++, -1);
623  }
624  }
625  else {
626  if (ctrj>1) add(ctrj-1);
627  set_displ(print_ptr++, last_print = 0);
628  if (ctrj<v4) add(v4-ctrj);
629  }
630 
631  deep--;
632  return ctrc;
633  }
634  }
635 
636  fa_assert(v3>=1 && v4>=1);
637 
638  // slave length >= 1
639  // master length >= 1
640 
641 # define AF 0
642 
643  // first column of matrix (slave side):
644  IF_MATRIX_DUMP(vertical[0][0]=)
645  zza[0] = 0;
646  int p = master_gap_open(v1);
647  for (int j=1; j<=v4; j++) {
648  p += master_gap_extend(v1);
649  IF_MATRIX_DUMP(vertical[0][j]=)
650  zza[j] = p;
651  IF_MATRIX_DUMP(verticalOpen[0][j]=)
652  zzb[j] = p + master_gap_open(v1);
653  }
654 
655  // left half of the matrix
656  p = st;
657  int ctri = v3 / 2;
658  for (int i=1; i<=ctri; i++) {
659  int n = zza[0];
660  p += master_gap_extend(v1+i+AF);
661  int k = p;
662  IF_MATRIX_DUMP(vertical[i][0]=)
663  zza[0] = k;
664  int l = p + master_gap_open(v1+i+AF);
665 
666  for (int j=1; j<=v4; j++) {
667  // from above (gap in master (behind position i))
668  IF_MATRIX_DUMP(verticalOpen[i][j]=) k += master_gap_open(v1+i+AF)+master_gap_extend(v1+i+AF); // (1)
669  IF_MATRIX_DUMP(vertical[i][j]=) l += master_gap_extend(v1+i+AF); // (2)
670  if (k<l) l = k; // l=min((1),(2))
671 
672  // from left (gap in slave (behind position j))
673  IF_MATRIX_DUMP(horizontalOpen[i][j]=) k = zza[j] + slave_gap_open(v2+j+AF)+slave_gap_extend(v2+j+AF); // (3)
674  int m;
675  IF_MATRIX_DUMP(horizontal[i][j]=) m = zzb[j] + slave_gap_extend(v2+j+AF); // (4)
676  if (k<m) m = k; // m=min((3),(4))
677 
678  // diagonal (no gaps)
679  IF_MATRIX_DUMP(diagonal[i][j]=) k = n + calc_weight(i, j, v1, v2); // (5)
680  if (l<k) k = l;
681  if (m<k) k = m; // k = minimum of all paths
682 
683  n = zza[j]; // minimum of same row; one column to the left
684  zza[j] = k; // minimum of all paths to this matrix position
685  zzb[j] = m; // minimum of those two paths, where gap was inserted into slave
686  }
687  }
688 
689 # define MHO 1 // X-Offset for second half of matrix-arrays (only used to insert MID-column into dumpMatrix())
690 # define BO 1 // Offset for second half of matrix (cause now the indices i and j are smaller as above)
691 
692  // last column of matrix:
693 
694  zzb[0]=zza[0];
695  IF_MATRIX_DUMP(vertical[v3+1+MHO][v4+1]=)
696  zzc[v4]=0;
697  p = master_gap_open(v1+v3);
698  for (int j=v4-1; j>-1; j--) {
699  p += master_gap_extend(v1+v3);
700  IF_MATRIX_DUMP(vertical[v3+1+MHO][j+BO]=)
701  zzc[j] = p;
702  IF_MATRIX_DUMP(verticalOpen[v3+1+MHO][j+BO]=)
703  zzd[j] = p+master_gap_open(v1+v3);
704  }
705 
706  // right half of matrix (backwards):
707  p = en;
708  for (int i=v3-1; i>=ctri; i--) {
709  int n = zzc[v4];
710  p += master_gap_extend(v1+i);
711  int k = p;
712  IF_MATRIX_DUMP(vertical[i+BO+MHO][v4+1]=)
713  zzc[v4] = k;
714  int l = p+master_gap_open(v1+i);
715 
716  for (int j=v4-1; j>=0; j--) {
717  // from below (gap in master (in front of position (i+BO)))
718  IF_MATRIX_DUMP(verticalOpen[i+BO+MHO][j+BO]=) k += master_gap_open(v1+i)+master_gap_extend(v1+i); // (1)
719  IF_MATRIX_DUMP(vertical[i+BO+MHO][j+BO]=) l += master_gap_extend(v1+i); // (2)
720  if (k<l) l = k; // l=min((1),(2))
721 
722  // from right (gap in slave (in front of position (j+BO)))
723  IF_MATRIX_DUMP(horizontalOpen[i+BO+MHO][j+BO]=) k = zzc[j] + slave_gap_open(v2+j) + slave_gap_extend(v2+j); // (3)
724  int m;
725  IF_MATRIX_DUMP(horizontal[i+BO+MHO][j+BO]=) m = zzd[j] + slave_gap_extend(v2+j); // (4)
726  if (k<m) m = k; // m=min((3),(4))
727 
728  // diagonal (no gaps)
729  IF_MATRIX_DUMP(diagonal[i+BO+MHO][j+BO]=) k = n + calc_weight(i+BO, j+BO, v1, v2); // (5)
730  if (l<k) k = l;
731  if (m<k) k = m; // k = minimum of all paths
732 
733  n = zzc[j]; // minimum of same row; one column to the right
734  zzc[j] = k; // minimum of all paths to this matrix position
735  zzd[j] = m; // minimum of those two paths, where gap was inserted into slave
736  }
737  }
738 
739 #undef BO
740 
741  // connect rightmost column of first half (column ctri)
742  // with leftmost column of second half (column ctri+1)
743 
744  zzd[v4] = zzc[v4];
745 
746  int ctrc = INT_MAX;
747  int flag = 0;
748 
749  for (int j=(ctri ? 0 : 1); j<=v4; j++) {
750  int k;
751  IF_MATRIX_DUMP(vertical[ctri+MHO][j]=)
752  k = zza[j] + zzc[j]; // sum up both calculations (=diagonal=no gaps)
753 
754  if (k<ctrc || ((k==ctrc) && (zza[j]!=zzb[j]) && (zzc[j]==zzd[j]))) {
755  ctrc = k;
756  ctrj = j;
757  flag = 1;
758  }
759  }
760 
761  for (int j=v4; j>=(ctri ? 0 : 1); j--) {
762  int k;
763  IF_MATRIX_DUMP(verticalOpen[ctri+MHO][j]=)
764  k = zzb[j] + zzd[j] // paths where gaps were inserted into slave (left and right side!)
765  - slave_gap_open(j); // subtract gap_open-penalty which was added twice (at left and right end of gap)
766 
767  if (k<ctrc) {
768  ctrc = k;
769  ctrj = j;
770  flag = 2;
771  }
772  }
773 
774  fa_assert(flag>=1 && flag<=2);
775 
776 #undef MHO
777 
778 #ifdef MATRIX_DUMP
779  if (display_matrix) {
780  dumpMatrix(v1, v2, v3, v4, ctri);
781  }
782 #endif
783 
784  // Conquer recursively around midpoint
785 
786  if (flag==1) { // Type 1 gaps
787  diff(v1, v2, ctri, ctrj, st, master_gap_open(v1+ctri)); // includes midpoint ctri and ctrj
788  diff(v1+ctri, v2+ctrj, v3-ctri, v4-ctrj, master_gap_open(v1+ctri), en);
789  }
790  else {
791  diff(v1, v2, ctri-1, ctrj, st, 0); // includes midpoint ctrj
792 
793  if (last_print<0 && print_ptr>0) { // Delete 2
794  last_print = decr_displ(print_ptr-1, 2);
795  }
796  else {
797  last_print = set_displ(print_ptr++, -2);
798  }
799 
800  diff(v1+ctri+1, v2+ctrj, v3-ctri-1, v4-ctrj, 0, en);
801  }
802 
803  deep--;
804  return ctrc; // Return the score of the best alignment
805 }
806 
807 static void do_align(int& score, long act_seq_length) {
808  pos1 = pos2 = 0;
809 
810  // clear statistics
811 
812  for (int i=1; i<=act_seq_length; ++i) {
813  for (int j=0; j<MAX_BASETYPES; ++j) {
814  naa1[j][i] = naa2[j][i]=0;
815  naas[j][i]=0;
816  }
817  }
818 
819  // create position statistics for each group
820  // [here every group contains only one seq]
821 
822  int l1 = 0;
823  int l2 = 0;
824  for (int i=1; i<=nseqs; ++i) {
825  if (group[i]==1) {
826  fst_list[++pos1]=i;
827  for (int j=1; j<=seqlen_array[i]; ++j) {
828  unsigned char b = seq_array[i][j];
829  if (b<128) {
830  ++naa1[b][j];
831  ++naa1[0][j];
832  }
833  }
834  if (seqlen_array[i]>l1) l1=seqlen_array[i];
835  }
836  else if (group[i]==2) {
837  snd_list[++pos2]=i;
838  for (int j=1; j<=seqlen_array[i]; ++j) {
839  unsigned char b = seq_array[i][j];
840  if (b<128) {
841  ++naa2[b][j];
842  ++naa2[0][j];
843  }
844  }
845  if (seqlen_array[i]>l2) l2=seqlen_array[i];
846  }
847  }
848 
849  if (pos1>=pos2) {
850  int t_arr[MAX_BASETYPES];
851 
852  for (int i=1; i<=pos2; ++i) alist[i]=snd_list[i];
853  for (int n=1; n<=l1; ++n) {
854  for (int i=1; i<MAX_BASETYPES; ++i) t_arr[i]=0;
855  for (int i=1; i<MAX_BASETYPES; ++i) {
856  if (naa1[i][n]>0) {
857  for (int j=1; j<MAX_BASETYPES; ++j) { // LOOP_VECTORIZED
858  t_arr[j] += (weights[i][j]*naa1[i][n]);
859  }
860  }
861  }
862  int k = naa1[0][n];
863  if (k>0) {
864  for (int i=1; i<MAX_BASETYPES; ++i) {
865  naas[i][n]=t_arr[i]/k;
866  }
867  }
868  }
869  }
870  else {
871  fa_assert(0); // should never occur if MAXN==2
872  }
873 
874  score = diff(1, 1, l1, l2, master_gap_open(1), master_gap_open(l1+1)); // Myers and Miller alignment now
875 }
876 
877 static int add_ggaps(long /* max_seq_length */) {
878  int pos = 1;
879  int to_do = print_ptr;
880 
881  for (int i=0; i<to_do; ++i) { // was: 1 .. <=to_do
882  if (displ[i]==0) {
883  result[1][pos]=result[2][pos]='*';
884  ++pos;
885  }
886  else {
887  int k = displ[i];
888  if (k>0) {
889  for (int j=0; j<=k-1; ++j) { // LOOP_VECTORIZED =0[>=9<11] =1 // completely fails with 9.x and 10.x; 1x with 4.9.2 + 5.4 + 5.5 + 6.5 + 7.2 + 7.5 + 8.1 + 8.5
890  result[2][pos+j]='*';
891  result[1][pos+j]='-';
892  }
893  pos += k;
894  }
895  else {
896  k = (displ[i]<0) ? displ[i] * -1 : displ[i];
897  for (int j=0; j<=k-1; ++j) { // LOOP_VECTORIZED =0[>=9<11] =1 // completely fails with 9.x and 10.x; 1x with 4.9.2 + 5.4 + 5.5 + 6.5 + 7.2 + 7.5 + 8.1 + 8.5
898  result[1][pos+j]='*';
899  result[2][pos+j]='-';
900  }
901  pos += k;
902  }
903  }
904  }
905 
906 #ifdef DEBUG
907  result[1][pos] = 0;
908  result[2][pos] = 0;
909 #endif
910 
911  return pos-1;
912 }
913 
914 
915 static int res_index(const char *t, char c) {
916  if (t[0]==c) return UNKNOWN_ACID;
917  for (int i=1; t[i]; i++) {
918  if (t[i]==c) return i;
919  }
920  return 0;
921 }
922 
923 static void p_encode(const unsigned char *seq, unsigned char *naseq, int l) {
924  // code seq as ints .. use -2 for gap
925  bool warned = false;
926 
927  for (int i=1; i<=l; i++) {
928  int c = res_index(amino_acid_order, seq[i]);
929 
930  if (!c) {
931  if (seq[i] == '*') {
932  c = -2;
933  }
934  else {
935  if (!warned && seq[i] != '?') { // consensus contains ? and it means X
936  char buf[100];
937  sprintf(buf, "Illegal character '%c' in sequence data", seq[i]);
938  aw_message(buf);
939  warned = true;
940  }
941  c = res_index(amino_acid_order, 'X');
942  }
943  }
944 
945  fa_assert(c>0 || c == -2);
946  naseq[i] = c;
947  }
948 }
949 
950 static void n_encode(const unsigned char *seq, unsigned char *naseq, int l) {
951  // code seq as ints .. use -2 for gap
952  bool warned = false;
953  for (int i=1; i<=l; i++) {
954  int c = res_index(nucleic_acid_order, seq[i]);
955 
956  if (!c) {
957  if (!warned && seq[i] != '?') {
958  char buf[100];
959  sprintf(buf, "Illegal character '%c' in sequence data", seq[i]);
960  aw_message(buf);
961  warned = true;
962  }
963  c = res_index(nucleic_acid_order, 'N');
964  }
965 
966  fa_assert(c>0);
967  naseq[i] = c;
968  }
969 }
970 
972  int weighted,
973  const char *seq1,
974  int length1,
975  const char *seq2,
976  int length2,
977  const int *gapsBefore1, // size of array = length1+1
978  int max_seq_length,
979  // result params:
980  const char*& res1,
981  const char*& res2,
982  int& reslen,
983  int& score)
984 {
986  gaps_before_position = gapsBefore1;
987 
988  if (!module_initialized) { // initialize only once
989  dnaflag = is_dna;
990  is_weight = weighted;
991 
992  error = init_myers(max_seq_length);
993  if (!error) error = init_show_pair(max_seq_length);
994  make_pamo(0);
995  fill_pam();
996 
997  for (int i=1; i<=2 && !error; i++) {
998  seq_array[i] = (unsigned char*)ckalloc((max_seq_length+2)*sizeof(char), error);
999  result[i] = (char*)ckalloc((max_seq_length+2)*sizeof(char), error);
1000  group[i] = i;
1001  }
1002 
1003  if (!error) module_initialized = true;
1004  }
1005  else {
1006  if (dnaflag!=is_dna || is_weight!=weighted) {
1007  error = "Please call ClustalV_exit() between calls that differ in\n"
1008  "one of the following parameters:\n"
1009  " is_dna, weighted";
1010  }
1011  }
1012 
1013  if (!error) {
1014  nseqs = 2;
1015  print_ptr = 0;
1016 
1017 #if defined(DEBUG) && 1
1018  memset(&seq_array[1][1], 0, max_seq_length*sizeof(seq_array[1][1]));
1019  memset(&seq_array[2][1], 0, max_seq_length*sizeof(seq_array[2][1]));
1020  memset(&displ[1], 0xff, max_seq_length*sizeof(displ[1]));
1021  seq_array[1][0] = '_';
1022  seq_array[2][0] = '_';
1023 #endif
1024 
1025  {
1026  typedef void (*encoder)(const unsigned char*, unsigned char*, int);
1027  encoder encode = dnaflag ? n_encode : p_encode;
1028 
1029  encode((const unsigned char*)(seq1-1), seq_array[1], length1);
1030  seqlen_array[1] = length1;
1031  encode((const unsigned char*)(seq2-1), seq_array[2], length2);
1032  seqlen_array[2] = length2;
1033 
1034  do_align(score, max(length1, length2));
1035  int alignedLength = add_ggaps(max_seq_length);
1036 
1037  result[1][alignedLength+1] = 0;
1038  result[2][alignedLength+1] = 0;
1039 
1040  res1 = result[1]+1;
1041  res2 = result[2]+1;
1042  reslen = alignedLength;
1043  }
1044  }
1045 
1046  return error;
1047 }
1048 
1050  if (module_initialized) {
1051  module_initialized = false;
1052 
1053  for (int i=1; i<=2; i++) {
1054  free(result[i]);
1055  free(seq_array[i]);
1056  }
1057  exit_show_pair();
1058  exit_myers();
1059  }
1060 }
1061 
1062 // --------------------------------------------------------------------------------
1063 
1064 #ifdef UNIT_TESTS
1065 #ifndef TEST_UNIT_H
1066 #include <test_unit.h>
1067 #endif
1068 
1069 static arb_test::match_expectation clustal_aligns(const char *i1, const char *i2, const char *expected1, const char *expected2, int expected_score) {
1070  size_t l1 = strlen(i1);
1071  size_t l2 = strlen(i2);
1072 
1073  const char *result1 = NULp;
1074  const char *result2 = NULp;
1075  int result_len; // test result value?
1076  int score;
1077 
1078  int gaps_size = l1+1;
1079  int no_gaps_before1[gaps_size];
1080  memset(no_gaps_before1, 0, gaps_size*sizeof(*no_gaps_before1));
1081 
1083  i1, l1,
1084  i2, l2,
1085  no_gaps_before1,
1086  max(l1, l2),
1087  result1,
1088  result2,
1089  result_len,
1090  score);
1091 
1092  using namespace arb_test;
1093  expectation_group expected;
1094 
1095  expected.add(doesnt_report_error(error.deliver()));
1096  expected.add(that(result1).is_equal_to(expected1));
1097  expected.add(that(result2).is_equal_to(expected2));
1098  expected.add(that(score).is_equal_to(expected_score));
1099 
1100  return all().ofgroup(expected);
1101 }
1102 
1103 #define TEST_CLUSTAL_ALIGNS(i1,i2,o1,o2,score) TEST_EXPECTATION(clustal_aligns(i1,i2,o1,o2,score))
1104 
1105 void TEST_clustalV() {
1106  TEST_CLUSTAL_ALIGNS("ACGTTGCAACGT",
1107  "ACGTTGCAACGT",
1108  "************",
1109  "************",
1110  138);
1111 
1112  TEST_CLUSTAL_ALIGNS("ACGTTAACGT",
1113  "ACGTTGCAACGT",
1114  "*****--*****",
1115  "************",
1116  207);
1117 
1118  TEST_CLUSTAL_ALIGNS("ACGTTGCAACGT",
1119  "ACCAACGT",
1120  "************",
1121  "*----*******",
1122  156);
1123 
1124  TEST_CLUSTAL_ALIGNS("ACGTTGCAACGT",
1125  "AGCTGTCACAGT",
1126  "************",
1127  "************",
1128  187);
1129 
1130  TEST_CLUSTAL_ALIGNS("ACGTTGCAACGT",
1131  "ACGTACGTACGT",
1132  "************",
1133  "************",
1134  164);
1135 
1136  TEST_CLUSTAL_ALIGNS("ACGTTGCAACGT",
1137  "ACGTACGT",
1138  "************",
1139  "****----****",
1140  162);
1141 
1142  TEST_CLUSTAL_ALIGNS("ACGTTGCAACGT",
1143  "AACGGTTC",
1144  "************",
1145  // "ACGTTGCAACGT",
1146  // "A----ACGGTTC",
1147  "*----*******",
1148  193);
1149 }
1150 
1151 #endif // UNIT_TESTS
1152 
1153 // --------------------------------------------------------------------------------
static int * zza
Definition: ClustalV.cxx:78
static int nseqs
Definition: ClustalV.cxx:70
static int last_print
Definition: ClustalV.cxx:83
static int getPenalty(char c1, char c2)
Definition: ClustalV.cxx:266
void * ckalloc(size_t bytes, ARB_ERROR &error)
Definition: ClustalV.cxx:328
group_matcher all()
Definition: test_unit.h:1011
#define UNKNOWN_ACID
Definition: ClustalV.cxx:175
int decr_displ(int offset, int value)
Definition: ClustalV.cxx:454
static void do_align(int &score, long act_seq_length)
Definition: ClustalV.cxx:807
#define MAX_BASETYPES
Definition: ClustalV.cxx:50
#define MAXN
Definition: ClustalV.cxx:41
static char * result[3]
Definition: ClustalV.cxx:277
static int ** naa2
Definition: ClustalV.cxx:61
static int * zzb
Definition: ClustalV.cxx:79
static Boolean dnaflag
Definition: ClustalV.cxx:47
match_expectation doesnt_report_error(const char *error)
Definition: test_unit.h:1105
int Boolean
Definition: ClustalV.cxx:45
void add(int v)
Definition: ClustalV.cxx:461
static int pos1
Definition: ClustalV.cxx:58
static const char * nucleic_acid_order
Definition: ClustalV.cxx:179
int master_gap_open(int beforePosition)
Definition: ClustalV.cxx:106
#define DEFAULT_PROBABLY_MUTATION
Definition: ClustalV.cxx:32
#define cheap_if(cond)
Definition: ClustalV.cxx:193
int master_gapAtWithOpenPenalty(int atPosition, int length, int penalty)
Definition: ClustalV.cxx:132
static void make_pamo(int nv)
Definition: ClustalV.cxx:352
static int res_index(const char *t, char c)
Definition: ClustalV.cxx:915
static char * matptr
Definition: ClustalV.cxx:302
static void fill_pam()
Definition: ClustalV.cxx:371
static int add_ggaps(long)
Definition: ClustalV.cxx:877
#define MAX_GAP_EXTEND_DISCOUNT
Definition: ClustalV.cxx:39
#define NUCLEIDS
Definition: ClustalV.cxx:178
FILE * seq
Definition: rns.c:46
#define MASTER_GAP_EXTEND
Definition: ClustalV.cxx:26
#define IF_MATRIX_DUMP(xxx)
Definition: ClustalV.cxx:103
static char pam250mt[]
Definition: ClustalV.cxx:279
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
static int ** naas
Definition: ClustalV.cxx:62
int calc_weight(int iat, int jat, int v1, int v2)
Definition: ClustalV.cxx:472
GB_ERROR deliver() const
Definition: arb_error.h:116
#define MAX_GAP_OPEN_DISCOUNT
Definition: ClustalV.cxx:38
static void p_encode(const unsigned char *seq, unsigned char *naseq, int l)
Definition: ClustalV.cxx:923
static void exit_show_pair()
Definition: ClustalV.cxx:438
int slave_gap_open(int)
Definition: ClustalV.cxx:159
static Boolean is_weight
Definition: ClustalV.cxx:48
static int group[MAXN+1]
Definition: ClustalV.cxx:65
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
static const char * nucleic_maybe_U
Definition: ClustalV.cxx:184
static void error(const char *msg)
Definition: mkptypes.cxx:96
static int snd_list[MAXN+1]
Definition: ClustalV.cxx:68
static int * zzc
Definition: ClustalV.cxx:80
expectation_group & add(const expectation &e)
Definition: test_unit.h:812
Definition: f2c.h:84
int set_displ(int offset, int value)
Definition: ClustalV.cxx:449
#define AF
#define that(thing)
Definition: test_unit.h:1043
ARB_ERROR ClustalV_align(int is_dna, int weighted, const char *seq1, int length1, const char *seq2, int length2, const int *gapsBefore1, int max_seq_length, const char *&res1, const char *&res2, int &reslen, int &score)
Definition: ClustalV.cxx:971
int master_gapAt(int atPosition, int length)
Definition: ClustalV.cxx:155
Definition: align.cxx:35
static int * zzd
Definition: ClustalV.cxx:81
void ClustalV_exit()
Definition: ClustalV.cxx:1049
static int ** naa1
Definition: ClustalV.cxx:60
static const char * nucleic_maybe_T
Definition: ClustalV.cxx:183
static int big_pam
Definition: ClustalV.cxx:54
#define DEFAULT_IMPROBABLY_MUTATION
Definition: ClustalV.cxx:31
static int pos2
Definition: ClustalV.cxx:59
#define DEFAULT_GAP_EXTEND
Definition: ClustalV.cxx:29
static const char * nucleic_maybe[6]
Definition: ClustalV.cxx:185
int baseMatch(char c1, char c2)
Definition: ClustalV.cxx:249
#define is_equal_to(val)
Definition: test_unit.h:1025
static int xover
Definition: ClustalV.cxx:52
long int flag
Definition: f2c.h:39
#define MASTER_GAP_OPEN
Definition: ClustalV.cxx:21
static ARB_ERROR init_myers(long max_seq_length)
Definition: ClustalV.cxx:337
static int pamo[(MAX_BASETYPES-1)*MAX_BASETYPES/2]
Definition: ClustalV.cxx:55
static int pam[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:56
static ARB_ERROR init_show_pair(long max_seq_length)
Definition: ClustalV.cxx:419
static bool module_initialized
Definition: ClustalV.cxx:43
int slave_gapAtWithOpenPenalty(int atPosition, int length, int penalty)
Definition: ClustalV.cxx:167
static int little_pam
Definition: ClustalV.cxx:53
static void n_encode(const unsigned char *seq, unsigned char *naseq, int l)
Definition: ClustalV.cxx:950
static void exit_myers()
Definition: ClustalV.cxx:408
static int baseCmp(unsigned char c1, unsigned char c2)
Definition: ClustalV.cxx:194
static size_t displ_size
Definition: ClustalV.cxx:74
fa_assert(chars< MESSAGE_BUFFERSIZE)
static const int * gaps_before_position
Definition: ClustalV.cxx:85
#define DEFAULT_GAP_OPEN
Definition: ClustalV.cxx:24
static const char * nucleic_maybe_A
Definition: ClustalV.cxx:180
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
static int seqlen_array[MAXN+1]
Definition: ClustalV.cxx:63
#define BO
ARB_ERROR MAXLENtooSmall()
Definition: ClustalV.cxx:324
char encode(const char bases[], GB_alignment_type aliType)
Definition: iupac.cxx:192
#define COMPARABLE_BASES
#define NULp
Definition: cxxforward.h:116
static const char * nucleic_maybe_G
Definition: ClustalV.cxx:182
#define offset(field)
Definition: GLwDrawA.c:73
int master_gap_extend(int beforePosition)
Definition: ClustalV.cxx:119
char * strndup(const char *str, int len)
Definition: global.h:102
static const char * nucleic_maybe_C
Definition: ClustalV.cxx:181
#define MHO
#define FALLTHROUGH
Definition: cxxforward.h:130
int slave_gapAt(int atPosition, int length)
Definition: ClustalV.cxx:171
int slave_gap_extend(int)
Definition: ClustalV.cxx:163
static unsigned char * seq_array[MAXN+1]
Definition: ClustalV.cxx:64
static const char * amino_acid_order
Definition: ClustalV.cxx:176
size_t length
static int print_ptr
Definition: ClustalV.cxx:82
static int * displ
Definition: ClustalV.cxx:77
static int fst_list[MAXN+1]
Definition: ClustalV.cxx:67
#define max(a, b)
Definition: f2c.h:154
GB_write_int const char s
Definition: AW_awar.cxx:154