ARB
align.cxx
Go to the documentation of this file.
1 // =============================================================
2 /* */
3 // File : align.c
4 // Purpose :
5 /* */
6 // Institute of Microbiology (Technical University Munich)
7 // http://www.arb-home.de/
8 /* */
9 // =============================================================
10 
11 #include "mem.h"
12 #include "trnsprob.h"
13 
14 #define EXTERN
15 #include "i-hopper.h"
16 
17 #include <cxxforward.h>
18 
19 #include <stdio.h>
20 #include <math.h>
21 
22 #define MSIZE 512
23 #define ESIZE 36
24 
25 //============================================================================
26 
27 #define MINDIST 0.001
28 #define MAXDIST 1.000
29 
30 #define MAXGAP 16
31 #define NOWHERE (MAXGAP+1)
32 
33 #define MINLENGTH 5
34 
35 typedef struct score {
36  double score;
37  int up,down;
38 } Score;
39 
40 
41 typedef struct fragment {
43  struct fragment *next;
44 } Fragment;
45 
46 typedef struct island {
52 } Island;
53 
54 static double **GP,**GS;
55 
56 // #define TEST
57 
58 #ifdef TEST
59 
60 #define TELEN 193
61 static double TE[TELEN][TELEN];
62 
63 static int te=TRUE;
64 
65 #endif
66 
67 static Score **U;
68 
70 
71 static Island *Z,**ZB,**ZE;
72 
73 //============================================================================
74 
75 static void initScore(double ***pP,double ***pS) {
76 
77  *pP=newDoubleMatrix(N,N);
78  *pS=newDoubleMatrix(N1,N1);
79 
80 }
81 
82 //............................................................................
83 
84 static void uninitScore(double ***pP,double ***pS) {
85 
86  freeMatrix(pS);
87  freeMatrix(pP);
88 
89 }
90 
91 //............................................................................
92 
93 static void updateScore(
94  double **P,double **S,
95  double F[N],double X[6],double dist
96  ) {
97  int i,j;
98  double ***SS,s,smin,smax;
99 
100  P[T][T]=F[T]*F[T]; P[T][C]=F[T]*F[C]; P[T][A]=F[T]*F[A]; P[T][G]=F[T]*F[G];
101  P[C][T]=F[C]*F[T]; P[C][C]=F[C]*F[C]; P[C][A]=F[C]*F[A]; P[C][G]=F[C]*F[G];
102  P[A][T]=F[A]*F[T]; P[A][C]=F[A]*F[C]; P[A][A]=F[A]*F[A]; P[A][G]=F[A]*F[G];
103  P[G][T]=F[G]*F[T]; P[G][C]=F[G]*F[C]; P[G][A]=F[G]*F[A]; P[G][G]=F[G]*F[G];
104 
105  initTrnsprob(&SS);
106  updateTrnsprob(SS,F,X,REV);
107  getTrnsprob(S,SS,dist);
108  uninitTrnsprob(&SS);
109  S[T][T]/=F[T]; S[T][C]/=F[C]; S[T][A]/=F[A]; S[T][G]/=F[G];
110  S[C][T]/=F[T]; S[C][C]/=F[C]; S[C][A]/=F[A]; S[C][G]/=F[G];
111  S[A][T]/=F[T]; S[A][C]/=F[C]; S[A][A]/=F[A]; S[A][G]/=F[G];
112  S[G][T]/=F[T]; S[G][C]/=F[C]; S[G][A]/=F[A]; S[G][G]/=F[G];
113 
114  for(i=0;i<N;i++) for(j=0;j<N;j++) S[i][j]=log(S[i][j]);
115 
116  S[T][N]=F[T]*S[T][T]+F[C]*S[T][C]+F[A]*S[T][A]+F[G]*S[T][G];
117  S[C][N]=F[T]*S[C][T]+F[C]*S[C][C]+F[A]*S[C][A]+F[G]*S[C][G];
118  S[A][N]=F[T]*S[A][T]+F[C]*S[A][C]+F[A]*S[A][A]+F[G]*S[A][G];
119  S[G][N]=F[T]*S[G][T]+F[C]*S[G][C]+F[A]*S[G][A]+F[G]*S[G][G];
120  S[N][T]=F[T]*S[T][T]+F[C]*S[C][T]+F[A]*S[A][T]+F[G]*S[G][T];
121  S[N][C]=F[T]*S[T][C]+F[C]*S[C][C]+F[A]*S[A][C]+F[G]*S[G][C];
122  S[N][A]=F[T]*S[T][A]+F[C]*S[C][A]+F[A]*S[A][A]+F[G]*S[G][A];
123  S[N][G]=F[T]*S[T][G]+F[C]*S[C][G]+F[A]*S[A][G]+F[G]*S[G][G];
124  S[N][N]=F[T]*S[T][N]+F[C]*S[C][N]+F[A]*S[A][N]+F[G]*S[G][N];
125 
126  smin=0.; smax=0.;
127  for(i=0;i<N;i++)
128  for(j=0;j<N;j++) {
129  s=S[i][j];
130  if(s<smin) smin=s; else
131  if(s>smax) smax=s;
132  }
133 
134 }
135 
136 //============================================================================
137 
138 static void initEntropy(double ***EE) {
139  *EE=newDoubleMatrix(ESIZE+1,MSIZE);
140 }
141 
142 //............................................................................
143 
144 static void uninitEntropy(double ***EE) {
145  freeMatrix(EE);
146 }
147 
148 //............................................................................
149 
150 static double getEntropy(double **E,double m,int l) {
151  int k,ia,ib,ja,jb;
152  double *M,mmin,idm,la,lb,ma,mb;
153 
154  M=E[0]; ma=0.5*(M[1]-M[0]); mmin=M[0]-ma; idm=0.5/ma;
155 
156  k=floor((m-mmin)*idm);
157  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
158 
159  if(k==0) {ja=k; jb=k+1;} else
160  if(k==MSIZE-1) {ja=k-1; jb=k; } else
161  if(m>M[k]) {ja=k; jb=k+1;} else
162  {ja=k-1; jb=k; }
163 
164  ma=M[ja]; mb=M[jb];
165 
166  if(l<=16) {
167  return(
168  ((mb-m)*E[l][ja]+(m-ma)*E[l][jb])/(mb-ma)
169  );
170  }
171  else {
172  if(l<=32) {
173  if(l<=18) {la=16; lb=18; ia=16; ib=17;} else
174  if(l<=20) {la=18; lb=20; ia=17; ib=18;} else
175  if(l<=22) {la=20; lb=22; ia=18; ib=19;} else
176  if(l<=24) {la=22; lb=24; ia=19; ib=20;} else
177  if(l<=26) {la=24; lb=26; ia=20; ib=21;} else
178  if(l<=28) {la=26; lb=28; ia=21; ib=22;} else
179  if(l<=30) {la=28; lb=30; ia=22; ib=23;} else
180  if(l<=32) {la=30; lb=32; ia=23; ib=24;}
181  } else
182  if(l<=64) {
183  if(l<=36) {la=32; lb=36; ia=24; ib=25;} else
184  if(l<=40) {la=36; lb=40; ia=25; ib=26;} else
185  if(l<=44) {la=40; lb=44; ia=26; ib=27;} else
186  if(l<=48) {la=44; lb=48; ia=27; ib=28;} else
187  if(l<=52) {la=48; lb=52; ia=28; ib=29;} else
188  if(l<=56) {la=52; lb=56; ia=29; ib=30;} else
189  if(l<=60) {la=56; lb=60; ia=30; ib=31;} else
190  if(l<=64) {la=60; lb=64; ia=31; ib=32;}
191  }
192  else {
193  if(l<= 128) {la= 64; lb= 128; ia=32; ib=33;} else
194  if(l<= 256) {la= 128; lb= 256; ia=33; ib=34;} else
195  if(l<= 512) {la= 256; lb= 512; ia=34; ib=35;} else
196  {la= 512; lb=1024; ia=35; ib=36;}
197  }
198  return(
199  (
200  +(lb-l)*((mb-m)*E[ia][ja]+(m-ma)*E[ia][jb])
201  +(l-la)*((mb-m)*E[ib][ja]+(m-ma)*E[ib][jb])
202  )
203  /((lb-la)*(mb-ma))
204  );
205  }
206 
207 }
208 
209 //............................................................................
210 
211 static void updateEntropy(double **P,double **S,double **E) {
212  int i,j,k,l,ll,lll;
213  double *M,m,dm,idm,mmin,mmax;
214 
215  mmin=0.; mmax=0.;
216  for(i=0;i<N;i++)
217  for(j=0;j<N;j++) {m=S[i][j]; if(m<mmin) mmin=m; else if(m>mmax) mmax=m;}
218  dm=(mmax-mmin)/MSIZE;
219  idm=1./dm;
220 
221  M=E[0];
222  for(i=0,m=mmin+0.5*dm;i<MSIZE;i++,m+=dm) M[i]=m;
223 
224  for(i=0;i<MSIZE;i++) E[1][i]=0.;
225  for(i=0;i<N;i++)
226  for(j=0;j<N;j++) {
227  k=floor((S[i][j]-mmin)*idm);
228  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
229  E[1][k]+=P[i][j];
230  }
231 
232  for(k=0;k<MSIZE;k++) E[2][k]=0.;
233  for(i=0;i<MSIZE;i++)
234  for(j=0;j<MSIZE;j++) {
235  m=(M[i]+M[j])/2;
236  k=floor((m-mmin)*idm);
237  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
238  E[2][k]+=E[1][i]*E[1][j];
239  }
240 
241  for(ll=2,lll=1;ll<=8;ll++,lll++) {
242  l=ll+lll;
243  for(k=0;k<MSIZE;k++) E[l][k]=0.;
244  for(i=0;i<MSIZE;i++)
245  for(j=0;j<MSIZE;j++) {
246  m=(ll*M[i]+lll*M[j])/l;
247  k=floor((m-mmin)*idm);
248  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
249  E[l][k]+=E[ll][i]*E[lll][j];
250  }
251  l=ll+ll;
252  for(k=0;k<MSIZE;k++) E[l][k]=0.;
253  for(i=0;i<MSIZE;i++)
254  for(j=0;j<MSIZE;j++) {
255  m=(M[i]+M[j])/2;
256  k=floor((m-mmin)*idm);
257  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
258  E[l][k]+=E[ll][i]*E[ll][j];
259  }
260  }
261 
262  for(l=17,ll=l-8;l<=24;l++,ll++) {
263  for(k=0;k<MSIZE;k++) E[l][k]=0.;
264  for(i=0;i<MSIZE;i++)
265  for(j=0;j<MSIZE;j++) {
266  m=(M[i]+M[j])/2;
267  k=floor((m-mmin)*idm);
268  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
269  E[l][k]+=E[ll][i]*E[ll][j];
270  }
271  }
272 
273  for(l=25,ll=l-8;l<=32;l++,ll++) {
274  for(k=0;k<MSIZE;k++) E[l][k]=0.;
275  for(i=0;i<MSIZE;i++)
276  for(j=0;j<MSIZE;j++) {
277  m=(M[i]+M[j])/2;
278  k=floor((m-mmin)*idm);
279  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
280  E[l][k]+=E[ll][i]*E[ll][j];
281  }
282  }
283 
284  for(l=33,ll=l-1;l<=36;l++,ll++) {
285  for(k=0;k<MSIZE;k++) E[l][k]=0.;
286  for(i=0;i<MSIZE;i++)
287  for(j=0;j<MSIZE;j++) {
288  m=(M[i]+M[j])/2;
289  k=floor((m-mmin)*idm);
290  if(k>=MSIZE) k=MSIZE-1; else if(k<0) k=0;
291  E[l][k]+=E[ll][i]*E[ll][j];
292  }
293  }
294 
295  for(l=1;l<=ESIZE;l++) {m=0.; for(k=MSIZE-1;k>=0;k--) m=E[l][k]+=m;}
296 
297  for(i=1;i<=ESIZE;i++)
298  for(j=0;j<MSIZE;j++) E[i][j]=(E[i][j]>REAL_MIN)?log(E[i][j]):LOG_REAL_MIN;
299 
300  {
301  FILE *fp;
302  int nbp;
303  double m0,dm2;
304 
305  nbp=22;
306  fp=fopen("distrib.txt","wt");
307  fprintf(fp,"\n(* score per basepair distribution for gap-less random path 1=Smin 100=Smax *)\nListPlot3D[({");
308  m0=M[0]; dm2=(M[MSIZE-1]-M[0])/99.;
309  for(i=1;i<=nbp;i++) {
310  fprintf(fp,"\n{%f",exp(getEntropy(E,m0,i)));
311  for(j=1;j<=99;j++) fprintf(fp,",%f",exp(getEntropy(E,m0+dm2*j,i)));
312  fprintf(fp,"}"); if(i<nbp) fprintf(fp,",");
313  }
314  fprintf(fp,"}),PlotRange->{{1,100},{1,%d},{0,1}},ViewPoint->{1.5,1.1,0.8}]\n",nbp);
315  fclose(fp);
316 
317  }
318 
319 }
320 
321 //============================================================================
322 
323 static Island *newIsland(char *X,char *Y,int i,int j,int d) {
324  Island *p; Fragment *f,**ff,*L; int k,ii,jj,iii,jjj,l; double s;
325 
326  p=(Island*)newBlock(sizeof(Island));
327 
328  ii=i; jj=j; l=0; s=0.;
329 
330  if(d>0) {
331  ff=&L;
332  for(;;) {
333  s+=GS[(int)X[ii]][(int)Y[jj]];
334  if(++l==1) {iii=ii; jjj=jj;}
335  k=U[ii][jj].up;
336  if(k) {
337  f=(Fragment*)newBlock(sizeof(Fragment));
338  f->beginX=iii; f->beginY=jjj; f->length=l;
339  l=0; *ff=f; ff=&f->next;
340  }
341  if(k==NOWHERE) break;
342  ii++; jj++; if(k>=0) ii+=k; else jj-=k;
343  }
344  *ff=NULp;
345  p->beginX= i; p->beginY= j;
346  p->endX =ii; p->endY =jj;
347  }
348  else {
349  L=NULp;
350  for(;;) {
351  s+=GS[(int)X[ii]][(int)Y[jj]];
352  if(++l==1) {iii=ii; jjj=jj;}
353  k=U[ii][jj].down;
354  if(k) {
355  f=(Fragment*)newBlock(sizeof(Fragment));
356  f->beginX=iii-l+1; f->beginY=jjj-l+1; f->length=l;
357  l=0; f->next=L; L=f;
358  }
359  if(k==NOWHERE) break;
360  ii--; jj--; if(k>=0) ii-=k; else jj+=k;
361  }
362  p->beginX=ii; p->beginY=jj;
363  p->endX = i; p->endY = j;
364  }
365 
366  p->fragments=L;
367 
368  p->score=s+GapC*expectedScore;
369 
370  return(p);
371 }
372 
373 //............................................................................
374 
375 static void freeIsland(Island **pp) {
376  Island *p; Fragment *q;
377 
378  p=*pp;
379 
380  while(p->fragments) {q=p->fragments; p->fragments=q->next; freeBlock(&q);}
381 
382  freeBlock(pp);
383 
384 }
385 
386 //............................................................................
387 
388 static void registerIsland(Island *f) {
389  Island **pli;
390 
391  f->next=Z; Z=f;
392 
393  pli=&ZB[f->beginX+f->beginY];
394  f->nextBeginIndex=*pli; *pli=f;
395 
396  pli=&ZE[f->endX+f->endY];
397  f->nextEndIndex=*pli; *pli=f;
398 
399 }
400 
401 //............................................................................
402 
403 static Island *selectUpperIslands(Island *f,int nX,int nY,int *incomplete) {
404  int i,j; Island *l,*g;
405 
406  l=NULp;
407 
408  for(i=f->endX+f->endY+2,j=nX+nY-2;i<=j;i++)
409  for(g=ZB[i];g;g=g->nextBeginIndex)
410  if(g->beginX>f->endX&&g->beginY>f->endY) {
411  if(!g->hasUpper) {*incomplete=TRUE; return(NULp);}
412  g->nextSelected=l; l=g;
413  }
414 
415  *incomplete=FALSE;
416 
417  return(l);
418 }
419 
420 //............................................................................
421 
422 static Island *selectLowerIslands(Island *f,int *incomplete) {
423  int i; Island *l,*g;
424 
425  l=NULp;
426 
427  for(i=f->beginX+f->beginY-2;i>=0;i--)
428  for(g=ZE[i];g;g=g->nextEndIndex)
429  if(g->endX<f->beginX&&g->endY<f->beginY) {
430  if(!g->hasLower) {*incomplete=TRUE; return(NULp);}
431  g->nextSelected=l; l=g;
432  }
433 
434  *incomplete=FALSE;
435 
436  return(l);
437 }
438 
439 //............................................................................
440 
441 static int areEqual(Island *a,Island *b) {
442  Fragment *fa,*fb;
443 
444  if( a->beginX != b->beginX ) return(FALSE);
445  if( a->beginY != b->beginY ) return(FALSE);
446  if( a->endX != b->endX ) return(FALSE);
447  if( a->endY != b->endY ) return(FALSE);
448 
449  fa=a->fragments; fb=b->fragments;
450  while(fa&&fb) {
451  if( fa->beginX != fb->beginX ) return(FALSE);
452  if( fa->beginY != fb->beginY ) return(FALSE);
453  if( fa->length != fb->length ) return(FALSE);
454  fa=fa->next; fb=fb->next;
455  }
456  if(fa||fb) return(FALSE);
457 
458  return(TRUE);
459 }
460 
461 //............................................................................
462 
463 static int isUnique(Island *f) {
464  Island *v;
465 
466  for(v=ZB[f->beginX+f->beginY];v;v=v->nextBeginIndex)
467  if(areEqual(f,v)) return(FALSE);
468 
469  return(TRUE);
470 }
471 
472 //............................................................................
473 
474 static int isSignificant(Island *f) {
475  int l;
476  Fragment *q;
477 
478  l=0; for(q=f->fragments;q;q=q->next) l+=q->length;
479 
480  if(l<MINLENGTH) return(FALSE);
481 
482  if(getEntropy(GE,f->score/l,l)<=LogThres) return(TRUE);
483 
484  return(FALSE);
485 }
486 
487 //............................................................................
488 
489 static int I,J,K;
490 
491 //....
492 
493 static void drawLowerPath(Island *f,int nX,char *X,char *XX,int nY,char *Y,char *YY) {
494  int k;
495  Fragment *q;
496 
497  if(f->lower) drawLowerPath(f->lower,nX,X,XX,nY,Y,YY);
498 
499  for(q=f->fragments;q;q=q->next) {
500  while(I<q->beginX) {XX[K]=decodeBase(X[I++]); YY[K]='-'; K++;}
501  while(J<q->beginY) {XX[K]='-'; YY[K]=decodeBase(Y[J++]); K++;}
502  for(k=0;k<q->length;k++)
503  {XX[K]=decodeBase(X[I++]); YY[K]=decodeBase(Y[J++]); K++;}
504  }
505 
506 }
507 
508 //....
509 
510 static void drawPath(Island *f,int nX,char *X,char *XX,int nY,char *Y,char *YY) {
511  int k;
512  Island *p;
513  Fragment *q;
514 
515  I=0; J=0; K=0;
516 
517  if(f->lower) drawLowerPath(f->lower,nX,X,XX,nY,Y,YY);
518 
519  for(p=f;p;p=p->upper)
520  for(q=p->fragments;q;q=q->next) {
521  while(I<q->beginX) {XX[K]=decodeBase(X[I++]); YY[K]='-'; K++;}
522  while(J<q->beginY) {XX[K]='-'; YY[K]=decodeBase(Y[J++]); K++;}
523  for(k=0;k<q->length;k++)
524  {XX[K]=decodeBase(X[I++]); YY[K]=decodeBase(Y[J++]); K++;}
525  }
526 
527  while(I<nX) {XX[K]=decodeBase(X[I++]); YY[K]='-'; K++;}
528  while(J<nY) {XX[K]='-'; YY[K]=decodeBase(Y[J++]); K++;}
529 
530  XX[K]='\0';
531  YY[K]='\0';
532 
533 }
534 
535 //............................................................................
536 
537 static void drawIsland(Island *f) {
538  int i,j,k;
539  Fragment *q;
540 
541 #ifdef TEST
542  double score;
543  score=f->score;
544 #endif
545 
546  for(q=f->fragments;q;q=q->next) {
547  for(i=q->beginX,j=q->beginY,k=0;k<q->length;k++,i++,j++) {
548 
549 #ifdef TEST
550  if(
551  i>=0&&i<TELEN&&j>=0&&j<TELEN // &&score>TE[i][j]
552  ) {
553  TE[i][j]=score;
554  }
555 #endif
556 
557  }
558  }
559 
560 }
561 
562 //============================================================================
563 
564 static double secS(int i,int j,char X[],int secX[],char Y[],int secY[]) {
565 
566  if(secX[i]||secY[j]) {
567  if(secX[i]&&secY[j]) return(GS[(int)X[i]][(int)Y[j]]-Supp*expectedScore);
568  return(-1.e34);
569  }
570 
571  return(GS[(int)X[i]][(int)Y[j]]);
572 }
573 
574 //============================================================================
575 
576 static void AlignTwo(
577  int nX,char X[],int secX[],char XX[],int nY,char Y[],int secY[],char YY[]
578  ) {
579  int r,changed,i,j,k,ii,jj,startx,stopx,starty,stopy;
580  double s,ss;
581  Island *z,*zz,*zzz,*best;
582 
583  //OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
584 
585  Z=NULp; for(i=nX+nY-2;i>=0;i--) {ZB[i]=NULp; ZE[i]=NULp;}
586 
587  startx=0; stopx=nX-1;
588  for(i=startx,ii=i-1;i<=stopx;i++,ii++) {
589  starty=0; stopy=nY-1;
590  for(j=starty,jj=j-1;j<=stopy;j++,jj++) {
591  s=0.; r=NOWHERE;
592  if(i>startx&&j>starty) {
593  ss=U[ii][jj].score; if(ss>s) {s=ss; r=0;}
594  for(k=1;k<=MAXGAP&&ii-k>=0;k++) {
595  ss=U[ii-k][jj].score+(GapA+k*GapB)*expectedScore; if(ss>s) {s=ss; r= k;}
596  }
597  for(k=1;k<=MAXGAP&&jj-k>=0;k++) {
598  ss=U[ii][jj-k].score+(GapA+k*GapB)*expectedScore; if(ss>s) {s=ss; r=-k;}
599  }
600  }
601  s+=secS(i,j,X,secX,Y,secY); if(s<0.) {s=0.; r=NOWHERE;}
602  U[i][j].score=s;
603  U[i][j].down=r;
604  }
605  }
606 
607  startx=0; stopx=nX-1;
608  for(i=startx;i<=stopx;i++) {
609  starty=0; stopy=nY-1;
610  for(j=starty;j<=stopy;j++) {
611  ii=i; jj=j; s=U[ii][jj].score; U[ii][jj].up=NOWHERE;
612  for(;;) {
613  k=U[ii][jj].down; if(k==NOWHERE) break;
614  ii--; jj--; if(k>=0) ii-=k; else jj+=k;
615  if(U[ii][jj].score>s) break;
616  U[ii][jj].score=s; U[ii][jj].up=k;
617  }
618  }
619  }
620 
621  startx=0; stopx=nX-1;
622  for(i=startx;i<=stopx;i++) {
623  starty=0; stopy=nY-1;
624  for(j=starty;j<=stopy;j++) {
625  if(U[i][j].down!=NOWHERE) continue;
626  if(U[i][j].up==NOWHERE) continue;
627  z=newIsland(X,Y,i,j,1);
628  if(isUnique(z)&&isSignificant(z)) registerIsland(z);
629  else freeIsland(&z);
630  }
631  }
632 
633  //----
634 
635  startx=nX-1; stopx=0;
636  for(i=startx,ii=i+1;i>=stopx;i--,ii--) {
637  starty=nY-1; stopy=0;
638  for(j=starty,jj=j+1;j>=stopy;j--,jj--) {
639  s=0.; r=NOWHERE;
640  if(i<startx&&j<starty) {
641  ss=U[ii][jj].score; if(ss>s) {s=ss; r=0;}
642  for(k=1;k<=MAXGAP&&ii+k<nX;k++) {
643  ss=U[ii+k][jj].score+(GapA+k*GapB)*expectedScore; if(ss>s) {s=ss; r= k;}
644  }
645  for(k=1;k<=MAXGAP&&jj+k<nY;k++) {
646  ss=U[ii][jj+k].score+(GapA+k*GapB)*expectedScore; if(ss>s) {s=ss; r=-k;}
647  }
648  }
649  s+=secS(i,j,X,secX,Y,secY); if(s<0.) {s=0.; r=NOWHERE;}
650  U[i][j].score=s;
651  U[i][j].up=r;
652  }
653  }
654 
655  startx=nX-1; stopx=0;
656  for(i=startx;i>=stopx;i--) {
657  starty=nY-1; stopy=0;
658  for(j=starty;j>=stopy;j--) {
659  ii=i; jj=j; s=U[ii][jj].score; U[ii][jj].down=NOWHERE;
660  for(;;) {
661  k=U[ii][jj].up; if(k==NOWHERE) break;
662  ii++; jj++; if(k>=0) ii+=k; else jj-=k;
663  if(U[ii][jj].score>s) break;
664  U[ii][jj].score=s; U[ii][jj].down=k;
665  }
666  }
667  }
668 
669  startx=nX-1; stopx=0;
670  for(i=startx;i>=stopx;i--) {
671  starty=nY-1; stopy=0;
672  for(j=starty;j>=stopy;j--) {
673  if(U[i][j].up!=NOWHERE) continue;
674  if(U[i][j].down==NOWHERE) continue;
675  z=newIsland(X,Y,i,j,-1);
676  if(isUnique(z)&&isSignificant(z)) registerIsland(z);
677  else freeIsland(&z);
678  }
679  }
680 
681  //*****************
682 
683  for(z=Z;z;z=z->next) {z->hasUpper=FALSE; z->upper=NULp; z->upperScore=0.;}
684  do { changed=FALSE;
685  for(z=Z;z;z=z->next) {
686  if(z->hasUpper) continue;
687  zz=selectUpperIslands(z,nX,nY,&i); if(i) continue;
688  if(zz) {
689  s=0.; best=NULp;
690  for(zzz=zz;zzz;zzz=zzz->nextSelected) {
691  if(zzz->upperScore+zzz->score>s) {s=zzz->upperScore+zzz->score; best=zzz;}
692  }
693  if(best) {z->upper=best; z->upperScore=s;}
694  }
695  z->hasUpper=TRUE;
696  changed=TRUE;
697  }
698  } while(changed);
699 
700  for(z=Z;z;z=z->next) {z->hasLower=FALSE; z->lower=NULp; z->lowerScore=0.;}
701  do { changed=FALSE;
702  for(z=Z;z;z=z->next) {
703  if(z->hasLower) continue;
704  zz=selectLowerIslands(z,&i); if(i) continue;
705  if(zz) {
706  s=0.; best=NULp;
707  for(zzz=zz;zzz;zzz=zzz->nextSelected) {
708  if(zzz->lowerScore+zzz->score>s) {s=zzz->lowerScore+zzz->score; best=zzz;}
709  }
710  if(best) {z->lower=best; z->lowerScore=s;}
711  }
712  z->hasLower=TRUE;
713  changed=TRUE;
714  }
715  } while(changed);
716 
717  //*****************
718 
719  s=0.; best=NULp;
720  for(z=Z;z;z=z->next) {
721  ss=z->score+z->upperScore+z->lowerScore;
722  if(ss>s) {best=z; s=ss;}
723  }
724 
725 #ifdef TEST
726  for(i=0;i<TELEN;i++) for(j=0;j<TELEN;j++) TE[i][j]=0.;
727 #endif
728 
729  if(best) { drawPath(best,nX,X,XX,nY,Y,YY);
730  drawIsland(best);
731  for(z=best->upper;z;z=z->upper) drawIsland(z);
732  for(z=best->lower;z;z=z->lower) drawIsland(z);
733  }
734 
735  //OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
736 
737 #ifdef TEST
738 
739  if(te) {FILE *fp; te=FALSE;
740 
741  fp=fopen("subst.txt","wt");
742  fprintf(fp,"\n(* substitution matrix *)\nListDensityPlot[{");
743  for(i=0;i<N;i++) {
744  fprintf(fp,"\n{%f",GS[i][0]);
745  for(j=1;j<N;j++) fprintf(fp,",%f",GS[i][j]);
746  fprintf(fp,"}"); if(i<N-1) fprintf(fp,",");
747  }
748  fprintf(fp,"}]\n");
749  fclose(fp);
750 
751  fp=fopen("correl.txt","w");
752  fprintf(fp,"\n(* correlation matrix *)\n{");
753  for(i=0;i<TELEN&&i<nX;i++) {
754  fprintf(fp,"\n{");
755  fprintf(fp,"%f",secS(i,0,X,secX,Y,secY));
756  for(j=1;j<TELEN&&j<nY;j++) fprintf(fp,",%f",secS(i,j,X,secX,Y,secY));
757  if(i==TELEN-1||i==nX-1)fprintf(fp,"}"); else fprintf(fp,"},");
758  }
759  fprintf(fp,"}//ListDensityPlot\n");
760  fclose(fp);
761 
762  fp=fopen("path.txt","w");
763  fprintf(fp,"\n(* pairwise alignment *)\n{");
764  for(i=0;i<TELEN&&i<nX;i++) {
765  fprintf(fp,"\n{");
766  fprintf(fp,"%f\n",TE[i][0]);
767  for(j=1;j<TELEN&&j<nY;j++) fprintf(fp,",%f",TE[i][j]);
768  if(i==TELEN-1||i==nX-1)fprintf(fp,"}"); else fprintf(fp,"},");
769  }
770  fprintf(fp,"}//ListDensityPlot\n");
771  fclose(fp);
772 
773  for(i=0;i<TELEN;i++) for(j=0;j<TELEN;j++) TE[i][j]=0.;
774  for(z=Z;z;z=z->next) drawIsland(z);
775 
776  fp=fopen("islands.txt","w");
777  fprintf(fp,"\n(* smith-waterman-islands *)\n{");
778  for(i=0;i<TELEN&&i<nX;i++) {
779  fprintf(fp,"\n{");
780  fprintf(fp,"%f\n",TE[i][0]);
781  for(j=1;j<TELEN&&j<nY;j++) fprintf(fp,",%f",TE[i][j]);
782  if(i==TELEN-1||i==nX-1)fprintf(fp,"}"); else fprintf(fp,"},");
783  }
784  fprintf(fp,"}//ListDensityPlot\n");
785  fclose(fp);
786 
787  }
788 
789 #endif
790 
791  //OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
792 
793  while(Z) {z=Z; Z=Z->next; freeIsland(&z);}
794 
795 }
796 
797 //............................................................................
798 
799 void Align(
800  int nX,char X[],int secX[],char **XX,int nY,char Y[],int secY[],char **YY,
801  int freqs,double fT,double fC,double fA,double fG,
802  double rTC,double rTA,double rTG,double rCA,double rCG,double rAG,
803  double dist,double supp,double gapA,double gapB,double gapC,double thres
804  ) {
805  double F[N],R[6];
806  char *s;
807  int i,j,maxlen;
808 
809  *XX = (char*)newBlock((nX+nY+1)*sizeof(char));
810  *YY = (char*)newBlock((nX+nY+1)*sizeof(char));
811 
812  Supp=supp;
813  GapA=gapA;
814  GapB=gapB;
815  GapC=gapC;
816  Thres=thres;
817 
818  if(dist>MAXDIST||dist<MINDIST) {Error="Bad argument"; return;}
819 
820  if(GapA<0.||GapB<0.) {Error="Bad argument"; return;}
821 
822  if(Thres>1.||Thres<=0.) {Error="Bad argument"; return;}
823  LogThres=log(Thres);
824 
825  if(freqs) {
826  if(fT<=0.||fC<=0.||fA<=0.||fG<=0.) {Error="Bad argument"; return;}
827  }
828  else {
829  fT=0.; fC=0.; fA=0.; fG=0.;
830  for(s=X;*s;s++) {
831  switch(*s) {
832  case 'T': fT++; break; case 'C': fC++; break;
833  case 'A': fA++; break; case 'G': fG++; break;
834  default: fT+=0.25; fC+=0.25; fA+=0.25; fG+=0.25;
835  }
836  }
837  for(s=Y;*s;s++) {
838  switch(*s) {
839  case 'T': fT++; break; case 'C': fC++; break;
840  case 'A': fA++; break; case 'G': fG++; break;
841  default: fT+=0.25; fC+=0.25; fA+=0.25; fG+=0.25;
842  }
843  }
844  }
845 
846  if(rTC<=0.||rTA<=0.||rTG<=0.||rCA<=0.||rCG<=0.||rAG<=0.) {Error="Bad argument"; return;}
847 
848  normalizeBaseFreqs(F,fT,fC,fA,fG);
849  normalizeRateParams(R,rTC,rTA,rTG,rCA,rCG,rAG);
850 
851  maxlen=nX>nY?nX:nY;
852 
853  for(i=0;i<nX;i++) X[i]=encodeBase(X[i]);
854  for(i=0;i<nY;i++) Y[i]=encodeBase(Y[i]);
855 
856  U=(Score **)newMatrix(maxlen,maxlen,sizeof(Score));
857  ZB=(Island **)newVector(2*maxlen,sizeof(Island *));
858  ZE=(Island **)newVector(2*maxlen,sizeof(Island *));
859 
860  initScore(&GP,&GS);
861  initEntropy(&GE);
862 
863  updateScore(GP,GS,F,R,dist);
865 
866  expectedScore=0.;
867  for(i=0;i<N;i++) for(j=0;j<N;j++) expectedScore+=GP[i][j]*GS[i][j];
868 
869  AlignTwo(nX,X,secX,*XX,nY,Y,secY,*YY);
870 
871  uninitEntropy(&GE);
872  uninitScore(&GP,&GS);
873 
874  freeBlock(&ZE);
875  freeBlock(&ZB);
876  freeMatrix(&U);
877 
878  for(i=0;i<nX;i++) X[i]=decodeBase(X[i]);
879  for(i=0;i<nY;i++) Y[i]=decodeBase(Y[i]);
880 
881 }
void initTrnsprob(double ****PP)
Definition: trnsprob.cxx:155
static int I
Definition: align.cxx:489
char encodeBase(char c)
Definition: trnsprob.cxx:80
static Island * newIsland(char *X, char *Y, int i, int j, int d)
Definition: align.cxx:323
static double GapA
Definition: align.cxx:69
Definition: trnsprob.h:21
void uninitTrnsprob(double ****PP)
Definition: trnsprob.cxx:167
static void uninitEntropy(double ***EE)
Definition: align.cxx:144
static void drawPath(Island *f, int nX, char *X, char *XX, int nY, char *Y, char *YY)
Definition: align.cxx:510
struct island * next
Definition: align.cxx:50
int beginX
Definition: align.cxx:48
void normalizeBaseFreqs(double *F, double fT, double fC, double fA, double fG)
Definition: trnsprob.cxx:110
static double Thres
Definition: align.cxx:69
struct fragment Fragment
static double GapB
Definition: align.cxx:69
#define FALSE
Definition: defs.h:21
Definition: trnsprob.h:20
int beginX
Definition: align.cxx:42
int down
Definition: align.cxx:37
static double LogThres
Definition: align.cxx:69
Definition: align.cxx:46
static double ** GP
Definition: align.cxx:54
static int areEqual(Island *a, Island *b)
Definition: align.cxx:441
int endX
Definition: align.cxx:48
double score
Definition: align.cxx:36
#define MAXDIST
Definition: align.cxx:28
static double GapC
Definition: align.cxx:69
Definition: trnsprob.h:20
static void initEntropy(double ***EE)
Definition: align.cxx:138
static double expectedScore
Definition: align.cxx:69
int beginY
Definition: align.cxx:42
#define MINDIST
Definition: align.cxx:27
void updateTrnsprob(double ***PP, double *F, double *X, short M)
Definition: trnsprob.cxx:226
char decodeBase(char c)
Definition: trnsprob.cxx:95
struct island * upper
Definition: align.cxx:50
static Island * Z
Definition: align.cxx:71
#define REAL_MIN
Definition: defs.h:26
int up
Definition: align.cxx:37
static void AlignTwo(int nX, char X[], int secX[], char XX[], int nY, char Y[], int secY[], char YY[])
Definition: align.cxx:576
#define MINLENGTH
Definition: align.cxx:33
static void registerIsland(Island *f)
Definition: align.cxx:388
double score
Definition: align.cxx:47
struct island * nextEndIndex
Definition: align.cxx:50
#define NOWHERE
Definition: align.cxx:31
static Island ** ZB
Definition: align.cxx:71
int hasUpper
Definition: align.cxx:49
static Island * selectLowerIslands(Island *f, int *incomplete)
Definition: align.cxx:422
Definition: trnsprob.h:20
static void updateEntropy(double **P, double **S, double **E)
Definition: align.cxx:211
static double Supp
Definition: align.cxx:69
static int isSignificant(Island *f)
Definition: align.cxx:474
static double getEntropy(double **E, double m, int l)
Definition: align.cxx:150
#define newVector(n, s)
Definition: mem.h:23
Definition: align.cxx:35
int hasLower
Definition: align.cxx:49
#define MSIZE
Definition: align.cxx:22
static double ** GS
Definition: align.cxx:54
static Island * selectUpperIslands(Island *f, int nX, int nY, int *incomplete)
Definition: align.cxx:403
struct score Score
static void * M
Definition: mem.cxx:18
#define ESIZE
Definition: align.cxx:23
static Island ** ZE
Definition: align.cxx:71
void * newBlock(size_t s)
Definition: mem.cxx:39
static int isUnique(Island *f)
Definition: align.cxx:463
static double ** GE
Definition: align.cxx:69
static int J
Definition: align.cxx:489
int endY
Definition: align.cxx:48
static void initScore(double ***pP, double ***pS)
Definition: align.cxx:75
struct fragment * next
Definition: align.cxx:43
static double secS(int i, int j, char X[], int secX[], char Y[], int secY[])
Definition: align.cxx:564
Definition: trnsprob.h:20
#define freeBlock(v)
Definition: mem.h:25
struct island * lower
Definition: align.cxx:50
struct island Island
int length
Definition: align.cxx:42
double lowerScore
Definition: align.cxx:47
void ** newMatrix(size_t nrow, size_t ncol, size_t s)
Definition: mem.cxx:87
static void updateScore(double **P, double **S, double F[N], double X[6], double dist)
Definition: align.cxx:93
#define MAXGAP
Definition: align.cxx:30
#define LOG_REAL_MIN
Definition: defs.h:27
void getTrnsprob(double **P, double ***PP, double d)
Definition: trnsprob.cxx:179
static void drawLowerPath(Island *f, int nX, char *X, char *XX, int nY, char *Y, char *YY)
Definition: align.cxx:493
#define NULp
Definition: cxxforward.h:116
#define TRUE
Definition: defs.h:17
struct island * nextSelected
Definition: align.cxx:50
struct fragment * fragments
Definition: align.cxx:51
Definition: trnsprob.h:20
static void drawIsland(Island *f)
Definition: align.cxx:537
void normalizeRateParams(double *X, double a, double b, double c, double d, double e, double f)
Definition: trnsprob.cxx:132
Definition: trnsprob.h:20
double upperScore
Definition: align.cxx:47
void Align(int nX, char X[], int secX[], char **XX, int nY, char Y[], int secY[], char **YY, int freqs, double fT, double fC, double fA, double fG, double rTC, double rTA, double rTG, double rCA, double rCG, double rAG, double dist, double supp, double gapA, double gapB, double gapC, double thres)
Definition: align.cxx:799
size_t length
static void uninitScore(double ***pP, double ***pS)
Definition: align.cxx:84
struct island * nextBeginIndex
Definition: align.cxx:50
static Params P
Definition: arb_probe.cxx:81
int beginY
Definition: align.cxx:48
#define freeMatrix(m)
Definition: mem.h:40
static int K
Definition: align.cxx:489
#define newDoubleMatrix(r, c)
Definition: mem.h:34
static Score ** U
Definition: align.cxx:67
static void freeIsland(Island **pp)
Definition: align.cxx:375
GB_write_int const char s
Definition: AW_awar.cxx:154