31 #define NOWHERE (MAXGAP+1)
61 static double TE[TELEN][TELEN];
94 double **
P,
double **S,
95 double F[
N],
double X[6],
double dist
98 double ***SS,
s,smin,smax;
109 S[
T][
T]/=F[
T]; S[
T][
C]/=F[
C]; S[
T][
A]/=F[
A]; S[
T][
G]/=F[
G];
110 S[
C][
T]/=F[
T]; S[
C][
C]/=F[
C]; S[
C][
A]/=F[
A]; S[
C][
G]/=F[
G];
111 S[
A][
T]/=F[
T]; S[
A][
C]/=F[
C]; S[
A][
A]/=F[
A]; S[
A][
G]/=F[
G];
112 S[
G][
T]/=F[
T]; S[
G][
C]/=F[
C]; S[
G][
A]/=F[
A]; S[
G][
G]/=F[
G];
114 for(i=0;i<
N;i++)
for(j=0;j<
N;j++) S[i][j]=log(S[i][j]);
130 if(s<smin) smin=
s;
else
152 double *
M,mmin,idm,la,lb,ma,mb;
154 M=E[0]; ma=0.5*(M[1]-M[0]); mmin=M[0]-ma; idm=0.5/ma;
156 k=floor((m-mmin)*idm);
159 if(k==0) {ja=k; jb=k+1;}
else
160 if(k==
MSIZE-1) {ja=k-1; jb=k; }
else
161 if(m>M[k]) {ja=k; jb=k+1;}
else
168 ((mb-m)*E[l][ja]+(m-ma)*E[l][jb])/(mb-ma)
173 if(l<=18) {la=16; lb=18; ia=16; ib=17;}
else
174 if(l<=20) {la=18; lb=20; ia=17; ib=18;}
else
175 if(l<=22) {la=20; lb=22; ia=18; ib=19;}
else
176 if(l<=24) {la=22; lb=24; ia=19; ib=20;}
else
177 if(l<=26) {la=24; lb=26; ia=20; ib=21;}
else
178 if(l<=28) {la=26; lb=28; ia=21; ib=22;}
else
179 if(l<=30) {la=28; lb=30; ia=22; ib=23;}
else
180 if(l<=32) {la=30; lb=32; ia=23; ib=24;}
183 if(l<=36) {la=32; lb=36; ia=24; ib=25;}
else
184 if(l<=40) {la=36; lb=40; ia=25; ib=26;}
else
185 if(l<=44) {la=40; lb=44; ia=26; ib=27;}
else
186 if(l<=48) {la=44; lb=48; ia=27; ib=28;}
else
187 if(l<=52) {la=48; lb=52; ia=28; ib=29;}
else
188 if(l<=56) {la=52; lb=56; ia=29; ib=30;}
else
189 if(l<=60) {la=56; lb=60; ia=30; ib=31;}
else
190 if(l<=64) {la=60; lb=64; ia=31; ib=32;}
193 if(l<= 128) {la= 64; lb= 128; ia=32; ib=33;}
else
194 if(l<= 256) {la= 128; lb= 256; ia=33; ib=34;}
else
195 if(l<= 512) {la= 256; lb= 512; ia=34; ib=35;}
else
196 {la= 512; lb=1024; ia=35; ib=36;}
200 +(lb-l)*((mb-m)*E[ia][ja]+(m-ma)*E[ia][jb])
201 +(l-la)*((mb-m)*E[ib][ja]+(m-ma)*E[ib][jb])
213 double *
M,m,dm,idm,mmin,mmax;
217 for(j=0;j<
N;j++) {m=S[i][j];
if(m<mmin) mmin=m;
else if(m>mmax) mmax=m;}
218 dm=(mmax-mmin)/
MSIZE;
222 for(i=0,m=mmin+0.5*dm;i<
MSIZE;i++,m+=dm) M[i]=m;
224 for(i=0;i<
MSIZE;i++) E[1][i]=0.;
227 k=floor((S[i][j]-mmin)*idm);
228 if(k>=MSIZE) k=MSIZE-1;
else if(k<0) k=0;
232 for(k=0;k<
MSIZE;k++) E[2][k]=0.;
234 for(j=0;j<
MSIZE;j++) {
236 k=floor((m-mmin)*idm);
237 if(k>=MSIZE) k=MSIZE-1;
else if(k<0) k=0;
238 E[2][k]+=E[1][i]*E[1][j];
241 for(ll=2,lll=1;ll<=8;ll++,lll++) {
243 for(k=0;k<
MSIZE;k++) E[l][k]=0.;
245 for(j=0;j<
MSIZE;j++) {
246 m=(ll*M[i]+lll*M[j])/l;
247 k=floor((m-mmin)*idm);
248 if(k>=MSIZE) k=MSIZE-1;
else if(k<0) k=0;
249 E[l][k]+=E[ll][i]*E[lll][j];
252 for(k=0;k<
MSIZE;k++) E[l][k]=0.;
254 for(j=0;j<
MSIZE;j++) {
256 k=floor((m-mmin)*idm);
257 if(k>=MSIZE) k=MSIZE-1;
else if(k<0) k=0;
258 E[l][k]+=E[ll][i]*E[ll][j];
262 for(l=17,ll=l-8;l<=24;l++,ll++) {
263 for(k=0;k<
MSIZE;k++) E[l][k]=0.;
265 for(j=0;j<
MSIZE;j++) {
267 k=floor((m-mmin)*idm);
268 if(k>=MSIZE) k=MSIZE-1;
else if(k<0) k=0;
269 E[l][k]+=E[ll][i]*E[ll][j];
273 for(l=25,ll=l-8;l<=32;l++,ll++) {
274 for(k=0;k<
MSIZE;k++) E[l][k]=0.;
276 for(j=0;j<
MSIZE;j++) {
278 k=floor((m-mmin)*idm);
279 if(k>=MSIZE) k=MSIZE-1;
else if(k<0) k=0;
280 E[l][k]+=E[ll][i]*E[ll][j];
284 for(l=33,ll=l-1;l<=36;l++,ll++) {
285 for(k=0;k<
MSIZE;k++) E[l][k]=0.;
287 for(j=0;j<
MSIZE;j++) {
289 k=floor((m-mmin)*idm);
290 if(k>=MSIZE) k=MSIZE-1;
else if(k<0) k=0;
291 E[l][k]+=E[ll][i]*E[ll][j];
295 for(l=1;l<=
ESIZE;l++) {m=0.;
for(k=MSIZE-1;k>=0;k--) m=E[l][k]+=m;}
297 for(i=1;i<=
ESIZE;i++)
306 fp=fopen(
"distrib.txt",
"wt");
307 fprintf(fp,
"\n(* score per basepair distribution for gap-less random path 1=Smin 100=Smax *)\nListPlot3D[({");
308 m0=M[0]; dm2=(M[MSIZE-1]-M[0])/99.;
309 for(i=1;i<=nbp;i++) {
311 for(j=1;j<=99;j++) fprintf(fp,
",%f",exp(
getEntropy(E,m0+dm2*j,i)));
312 fprintf(fp,
"}");
if(i<nbp) fprintf(fp,
",");
314 fprintf(fp,
"}),PlotRange->{{1,100},{1,%d},{0,1}},ViewPoint->{1.5,1.1,0.8}]\n",nbp);
328 ii=i; jj=j; l=0; s=0.;
333 s+=
GS[(
int)X[ii]][(
int)Y[jj]];
334 if(++l==1) {iii=ii; jjj=jj;}
339 l=0; *ff=f; ff=&f->
next;
342 ii++; jj++;
if(k>=0) ii+=k;
else jj-=k;
351 s+=
GS[(
int)X[ii]][(
int)Y[jj]];
352 if(++l==1) {iii=ii; jjj=jj;}
360 ii--; jj--;
if(k>=0) ii-=k;
else jj+=k;
408 for(i=f->
endX+f->
endY+2,j=nX+nY-2;i<=j;i++)
456 if(fa||fb)
return(
FALSE);
510 static void drawPath(
Island *f,
int nX,
char *X,
char *XX,
int nY,
char *Y,
char *YY) {
519 for(p=f;p;p=p->
upper)
551 i>=0&&i<TELEN&&j>=0&&j<TELEN // &&score>TE[i][j]
564 static double secS(
int i,
int j,
char X[],
int secX[],
char Y[],
int secY[]) {
566 if(secX[i]||secY[j]) {
571 return(
GS[(
int)X[i]][(
int)Y[j]]);
577 int nX,
char X[],
int secX[],
char XX[],
int nY,
char Y[],
int secY[],
char YY[]
579 int r,changed,i,j,k,ii,jj,startx,stopx,starty,stopy;
585 Z=
NULp;
for(i=nX+nY-2;i>=0;i--) {ZB[i]=
NULp; ZE[i]=
NULp;}
587 startx=0; stopx=nX-1;
588 for(i=startx,ii=i-1;i<=stopx;i++,ii++) {
589 starty=0; stopy=nY-1;
590 for(j=starty,jj=j-1;j<=stopy;j++,jj++) {
592 if(i>startx&&j>starty) {
593 ss=U[ii][jj].
score;
if(ss>s) {s=ss; r=0;}
594 for(k=1;k<=
MAXGAP&&ii-k>=0;k++) {
597 for(k=1;k<=
MAXGAP&&jj-k>=0;k++) {
601 s+=
secS(i,j,X,secX,Y,secY);
if(s<0.) {s=0.; r=
NOWHERE;}
607 startx=0; stopx=nX-1;
608 for(i=startx;i<=stopx;i++) {
609 starty=0; stopy=nY-1;
610 for(j=starty;j<=stopy;j++) {
614 ii--; jj--;
if(k>=0) ii-=k;
else jj+=k;
615 if(U[ii][jj].
score>s)
break;
621 startx=0; stopx=nX-1;
622 for(i=startx;i<=stopx;i++) {
623 starty=0; stopy=nY-1;
624 for(j=starty;j<=stopy;j++) {
625 if(U[i][j].down!=
NOWHERE)
continue;
626 if(U[i][j].up==
NOWHERE)
continue;
635 startx=nX-1; stopx=0;
636 for(i=startx,ii=i+1;i>=stopx;i--,ii--) {
637 starty=nY-1; stopy=0;
638 for(j=starty,jj=j+1;j>=stopy;j--,jj--) {
640 if(i<startx&&j<starty) {
641 ss=U[ii][jj].
score;
if(ss>s) {s=ss; r=0;}
642 for(k=1;k<=
MAXGAP&&ii+k<nX;k++) {
645 for(k=1;k<=
MAXGAP&&jj+k<nY;k++) {
649 s+=
secS(i,j,X,secX,Y,secY);
if(s<0.) {s=0.; r=
NOWHERE;}
655 startx=nX-1; stopx=0;
656 for(i=startx;i>=stopx;i--) {
657 starty=nY-1; stopy=0;
658 for(j=starty;j>=stopy;j--) {
662 ii++; jj++;
if(k>=0) ii+=k;
else jj-=k;
663 if(U[ii][jj].
score>s)
break;
669 startx=nX-1; stopx=0;
670 for(i=startx;i>=stopx;i--) {
671 starty=nY-1; stopy=0;
672 for(j=starty;j>=stopy;j--) {
673 if(U[i][j].up!=
NOWHERE)
continue;
674 if(U[i][j].down==
NOWHERE)
continue;
685 for(z=Z;z;z=z->
next) {
702 for(z=Z;z;z=z->
next) {
720 for(z=Z;z;z=z->
next) {
722 if(ss>s) {best=z; s=ss;}
726 for(i=0;i<TELEN;i++)
for(j=0;j<TELEN;j++) TE[i][j]=0.;
729 if(best) {
drawPath(best,nX,X,XX,nY,Y,YY);
739 if(te) {FILE *fp; te=
FALSE;
741 fp=fopen(
"subst.txt",
"wt");
742 fprintf(fp,
"\n(* substitution matrix *)\nListDensityPlot[{");
744 fprintf(fp,
"\n{%f",
GS[i][0]);
745 for(j=1;j<
N;j++) fprintf(fp,
",%f",
GS[i][j]);
746 fprintf(fp,
"}");
if(i<N-1) fprintf(fp,
",");
751 fp=fopen(
"correl.txt",
"w");
752 fprintf(fp,
"\n(* correlation matrix *)\n{");
753 for(i=0;i<TELEN&&i<nX;i++) {
755 fprintf(fp,
"%f",
secS(i,0,X,secX,Y,secY));
756 for(j=1;j<TELEN&&j<nY;j++) fprintf(fp,
",%f",
secS(i,j,X,secX,Y,secY));
757 if(i==TELEN-1||i==nX-1)fprintf(fp,
"}");
else fprintf(fp,
"},");
759 fprintf(fp,
"}//ListDensityPlot\n");
762 fp=fopen(
"path.txt",
"w");
763 fprintf(fp,
"\n(* pairwise alignment *)\n{");
764 for(i=0;i<TELEN&&i<nX;i++) {
766 fprintf(fp,
"%f\n",TE[i][0]);
767 for(j=1;j<TELEN&&j<nY;j++) fprintf(fp,
",%f",TE[i][j]);
768 if(i==TELEN-1||i==nX-1)fprintf(fp,
"}");
else fprintf(fp,
"},");
770 fprintf(fp,
"}//ListDensityPlot\n");
773 for(i=0;i<TELEN;i++)
for(j=0;j<TELEN;j++) TE[i][j]=0.;
776 fp=fopen(
"islands.txt",
"w");
777 fprintf(fp,
"\n(* smith-waterman-islands *)\n{");
778 for(i=0;i<TELEN&&i<nX;i++) {
780 fprintf(fp,
"%f\n",TE[i][0]);
781 for(j=1;j<TELEN&&j<nY;j++) fprintf(fp,
",%f",TE[i][j]);
782 if(i==TELEN-1||i==nX-1)fprintf(fp,
"}");
else fprintf(fp,
"},");
784 fprintf(fp,
"}//ListDensityPlot\n");
800 int nX,
char X[],
int secX[],
char **XX,
int nY,
char Y[],
int secY[],
char **YY,
801 int freqs,
double fT,
double fC,
double fA,
double fG,
802 double rTC,
double rTA,
double rTG,
double rCA,
double rCG,
double rAG,
803 double dist,
double supp,
double gapA,
double gapB,
double gapC,
double thres
826 if(fT<=0.||fC<=0.||fA<=0.||fG<=0.) {
Error=
"Bad argument";
return;}
829 fT=0.; fC=0.; fA=0.; fG=0.;
832 case 'T': fT++;
break;
case 'C': fC++;
break;
833 case 'A': fA++;
break;
case 'G': fG++;
break;
834 default: fT+=0.25; fC+=0.25; fA+=0.25; fG+=0.25;
839 case 'T': fT++;
break;
case 'C': fC++;
break;
840 case 'A': fA++;
break;
case 'G': fG++;
break;
841 default: fT+=0.25; fC+=0.25; fA+=0.25; fG+=0.25;
846 if(rTC<=0.||rTA<=0.||rTG<=0.||rCA<=0.||rCG<=0.||rAG<=0.) {
Error=
"Bad argument";
return;}
869 AlignTwo(nX,X,secX,*XX,nY,Y,secY,*YY);
void initTrnsprob(double ****PP)
static Island * newIsland(char *X, char *Y, int i, int j, int d)
void uninitTrnsprob(double ****PP)
static void uninitEntropy(double ***EE)
static void drawPath(Island *f, int nX, char *X, char *XX, int nY, char *Y, char *YY)
void normalizeBaseFreqs(double *F, double fT, double fC, double fA, double fG)
static int areEqual(Island *a, Island *b)
static void initEntropy(double ***EE)
static double expectedScore
void updateTrnsprob(double ***PP, double *F, double *X, short M)
static void AlignTwo(int nX, char X[], int secX[], char XX[], int nY, char Y[], int secY[], char YY[])
static void registerIsland(Island *f)
struct island * nextEndIndex
static Island * selectLowerIslands(Island *f, int *incomplete)
static void updateEntropy(double **P, double **S, double **E)
static int isSignificant(Island *f)
static double getEntropy(double **E, double m, int l)
static Island * selectUpperIslands(Island *f, int nX, int nY, int *incomplete)
void * newBlock(size_t s)
static int isUnique(Island *f)
static void initScore(double ***pP, double ***pS)
static double secS(int i, int j, char X[], int secX[], char Y[], int secY[])
void ** newMatrix(size_t nrow, size_t ncol, size_t s)
static void updateScore(double **P, double **S, double F[N], double X[6], double dist)
void getTrnsprob(double **P, double ***PP, double d)
static void drawLowerPath(Island *f, int nX, char *X, char *XX, int nY, char *Y, char *YY)
struct island * nextSelected
struct fragment * fragments
static void drawIsland(Island *f)
void normalizeRateParams(double *X, double a, double b, double c, double d, double e, double f)
void Align(int nX, char X[], int secX[], char **XX, int nY, char Y[], int secY[], char **YY, int freqs, double fT, double fC, double fA, double fG, double rTC, double rTA, double rTG, double rCA, double rCG, double rAG, double dist, double supp, double gapA, double gapB, double gapC, double thres)
static void uninitScore(double ***pP, double ***pS)
struct island * nextBeginIndex
#define newDoubleMatrix(r, c)
static void freeIsland(Island **pp)
GB_write_int const char s