17 #define epsilon 0.000001 // a small number
19 double di_protdist::pameigs[20] = {
20 -0.022091252, -0.019297602, 0.000004760, -0.017477817,
21 -0.016575549, -0.015504543, -0.002112213, -0.002685727,
22 -0.002976402, -0.013440755, -0.012926992, -0.004293227,
23 -0.005356688, -0.011064786, -0.010480731, -0.008760449,
24 -0.007142318, -0.007381851, -0.007806557, -0.008127024
27 double di_protdist::pamprobs[20][20] = {
29 -0.01522976, -0.00746819, -0.13934468, 0.11755315, -0.00212101,
30 0.01558456, -0.07408235, -0.00322387, 0.01375826, 0.00448826,
31 0.00154174, 0.02013313, -0.00159183, -0.00069275, -0.00399898,
32 0.08414055, -0.01188178, -0.00029870, 0.00220371, 0.00042546
35 -0.07765582, -0.00712634, -0.03683209, -0.08065755, -0.00462872,
36 -0.03791039, 0.10642147, -0.00912185, 0.01436308, -0.00133243,
37 0.00166346, 0.00624657, -0.00003363, -0.00128729, -0.00690319,
38 0.17442028, -0.05373336, -0.00078751, -0.00038151, 0.01382718
41 -0.08810973, -0.04081786, -0.04066004, -0.04736004, -0.03275406,
42 -0.03761164, -0.05047487, -0.09086213, -0.03269598, -0.03558015,
43 -0.08407966, -0.07970977, -0.01504743, -0.04011920, -0.05182232,
44 -0.07026991, -0.05846931, -0.01016998, -0.03047472, -0.06280511
47 0.02513756, -0.00578333, 0.09865453, 0.01322314, -0.00310665,
48 0.05880899, -0.09252443, -0.02986539, -0.03127460, 0.01007539,
49 -0.00360119, -0.01995024, 0.00094940, -0.00145868, -0.01388816,
50 0.11358341, -0.12127513, -0.00054696, -0.00055627, 0.00417284
53 0.16517316, -0.00254742, -0.03318745, -0.01984173, 0.00031890,
54 -0.02817810, 0.02661678, -0.01761215, 0.01665112, 0.10513343,
55 -0.00545026, 0.01827470, -0.00207616, -0.00763758, -0.01322808,
56 -0.02202576, -0.07434204, 0.00020593, 0.00119979, -0.10827873
59 0.16088826, 0.00056313, -0.02579303, -0.00319655, 0.00037228,
60 -0.03193150, 0.01655305, -0.03028640, 0.01367746, -0.11248153,
61 0.00778371, 0.02675579, 0.00243718, 0.00895470, -0.01729803,
62 -0.02686964, -0.08262584, 0.00011794, -0.00225134, 0.09415650
65 -0.01739295, 0.00572017, -0.00712592, -0.01100922, -0.00870113,
66 -0.00663461, -0.01153857, -0.02248432, -0.00382264, -0.00358612,
67 -0.00139345, -0.00971460, -0.00133312, 0.01927783, -0.01053838,
68 -0.00911362, -0.01010908, 0.09417598, 0.01763850, -0.00955454
71 0.01728888, 0.01344211, 0.01200836, 0.01857259, -0.17088517,
72 0.01457592, 0.01997839, 0.02844884, 0.00839403, 0.00196862,
73 0.01391984, 0.03270465, 0.00347173, -0.01940984, 0.01233979,
74 0.00542887, 0.01008836, 0.00126491, -0.02863042, 0.00449764
77 -0.02881366, -0.02184155, -0.01566086, -0.02593764, -0.04050907,
78 -0.01539603, -0.02576729, -0.05089606, -0.00597430, 0.02181643,
79 0.09835597, -0.04040940, 0.00873512, 0.12139434, -0.02427882,
80 -0.02945238, -0.01566867, -0.01606503, 0.09475319, 0.02238670
83 0.04080274, -0.02869626, -0.05191093, -0.08435843, 0.00021141,
84 0.13043842, 0.00871530, 0.00496058, -0.02797641, -0.00636933,
85 0.02243277, 0.03640362, -0.05735517, 0.00196918, -0.02218934,
86 -0.00608972, 0.02872922, 0.00047619, 0.00151285, 0.00883489
89 -0.02623824, 0.00331152, 0.03640692, 0.04260231, -0.00038223,
90 -0.07480340, -0.01022492, -0.00426473, 0.01448116, 0.01456847,
91 0.05786680, 0.03368691, -0.10126924, -0.00147454, 0.01275395,
92 0.00017574, -0.01585206, -0.00015767, -0.00231848, 0.02310137
95 -0.00846258, -0.01508106, -0.01967505, -0.02772004, 0.01248253,
96 -0.01331243, -0.02569382, -0.04461524, -0.02207075, 0.04663443,
97 0.19347923, -0.02745691, 0.02288515, -0.04883849, -0.01084597,
98 -0.01947187, -0.00081675, 0.00516540, -0.07815919, 0.08035585
101 -0.06553111, 0.09756831, 0.00524326, -0.00885098, 0.00756653,
102 0.02783099, -0.00427042, -0.16680359, 0.03951331, -0.00490540,
103 0.01719610, 0.15018204, 0.00882722, -0.00423197, -0.01919217,
104 -0.02963619, -0.01831342, -0.00524338, 0.00011379, -0.02566864
107 -0.07494341, -0.11348850, 0.00241343, -0.00803016, 0.00492438,
108 0.00711909, -0.00829147, 0.05793337, 0.02734209, 0.02059759,
109 -0.02770280, 0.14128338, 0.01532479, 0.00364307, 0.05968116,
110 -0.06497960, -0.08113941, 0.00319445, -0.00104222, 0.03553497
113 0.05948223, -0.08959930, 0.03269977, -0.03272374, -0.00365667,
114 -0.03423294, -0.06418925, -0.05902138, 0.05746317, -0.02580596,
115 0.01259572, 0.05848832, 0.00672666, 0.00233355, -0.05145149,
116 0.07348503, 0.11427955, 0.00142592, -0.01030651, -0.04862799
119 -0.01606880, 0.05200845, -0.01212967, -0.06824429, -0.00234304,
120 0.01094203, -0.07375538, 0.08808629, 0.12394822, 0.02231351,
121 -0.03608265, -0.06978045, -0.00618360, 0.00274747, -0.01921876,
122 -0.01541969, -0.02223856, -0.00107603, -0.01251777, 0.05412534
125 0.01688843, 0.05784728, -0.02256966, -0.07072251, -0.00422551,
126 -0.06261233, -0.08502830, 0.08925346, -0.08529597, 0.01519343,
127 -0.05008258, 0.10931873, 0.00521033, 0.02593305, -0.00717855,
128 0.02291527, 0.02527388, -0.00266188, -0.00871160, 0.02708135
131 -0.04233344, 0.00076379, 0.01571257, 0.04003092, 0.00901468,
132 0.00670577, 0.03459487, 0.12420216, -0.00067366, -0.01515094,
133 0.05306642, 0.04338407, 0.00511287, 0.01036639, -0.17867462,
134 -0.02289440, -0.03213205, 0.00017924, -0.01187362, -0.03933874
137 0.01284817, -0.01685622, 0.00724363, 0.01687952, -0.00882070,
138 -0.00555957, 0.01676246, -0.05560456, -0.00966893, 0.06197684,
139 -0.09058758, 0.00880607, 0.00108629, -0.08308956, -0.08056832,
140 -0.00413297, 0.02973107, 0.00092948, 0.07010111, 0.13007418
143 0.00700223, -0.01347574, 0.00691332, 0.03122905, 0.00310308,
144 0.00946862, 0.03455040, -0.06712536, -0.00304506, 0.04267941,
145 -0.10422292, -0.01127831, -0.00549798, 0.11680505, -0.03352701,
146 -0.00084536, 0.01631369, 0.00095063, -0.09570217, 0.06480321
150 void di_protdist::maketrans() {
152 long i, j, k, m, n,
s;
154 long sub[3], newsub[3];
157 double TEMP, TEMP1, TEMP2, TEMP3;
159 for (i = 0; i <= 19; i++) {
161 for (j = 0; j <= 19; j++)
168 g[0] = freqc + freqt;
169 g[1] = freqc + freqt;
170 g[2] = freqa + freqg;
171 g[3] = freqa + freqg;
176 fracchange = xi * (2 * f[0] * f[1] / g[0] + 2 * f[2] * f[3] / g[2]) +
177 xv * (1 - TEMP * TEMP - TEMP1 * TEMP1 - TEMP2 * TEMP2 - TEMP3 * TEMP3);
178 for (i = 0; i <= 3; i++) {
179 for (j = 0; j <= 3; j++) {
180 for (k = 0; k <= 3; k++) {
181 if (trans[i][j][k] !=
STOP)
182 sum += f[i] * f[j] * f[k];
186 for (i = 0; i <= 3; i++) {
188 for (j = 0; j <= 3; j++) {
190 for (k = 0; k <= 3; k++) {
193 for (m = 0; m <= 2; m++) {
195 for (n = 1; n <= 4; n++) {
196 memcpy(newsub, sub,
sizeof(
long) * 3L);
198 x = f[i] * f[j] * f[k] / (3.0 * sum);
199 if (((s == 1 || s == 2) && (n == 3 || n == 4)) ||
200 ((n == 1 || n == 2) && (s == 3 || s == 4)))
203 x *= xi * f[n - 1] / g[n - 1] + xv * f[n - 1];
204 b2 = trans[newsub[0] - 1][newsub[1] - 1][newsub[2] - 1];
208 if (cat[b1] != cat[b2]) {
209 prob[b1][b2] += x * ease;
210 prob[b1][b1] += x * (1.0 - ease);
221 for (i = 0; i <= 19; i++)
223 for (i = 0; i <= 19; i++) {
224 for (j = 0; j <= 19; j++)
225 prob[i][j] /= sqrt(pi[i] * pi[j]);
230 void di_protdist::code() {
233 trans[0][0][0] =
PHE;
234 trans[0][0][1] =
PHE;
235 trans[0][0][2] =
LEU;
236 trans[0][0][3] =
LEU;
237 trans[0][1][0] =
SER;
238 trans[0][1][1] =
SER;
239 trans[0][1][2] =
SER;
240 trans[0][1][3] =
SER;
241 trans[0][2][0] =
TYR;
242 trans[0][2][1] =
TYR;
243 trans[0][2][2] =
STOP;
244 trans[0][2][3] =
STOP;
245 trans[0][3][0] =
CYS;
246 trans[0][3][1] =
CYS;
247 trans[0][3][2] =
STOP;
248 trans[0][3][3] =
TRP;
249 trans[1][0][0] =
LEU;
250 trans[1][0][1] =
LEU;
251 trans[1][0][2] =
LEU;
252 trans[1][0][3] =
LEU;
253 trans[1][1][0] =
PRO;
254 trans[1][1][1] =
PRO;
255 trans[1][1][2] =
PRO;
256 trans[1][1][3] =
PRO;
257 trans[1][2][0] =
HIS;
258 trans[1][2][1] =
HIS;
259 trans[1][2][2] =
GLN;
260 trans[1][2][3] =
GLN;
261 trans[1][3][0] =
ARG;
262 trans[1][3][1] =
ARG;
263 trans[1][3][2] =
ARG;
264 trans[1][3][3] =
ARG;
265 trans[2][0][0] =
ILEU;
266 trans[2][0][1] =
ILEU;
267 trans[2][0][2] =
ILEU;
268 trans[2][0][3] =
MET;
269 trans[2][1][0] =
THR;
270 trans[2][1][1] =
THR;
271 trans[2][1][2] =
THR;
272 trans[2][1][3] =
THR;
273 trans[2][2][0] =
ASN;
274 trans[2][2][1] =
ASN;
275 trans[2][2][2] =
LYS;
276 trans[2][2][3] =
LYS;
277 trans[2][3][0] =
SER;
278 trans[2][3][1] =
SER;
279 trans[2][3][2] =
ARG;
280 trans[2][3][3] =
ARG;
281 trans[3][0][0] =
VAL;
282 trans[3][0][1] =
VAL;
283 trans[3][0][2] =
VAL;
284 trans[3][0][3] =
VAL;
285 trans[3][1][0] =
ALA;
286 trans[3][1][1] =
ALA;
287 trans[3][1][2] =
ALA;
288 trans[3][1][3] =
ALA;
289 trans[3][2][0] =
ASP;
290 trans[3][2][1] =
ASP;
291 trans[3][2][2] =
GLU;
292 trans[3][2][3] =
GLU;
293 trans[3][3][0] =
GLY;
294 trans[3][3][1] =
GLY;
295 trans[3][3][2] =
GLY;
296 trans[3][3][3] =
GLY;
304 trans[0][3][2] =
TRP;
308 trans[0][3][2] =
TRP;
309 trans[2][3][2] =
STOP;
310 trans[2][3][3] =
STOP;
311 trans[2][0][2] =
MET;
315 trans[0][3][2] =
TRP;
316 trans[2][0][2] =
MET;
317 trans[2][3][2] =
SER;
321 trans[0][3][2] =
TRP;
322 trans[1][0][2] =
THR;
323 trans[2][0][2] =
MET;
328 void di_protdist::transition() {
331 double freqr = freqa + freqg;
332 double freqy = freqc + freqt;
336 double aa = ttratio * freqr * freqy - freqa * freqg - freqc * freqt;
337 double bb = freqa * freqgr + freqc *
freqty;
342 if (xi <= 0.0 && xi >= -
epsilon) {
346 GBK_terminate(
"This transition-transversion ratio is impossible with these base frequencies");
350 void di_protdist::givens(
di_aa_matrix a,
long i,
long j,
long n,
double ctheta,
double stheta,
bool left) {
354 for (
long k = 0; k < n; k++) {
355 double d = ctheta * a[i - 1][k] + stheta * a[j - 1][k];
356 a[j - 1][k] = ctheta * a[j - 1][k] - stheta * a[i - 1][k];
361 for (
long k = 0; k < n; k++) {
362 double d = ctheta * a[k][i - 1] + stheta * a[k][j - 1];
363 a[k][j - 1] = ctheta * a[k][j - 1] - stheta * a[k][i - 1];
369 void di_protdist::coeffs(
double x,
double y,
double *c,
double *s,
double accuracy) {
371 double root = hypot(x, y);
372 if (root < accuracy) {
382 void di_protdist::tridiag(
di_aa_matrix a,
long n,
double accuracy) {
387 for (i = 2; i < n; i++) {
388 for (j = i + 1; j <= n; j++) {
389 coeffs(a[i - 2][i - 1], a[i - 2][j - 1], &c, &s, accuracy);
390 givens(a, i, j, n, c, s,
true);
391 givens(a, i, j, n, c, s,
false);
392 givens(eigvecs, i, j, n, c, s,
true);
397 void di_protdist::shiftqr(
di_aa_matrix a,
long n,
double accuracy) {
399 for (
long i = n; i >= 2; i--) {
401 const double& ai1 = a[i - 1][i - 1];
402 const double& ai2 = a[i - 2][i - 2];
403 const double d = hypot(ai2 - ai1, a[i - 1][i - 2]);
404 double approx = ai2 + ai1;
407 approx = (approx - d) / 2.0;
410 approx = (approx + d) / 2.0;
413 for (
long j = 0; j < i; j++) {
417 for (
long j = 1; j < i; j++) {
420 coeffs(a[j - 1][j - 1], a[j][j - 1], &c, &s, accuracy);
421 givens(a, j, j + 1, i, c, s,
true);
422 givens(a, j, j + 1, i, c, s,
false);
423 givens(eigvecs, j, j + 1, n, c, s,
true);
426 for (
long j = 0; j < i; j++) {
430 while (fabs(a[i - 1][i - 2]) > accuracy);
441 for (i = 0; i < n; i++) {
442 for (j = 0; j < n; j++)
446 tridiag(proba, n, accuracy);
447 shiftqr(proba, n, accuracy);
448 for (i = 0; i < n; i++)
449 eig[i] = proba[i][i];
450 for (i = 0; i <= 19; i++) {
451 for (j = 0; j <= 19; j++)
452 proba[i][j] = sqrt(pi[j]) * eigvecs[i][j];
458 void di_protdist::pameigen() {
460 memcpy(prob, pamprobs,
sizeof(pamprobs));
461 memcpy(eig, pameigs,
sizeof(pameigs));
465 void di_protdist::build_exptteig(
double tt) {
467 for (m = 0; m <= 19; m++) {
468 exptteig[m] = exp(tt * eig[m]);
472 void di_protdist::predict(
double ,
long nb1,
long nb2) {
474 for (
long m = 0; m <= 19; m++) {
475 double q = prob[m][nb1] * prob[m][nb2] * exptteig[m];
477 double TEMP = eig[m];
479 d2p += TEMP * TEMP * q;
483 void di_protdist::build_predikt_table(
int pos) {
484 double tt = pos_2_tt(pos);
487 akt_slopes = slopes[pos] = ARB_calloc<di_paa_matrix>(1);
488 akt_curves = curves[pos] = ARB_calloc<di_paa_matrix>(1);
489 akt_infs = infs[pos] = ARB_calloc<di_bool_matrix>(1);
492 for (
int b2 =
ALA; b2 <= b1; b2++) {
552 else if (b2 ==
GLX) {
562 akt_slopes[0][b1][b2] = dp / p;
563 akt_curves[0][b1][b2] = d2p / p - dp * dp / (p * p);
564 akt_infs[0][b1][b2] = 0;
565 akt_slopes[0][b2][b1] = akt_slopes[0][b1][b2];
566 akt_curves[0][b2][b1] = akt_curves[0][b1][b2];
567 akt_infs[0][b2][b1] = 0;
570 akt_infs[0][b1][b2] = 1;
571 akt_infs[0][b2][b1] = 1;
578 int di_protdist::tt_2_pos(
double tt) {
581 pos = DI_RESOLUTION * DI_MAX_DIST - 1;
587 double di_protdist::pos_2_tt(
int pos) {
592 void di_protdist::build_akt_predikt(
double tt) {
594 int pos = tt_2_pos(tt);
596 build_predikt_table(pos);
598 akt_slopes = slopes[pos];
599 akt_curves = curves[pos];
600 akt_infs = infs[pos];
609 long i, j, k, m, n, iterations;
610 double delta, slope, curv;
618 for (i = 0; i < spp && !
error; i++) {
619 matrix->
set(i, i, 0.0);
623 for (k = 0; k <chars; k++) {
625 if (b1 <=
VAL)
continue;
626 if (b1 ==
ASX || b1 ==
GLX)
continue;
631 for (j = 0; j < i && !
error; j++) {
644 build_akt_predikt(tt);
647 for (k = chars; k >0; k--) {
650 if (predict_infinity(b1, b2)) {
653 slope += predict_slope(b1, b2);
654 curv += predict_curve(b1, b2);
657 if (!predict_infinity(b1, b2)) {
662 tt = -1.0 / fracchange;
665 int npos = tt_2_pos(tt);
666 int d = npos - pos;
if (d<0) d=-d;
673 if ((slope > 0.0 && delta < 0.0) || (slope < 0.0 && delta > 0.0))
675 if (tt + delta < 0 && tt <=
epsilon) {
686 }
while (iterations < 20);
693 for (k = chars; k >0; k--) {
696 if (b1 <=
VAL && b2 <=
VAL) {
707 double rel = 1 - (double) m / n;
708 double drel = 1.0 - rel - 0.2 * rel * rel;
710 aw_message(
GBS_global_string(
"Warning: distance between sequences '%s' and '%s' is too large for kimura formula", entries[i]->name, entries[j]->name));
719 tt = (n-m)/(
double)n;
730 matrix->
set(i, j, fracchange * tt);
734 if (aborted_flag && error) *aborted_flag =
true;
739 void di_protdist::clean_slopes() {
775 memset(trans, 0,
sizeof(trans));
776 memset(pi, 0,
sizeof(pi));
789 for (
int i = 0; i<(DI_RESOLUTION*
DI_MAX_DIST); ++i) {
double di_aa_matrix[DI_MAX_AA][DI_MAX_AA]
void set(size_t i, size_t j, T val)
di_protdist(di_codetype codei, di_cattype cati, long nentries, DI_ENTRY **entries, long seq_len, AP_smatrix *matrixi)
const char * GBS_global_string(const char *templat,...)
void GBK_terminate(const char *error) __ATTR__NORETURN
static void error(const char *msg)
GB_ERROR makedists(bool *aborted_flag)
AP_sequence_simple_protein * get_prot_seq()
CONSTEXPR_INLINE long matrix_halfsize(long entries, bool inclusive_diagonal)
void aw_message(const char *msg)
const ap_pro * get_sequence() const
void inc_and_check_user_abort(GB_ERROR &error)
GB_write_int const char s