ARB
arb_dnarates.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : arb_dnarates.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 /* Maximum likelihood site rate calculation, Gary Olsen, 1991, 1992.
12  *
13  * Portions based on the program dnaml version 3.3 by Joseph Felsenstein
14  *
15  * Copyright notice from dnaml:
16  *
17  * version 3.3. (c) Copyright 1986, 1990 by the University of Washington
18  * and Joseph Felsenstein. Written by Joseph Felsenstein. Permission is
19  * granted to copy and use this program provided no fee is charged for it
20  * and provided that this copyright notice is not removed.
21  */
22 
23 // Conversion to C by Gary Olsen, 1991
24 
25 #define programName "DNAml_rates"
26 #define programVersion "1.0.0"
27 #define programDate "April 11, 1992"
28 
29 #include "DNAml_rates_1_0.h"
30 
31 #include <aw_awar_defs.hxx>
32 
33 #include <unistd.h>
34 #include <cmath>
35 
36 #define assert(bed) arb_assert(bed)
37 
38 // Global variables
39 
40 static xarray
42 
43 static double
44 dLidki[maxpatterns], // change in pattern i likelihood with rate
45  bestki[maxpatterns], // best rate for pattern i
46  bestLi[maxpatterns], // best likelihood found for pattern i
47  patrate[maxpatterns]; // current rate for pattern i
48 
49 static double
50 xi, xv, ttratio, // transition/transversion info
51  freqa, freqc, freqg, freqt, // base frequencies
54  fracchange; // random matching frequency (in a sense)
55 
56 static int
57 info[maxsites+1], // number of informative nucleotides
58  patsite[maxsites+1], // site corresponding to pattern
59  pattern[maxsites+1], // pattern number corresponding to site
60  patweight[maxsites+1], // weight of given pattern
61  weight[maxsites+1]; // weight of sequence site
62 
63 static int
64 categs, // number of rate categories
65  endsite, // number of unique sequence patterns
66  mininfo, // minimum number of informative sequences for rate est
67  numsp, // number of species
68  sites, // number of input sequence positions
69  weightsum; // sum of weights of positions in analysis
70 
71 static bool
72 anerror, // error flag
73  freqsfrom, // use empirical base frequencies
74  interleaved, // input data are in interleaved format
75  printdata, // echo data to output stream
76  writefile, // put weight and rate data in file
77  userweights; // use user-supplied position weight mask
78 
79 static char
80 *y[maxsp+1]; // sequence data array
81 
82 
83 // =======================================================================
84 // PROGRAM
85 // =======================================================================
86 
87 static void getnums(FILE *INFILE) {
88  // input number of species, number of sites
89  printf("\n%s, version %s, %s\n\n",
92  programDate);
93  printf("Portions based on Joseph Felsenstein's Nucleic acid sequence\n");
94  printf("Maximum Likelihood method, version 3.3\n\n");
95 
96  if (fscanf(INFILE, "%d %d", &numsp, &sites) != 2) {
97  printf("ERROR: Problem reading number of species and sites\n");
98  anerror = true;
99  return;
100  }
101  printf("%d Species, %d Sites\n\n", numsp, sites);
102 
103  if (numsp > maxsp) {
104  printf("ERROR: Too many species; adjust program constants\n");
105  anerror = true;
106  }
107  else if (numsp < 4) {
108  printf("ERROR: Too few species (need at least 4)\n");
109  anerror = true;
110  }
111 
112  if (sites > maxsites) {
113  printf("ERROR: Too many sites; adjust program constants\n");
114  anerror = true;
115  }
116  else if (sites < 1) {
117  printf("ERROR: Too few sites\n");
118  anerror = true;
119  }
120 }
121 
122 inline int min(int i, int j) { return i<j ? i : j; }
123 inline bool digit(int ch) { return ch >= '0' && ch <= '9'; }
124 inline bool white(int ch) { return ch == ' ' || ch == '\n' || ch == '\t'; }
125 
126 static void uppercase(int *chptr) {
127  // convert character to upper case -- either ASCII or EBCDIC
128  int ch = *chptr;
129  if ((ch >= 'a' && ch <= 'i') ||
130  (ch >= 'j' && ch <= 'r') ||
131  (ch >= 's' && ch <= 'z'))
132  {
133  *chptr = ch + 'A' - 'a';
134  }
135 }
136 
137 
138 static int base36(int ch) {
139  if (ch >= '0' && ch <= '9') return ch - '0';
140  else if (ch >= 'A' && ch <= 'I') return ch - 'A' + 10;
141  else if (ch >= 'J' && ch <= 'R') return ch - 'J' + 19;
142  else if (ch >= 'S' && ch <= 'Z') return ch - 'S' + 28;
143  else if (ch >= 'a' && ch <= 'i') return ch - 'a' + 10;
144  else if (ch >= 'j' && ch <= 'r') return ch - 'j' + 19;
145  else if (ch >= 's' && ch <= 'z') return ch - 's' + 28;
146  else return -1;
147 }
148 
149 
150 static int itobase36(int i) {
151  if (i < 0) return '?';
152  if (i < 10) return i + '0';
153  if (i < 19) return i - 10 + 'A';
154  if (i < 28) return i - 19 + 'J';
155  if (i < 36) return i - 28 + 'S';
156  return '?';
157 }
158 
159 
160 static int findch(int c, FILE *INFILE) {
161  int ch;
162  while ((ch = getc(INFILE)) != EOF && ch != c) ;
163  return ch;
164 }
165 
166 
167 static void inputweights(FILE *INFILE) {
168  // input the character weights 0, 1, 2 ... 9, A, B, ... Y, Z
169 
170  for (int i = 2; i <= nmlngth; i++) (void) getc(INFILE);
171 
172  weightsum = 0;
173  int i = 1;
174  while (i <= sites) {
175  int ch = getc(INFILE);
176  int wi = base36(ch);
177  if (wi >= 0) {
178  weightsum += weight[i++] = wi;
179  }
180  else if (! white(ch)) {
181  printf("ERROR: Bad weight character: '%c'", ch);
182  printf(" Weights must be a digit or a letter.\n");
183  anerror = true;
184  return;
185  }
186  }
187 
188  if (findch('\n', INFILE) == EOF) { // skip to end of line
189  printf("ERROR: Missing newline at end of weight data\n");
190  anerror = true;
191  }
192 }
193 
194 
195 static void getoptions(FILE *INFILE) {
196  categs = 0; // Number of rate categories
197  freqsfrom = false; // Use empirical base frequencies
198  interleaved = true; // By default, data format is interleaved
199  mininfo = MIN_INFO; // Default minimum number of informative seqs
200  printdata = false; // Don't echo data to output stream
201  ttratio = 2.0; // Transition/transversion rate ratio
202  userweights = false; // User-defined position weights
203  writefile = false; // Do not write to file
204 
205  int extranum = 0;
206  int ch;
207  while ((ch = getc(INFILE)) != '\n' && ch != EOF) {
208  uppercase (& ch);
209  switch (ch) {
210  case '1': printdata = ! printdata; break;
211  case 'C': categs = -1; extranum++; break;
212  case 'F': freqsfrom = true; break;
213  case 'I': interleaved = ! interleaved; break;
214  case 'L': break;
215  case 'M': mininfo = 0; extranum++; break;
216  case 'T': ttratio = -1.0; extranum++; break;
217  case 'U': break;
218  case 'W': userweights = true; weightsum = 0; extranum++; break;
219  case 'Y': writefile = ! writefile; break;
220  case ' ': break;
221  case '\t': break;
222  default:
223  printf("ERROR: Bad option character: '%c'\n", ch);
224  anerror = true;
225  return;
226  }
227  }
228 
229  if (ch == EOF) {
230  printf("ERROR: End-of-file in options list\n");
231  anerror = true;
232  return;
233  }
234 
235  // process lines with auxiliary data
236 
237  while (extranum-- && ! anerror) {
238  ch = getc(INFILE);
239  uppercase (& ch);
240  switch (ch) {
241 
242  case 'C':
243  if (categs >= 0) {
244  printf("ERROR: Unexpected Categories data\n");
245  anerror = true;
246  }
247  else if (fscanf(INFILE, "%d", &categs) != 1 || findch('\n', INFILE)==EOF) {
248  printf("ERROR: Problem reading number of rate categories\n");
249  anerror = true;
250  }
251  else if (categs < 1 || categs > maxcategories) {
252  printf("ERROR: Bad number of rate categories: %d\n", categs);
253  anerror = true;
254  }
255  break;
256 
257  case 'M': // Minimum informative sequences
258  if (mininfo > 0) {
259  printf("ERROR: Unexpected Min informative residues data\n");
260  anerror = true;
261  }
262  else if (fscanf(INFILE, "%d", &mininfo)!=1 || findch('\n', INFILE)==EOF) {
263  printf("ERROR: Problem reading min informative residues\n");
264  anerror = true;
265  }
266  else if (mininfo < 2 || mininfo > numsp) {
267  printf("ERROR: Bad number for informative residues: %d\n",
268  mininfo);
269  anerror = true;
270  }
271  break;
272 
273  case 'T': // Transition/transversion ratio
274  if (ttratio >= 0.0) {
275  printf("ERROR: Unexpected Transition/transversion data\n");
276  anerror = true;
277  }
278  else if (fscanf(INFILE, "%lf", &ttratio)!=1 || findch('\n', INFILE)==EOF) {
279  printf("ERROR: Problem reading transition/transversion data\n");
280  anerror = true;
281  }
282  break;
283 
284  case 'W': // Weights
285  if (! userweights || weightsum > 0) {
286  printf("ERROR: Unexpected Weights data\n");
287  anerror = true;
288  }
289  else {
290  inputweights(INFILE);
291  }
292  break;
293 
294  default:
295  printf("ERROR: Auxiliary data line starts with '%c'\n", ch);
296  anerror = true;
297  break;
298  }
299  }
300 
301  if (anerror) return;
302 
303  if (categs < 0) {
304  printf("ERROR: Category data missing from input\n");
305  anerror = true;
306  return;
307  }
308 
309  if (mininfo <= 0) {
310  printf("ERROR: Minimum informative residues missing from input\n");
311  anerror = true;
312  return;
313  }
314  else {
315  printf("There must be at least %d informative residues per column\n\n", mininfo);
316  }
317 
318  if (ttratio < 0.0) {
319  printf("ERROR: Transition/transversion data missing from input\n");
320  anerror = true;
321  return;
322  }
323 
324  if (! userweights) {
325  for (int i = 1; i <= sites; i++) weight[i] = 1; // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
326  weightsum = sites;
327  }
328  else if (weightsum < 1) {
329  printf("ERROR: Weight data invalid or missing from input\n");
330  anerror = true;
331  return;
332  }
333 
334 }
335 
336 
337 static void getbasefreqs(FILE *INFILE) {
338  if (freqsfrom) printf("Empirical ");
339  printf("Base Frequencies:\n\n");
340 
341  if (! freqsfrom) {
342  if (fscanf(INFILE, "%lf%lf%lf%lf", &freqa, &freqc, &freqg, &freqt) != 4
343  || findch('\n', INFILE) == EOF) {
344  printf("ERROR: Problem reading user base frequencies\n");
345  anerror = true;
346  }
347  }
348 
349  printf(" A %10.5f\n", freqa);
350  printf(" C %10.5f\n", freqc);
351  printf(" G %10.5f\n", freqg);
352  printf(" T(U) %10.5f\n\n", freqt);
353  freqr = freqa + freqg;
354  invfreqr = 1.0/freqr;
355  freqar = freqa * invfreqr;
356  freqgr = freqg * invfreqr;
357  freqy = freqc + freqt;
358  invfreqy = 1.0/freqy;
359  freqcy = freqc * invfreqy;
360  freqty = freqt * invfreqy;
361  printf("Transition/transversion ratio = %10.6f\n\n", ttratio);
362  double suma = ttratio*freqr*freqy - (freqa*freqg + freqc*freqt);
363  double sumb = freqa*freqgr + freqc*freqty;
364  xi = suma/(suma+sumb);
365  xv = 1.0 - xi;
366  ttratio = xi / xv;
367  if (xi <= 0.0) {
368  printf("WARNING: This transition/transversion ratio is\n");
369  printf(" impossible with these base frequencies!\n");
370  xi = 3/5;
371  xv = 2/5;
372  printf("Transition/transversion parameter reset\n\n");
373  }
374  printf("(Transition/transversion parameter = %10.6f)\n\n", xi/xv);
376  + xv*(1.0 - freqa*freqa - freqc*freqc
377  - freqg*freqg - freqt*freqt);
378 }
379 
380 
381 static void getyspace() {
382  long size = 4 * (sites/4 + 1);
383  char *y0 = (char*)malloc((unsigned) (sizeof(char) * size * (numsp+1)));
384 
385  if (!y0) {
386  printf("ERROR: Unable to obtain space for data array\n");
387  anerror = true;
388  }
389  else {
390  for (int i = 0; i <= numsp; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
391  y[i] = y0;
392  y0 += size;
393  }
394  }
395 }
396 
397 
398 static void setuptree(tree *tr, const int numSp) {
399  nodeptr p = NULp;
400 
401  for (int i = 1; i <= numSp; i++) { // Set-up tips
402  p = (nodeptr)malloc(sizeof(node));
403  if (!p) { anerror = true; break; }
404 
405  p->x = NULp;
406  p->tip = NULp;
407  p->number = i;
408  p->next = NULp;
409  p->back = NULp;
410  tr->nodep[i] = p;
411  }
412 
413  const int nodes = leafs_2_nodes(numSp, ROOTED);
414  for (int i = numSp+1; i <= nodes && !anerror; i++) { // internal nodes
415  nodeptr q = NULp;
416  for (int j = 1; j <= 3; j++) {
417  p = (nodeptr)malloc(sizeof(node));
418  if (!p) { anerror = true; break; }
419 
420  p->x = NULp;
421  p->tip = NULp;
422  p->number = i;
423  p->next = q;
424  p->back = NULp;
425  q = p;
426  }
427  if (!anerror) {
428  p->next->next->next = p;
429  tr->nodep[i] = p;
430  }
431  }
432 
433  tr->likelihood = unlikely;
434  tr->start = tr->nodep[1];
435  tr->mxtips = numSp;
436  tr->ntips = 0;
437  tr->nextnode = 0;
438  tr->opt_level = 0;
439  tr->smoothed = false;
440 
441  if (anerror) printf("ERROR: Unable to obtain sufficient memory");
442 }
443 
444 
445 static void freeTreeNode(nodeptr p) {
446  // Free a tree node (sector)
447  if (p) {
448  if (p->x) {
449  free(p->x->a);
450  free(p->x);
451  }
452  free(p);
453  }
454 }
455 
456 static void freeTree(tree *tr) {
457  int leafs = tr->mxtips;
458  int nodes = leafs_2_nodes(leafs, ROOTED);
459 
460  for (int i = 1; i <= leafs; i++) freeTreeNode(tr->nodep[i]);
461 
462  for (int i = leafs+1; i <= nodes; i++) {
463  nodeptr p = tr->nodep[i];
464  if (p) {
465  nodeptr q = p->next;
466  if (q) {
467  freeTreeNode(q->next);
468  freeTreeNode(q);
469  }
470  freeTreeNode(p);
471  }
472  }
473 }
474 
475 
476 static void getdata(tree *tr, FILE *INFILE) {
477  // read sequences
478 
479  int meaning[256]; // meaning of input characters
480  {
481  for (int i = 0; i <= 255; i++) meaning[i] = 0;
482 
483  meaning['A'] = 1;
484  meaning['B'] = 14;
485  meaning['C'] = 2;
486  meaning['D'] = 13;
487  meaning['G'] = 4;
488  meaning['H'] = 11;
489  meaning['K'] = 12;
490  meaning['M'] = 3;
491  meaning['N'] = 15;
492  meaning['O'] = 15;
493  meaning['R'] = 5;
494  meaning['S'] = 6;
495  meaning['T'] = 8;
496  meaning['U'] = 8;
497  meaning['V'] = 7;
498  meaning['W'] = 9;
499  meaning['X'] = 15;
500  meaning['Y'] = 10;
501  meaning['?'] = 15;
502  meaning['-'] = 15;
503  }
504 
505  int basesread = 0;
506  int basesnew = 0;
507 
508  bool allread = false;
509  bool firstpass = true;
510 
511  int ch = ' ';
512 
513  while (! allread) {
514  for (int i = 1; i <= numsp; i++) { // Read data line
515  if (firstpass) { // Read species names
516  int j = 1;
517  while (white(ch = getc(INFILE))) { // Skip blank lines
518  if (ch == '\n') j = 1; else j++;
519  }
520 
521  if (j > nmlngth) {
522  printf("ERROR: Blank name for species %d; ", i);
523  printf("check number of species,\n");
524  printf(" number of sites, and interleave option.\n");
525  anerror = true;
526  return;
527  }
528 
529  char *nameptr = tr->nodep[i]->name;
530  for (int k = 1; k < j; k++) *nameptr++ = ' ';
531 
532  while (ch != '\n' && ch != EOF) {
533  if (ch == '_' || white(ch)) ch = ' ';
534  *nameptr++ = ch;
535  if (++j > nmlngth) break;
536  ch = getc(INFILE);
537  }
538 
539  while (j++ <= nmlngth) *nameptr++ = ' ';
540  *nameptr = '\0'; // add null termination
541 
542  if (ch == EOF) {
543  printf("ERROR: End-of-file in name of species %d\n", i);
544  anerror = true;
545  return;
546  }
547  }
548 
549  int j = basesread;
550  while ((j < sites) && ((ch = getc(INFILE)) != EOF)
551  && ((! interleaved) || (ch != '\n'))) {
552  uppercase (& ch);
553  if (meaning[ch] || ch == '.') {
554  j++;
555  if (ch == '.') {
556  if (i != 1) ch = y[1][j];
557  else {
558  printf("ERROR: Dot (.) found at site %d of sequence 1\n", j);
559  anerror = true;
560  return;
561  }
562  }
563  y[i][j] = ch;
564  }
565  else if (white(ch) || digit(ch)) ;
566  else {
567  printf("ERROR: Bad base (%c) at site %d of sequence %d\n",
568  ch, j, i);
569  anerror = true;
570  return;
571  }
572  }
573 
574  if (ch == EOF) {
575  printf("ERROR: End-of-file at site %d of sequence %d\n", j, i);
576  anerror = true;
577  return;
578  }
579 
580  if (! firstpass && (j == basesread)) i--; // no data on line
581  else if (i == 1) basesnew = j;
582  else if (j != basesnew) {
583  printf("ERROR: Sequences out of alignment\n");
584  printf(" %d (instead of %d) residues read in sequence %d\n",
585  j - basesread, basesnew - basesread, i);
586  anerror = true;
587  return;
588  }
589 
590  while (ch != '\n' && ch != EOF) ch = getc(INFILE); // flush line
591  }
592  firstpass = false;
593  basesread = basesnew;
594  allread = (basesread >= sites);
595  }
596 
597  // Print listing of sequence alignment
598 
599  if (printdata) {
600  int j = nmlngth - 5 + ((sites + ((sites-1)/10))/2);
601  if (j < nmlngth - 1) j = nmlngth - 1;
602  if (j > 37) j = 37;
603  printf("Name"); for (int i=1; i<=j; i++) putchar(' '); printf("Sequences\n");
604  printf("----"); for (int i=1; i<=j; i++) putchar(' '); printf("---------\n");
605  putchar('\n');
606 
607  for (int i = 1; i <= sites; i += 60) {
608  int l = i + 59;
609  if (l > sites) l = sites;
610 
611  if (userweights) {
612  printf("Weights ");
613  for (j = 11; j <= nmlngth+3; j++) putchar(' ');
614  for (int k = i; k <= l; k++) {
615  putchar(itobase36(weight[k]));
616  if (((k % 10) == 0) && ((k % 60) != 0)) putchar(' ');
617  }
618  putchar('\n');
619  }
620 
621  for (j = 1; j <= numsp; j++) {
622  printf("%s ", tr->nodep[j]->name);
623  for (int k = i; k <= l; k++) {
624  ch = y[j][k];
625  if ((j > 1) && (ch == y[1][k])) ch = '.';
626  putchar(ch);
627  if (((k % 10) == 0) && ((k % 60) != 0)) putchar(' ');
628  }
629  putchar('\n');
630  }
631  putchar('\n');
632  }
633  }
634 
635  // Convert characters to meanings
636 
637  for (int i = 1; i <= sites; i++) info[i] = 0;
638 
639  for (int j = 1; j <= numsp; j++) {
640  for (int i = 1; i <= sites; i++) {
641  if ((y[j][i] = meaning[(int)y[j][i]]) != 15) info[i]++;
642  }
643  }
644 
645  for (int i = 1; i <= sites; i++) if (info[i] < MIN_INFO) weight[i] = 0;
646 
647 }
648 
649 
650 static void sitesort() {
651  // Shell sort keeping sites, weights in same order
652  for (int gap = sites/2; gap > 0; gap /= 2) {
653  for (int i = gap + 1; i <= sites; i++) {
654  int j = i - gap;
655  bool flip;
656  do {
657  flip = false;
658 
659  int jj = patsite[j];
660  int jg = patsite[j+gap];
661 
662  bool tied = true;
663 
664  for (int k = 1; tied && (k <= numsp); k++) {
665  flip = (y[k][jj] > y[k][jg]);
666  tied = (y[k][jj] == y[k][jg]);
667  }
668  if (flip) {
669  patsite[j] = jg;
670  patsite[j+gap] = jj;
671  j -= gap;
672  }
673  }
674  while (flip && (j > 0));
675  }
676  }
677 }
678 
679 
680 static void sitecombcrunch() {
681  // combine sites that have identical patterns (and nonzero weight)
682  int i = 0;
683  patsite[0] = patsite[1];
684  patweight[0] = 0;
685 
686  for (int j = 1; j <= sites; j++) {
687  bool tied = true;
688  int sitei = patsite[i];
689  int sitej = patsite[j];
690 
691  for (int k = 1; tied && (k <= numsp); k++) {
692  tied = (y[k][sitei] == y[k][sitej]);
693  }
694 
695  if (tied) {
696  patweight[i] += weight[sitej];
697  }
698  else {
699  if (patweight[i] > 0) i++;
700  patweight[i] = weight[sitej];
701  patsite[i] = sitej;
702  }
703 
704  pattern[sitej] = i;
705  }
706 
707  endsite = i;
708  if (patweight[i] > 0) endsite++;
709 }
710 
711 
712 static void makeweights() {
713  // make up weights vector to avoid duplicate computations
714  for (int i = 1; i <= sites; i++) patsite[i] = i; // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
715  sitesort();
716  sitecombcrunch();
717  if (endsite > maxpatterns) {
718  printf("ERROR: Too many patterns in data\n");
719  printf(" Increase maxpatterns to at least %d\n", endsite);
720  anerror = true;
721  }
722  else {
723  printf("Analyzing %d distinct data patterns (columns)\n\n", endsite);
724  }
725 }
726 
727 
728 static void makevalues(tree *tr) {
729  // set up fractional likelihoods at tips
730  for (int i = 1; i <= numsp; i++) { // Pack and move tip data
731  for (int j = 0; j < endsite; j++) {
732  y[i-1][j] = y[i][patsite[j]];
733  }
734  nodeptr p = tr->nodep[i];
735  p->tip = &(y[i-1][0]);
736  }
737 }
738 
739 
740 static void empiricalfreqs(tree *tr) {
741  // Get empirical base frequencies from the data
742 
743  freqa = 0.25;
744  freqc = 0.25;
745  freqg = 0.25;
746  freqt = 0.25;
747 
748  for (int k = 1; k <= 8; k++) {
749  double suma = 0.0;
750  double sumc = 0.0;
751  double sumg = 0.0;
752  double sumt = 0.0;
753 
754 
755  for (int i = 1; i <= numsp; i++) {
756  char *yptr = tr->nodep[i]->tip;
757  for (int j = 0; j < endsite; j++) {
758  int code = *yptr++;
759 
760  double fa = freqa * (code & 1);
761  double fc = freqc * ((code >> 1) & 1);
762  double fg = freqg * ((code >> 2) & 1);
763  double ft = freqt * ((code >> 3) & 1);
764 
765  double wj = patweight[j] / (fa + fc + fg + ft);
766 
767  suma += wj * fa;
768  sumc += wj * fc;
769  sumg += wj * fg;
770  sumt += wj * ft;
771  }
772  }
773 
774  double sum = suma + sumc + sumg + sumt;
775 
776  freqa = suma / sum;
777  freqc = sumc / sum;
778  freqg = sumg / sum;
779  freqt = sumt / sum;
780  }
781 }
782 
783 
784 static void getinput(tree *tr, FILE *INFILE) {
785  getnums(INFILE); if (anerror) return;
786  getoptions(INFILE); if (anerror) return;
787  if (!freqsfrom) {
788  getbasefreqs(INFILE); if (anerror) return;
789  }
790  getyspace(); if (anerror) return;
791  setuptree(tr, numsp); if (anerror) return;
792  getdata(tr, INFILE); if (anerror) return;
793  makeweights(); if (anerror) return;
794  makevalues (tr); if (anerror) return;
795  if (freqsfrom) {
796  empiricalfreqs (tr); if (anerror) return;
797  getbasefreqs(INFILE);
798  }
799 }
800 
801 
802 static xarray *setupxarray() {
803  xarray *x = (xarray *) malloc((unsigned) sizeof(xarray));
804  if (x) {
805  xtype *data = (xtype *) malloc((unsigned) (4 * endsite * sizeof(xtype)));
806  if (data) {
807  x->a = data;
808  x->c = data += endsite;
809  x->g = data += endsite;
810  x->t = data + endsite;
811  x->prev = x->next = x;
812  x->owner = NULp;
813  }
814  else {
815  free(x);
816  return NULp;
817  }
818  }
819  return x;
820 }
821 
822 
823 static void linkxarray(int req, int min, xarray **freexptr, xarray **usedxptr) {
824  // Link a set of xarrays
825 
826  xarray *first = NULp;
827  xarray *prev = NULp;
828 
829  {
830  int i = 0;
831  xarray *x;
832  do {
833  x = setupxarray();
834  if (x) {
835  if (! first) first = x;
836  else {
837  prev->next = x;
838  x->prev = prev;
839  }
840  prev = x;
841  i++;
842  }
843  else {
844  printf("ERROR: Failure to get xarray memory.\n");
845  if (i < min) anerror = true;
846  }
847  } while ((i < req) && x);
848  }
849 
850  if (first) {
851  first->prev = prev;
852  prev->next = first;
853  }
854 
855  *freexptr = first;
856  *usedxptr = NULp;
857 }
858 
859 
860 static void setupnodex(tree *tr) {
861  for (int i = tr->mxtips + 1; (i <= 2*(tr->mxtips) - 2) && ! anerror; i++) {
862  nodeptr p = tr->nodep[i];
863  p->x = setupxarray();
864  if (!p->x) { anerror = true; break; }
865  }
866 }
867 
868 static xarray *getxtip(nodeptr p) {
869  xarray *new_xarray = NULp;
870  if (p) {
871  bool splice = false;
872 
873  if (p->x) {
874  new_xarray = p->x;
875  if (new_xarray == new_xarray->prev) ; // linked to self; leave it
876  else if (new_xarray == usedxtip) usedxtip = usedxtip->next; // at head
877  else if (new_xarray == usedxtip->prev) ; // already at tail
878  else { // move to tail of list
879  new_xarray->prev->next = new_xarray->next;
880  new_xarray->next->prev = new_xarray->prev;
881  splice = true;
882  }
883  }
884 
885  else if (freextip) {
886  new_xarray = freextip;
887  p->x = freextip;
888 
889  new_xarray->owner = p;
890  if (new_xarray->prev != new_xarray) { // not only member of freelist
891  new_xarray->prev->next = new_xarray->next;
892  new_xarray->next->prev = new_xarray->prev;
893  freextip = new_xarray->next;
894  }
895  else {
896  freextip = NULp;
897  }
898 
899  splice = true;
900  }
901 
902  else if (usedxtip) {
903  usedxtip->owner->x = NULp;
904  new_xarray = usedxtip;
905  p->x = usedxtip;
906  new_xarray->owner = p;
908  }
909 
910  else {
911  printf ("ERROR: Unable to locate memory for a tip.\n");
912  anerror = true;
913  exit(0);
914  }
915 
916  if (splice) {
917  if (usedxtip) { // list is not empty
918  usedxtip->prev->next = new_xarray;
919  new_xarray->prev = usedxtip->prev;
920  usedxtip->prev = new_xarray;
921  new_xarray->next = usedxtip;
922  }
923  else {
924  usedxtip = new_xarray->prev = new_xarray->next = new_xarray;
925  }
926  }
927  }
928  return new_xarray;
929 }
930 
931 
932 static xarray *getxnode(nodeptr p) {
933  // Ensure that internal node p has memory
934  if (! (p->x)) { // Move likelihood array on this node to sector p
935  nodeptr s;
936  if ((s = p->next)->x || (s = s->next)->x) {
937  p->x = s->x;
938  s->x = NULp;
939  }
940  else {
941  printf("ERROR in getxnode: Unable to locate memory at internal node.");
942  exit(0);
943  }
944  }
945  return p->x;
946 }
947 
948 
949 static void newview(nodeptr p) {
950  // Update likelihoods at node
951 
952  if (p->tip) { // Make sure that data are at tip
953  if (!p->x) { // they are not already there
954  (void) getxtip(p); // they are not, so get memory
955 
956  xtype *x3a = &(p->x->a[0]); // Move tip data to xarray
957  xtype *x3c = &(p->x->c[0]);
958  xtype *x3g = &(p->x->g[0]);
959  xtype *x3t = &(p->x->t[0]);
960 
961  char *yptr = p->tip;
962  for (int i = 0; i < endsite; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
963  int code = *yptr++;
964  *x3a++ = code & 1;
965  *x3c++ = (code >> 1) & 1;
966  *x3g++ = (code >> 2) & 1;
967  *x3t++ = (code >> 3) & 1;
968  }
969  }
970  }
971  else {
972  // Internal node needs update
973 
974  nodeptr q = p->next->back;
975  nodeptr r = p->next->next->back;
976 
977  while ((! p->x) || (! q->x) || (! r->x)) {
978  if (! q->x) newview(q);
979  if (! r->x) newview(r);
980  if (! p->x) (void) getxnode(p);
981  }
982 
983  xtype *x1a = &(q->x->a[0]);
984  xtype *x1c = &(q->x->c[0]);
985  xtype *x1g = &(q->x->g[0]);
986  xtype *x1t = &(q->x->t[0]);
987 
988  double z1 = q->z;
989  double lz1 = (z1 > zmin) ? log(z1) : log(zmin);
990  double xvlz1 = xv * lz1;
991 
992  xtype *x2a = &(r->x->a[0]);
993  xtype *x2c = &(r->x->c[0]);
994  xtype *x2g = &(r->x->g[0]);
995  xtype *x2t = &(r->x->t[0]);
996 
997  double z2 = r->z;
998  double lz2 = (z2 > zmin) ? log(z2) : log(zmin);
999  double xvlz2 = xv * lz2;
1000 
1001  xtype *x3a = &(p->x->a[0]);
1002  xtype *x3c = &(p->x->c[0]);
1003  xtype *x3g = &(p->x->g[0]);
1004  xtype *x3t = &(p->x->t[0]);
1005 
1006  {
1007  double *rptr = &(patrate[0]);
1008  for (int i = 0; i < endsite; i++) {
1009  double ki = *rptr++;
1010 
1011  double zz1 = exp(ki * lz1);
1012  double zv1 = exp(ki * xvlz1);
1013 
1014  double fx1r = freqa * *x1a + freqg * *x1g;
1015  double fx1y = freqc * *x1c + freqt * *x1t;
1016  double fx1n = fx1r + fx1y;
1017 
1018  double tempi = fx1r * invfreqr;
1019  double tempj = zv1 * (tempi-fx1n) + fx1n;
1020 
1021  double suma1 = zz1 * (*x1a++ - tempi) + tempj;
1022  double sumg1 = zz1 * (*x1g++ - tempi) + tempj;
1023 
1024  tempi = fx1y * invfreqy;
1025  tempj = zv1 * (tempi-fx1n) + fx1n;
1026 
1027  double sumc1 = zz1 * (*x1c++ - tempi) + tempj;
1028  double sumt1 = zz1 * (*x1t++ - tempi) + tempj;
1029 
1030  double zz2 = exp(ki * lz2);
1031  double zv2 = exp(ki * xvlz2);
1032 
1033  double fx2r = freqa * *x2a + freqg * *x2g;
1034  double fx2y = freqc * *x2c + freqt * *x2t;
1035  double fx2n = fx2r + fx2y;
1036 
1037  tempi = fx2r * invfreqr;
1038  tempj = zv2 * (tempi-fx2n) + fx2n;
1039 
1040  *x3a++ = suma1 * (zz2 * (*x2a++ - tempi) + tempj);
1041  *x3g++ = sumg1 * (zz2 * (*x2g++ - tempi) + tempj);
1042 
1043  tempi = fx2y * invfreqy;
1044  tempj = zv2 * (tempi-fx2n) + fx2n;
1045 
1046  *x3c++ = sumc1 * (zz2 * (*x2c++ - tempi) + tempj);
1047  *x3t++ = sumt1 * (zz2 * (*x2t++ - tempi) + tempj);
1048  }
1049  }
1050  }
1051 }
1052 
1053 
1054 static void hookup(nodeptr p, nodeptr q, double z) {
1055  p->back = q;
1056  q->back = p;
1057  p->z = q->z = z;
1058 }
1059 
1060 
1061 static void initrav(nodeptr p) {
1062  if (! p->tip) {
1063  initrav(p->next->back);
1064  initrav(p->next->next->back);
1065  newview(p);
1066  }
1067 }
1068 
1069 // =======================================================================
1070 // Read a tree from a file
1071 // =======================================================================
1072 
1073 static int treeFinishCom(FILE *INFILE) {
1074  bool inquote = false;
1075  int ch;
1076  while ((ch = getc(INFILE)) != EOF && (inquote || ch != ']')) {
1077  if (ch == '[' && ! inquote) { // comment; find its end
1078  if ((ch = treeFinishCom(INFILE)) == EOF) break;
1079  }
1080  else if (ch == '\'') inquote = ! inquote; // start or end of quote
1081  }
1082 
1083  return ch;
1084 }
1085 
1086 
1087 static int treeGetCh(FILE *INFILE) {
1088  // get next nonblank, noncomment character
1089  int ch;
1090 
1091  while ((ch = getc(INFILE)) != EOF) {
1092  if (white(ch)) ;
1093  else if (ch == '[') { // comment; find its end
1094  if ((ch = treeFinishCom(INFILE)) == EOF) break;
1095  }
1096  else break;
1097  }
1098 
1099  return ch;
1100 }
1101 
1102 static void treeFlushLabel(FILE *INFILE) {
1103  int ch = treeGetCh(INFILE);
1104  if (ch == EOF) return;
1105 
1106  bool done = (ch == ':' || ch == ',' || ch == ')' || ch == '[' || ch == ';');
1107  if (!done) {
1108  bool quoted = (ch == '\'');
1109  if (quoted) ch = getc(INFILE);
1110 
1111  while (! done) {
1112  if (quoted) {
1113  if ((ch = findch('\'', INFILE)) == EOF) return; // find close quote
1114  ch = getc(INFILE); // check next char
1115  if (ch != '\'') done = true; // not doubled quote
1116  }
1117  else if (ch == ':' || ch == ',' || ch == ')' || ch == '['
1118  || ch == ';' || ch == '\n' || ch == EOF) {
1119  done = true;
1120  }
1121  if (! done) done = ((ch = getc(INFILE)) == EOF);
1122  }
1123  }
1124 
1125  if (ch != EOF) (void) ungetc(ch, INFILE);
1126 }
1127 
1128 
1129 static int findTipName(tree *tr, int ch, FILE *INFILE) {
1130  bool quoted = (ch == '\'');
1131  if (quoted) ch = getc(INFILE);
1132 
1133  bool done = false;
1134  int i = 0;
1135 
1136  char str[nmlngth+1];
1137  do {
1138  if (quoted) {
1139  if (ch == '\'') {
1140  ch = getc(INFILE);
1141  if (ch != '\'') done = true;
1142  }
1143  else if (ch == EOF) {
1144  done = true;
1145  }
1146  else if (ch == '\n' || ch == '\t') {
1147  ch = ' ';
1148  }
1149  }
1150  else if (ch == ':' || ch == ',' || ch == ')' || ch == '[' || ch == '\n' || ch == EOF) {
1151  done = true;
1152  }
1153  else if (ch == '_' || ch == '\t') {
1154  ch = ' ';
1155  }
1156 
1157  if (! done) {
1158  if (i < nmlngth) str[i++] = ch;
1159  ch = getc(INFILE);
1160  }
1161  } while (! done);
1162 
1163  if (ch == EOF) {
1164  printf("ERROR: End-of-file in tree species name\n");
1165  return 0;
1166  }
1167 
1168  (void) ungetc(ch, INFILE);
1169  while (i < nmlngth) str[i++] = ' '; // Pad name
1170 
1171  int n = 1;
1172  bool found;
1173  do {
1174  nodeptr q = tr->nodep[n];
1175  if (! (q->back)) { // Only consider unused tips
1176  i = 0;
1177  char *nameptr = q->name;
1178  do { found = str[i] == *nameptr++; } while (found && (++i < nmlngth));
1179  }
1180  else {
1181  found = false;
1182  }
1183  } while ((! found) && (++n <= tr->mxtips));
1184 
1185  if (! found) {
1186  i = nmlngth;
1187  do { str[i] = '\0'; } while (i-- && (str[i] <= ' '));
1188  printf("ERROR: Cannot find data for tree species: %s\n", str);
1189  }
1190 
1191  return found ? n : 0;
1192 }
1193 
1194 
1195 static double processLength(FILE *INFILE) {
1196  int ch = treeGetCh(INFILE); // Skip comments
1197  if (ch != EOF) (void) ungetc(ch, INFILE);
1198 
1199  double branch;
1200  if (fscanf(INFILE, "%lf", &branch) != 1) {
1201  printf("ERROR: Problem reading branch length in processLength:\n");
1202 
1203  char str[41];
1204  if (fscanf(INFILE, "%40s", str) == 1) printf("%s\n", str);
1205 
1206  anerror = true;
1207  branch = 0.0;
1208  }
1209 
1210  return branch;
1211 }
1212 
1213 
1214 static void treeFlushLen(FILE *INFILE) {
1215  int ch = treeGetCh(INFILE);
1216 
1217  if (ch == ':') (void) processLength(INFILE);
1218  else if (ch != EOF) (void) ungetc(ch, INFILE);
1219 }
1220 
1221 
1222 static void treeNeedCh(int c1, const char *where, FILE *INFILE) {
1223  int c2 = treeGetCh(INFILE);
1224  if (c2 != c1) {
1225  printf("ERROR: Missing '%c' %s tree; ", c1, where);
1226  if (c2 == EOF) {
1227  printf("End-of-File");
1228  }
1229  else {
1230  putchar('\'');
1231  for (int i = 24; i-- && (c2 != EOF); c2 = getc(INFILE)) putchar(c2);
1232  putchar('\'');
1233  }
1234  printf(" found instead\n");
1235  anerror = true;
1236  }
1237 }
1238 
1239 static void addElementLen(tree *tr, nodeptr p, FILE *INFILE) {
1240  nodeptr q;
1241  int ch = treeGetCh(INFILE);
1242  if (ch == '(') { // A new internal node
1243  int n = (tr->nextnode)++;
1244  if (n > 2*(tr->mxtips) - 2) {
1245  if (tr->rooted || n > 2*(tr->mxtips) - 1) {
1246  printf("ERROR: Too many internal nodes. Is tree rooted?\n");
1247  printf(" Deepest splitting should be a trifurcation.\n");
1248  anerror = true;
1249  return;
1250  }
1251  else {
1252  tr->rooted = true;
1253  }
1254  }
1255  q = tr->nodep[n];
1256  assert(q);
1257  addElementLen(tr, q->next, INFILE); if (anerror) return;
1258  treeNeedCh(',', "in", INFILE); if (anerror) return;
1259  assert(q->next);
1260  addElementLen(tr, q->next->next, INFILE); if (anerror) return;
1261  treeNeedCh(')', "in", INFILE); if (anerror) return;
1262  treeFlushLabel(INFILE); if (anerror) return;
1263  }
1264 
1265  else { // A new tip
1266  int n = findTipName(tr, ch, INFILE);
1267  if (n <= 0) { anerror = true; return; }
1268  q = tr->nodep[n];
1269  if (tr->start->number > n) tr->start = q;
1270  (tr->ntips)++;
1271  }
1272 
1273  treeNeedCh(':', "in", INFILE); if (anerror) return;
1274  double branch = processLength(INFILE); if (anerror) return;
1275  double z = exp(-branch / fracchange);
1276  if (z > zmax) z = zmax;
1277  hookup(p, q, z);
1278 }
1279 
1280 
1281 static void uprootTree(tree *tr, nodeptr p) {
1282  if (p->tip || p->back) {
1283  printf("ERROR: Unable to uproot tree.\n");
1284  printf(" Inappropriate node marked for removal.\n");
1285  anerror = true;
1286  }
1287  else {
1288  int n = --(tr->nextnode); // last internal node added
1289  if (n != tr->mxtips + tr->ntips - 1) {
1290  printf("ERROR: Unable to uproot tree. Inconsistent\n");
1291  printf(" number of tips and nodes for rooted tree.\n");
1292  anerror = true;
1293  }
1294  else {
1295  nodeptr q = p->next->back; // remove p from tree
1296  nodeptr r = p->next->next->back;
1297 
1298  hookup(q, r, q->z * r->z);
1299 
1300  q = tr->nodep[n];
1301  r = q->next;
1302  nodeptr s = q->next->next;
1303 
1304  if (tr->ntips > 2 && p != q && p != r && p != s) {
1305  hookup(p, q->back, q->z); // move connections to p
1306  hookup(p->next, r->back, r->z);
1307  hookup(p->next->next, s->back, s->z);
1308  }
1309 
1310  q->back = r->back = s->back = NULp;
1311  tr->rooted = false;
1312  }
1313  }
1314 }
1315 
1316 
1317 static void treeReadLen(tree *tr, FILE *INFILE) {
1318  for (int i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = NULp;
1319 
1320  tr->start = tr->nodep[tr->mxtips];
1321  tr->ntips = 0;
1322  tr->nextnode = tr->mxtips + 1;
1323  tr->opt_level = 0;
1324  tr->smoothed = false;
1325  tr->rooted = false;
1326 
1327  nodeptr p = tr->nodep[(tr->nextnode)++];
1328 
1329  treeNeedCh('(', "at start of", INFILE); if (anerror) return;
1330  addElementLen(tr, p, INFILE); if (anerror) return;
1331  treeNeedCh(',', "in", INFILE); if (anerror) return;
1332  addElementLen(tr, p->next, INFILE); if (anerror) return;
1333 
1334  if (!tr->rooted) {
1335  int ch = treeGetCh(INFILE);
1336  if (ch == ',') { // An unrooted format
1337  addElementLen(tr, p->next->next, INFILE); if (anerror) return;
1338  }
1339  else { // A rooted format
1340  p->next->next->back = NULp;
1341  tr->rooted = true;
1342  if (ch != EOF) (void) ungetc(ch, INFILE);
1343  }
1344  }
1345  treeNeedCh(')', "in", INFILE);
1346  if (anerror) {
1347  printf("(This error also happens if the last species in the tree is unmarked)\n");
1348  return;
1349  }
1350 
1351 
1352  treeFlushLabel(INFILE); if (anerror) return;
1353  treeFlushLen(INFILE); if (anerror) return;
1354  treeNeedCh(';', "at end of", INFILE); if (anerror) return;
1355 
1356  if (tr->rooted) {
1357  uprootTree(tr, p->next->next); if (anerror) return;
1358  }
1359  tr->start = p->next->next->back; // This is start used by treeString
1360 
1361  initrav(tr->start);
1362  initrav(tr->start->back);
1363 }
1364 
1365 // =======================================================================
1366 // End of Tree Reading
1367 // =======================================================================
1368 
1369 
1370 static double evaluate(tree *tr, nodeptr p) {
1371  nodeptr q = p->back;
1372  while ((! p->x) || (! q->x)) {
1373  if (! (p->x)) newview(p);
1374  if (! (q->x)) newview(q);
1375  }
1376 
1377  xtype *x1a = &(p->x->a[0]);
1378  xtype *x1c = &(p->x->c[0]);
1379  xtype *x1g = &(p->x->g[0]);
1380  xtype *x1t = &(p->x->t[0]);
1381 
1382  xtype *x2a = &(q->x->a[0]);
1383  xtype *x2c = &(q->x->c[0]);
1384  xtype *x2g = &(q->x->g[0]);
1385  xtype *x2t = &(q->x->t[0]);
1386 
1387  double z = p->z;
1388  if (z < zmin) z = zmin;
1389 
1390  double lz = log(z);
1391  double xvlz = xv * lz;
1392 
1393  int *wptr = &(patweight[0]);
1394  double *rptr = &(patrate[0]);
1395  double *log_f = tr->log_f;
1396 
1397  double sum = 0.0;
1398 
1399  for (int i = 0; i < endsite; i++) {
1400  double fx1a = freqa * *x1a++;
1401  double fx1g = freqg * *x1g++;
1402  double fx1c = freqc * *x1c++;
1403  double fx1t = freqt * *x1t++;
1404 
1405  double suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t;
1406 
1407  double fx2r = freqa * *x2a++ + freqg * *x2g++;
1408  double fx2y = freqc * *x2c++ + freqt * *x2t++;
1409  double fx1r = fx1a + fx1g;
1410  double fx1y = fx1c + fx1t;
1411 
1412  double sumc = (fx1r + fx1y) * (fx2r + fx2y);
1413  double sumb = fx1r * fx2r * invfreqr + fx1y * fx2y * invfreqy;
1414 
1415  suma -= sumb;
1416  sumb -= sumc;
1417 
1418  double ki = *rptr++;
1419  double zz = exp(ki * lz);
1420  double zv = exp(ki * xvlz);
1421 
1422  double term = log(zz * suma + zv * sumb + sumc);
1423  sum += *wptr++ * term;
1424  *log_f++ = term;
1425  }
1426 
1427  tr->likelihood = sum;
1428  return sum;
1429 }
1430 
1431 
1432 static void dli_dki(nodeptr p) {
1433  // d(Li)/d(ki)
1434 
1435  nodeptr q = p->back;
1436  while ((! p->x) || (! q->x)) {
1437  if (! p->x) newview(p);
1438  if (! q->x) newview(q);
1439  }
1440 
1441  xtype *x1a = &(p->x->a[0]);
1442  xtype *x1c = &(p->x->c[0]);
1443  xtype *x1g = &(p->x->g[0]);
1444  xtype *x1t = &(p->x->t[0]);
1445 
1446  xtype *x2a = &(q->x->a[0]);
1447  xtype *x2c = &(q->x->c[0]);
1448  xtype *x2g = &(q->x->g[0]);
1449  xtype *x2t = &(q->x->t[0]);
1450 
1451  double z = p->z;
1452  if (z < zmin) z = zmin;
1453 
1454  double lz = log(z);
1455  double xvlz = xv * lz;
1456 
1457  double *rptr = &(patrate[0]);
1458  int *wptr = &(patweight[0]);
1459 
1460  for (int i = 0; i < endsite; i++) {
1461  double fx1a = freqa * *x1a++;
1462  double fx1g = freqg * *x1g++;
1463  double fx1c = freqc * *x1c++;
1464  double fx1t = freqt * *x1t++;
1465 
1466  double suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t;
1467 
1468  double fx2r = freqa * *x2a++ + freqg * *x2g++;
1469  double fx2y = freqc * *x2c++ + freqt * *x2t++;
1470  double fx1r = fx1a + fx1g;
1471  double fx1y = fx1c + fx1t;
1472 
1473  double sumc = (fx1r + fx1y) * (fx2r + fx2y);
1474  double sumb = fx1r * fx2r * invfreqr + fx1y * fx2y * invfreqy;
1475 
1476  suma -= sumb;
1477  sumb -= sumc;
1478 
1479  double ki = *rptr++;
1480 
1481  suma *= exp(ki * lz);
1482  sumb *= exp(ki * xvlz);
1483 
1484  dLidki[i] += *wptr++ * lz * (suma + sumb*xv);
1485  }
1486 }
1487 
1488 static void spanSubtree(nodeptr p) {
1489  dli_dki (p);
1490 
1491  if (! p->tip) {
1492  spanSubtree(p->next->back);
1493  spanSubtree(p->next->next->back);
1494  }
1495 }
1496 
1497 
1498 static void findSiteRates(tree *tr, double ki_min, double ki_max, double d_ki, double max_error) {
1499  if (ki_min <= 0.0 || ki_max <= ki_min) {
1500  printf("ERROR: Bad rate value limits to findSiteRates\n");
1501  anerror = true;
1502  return;
1503  }
1504  else if (d_ki <= 1.0) {
1505  printf("ERROR: Bad rate step to findSiteRates\n");
1506  anerror = true;
1507  return;
1508  }
1509 
1510  for (int i = 0; i < endsite; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
1511  bestki[i] = 1.0; // dummy initial rates
1512  bestLi[i] = unlikely;
1513  }
1514 
1515  for (double ki = ki_min; ki <= ki_max; ki *= d_ki) {
1516  for (int i = 0; i < endsite; i++) patrate[i] = ki; // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
1517  initrav(tr->start);
1518  initrav(tr->start->back);
1519  (void) evaluate(tr, tr->start->back);
1520  for (int i = 0; i < endsite; i++) {
1521  if (tr->log_f[i] > bestLi[i]) {
1522  bestki[i] = ki;
1523  bestLi[i] = tr->log_f[i];
1524  }
1525  }
1526  }
1527 
1528  for (int i = 0; i < endsite; i++) patrate[i] = bestki[i];
1529  initrav(tr->start);
1530  initrav(tr->start->back);
1531 
1532  while (d_ki > 1.0 + max_error) {
1533  for (int i = 0; i < endsite; i++) dLidki[i] = 0.0;
1534  spanSubtree(tr->start->back);
1535  if (! tr->start->tip) {
1536  spanSubtree(tr->start->next->back);
1537  spanSubtree(tr->start->next->next->back);
1538  }
1539 
1540  d_ki = sqrt(d_ki);
1541  double inv_d_ki = 1.0/d_ki;
1542  for (int i = 0; i < endsite; i++) {
1543  if (dLidki[i] > 0.0) {
1544  patrate[i] *= d_ki;
1545  if (patrate[i] > ki_max) patrate[i] = ki_max;
1546  }
1547  else {
1548  patrate[i] *= inv_d_ki;
1549  if (patrate[i] < ki_min) patrate[i] = ki_min;
1550  }
1551  }
1552 
1553  initrav(tr->start);
1554  initrav(tr->start->back);
1555  }
1556 }
1557 
1558 
1559 static double subtreeLength(nodeptr p) {
1560  double sum = -fracchange * log(p->z);
1561  if (! p->tip) {
1562  sum += subtreeLength(p->next->back);
1563  sum += subtreeLength(p->next->next->back);
1564  }
1565 
1566  return sum;
1567 }
1568 
1569 
1570 static double treeLength(tree *tr) {
1571  double sum = subtreeLength(tr->start->back);
1572  if (! tr->start->tip) {
1573  sum += subtreeLength(tr->start->next->back);
1574  sum += subtreeLength(tr->start->next->next->back);
1575  }
1576 
1577  return sum;
1578 }
1579 
1580 
1581 static void categorize(int Sites,
1582  int Categs,
1583  int Weight[], // one based
1584  int Pattern[], // one based
1585  double Patrate[], // zero based
1586  double categrate[], // zero based
1587  int sitecateg[]) // one based
1588 {
1589  double min_1 = 1.0E37;
1590  double min_2 = 1.0E37;
1591  double max_1 = 0.0;
1592  double max_2 = 0.0;
1593 
1594  for (int i = 1; i <= Sites; i++) {
1595  if (Weight[i] > 0) {
1596  double ki = Patrate[Pattern[i]];
1597  if (ki < min_2) {
1598  if (ki < min_1) {
1599  if (ki < 0.995 * min_1) min_2 = min_1;
1600  min_1 = ki;
1601  }
1602  else if (ki > 1.005 * min_1) {
1603  min_2 = ki;
1604  }
1605  }
1606  else if (ki > max_2) {
1607  if (ki > max_1) {
1608  if (ki > 1.005 * max_1) max_2 = max_1;
1609  max_1 = ki;
1610  }
1611  else if (ki < 0.995 * max_1) {
1612  max_2 = ki;
1613  }
1614  }
1615  }
1616  }
1617 
1618  double a = (Categs - 3.0)/log(max_2/min_2);
1619  double b = - a * log(min_2) + 2.0;
1620 
1621  categrate[0] = min_1;
1622  for (int k = 1; k <= Categs-2; k++) categrate[k] = min_2 * exp((k-1)/a);
1623  if (Categs>0) categrate[Categs-1] = max_1;
1624 
1625  for (int i = 1; i <= Sites; i++) {
1626  if (Weight[i] > 0) {
1627  double ki = Patrate[Pattern[i]];
1628  if (ki < 0.99 * min_2) sitecateg[i] = 1;
1629  else if (ki > 1.00 * max_2) sitecateg[i] = Categs;
1630  else sitecateg[i] = nint(a * log(Patrate[Pattern[i]]) + b);
1631  }
1632  else {
1633  sitecateg[i] = Categs;
1634  }
1635  }
1636 }
1637 
1638 
1639 static char *arb_filter;
1640 static char *alignment_name;
1641 static GBDATA *gb_main;
1642 
1643 static void getArbFilter() {
1645  GB_begin_transaction(gb_main);
1646  arb_filter = GBT_read_string(gb_main, AWAR_GDE_EXPORT_FILTER);
1647  alignment_name = GBT_get_default_alignment(gb_main);
1648  GB_commit_transaction(gb_main);
1649 }
1650 
1651 static int get_next_SAI_count() {
1652  GBDATA *gb_sai_count = GB_search(gb_main, "arb_dnarates/sai_count", GB_FIND);
1653  if (!gb_sai_count) {
1654  gb_sai_count = GB_search(gb_main, "arb_dnarates/sai_count", GB_INT);
1655  }
1656  int count = GB_read_int(gb_sai_count);
1657  count++;
1658  GB_write_int(gb_sai_count, count);
1659  return count;
1660 }
1661 
1663  GBDATA *gb_sai = NULp;
1664  char sai_name[100];
1665 
1666  while (1) {
1667  sprintf(sai_name, "POS_VAR_BY_ML_%i", get_next_SAI_count());
1668  gb_sai = GBT_find_SAI(gb_main, sai_name);
1669  if (!gb_sai) { // sai_name is not used yet
1670  gb_sai = GBT_find_or_create_SAI(gb_main, sai_name);
1671  printf("Writing '%s'\n", sai_name);
1672  break;
1673  }
1674  }
1675  return gb_sai;
1676 }
1677 
1678 static bool writeToArb() {
1679  GB_ERROR error = NULp;
1680  GB_begin_transaction(gb_main);
1681 
1682  long ali_len = GBT_get_alignment_len(gb_main, alignment_name);
1683  if (ali_len<=0) {
1684  error = GB_await_error();
1685  }
1686  else {
1687  char *cats = ARB_calloc<char>(ali_len+1); // categories
1688  float *rates = ARB_calloc<float>(ali_len); // rates to export
1689  char category_string[1024];
1690 
1691  // check filter has correct length
1692  {
1693  long filter_len = strlen(arb_filter);
1694  if (filter_len != ali_len) {
1695  error = GBS_global_string("Filter length (%li) does not match alignment length (%li)",
1696  filter_len, ali_len);
1697  }
1698  }
1699 
1700  // fill in rates and categories
1701  if (!error) {
1702  double categrate[maxcategories]; // rate of a given category
1703  int sitecateg[maxsites+1]; // category of a given site
1704 
1705  categorize(sites, categs, weight, pattern, patrate, categrate, sitecateg);
1706 
1707  int i = 1; // thanks to pascal
1708  for (int ali_pos = 0; ali_pos < ali_len; ali_pos++) {
1709  if (arb_filter[ali_pos] == '0') {
1710  cats[ali_pos] = '.';
1711  rates[ali_pos] = KI_MAX;
1712  continue; // filter says not written
1713  }
1714  if (weight[i] > 0) {
1715  rates[ali_pos] = patrate[pattern[i]];
1716  cats[ali_pos] = itobase36(categs - sitecateg[i]);
1717  }
1718  else {
1719  rates[i] = KI_MAX; // infinite rate
1720  cats[ali_pos] = '.';
1721  }
1722  i++;
1723  }
1724 
1725  int unfiltered_sites = i-1;
1726  if (unfiltered_sites != sites) {
1727  error = GBS_global_string("Filter positions (%i) do not match input sequence positions (%i)",
1728  unfiltered_sites, sites);
1729  }
1730 
1731  // write categories
1732  if (!error) {
1733  char *p = category_string;
1734  p[0] = 0; // if no categs
1735 
1736  for (int k = 1; k <= categs; k ++) {
1737  sprintf(p, " %G", categrate[categs-k]);
1738  p += strlen(p);
1739  }
1740  }
1741  }
1742 
1743 
1744  if (!error) {
1745  GBDATA *gb_sai = create_next_SAI();
1746  if (!gb_sai) {
1747  error = GB_await_error();
1748  }
1749  else {
1750  GBDATA *gb_data = GBT_add_data(gb_sai, alignment_name, "rates", GB_FLOATS); // @@@ AFAIK not used anywhere
1751  GB_write_floats(gb_data, rates, ali_len);
1752 
1753  gb_data = GBT_add_data(gb_sai, alignment_name, "data", GB_STRING);
1754  GB_write_string(gb_data, cats);
1755 
1756  gb_data = GBT_add_data(gb_sai, alignment_name, "_CATEGORIES", GB_STRING);
1757  GB_write_string(gb_data, category_string);
1758 
1759  gb_data = GBT_add_data(gb_sai, alignment_name, "_TYPE", GB_STRING);
1760  GB_write_string(gb_data, "PVML: Positional Variability by ML (Olsen)");
1761  }
1762  }
1763 
1764  free(cats);
1765  free(rates);
1766  }
1767 
1768  error = GB_end_transaction(gb_main, error);
1769  if (error) {
1770  fprintf(stderr, "Error in arb_dnarates: %s\n", error);
1771  }
1772 
1773  return !error;
1774 }
1775 
1776 static void openArb(const char *dbname) {
1777  gb_main = GB_open(dbname, "rw");
1778  if (!gb_main) {
1779  if (strcmp(dbname, ":") == 0) {
1780  GB_warning("Cannot find ARB server");
1781  }
1782  else {
1783  GB_warningf("Cannot open DB '%s'", dbname);
1784  }
1785  exit(EXIT_FAILURE);
1786  }
1787 }
1788 
1789 static void saveArb(const char *saveAs) {
1790  GB_ERROR error = GB_save(gb_main, saveAs, "a");
1791  if (error) {
1792  GB_warningf("Error saving '%s': %s", saveAs, error);
1793  exit(EXIT_FAILURE);
1794  }
1795 }
1796 
1797 static void closeArb() {
1798  GB_close(gb_main);
1799 }
1800 
1801 static void wrfile(FILE *outfile,
1802  int Sites,
1803  int Categs,
1804  int Weight[], // one based
1805  double categrate[], // zero based
1806  int sitecateg[]) // one based
1807 {
1808  for (int k = 1; k <= Sites; k += 60) {
1809  int j = min(k + 59, Sites);
1810 
1811  fprintf(outfile, "%s ", k == 1 ? "Weights " : " ");
1812 
1813  for (int i = k; i <= j; i++) {
1814  putc(itobase36(Weight[i]), outfile);
1815  if (((i % 10) == 0) && ((i % 60) != 0)) putc(' ', outfile);
1816  }
1817 
1818  putc('\n', outfile);
1819  }
1820  for (int k = 1; k <= Categs; k += 7) {
1821  int j = min(k + 6, Categs);
1822 
1823  if (k == 1) fprintf(outfile, "C %2d", Categs);
1824  else fprintf(outfile, " ");
1825 
1826  for (int i = k-1; i < j; i++) fprintf(outfile, " %9.5f", categrate[i]);
1827 
1828  putc('\n', outfile);
1829  }
1830 
1831  for (int k = 1; k <= Sites; k += 60) {
1832  int j = min(k + 59, Sites);
1833 
1834  fprintf(outfile, "%s ", k == 1 ? "Categories" : " ");
1835 
1836  for (int i = k; i <= j; i++) {
1837  putc(itobase36(sitecateg[i]), outfile);
1838  if (((i % 10) == 0) && ((i % 60) != 0)) putc(' ', outfile);
1839  }
1840 
1841  putc('\n', outfile);
1842  }
1843 
1844 }
1845 
1846 
1847 static void summarize(int treenum) {
1848  printf(" Site Rate\n");
1849  printf(" ---- ---------\n");
1850 
1851  for (int i = 1; i <= sites; i++) {
1852  if (weight[i] > 0) {
1853  printf("%6d %11.4f\n", i, patrate[pattern[i]]);
1854  }
1855  else {
1856  printf("%6d Undefined\n", i);
1857  }
1858  }
1859 
1860  putchar('\n');
1861 
1862  if (categs > 1) {
1863  double categrate[maxcategories]; // rate of a given category
1864  int sitecateg[maxsites+1]; // category of a given site
1865 
1866  categorize(sites, categs, weight, pattern, patrate, categrate, sitecateg);
1867 
1868  wrfile(stdout, sites, categs, weight, categrate, sitecateg);
1869  putchar('\n');
1870 
1871  if (writefile) {
1872  char filename[512];
1873  if (treenum <= 0) {
1874  (void) sprintf(filename, "%s.%d", "weight_rate", getpid());
1875  }
1876  else {
1877  (void) sprintf(filename, "%s_%2d.%d", "weight_rate", treenum, getpid());
1878  }
1879 
1880  FILE *outfile = fopen(filename, "w");
1881  if (outfile) {
1882  wrfile(outfile, sites, categs, weight, categrate, sitecateg);
1883  (void) fclose(outfile);
1884  printf("Weights and categories also written to %s\n\n", filename);
1885  }
1886  }
1887  }
1888 }
1889 
1890 
1891 static void makeUserRates(tree *tr, FILE *INFILE) {
1892  int numtrees;
1893  if (fscanf(INFILE, "%d", &numtrees) != 1 || findch('\n', INFILE) == EOF) {
1894  printf("ERROR: Problem reading number of user trees\n");
1895  anerror = true;
1896  return;
1897  }
1898 
1899  printf("User-defined %s:\n\n", (numtrees == 1) ? "tree" : "trees");
1900 
1901  for (int which = 1; which <= numtrees; which++) {
1902  for (int i = 0; i < endsite; i++) patrate[i] = 1.0; // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
1903 
1904  treeReadLen(tr, INFILE);
1905  if (anerror) break;
1906 
1907  double tree_length = treeLength(tr);
1908 
1909  printf("%d taxon user-supplied tree read\n\n", tr->ntips);
1910  printf("Total length of tree branches = %8.6f\n\n", tree_length);
1911 
1912  findSiteRates(tr, 0.5/tree_length, KI_MAX, RATE_STEP, MAX_ERROR);
1913  if (anerror) break;
1914 
1915  summarize(numtrees == 1 ? 0 : which); if (anerror) break;
1916  }
1917 
1918 }
1919 
1920 inline bool is_char(const char *name, char c) { return name[0] == c && !name[1]; }
1921 inline bool wantSTDIN(const char *iname) { return is_char(iname, '-'); }
1922 
1923 int ARB_main(int argc, char *argv[]) {
1924  // Maximum Likelihood Site Rate
1925  const char *dbname = ":";
1926  const char *dbsavename = NULp;
1927  bool help = false;
1928  const char *error = NULp;
1929  const char *inputname = NULp;
1930  FILE *infile = NULp;
1931 
1932  switch (argc) {
1933  case 3: error = "'dbname' may only be used together with 'dbsavename'"; break;
1934 
1935  case 4:
1936  dbname = argv[2];
1937  dbsavename = argv[3];
1938  // fall-through
1939  case 2:
1940  inputname = argv[1];
1941  infile = wantSTDIN(inputname) ? stdin : fopen(inputname, "rt");
1942  if (!infile) error = GB_IO_error("reading", inputname);
1943  break;
1944 
1945  case 0:
1946  case 1: error = "missing arguments"; break;
1947 
1948  default : error = "too many arguments"; break;
1949  }
1950 
1951  if (error) {
1952  fprintf(stderr, "arb_dnarates: Error: %s\n", error);
1953  help = true;
1954  }
1955 
1956  if (help) {
1957  fputs("Usage: arb_dnarates input [dbname dbsavename]\n"
1958  " Expects phylip sequence data as 'input'.\n"
1959  " Reads from STDIN if 'input' is '-'.\n"
1960  " (e.g. cat data.phyl | arb_dnarates - ...\n"
1961  " or arb_dnarates data.phyl ...)\n"
1962  " Expects a 'dbname' or a running ARB DB server.\n"
1963  " - Reads " AWAR_GDE_EXPORT_FILTER " from server.\n"
1964  " - Saves calculated SAI to server (POS_VAR_BY_ML_...)\n"
1965  " Using 'dbname' does only make sense for unittests!\n"
1966  , stderr);
1967  exit(EXIT_FAILURE);
1968  }
1969 
1970  // using arb_dnarates only makes sense with a running db server
1971  // (because result is written there)
1972  GB_shell shell;
1973  openArb(dbname);
1974  getArbFilter(); // Note: expects AWAR_GDE_EXPORT_FILTER in running db server
1975 
1976  {
1977  tree curtree;
1978  for (int i = 0; i<MAXNODES; ++i) {
1979  curtree.nodep[i] = NULp;
1980  }
1981 
1982  tree *tr = &curtree;
1983  getinput(tr, infile);
1984  if (!anerror) linkxarray(3, 3, & freextip, & usedxtip);
1985  if (!anerror) setupnodex(tr);
1986  if (!anerror) makeUserRates(tr, infile);
1987  if (!anerror) {
1988  anerror = !writeToArb();
1989  if (!anerror && dbsavename) saveArb(dbsavename);
1990  }
1991  closeArb();
1992  freeTree(tr);
1993  }
1994 
1995  if (wantSTDIN(inputname)) fclose(infile);
1996 
1997  return anerror ? EXIT_FAILURE : EXIT_SUCCESS;
1998 }
bool wantSTDIN(const char *iname)
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
int number
Definition: arbdbt.h:48
static double freqa
const char * GB_ERROR
Definition: arb_core.h:25
double likelihood
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
static void empiricalfreqs(tree *tr)
static double evaluate(tree *tr, nodeptr p)
GB_ERROR GB_commit_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2551
void GB_warning(const char *message)
Definition: arb_msg.cxx:530
double z
static void getbasefreqs(FILE *INFILE)
static double xv
long GB_read_int(GBDATA *gbd)
Definition: arbdb.cxx:729
static void findSiteRates(tree *tr, double ki_min, double ki_max, double d_ki, double max_error)
static char * y[maxsp+1]
nodeptr back
GB_ERROR GB_save(GBDATA *gb, const char *path, const char *savetype)
static int patsite[maxsites+1]
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
static double bestki[maxpatterns]
static double freqt
static int inquote
Definition: mkptypes.cxx:72
static bool interleaved
static double dLidki[maxpatterns]
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
char name[nmlngth+1]
static void help()
static bool writeToArb()
static void initrav(nodeptr p)
GB_ERROR GB_IO_error(const char *action, const char *filename)
Definition: arb_msg.cxx:285
static void treeReadLen(tree *tr, FILE *INFILE)
xtype * t
#define programVersion
static void newview(nodeptr p)
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
double xtype
static bool printdata
static double invfreqy
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
static bool freqsfrom
static char * alignment_name
static void hookup(nodeptr p, nodeptr q, double z)
static void treeNeedCh(int c1, const char *where, FILE *INFILE)
static int treeFinishCom(FILE *INFILE)
static double freqc
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
static bool anerror
#define maxsp
xarray * next
static xarray * getxtip(nodeptr p)
xarray * x
#define MAX_ERROR
static int get_next_SAI_count()
static double freqgr
static double freqty
#define AWAR_GDE_EXPORT_FILTER
static double freqar
static double processLength(FILE *INFILE)
static void uppercase(int *chptr)
nodeptr nodep[MAXNODES]
static void openArb(const char *dbname)
static void saveArb(const char *saveAs)
int mxtips
static void getArbFilter()
int ARB_main(int argc, char *argv[])
static xarray * freextip
#define zmin
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:177
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:597
CONSTEXPR_INLINE int leafs_2_nodes(int leafs, TreeModel model)
Definition: arbdbt.h:53
static void getdata(tree *tr, FILE *INFILE)
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
char * tip
static void makevalues(tree *tr)
char * GBT_read_string(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:267
static int categs
static int weight[maxsites+1]
static void setuptree(tree *tr, const int numSp)
bool rooted
#define programName
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:536
static xarray * getxnode(nodeptr p)
static xarray * setupxarray()
static double fracchange
bool smoothed
bool is_char(const char *name, char c)
static void treeFlushLen(FILE *INFILE)
static int endsite
node * nodeptr
static double xi
#define KI_MAX
static double patrate[maxpatterns]
xarray * prev
static int treeGetCh(FILE *INFILE)
static void error(const char *msg)
Definition: mkptypes.cxx:96
static bool writefile
static int mininfo
static void uprootTree(tree *tr, nodeptr p)
static void summarize(int treenum)
static int sites
static void wrfile(FILE *outfile, int Sites, int Categs, int Weight[], double categrate[], int sitecateg[])
const int MAXNODES
static void inputweights(FILE *INFILE)
int opt_level
static void freeTreeNode(nodeptr p)
static void makeUserRates(tree *tr, FILE *INFILE)
nodeptr owner
static double freqy
static int base36(int ch)
static void linkxarray(int req, int min, xarray **freexptr, xarray **usedxptr)
static void sitesort()
#define maxsites
int ntips
static void dli_dki(nodeptr p)
Definition: arbdb.h:86
static GBDATA * create_next_SAI()
#define maxcategories
static GBDATA * gb_main
xtype * g
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1250
#define unlikely
static void categorize(int Sites, int Categs, int Weight[], int Pattern[], double Patrate[], double categrate[], int sitecateg[])
xtype * a
#define nint(x)
xtype * c
static double freqr
static double bestLi[maxpatterns]
static double subtreeLength(nodeptr p)
static void freeTree(tree *tr)
static double freqg
fputs(TRACE_PREFIX, stderr)
static void makeweights()
static int findch(int c, FILE *INFILE)
#define programDate
static xarray * usedxtip
static double ttratio
static void getoptions(FILE *INFILE)
nodeptr next
nodeptr start
static void closeArb()
bool white(int ch)
static int pattern[maxsites+1]
static int itobase36(int i)
static double treeLength(tree *tr)
#define assert(bed)
static void spanSubtree(nodeptr p)
GB_ERROR GB_write_floats(GBDATA *gbd, const float *f, long size)
Definition: arbdb.cxx:1457
#define NULp
Definition: cxxforward.h:116
static void sitecombcrunch()
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
static void treeFlushLabel(FILE *INFILE)
static void getnums(FILE *INFILE)
static int weightsum
int nextnode
double log_f[maxpatterns]
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:65
static void getinput(tree *tr, FILE *INFILE)
static char * arb_filter
int min(int i, int j)
static bool userweights
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
static int patweight[maxsites+1]
#define MIN_INFO
static void setupnodex(tree *tr)
#define maxpatterns
static int info[maxsites+1]
static int findTipName(tree *tr, int ch, FILE *INFILE)
static void addElementLen(tree *tr, nodeptr p, FILE *INFILE)
static void getyspace()
static double invfreqr
static double freqcy
#define nmlngth
#define zmax
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
#define RATE_STEP
static int numsp
Definition: arbdb.h:66
bool digit(int ch)
GB_write_int const char s
Definition: AW_awar.cxx:154