ARB
distanalyse.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : distanalyse.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "di_matr.hxx"
12 
13 using std::min;
14 using std::max;
15 using std::string;
16 
19 
21  if (is_AA) {
22  if (nentries> 100) {
23  msg = "A lot of sequences!\n ==> fast Kimura selected! (instead of PAM)";
24  result = DI_TRANSFORMATION_KIMURA;
25  }
26  else {
27  msg = "Only limited number of sequences!\n ==> slow PAM selected! (instead of Kimura)";
28  result = DI_TRANSFORMATION_PAM;
29  }
30  }
31  else {
32  long mean_len = 0;
33  float min_gc = 9999.9;
34  float max_gc = 0.0;
35  long min_len = 9999999;
36  long max_len = 0;
37 
38  // calculate meanvalue of sequencelength:
39  for (size_t row=0; row<nentries; row++) {
40  const char *sequ = entries[row]->get_nucl_seq()->get_sequence();
41 
42  size_t flen = aliview->get_length();
43 
44  long act_gci = 0;
45  long act_len = 0;
46 
47  for (size_t pos=0; pos<flen; pos++) {
48  char ch = sequ[pos];
49  if (ch == AP_C || ch == AP_G) act_gci++;
50  if (ch == AP_A || ch == AP_C || ch == AP_G || ch == AP_T) act_len++;
51  }
52 
53  mean_len += act_len;
54 
55  float act_gc = ((float) act_gci) / act_len;
56 
57  min_gc = min(min_gc, act_gc);
58  max_gc = max(max_gc, act_gc);
59 
60  min_len = min(min_len, act_len);
61  max_len = max(max_len, act_len);
62  }
63 
64  string warn;
65  if (min_len * 1.3 < max_len) {
66  warn = "Warning: The length of sequences differs significantly!\n"
67  "Be careful: Neighbour Joining is sensitive to this kind of \"error\"";
68  }
69 
70  mean_len /= nentries;
71 
72  if (mean_len < 100) {
73  msg = "Too short sequences!\n ==> No correction selected!";
74  result = DI_TRANSFORMATION_NONE;
75  }
76  else if (mean_len < 300) {
77  msg = "Meanlength shorter than 300\n ==> Jukes Cantor selected!";
79  }
80  else if ((mean_len < 1000) || ((max_gc / min_gc) < 1.2)) {
81  const char *reason;
82  if (mean_len < 1000) reason = "Sequences are too short for Olsen!";
83  else reason = GBS_global_string("Maximal GC (%f) : Minimal GC (%f) < 1.2", max_gc, min_gc);
84 
85  msg = GBS_global_string("%s ==> Felsenstein selected!", reason);
87  }
88  else {
89  msg = "Olsen selected!";
90  result = DI_TRANSFORMATION_OLSEN;
91  }
92 
93  if (!warn.empty()) {
94  di_assert(!msg.empty());
95  msg = warn+'\n'+msg;
96  }
97  }
98 
99  di_assert(!msg.empty());
101 
102  return result;
103 }
104 
string result
return string(buffer, length)
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
AP_sequence_parsimony * get_nucl_seq()
Definition: di_matr.hxx:86
DI_ENTRY ** entries
Definition: di_matr.hxx:151
size_t nentries
Definition: di_matr.hxx:152
#define di_assert(cond)
Definition: DI_clusters.cxx:32
DI_TRANSFORMATION
Definition: di_matr.hxx:43
DI_TRANSFORMATION detect_transformation(std::string &msg)
Definition: distanalyse.cxx:17
size_t get_length() const
Definition: AliView.cxx:66
#define min(a, b)
Definition: f2c.h:153
bool is_AA
Definition: di_matr.hxx:150
#define max(a, b)
Definition: f2c.h:154