21 #define ap_assert(cond) arb_assert(cond)
35 for (
int i=0; i<256; i++) {
36 frequencies[i] =
NULp;
38 char_2_transition[i] = 0;
39 char_2_transition[i] = 0;
49 for (
int i=0; i<256; i++) free(frequencies[i]);
59 size_t seq_len = ali_len;
65 for (
size_t i = 0; i< seq_len; i++) {
66 long L = char_2_freq[sequence[i]];
74 for (
size_t i = 0; i< seq_len; i++) bases[i] = char_2_transition[sequence[i]];
77 for (
size_t i = 0; i< seq_len; i++) tbases[i] = char_2_transversion[sequence[i]];
83 GB_UINT4 *ls = ARB_calloc<GB_UINT4>(ali_len);
84 GB_UINT4 *rs = ARB_calloc<GB_UINT4>(ali_len);
85 GB_UINT4 *lts = ARB_calloc<GB_UINT4>(ali_len);
86 GB_UINT4 *rts = ARB_calloc<GB_UINT4>(ali_len);
88 if (!error) error = this->parsimony(tree->get_leftson(), ls, lts);
89 if (!error) error = this->parsimony(tree->get_rightson(), rs, rts);
91 for (
long i=0; i< ali_len; i++) {
95 if (bases) bases[i] = l&r;
99 if (bases) bases[i] = l|r;
104 if (tbases) tbases[i] = l&r;
108 if (tbases) tbases[i] = l|r;
130 error =
"No tree specified for PVP calculation";
139 char_2_freq[(
unsigned char)
'a'] =
'A';
140 char_2_freq[(
unsigned char)
'A'] =
'A';
141 char_2_freq[(
unsigned char)
'c'] =
'C';
142 char_2_freq[(
unsigned char)
'C'] =
'C';
143 char_2_freq[(
unsigned char)
'g'] =
'G';
144 char_2_freq[(
unsigned char)
'G'] =
'G';
145 char_2_freq[(
unsigned char)
't'] =
'U';
146 char_2_freq[(
unsigned char)
'T'] =
'U';
147 char_2_freq[(
unsigned char)
'u'] =
'U';
148 char_2_freq[(
unsigned char)
'U'] =
'U';
152 for (
int i=0; i<256; i++) {
154 if (i==
'-') j =
'.';
else j = i;
155 long base = char_2_transition[i] = char_2_bitstring[j];
156 char_2_transversion[i] = 0;
157 if (base & (
AP_A |
AP_G)) char_2_transversion[i] = 1;
158 if (base & (
AP_C |
AP_T)) char_2_transversion[i] |= 2;
160 delete [] char_2_bitstring;
165 long char_2_bitstring[256];
167 for (
int i=0; i<256; ++i) {
168 char_2_bitstring[i] = 0;
170 int aa_max = translator->
MaxAA();
171 for (
int i = 0; i<=aa_max; ++i) {
173 unsigned char spro = translator->
index2spro(i);
175 char_2_bitstring[spro] = bs;
179 for (
int i=0; i<256; ++i) {
180 char_2_transversion[i] = 0;
181 long base = char_2_transition[i] = char_2_bitstring[i];
182 if (base) char_2_freq[i] = toupper(i);
188 for (
int i=0; i<256; i++) {
189 int j = char_2_freq[i];
190 if (j && !frequencies[j])
ARB_calloc(frequencies[j], ali_len);
196 error = this->parsimony(tree);
228 const char *description =
229 GBS_global_string(
"PVP: Positional Variability by Parsimony: tree '%s' ntaxa %li",
230 tree_name, treeLeafs);
236 char *data = ARB_calloc<char>(ali_len+1);
237 int *sum = ARB_calloc<int>(ali_len);
239 for (
int j=0; j<256 && !
error; j++) {
240 if (frequencies[j]) {
241 for (
int i=0; i<ali_len; i++) {
242 sum[i] += frequencies[j][i];
245 if (j >=
'A' && j <=
'Z') {
248 else error =
GB_write_ints(gb_freq, frequencies[j], ali_len);
261 else error =
GB_write_ints(gb_transv, transversions, ali_len);
266 double logbase = sqrt(2.0);
267 double lnlogbase = log(logbase);
269 double max_rate = 1.0;
271 int tenPercentOfLeafs = treeLeafs*0.1;
272 if ((10*tenPercentOfLeafs) < treeLeafs) ++tenPercentOfLeafs;
274 for (
int i=0; i<ali_len; i++) {
275 if (sum[i] < tenPercentOfLeafs) {
279 if (transitions[i] == 0) {
283 double rate = transitions[i] / (double)sum[i];
284 if (rate >= b * .95) {
287 rate = -b * log(1-rate/b);
288 if (rate > max_rate) rate = max_rate;
291 double dcat = -log(rate)/lnlogbase;
292 int icat = (
int)dcat;
293 if (icat > 35) icat = 35;
294 if (icat >= max_categ) max_categ = icat + 1;
295 data[i] =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[icat];
303 for (
int i = 0; i<max_categ; i++) {
size_t GBT_count_leafs(const TreeNode *tree)
char * AP_create_dna_to_ap_bases()
char * ARB_strdup(const char *str)
const char * GBS_global_string(const char *templat,...)
GB_ERROR GB_delete(GBDATA *&source)
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
CONSTEXPR_INLINE int leafs_2_nodes(int leafs, TreeModel model)
size_t GB_read_string_count(GBDATA *gbd)
GB_ERROR GB_await_error()
GB_ERROR save_aliEntry_to_SAI(const char *sai_name)
GB_ERROR delete_aliEntry_from_SAI(const char *sai_name)
static void error(const char *msg)
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
AP_pos_var(GBDATA *gb_main_, const char *ali_name_, long ali_len_, bool is_nuc_, const char *tree_name_)
GB_ERROR retrieve(TreeNode *tree)
AWT_translator * AWT_get_user_translator(GBDATA *gb_main)
TYPE * ARB_calloc(size_t nelem)
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
GB_ERROR GB_write_ints(GBDATA *gbd, const GB_UINT4 *i, long size)
const char * get_data() const
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
unsigned char index2spro(int index) const
void inc_and_check_user_abort(GB_ERROR &error)
long index2bitset(int index) const