28 for (i = 0; i < 256; i++) {
31 char_to_enum_table[i] =
ST_A;
34 char_to_enum_table[i] =
ST_C;
37 char_to_enum_table[i] =
ST_G;
41 char_to_enum_table[i] =
ST_T;
44 char_to_enum_table[i] =
ST_GAP;
58 memset((
char *) &
b[0], 0,
sizeof(
b));
73 b[i] *= inv_frequencies.
b[i];
94 if (
ld_lik> 10000) printf(
"overflow\n");
113 printf(
"%.3G ",
b[i]);
124 diag = k + (1.0 - k) * exp_dist;
125 rest = k - k * exp_dist;
131 printf(
"%.3G ", i == j ? diag : rest);
143 ST_FLOAT diag_rest_diff = diag-rest;
146 out.
b[
ST_A] = in.
b[
ST_A]*diag_rest_diff + sum_rest_prod;
147 out.
b[
ST_C] = in.
b[
ST_C]*diag_rest_diff + sum_rest_prod;
148 out.
b[
ST_G] = in.
b[
ST_G]*diag_rest_diff + sum_rest_prod;
149 out.
b[
ST_T] = in.
b[
ST_T]*diag_rest_diff + sum_rest_prod;
166 color_out_valid_till(
NULp)
201 if (remove_callbacks) {
217 void MostLikelySeq::set(
const char *) {
221 void MostLikelySeq::unset() {
238 size_t data_len =
std::min(range_len, source_sequence_len);
241 for (; pos<data_len; ++pos) dest[pos].setBase(freq[pos], toupper(source_sequence[pos]));
242 for (; pos<range_len; ++pos) dest[pos].setBase(freq[pos],
'.');
306 printf(
"POS %3zu %c ", i, data[i]);
316 hash_2_ap_tree(
NULp),
317 keep_species_hash(
NULp),
321 latest_modification(0),
330 base_frequencies(
NULp),
331 inv_base_frequencies(
NULp),
334 max_rate_matrices(0),
336 is_initialized(
false)
341 free(alignment_name);
344 delete [] base_frequencies;
345 delete [] inv_base_frequencies;
346 delete [] rate_matrices;
355 void ST_ML::create_frequencies() {
363 for (
size_t i = 0; i < filtered_length; i++) {
364 base_frequencies[i].
setTo(1.0);
365 base_frequencies[i].
lik = 1.0;
367 inv_base_frequencies[i].
setTo(1.0);
368 inv_base_frequencies[i].
lik = 1.0;
372 for (
size_t i = 0; i < filtered_length; i++) {
376 base_freq.
setTo(NO_FREQ);
391 for (
int j = 0; toCount[j].c; ++j) {
393 if (freq) base_freq.
b[toCount[j].b] += freq[i];
411 base_freq.
setTo(1.0);
417 inv_base_frequencies[i].
lik = 1.0;
422 void ST_ML::insert_tree_into_hash_rek(
AP_tree *
node) {
428 insert_tree_into_hash_rek(node->get_leftson());
429 insert_tree_into_hash_rek(node->get_rightson());
433 void ST_ML::create_matrices(
double max_disti,
int nmatrices) {
434 delete [] rate_matrices;
437 max_dist = max_disti;
438 max_rate_matrices = nmatrices;
439 step_size = max_dist / max_rate_matrices;
441 for (
int i = 0; i < max_rate_matrices; i++) {
442 rate_matrices[i].
set((i + 1) * step_size, 0);
446 long ST_ML::delete_species(
const char *key,
long val,
void *cd_st_ml) {
464 AP_tree *root = tree_root->get_root_node();
465 if (!root || root->
is_leaf()) {
473 freenull(alignment_name);
475 if (MostLikelySeq::tmp_out) {
477 MostLikelySeq::tmp_out =
NULp;
483 if (hash_2_ap_tree) {
485 hash_2_ap_tree =
NULp;
488 is_initialized =
false;
492 const char *species_names,
int marked_only,
505 column_stat = colstat;
508 if (column_stat_error) fprintf(stderr,
"Column statistic error: %s (using equal rates/tt-ratio for all columns)\n", column_stat_error);
516 else if (ali_len<10) {
517 error =
"alignment too short";
522 if (weighted_filter) {
531 aliview =
new AliView(gb_main, filter, weights, alignment_name);
547 if (!tree_root->get_root_node()) {
564 for (l = (
char *) species_names; l; l = n) {
571 insert_tree_into_hash_rek(tree_root->get_root_node());
574 keep_species_hash =
NULp;
583 if (!error) insert_tree_into_hash_rek(tree_root->get_root_node());
591 progress.
subtitle(
"calculating frequencies");
594 if (!column_stat_error) {
599 float *alloc_rates =
new float[filtered_length];
600 float *alloc_ttratio =
new float[filtered_length];
602 for (
size_t i = 0; i < filtered_length; i++) {
603 alloc_rates[i] = 1.0;
604 alloc_ttratio[i] = 2.0;
607 ttratio = alloc_ttratio;
611 create_frequencies();
613 create_matrices(2.0, 1000);
616 is_initialized =
true;
622 error = ta.
close(error);
652 const MostLikelySeq *leftSeq = get_mostlikely_sequence(node->get_leftson());
653 const MostLikelySeq *rightSeq = get_mostlikely_sequence(node->get_rightson());
664 undo_tree(tree_root->get_root_node());
668 void ST_ML::undo_tree(
AP_tree *node) {
672 undo_tree(node->get_leftson());
673 undo_tree(node->get_rightson());
677 #define GET_ML_VECTORS_BUG_WORKAROUND_INCREMENT (ST_MAX_SEQ_PART-1) // workaround bug in get_ml_vectors
695 if (!hash_2_ap_tree)
return NULp;
697 if (!node)
return NULp;
705 if (start_ali_pos != first_pos || end_ali_pos > last_pos) {
706 undo_tree(tree_root->get_root_node());
707 first_pos = start_ali_pos;
708 last_pos = end_ali_pos;
712 for (pntr = node->get_father(); pntr; pntr = pntr->get_father()) {
739 st_assert(contradicted(species_name, node));
741 if (latest_update < latest_modification) {
745 if (!node)
return false;
755 for (i = 0; i < 4; i++) {
760 for (i = 0; i < 4; i++) {
771 for (
size_t pos = 0; pos < ali_len; pos++) {
776 for (i = 0; i < 4; i++) {
781 double div = 100.0 / sum;
783 for (i = 0; i < 4; i++) {
784 result[i][pos] =
char ((vec.
b[adb[i]] * div) + 0.5);
789 latest_update = latest_modification;
800 if (!hash_2_ap_tree)
return NULp;
802 if (!node)
return NULp;
810 if (end_ali_pos > ali_len) {
811 end_ali_pos = ali_len;
823 for (pos = start_ali_pos; pos <= end_ali_pos; pos +=
ST_BUCKET_SIZE) {
826 if (pos > end_ali_pos) {
835 const char *source_sequence =
NULp;
842 const char *source = source_sequence + start_ali_pos;
844 for (pos = start_ali_pos; pos <= end_ali_pos; pos++) {
851 val = max / (0.0001 + vec->
b[b]);
854 *outs = (
int) (log(val));
872 return tree_root->get_root_node();
877 tree_root->get_root_node()->calcTreeInfo(info);
GBDATA * get_gb_main() const
GB_ERROR calc_st_ml(const char *tree_name, const char *alignment_name, const char *species_names, int marked_only, ColumnStat *colstat, const WeightedFilter *weighted_filter) __ATTR__USERESULT
const AliView * get_aliview() const
long remove_leafs(AWT_RemoveType awt_remove_type)
void setTo(ST_FLOAT freq)
size_t get_first_pos() const
const float * get_frequencies(unsigned char c) const
const TreeNode * get_gbt_tree() const
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
const ST_base_vector & get_base_frequency_at(size_t pos) const
GB_ERROR GB_add_callback(GBDATA *gbd, GB_CB_TYPE type, const DatabaseCallback &dbcb)
void increaseBy(ST_FLOAT inc)
MostLikelySeq(const AliView *aliview, ST_ML *st_ml_)
unsigned char ST_ML_Color
static void st_sequence_del_callback(GBDATA *, MostLikelySeq *seq)
char * ARB_strdup(const char *str)
const char * GBS_global_string(const char *templat,...)
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
static char * alignment_name
const ST_base_vector * get_inv_base_frequencies() const
float get_rate_at(size_t pos) const
void GBS_free_hash(GB_HASH *hs)
void transform(const ST_base_vector &in, ST_base_vector &out) const
void multiplyWith(ST_FLOAT factor)
static ST_base_vector * tmp_out
GB_ERROR GBT_link_tree(TreeNode *tree, GBDATA *gb_main, bool show_status, int *zombies, int *duplicates)
#define DOWNCAST(totype, expr)
const AP_filter * get_filter() const
AP_tree * find_node_by_name(const char *species_name)
virtual AP_tree * REMOVE()
static HelixNrInfo * start
AP_sequence * dup() const OVERRIDE
size_t GB_read_string_count(GBDATA *gbd)
GB_ERROR GB_await_error()
#define GET_ML_VECTORS_BUG_WORKAROUND_INCREMENT
GBDATA * get_bound_species_data() const
const float * get_rates() const
size_t get_filtered_length() const
const char * get_tree_name() const
DNA_Base char_to_enum(char i)
const char * awarname(const char *awarname_template, int idx)
const float * get_ttratio() const
double get_step_size() const
void calculate_ancestor(const MostLikelySeq *lefts, double leftl, const MostLikelySeq *rights, double rightl)
MostLikelySeq * get_ml_vectors(const char *species_name, AP_tree *node, size_t start_ali_pos, size_t end_ali_pos)
static int weights[MAX_BASETYPES][MAX_BASETYPES]
void makeInverseOf(const ST_base_vector &other, ST_FLOAT other_min_freq)
size_t count_species_in_tree() const
static void error(const char *msg)
size_t get_length() const
__ATTR__USERESULT GB_ERROR calculate(AP_filter *filter)
GB_ERROR loadFromDB(const char *name) FINAL_OVERRIDE
AP_sequence * set_seq(AP_sequence *sequence)
ST_rate_matrix & get_matrix_for(int distance)
ST_FLOAT summarize() const
void unbind_from_species()
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)
size_t get_filtered_length() const
void calc_out(const MostLikelySeq *sequence_of_brother, double dist)
bool is_up_to_date() const
const AliView * get_aliview() const
TYPE * ARB_calloc(size_t nelem)
AliView * create_aliview(const char *aliname, GB_ERROR &error) const
int * color_out_valid_till
GB_ERROR close(GB_ERROR error)
bool update_ml_likelihood(char *result[4], int &latest_update, const char *species_name, AP_tree *node)
void setBase(const ST_base_vector &frequencies, char base)
const AP_filter * get_filter() const
GB_ERROR tree_size_ok(AP_tree_root *tree_root)
void subtitle(const char *stitle)
void GB_remove_callback(GBDATA *gbd, GB_CB_TYPE type, const DatabaseCallback &dbcb)
GB_ERROR is_invalid() const
GB_ERROR bind_to_species(GBDATA *gb_species)
long GB_read_clock(GBDATA *gbd)
size_t get_last_pos() const
size_t get_alignment_length() const
ST_ML_Color * get_color_string(const char *species_name, AP_tree *node, size_t start_ali_pos, size_t end_ali_pos)
long GBT_get_species_count(GBDATA *gb_main)
GB_transaction ta(gb_var)
void destroy(TreeNode *that)
GB_ERROR bind_to_species(GBDATA *gb_species)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
void set(double dist, double TT_ratio)
~MostLikelySeq() OVERRIDE
const size_t ST_MAX_SEQ_PART
static int info[maxsites+1]
void create_column_statistic(AW_root *awr, const char *awarname, AW_awar *awar_default_alignment)
long GBS_read_hash(const GB_HASH *hs, const char *key)
void unbind_from_species(bool remove_callbacks)
ST_base_vector & operator*=(const ST_base_vector &other)
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
static void st_sequence_callback(GBDATA *, MostLikelySeq *seq)