30 ranges = no_of_ranges;
36 for (
size_t range = 0; range<ranges; ++range) {
37 allRange.
add(likelihood[range]);
44 char *report =
new char[ranges+1];
49 double all_std_deviation = sqrt(all_variance);
51 for (
size_t range = 0; range<ranges; ++range) {
52 size_t count = likelihood[range].
get_count();
56 double standard_error = all_std_deviation / sqrt(count);
57 double median_diff = likelihood[range].
get_median() - all_median;
58 double t_value = median_diff / standard_error;
60 t_values.
add(fabs(t_value));
62 double val = 0.7 * t_value;
76 int ival =
int (val + .5) + 5;
78 if (ival > 9) ival = 9;
79 if (ival < 0) ival = 0;
81 report[range] = (ival == 5) ?
'-' :
'0'+ival;
89 for (
size_t range = 0; range<ranges; ++range) {
105 stat_user(seq_len / bucket_size + 1),
111 const char *species_name,
int seq_len,
int bucket_size,
128 const char *source_sequence =
NULp;
129 int source_sequence_len = 0;
136 if (end > source_sequence_len) end = source_sequence_len;
139 for (pos = start; pos < end; vec++, pos++) {
144 double val = max / (0.0001 + vec->
b[base]);
145 double log_val = log(val);
146 double seq_rel_pos = double(pos)/seq_len;
160 const char *dest_field)
164 if (!gb_species) error =
GBS_global_string(
"Unknown species '%s'", species_name);
167 if (!info) error =
GBS_global_string(
"Statistic missing for species '%s'", species_name);
184 #warning t-value summary disabled for test-purposes
187 buffer.
put(
'a'); buffer.
cat(half_str); buffer.
put(
' ');
188 buffer.
put(
'b'); buffer.
cat(five_str); buffer.
put(
' ');
189 buffer.
put(
'c'); buffer.
cat(user_str);
197 if (!error && report) {
206 report_str =
new char[filtered_len + 1];
208 for (
size_t i = 0; i < filtered_len; i++) {
209 report_str[i] = user_str[i / bucket_size];
211 report_str[filtered_len] = 0;
215 char *blownUp_report = filter->
blowup_string(report_str,
' ');
217 free(blownUp_report);
221 delete [] report_str;
248 size_t species_count;
255 arb_progress add_progress(
"Calculating stat", parts*species_count);
259 if (end > seq_len) end = seq_len;
261 for (
GB_CSTR *pspecies_name = snames; *pspecies_name && !
error; pspecies_name++) {
269 arb_progress res_progress(
"Calculating reports", species_count);
272 for (
GB_CSTR *pspecies_name = snames; *pspecies_name && !
error; pspecies_name++) {
GB_ERROR calc_st_ml(const char *tree_name, const char *alignment_name, const char *species_names, int marked_only, ColumnStat *colstat, const WeightedFilter *weighted_filter) __ATTR__USERESULT
class DNA_Table dna_table
const TreeNode * get_gbt_tree() const
char * blowup_string(const char *filtered_string, char insert) const
size_t overall_range_count()
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
LikelihoodRanges stat_five
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
static void st_ml_add_sequence_part_to_stat(ST_ML *st_ml, ColumnStat *, const char *species_name, int seq_len, int bucket_size, GB_HASH *species_to_info_hash, int start, int end)
void add_relative(double seq_rel_pos, double probability)
ColumnQualityInfo(int seq_len, int bucket_size)
const char * GBS_global_string(const char *templat,...)
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
static char * alignment_name
void GBS_free_hash(GB_HASH *hs)
double get_median() const
void cat(const char *from)
static ST_base_vector * tmp_out
char buffer[MESSAGE_BUFFERSIZE]
static HelixNrInfo * start
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
size_t GB_read_string_count(GBDATA *gbd)
GB_ERROR GB_await_error()
GBDATA * get_bound_species_data() const
GB_ERROR st_ml_check_sequence_quality(GBDATA *gb_main, const char *tree_name, const char *alignment_name, ColumnStat *colstat, const WeightedFilter *weighted_filter, int bucket_size, int marked_only, st_report_enum report, const char *dest_field)
size_t get_filtered_length() const
DNA_Base char_to_enum(char i)
MostLikelySeq * get_ml_vectors(const char *species_name, AP_tree *node, size_t start_ali_pos, size_t end_ali_pos)
static void error(const char *msg)
size_t get_filtered_length() const
double get_variance(double median) const
static GB_ERROR st_ml_add_quality_string_to_species(GBDATA *gb_main, const AP_filter *filter, const char *alignment_name, const char *species_name, size_t bucket_size, GB_HASH *species_to_info_hash, st_report_enum report, const char *dest_field)
char * generate_string(Sampler &t_values)
GBDATA * GBT_searchOrCreate_itemfield_according_to_changekey(GBDATA *gb_item, const char *field_name, const char *change_key_path)
GB_ERROR GB_set_temporary(GBDATA *gbd) __ATTR__USERESULT
static void destroy_ColumnQualityInfo(long cl_info)
LikelihoodRanges stat_half
const AP_filter * get_filter() const
void nprintf(size_t maxlen, const char *templat,...) __ATTR__FORMAT_MEMBER(2)
ItemSelector & SPECIES_get_selector()
LikelihoodRanges stat_user
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
GB_CSTR * GBT_get_names_of_species_in_tree(const TreeNode *tree, size_t *count)
const char * get_data() const
AP_tree * STAT_find_node_by_name(ST_ML *st_ml, const char *species_name)
long GBT_get_species_count(GBDATA *gb_main)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
VarianceSampler summarize_all_ranges()
const size_t ST_MAX_SEQ_PART
static int info[maxsites+1]
long GBS_read_hash(const GB_HASH *hs, const char *key)
void inc_and_check_user_abort(GB_ERROR &error)
LikelihoodRanges(size_t ranges)