ARB
st_quality.hxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : st_quality.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #ifndef ST_QUALITY_HXX
12 #define ST_QUALITY_HXX
13 
14 #ifndef ST_WINDOW_HXX
15 #include "st_window.hxx"
16 #endif
17 
18 #define st_assert(cond) arb_assert(cond)
19 
20 class Sampler {
21  double sum;
22  size_t count;
23 public:
24  Sampler() : sum(0.0) , count(0) {}
25 
26  void add(double val) {
27  sum += val;
28  count++;
29  }
30  void add(const Sampler& other) {
31  sum += other.sum;
32  count += other.count;
33  }
34 
35  size_t get_count() const { return count; }
36  double get_sum() const { return sum; }
37  double get_median() const { return sum / count; }
38 };
39 
40 class VarianceSampler : public Sampler {
47  double square_sum;
48 
49 public:
50  VarianceSampler() : square_sum(0.0) {}
51 
52  void add(double val) {
53  Sampler::add(val);
54  square_sum += val*val;
55  }
56 
57  void add(const VarianceSampler& other) {
58  Sampler::add(other);
59  square_sum += other.square_sum;
60  }
61 
62  double get_variance(double median) const { return square_sum/get_count() - median*median; }
63  double get_variance() const { return get_variance(get_median()); }
64 };
65 
66 class LikelihoodRanges : virtual Noncopyable {
70  size_t ranges;
71  VarianceSampler *likelihood;
72 
73 public:
74 
75  LikelihoodRanges(size_t ranges);
76  ~LikelihoodRanges() { delete [] likelihood; }
77 
78  size_t get_range_count() const { return ranges; }
79 
80  void add(size_t range, double probability) {
81  st_assert(range < ranges);
82  likelihood[range].add(probability);
83  }
84  void add_relative(double seq_rel_pos, double probability) {
85  st_assert(seq_rel_pos>=0.0 && seq_rel_pos<1.0);
86  add(seq_rel_pos*ranges, probability);
87  }
88 
90  char *generate_string(Sampler& t_values);
91 
92  // int is_bad(); // 0 == ok, 1 strange, 2 bad, 3 very bad
93 };
94 
96  LikelihoodRanges stat_half; // two ranges (one for each half)
97  LikelihoodRanges stat_five; // five ranges
98  LikelihoodRanges stat_user; // user defined range size
99  LikelihoodRanges stat_cons; // two ranges (conserved + variable positions) -- @@@ unused
100 
101  ColumnQualityInfo(int seq_len, int bucket_size);
102 
104  return
105  stat_half.get_range_count() +
106  stat_five.get_range_count() +
107  stat_user.get_range_count() +
108  stat_cons.get_range_count();
109  }
110 };
111 
112 class WeightedFilter;
113 class ColumnStat;
114 
115 GB_ERROR st_ml_check_sequence_quality(GBDATA *gb_main, const char *tree_name,
116  const char *alignment_name, ColumnStat *colstat, const WeightedFilter *weighted_filter, int bucket_size,
117  int marked_only, st_report_enum report, const char *dest_field);
118 
119 #else
120 #error st_quality.hxx included twice
121 #endif // ST_QUALITY_HXX
const char * GB_ERROR
Definition: arb_core.h:25
LikelihoodRanges stat_cons
Definition: st_quality.hxx:99
void add(double val)
Definition: st_quality.hxx:52
size_t overall_range_count()
Definition: st_quality.hxx:103
LikelihoodRanges stat_five
Definition: st_quality.hxx:97
#define st_assert(cond)
Definition: st_quality.hxx:18
void add_relative(double seq_rel_pos, double probability)
Definition: st_quality.hxx:84
ColumnQualityInfo(int seq_len, int bucket_size)
Definition: ST_quality.cxx:102
static char * alignment_name
double get_median() const
Definition: st_quality.hxx:37
size_t get_range_count() const
Definition: st_quality.hxx:78
size_t get_count() const
Definition: st_quality.hxx:35
GB_ERROR st_ml_check_sequence_quality(GBDATA *gb_main, const char *tree_name, const char *alignment_name, ColumnStat *colstat, const WeightedFilter *weighted_filter, int bucket_size, int marked_only, st_report_enum report, const char *dest_field)
Definition: ST_quality.cxx:237
void add(const Sampler &other)
Definition: st_quality.hxx:30
st_report_enum
Definition: st_window.hxx:28
void add(double val)
Definition: st_quality.hxx:26
void add(const VarianceSampler &other)
Definition: st_quality.hxx:57
double get_variance(double median) const
Definition: st_quality.hxx:62
char * generate_string(Sampler &t_values)
Definition: ST_quality.cxx:42
LikelihoodRanges stat_half
Definition: st_quality.hxx:96
double get_variance() const
Definition: st_quality.hxx:63
LikelihoodRanges stat_user
Definition: st_quality.hxx:98
GBDATA * gb_main
Definition: adname.cxx:32
void add(size_t range, double probability)
Definition: st_quality.hxx:80
double get_sum() const
Definition: st_quality.hxx:36
VarianceSampler summarize_all_ranges()
Definition: ST_quality.cxx:34
LikelihoodRanges(size_t ranges)
Definition: ST_quality.cxx:29