ARB
AP_filter.hxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : AP_filter.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #ifndef AP_FILTER_HXX
12 #define AP_FILTER_HXX
13 
14 #ifndef ARBDB_BASE_H
15 #include <arbdb_base.h>
16 #endif
17 #ifndef ARB_ASSERT_H
18 #include <arb_assert.h>
19 #endif
20 #ifndef ARBTOOLS_H
21 #include <arbtools.h>
22 #endif
23 
24 #define af_assert(cond) arb_assert(cond)
25 
26 typedef unsigned char uchar;
27 
33 };
34 
35 enum AF_Not { NOT };
36 enum AF_Combine { AND, OR, XOR };
37 
38 class AP_filter {
39  bool *filter_mask; // true means "use position"
40  size_t filter_len; // length of 'filter_mask'
41  size_t real_len; // how many 'true's are in 'filter_mask'
42  long update; // timestamp
43 
44  uchar simplify[256]; // base -> simplified base
45  AWT_FILTER_SIMPLIFY simplify_type;
46 
47  size_t *bootstrap; // bootstrap[i] points to random filter positions [0..real_len[
48 
49  size_t *filterpos_2_seqpos; // filterpos -> sequencepos
50 
51 #if defined(ASSERTION_USED)
52  mutable bool checked_for_validity;
53 #endif
54 
55  void calc_filterpos_2_seqpos();
56 
57  // ctor-helper-functions:
58  void init(size_t size);
59  void make_permeable(size_t size);
60  void init_from_string(const char *ifilter, const char *zerobases, size_t size);
61 
62  size_t bootstrapped_filterpos(size_t bpos) const {
64  af_assert(bpos<real_len);
65  size_t fpos = bootstrap[bpos];
66  af_assert(fpos<real_len);
67  return fpos;
68  }
69 
70 public:
71  AP_filter(size_t size); // permeable filter (passes all columns)
72  AP_filter(const char *filter, const char *zerobases, size_t size);
73  AP_filter(const AP_filter& other);
74 
75  AP_filter(AF_Not, const AP_filter& other);
76  AP_filter(const AP_filter& f1, AF_Combine comb, const AP_filter& f2);
77 
78  ~AP_filter();
80 
81  long get_timestamp() const { return update; }
82  size_t get_filtered_length() const { return real_len; }
83  size_t get_length() const { return filter_len; }
84 
85  bool use_position(size_t pos) const { // returns true if filter is set for position 'pos'
86  af_assert(checked_for_validity);
87  af_assert(pos<filter_len);
88  return filter_mask[pos];
89  }
90 
91  const size_t *get_filterpos_2_seqpos() const {
92  if (!filterpos_2_seqpos) {
93  // this is no modification, it's lazy initialization:
94  const_cast<AP_filter*>(this)->calc_filterpos_2_seqpos();
95  }
96  return filterpos_2_seqpos;
97  }
98 
99  void enable_simplify(AWT_FILTER_SIMPLIFY type); // default is AWT_FILTER_SIMPLIFY_NONE
100  const uchar *get_simplify_table() const {
101  if (simplify_type == AWT_FILTER_SIMPLIFY_NOT_INITIALIZED) {
102  // this is no modification, it's lazy initialization:
104  }
105  return simplify;
106  }
107 
108  void enable_bootstrap();
109  bool does_bootstrap() const { return bootstrap; }
110 
111  size_t bootstrapped_seqpos(size_t bpos) const {
112  size_t fpos = bootstrapped_filterpos(bpos);
113  size_t spos = (get_filterpos_2_seqpos())[fpos];
114  af_assert(spos<filter_len);
115  return spos;
116  }
117 
118  char *to_string() const; // convert to 0/1 string
119 
120  char *blowup_string(const char *filtered_string, char insert) const;
121  char *filter_string(const char *fulllen_string) const;
122 
129 #if defined(ASSERTION_USED)
130  checked_for_validity = true;
131 #endif
132  if (get_filtered_length()) {
134  return NULp;
135  }
136  if (get_length()) return "Sequence completely filtered out (no columns left)";
137  return "No alignment selected";
138  }
139 #if defined(ASSERTION_USED)
140  bool was_checked_for_validity() const { return checked_for_validity; }
141 #endif
142 };
143 
144 
145 
146 class AP_weights {
147  size_t len;
148  GB_UINT4 *weights __attribute__((__aligned__(16)));
149 
150 public:
151 
152  AP_weights(const AP_filter *fil); // dummy weights (all columns weighted equal)
153  AP_weights(const GB_UINT4 *w, size_t wlen, const AP_filter *fil);
154  AP_weights(const AP_weights& other);
155  ~AP_weights();
157 
158  const GB_UINT4* get_weights() const {
159  return weights;
160  }
161 
162  GB_UINT4 weight(size_t idx) const {
163  af_assert(idx<len);
164  return is_unweighted() ? 1 : weights[idx];
165  }
166 
167  size_t length() const { return len; }
168  bool is_unweighted() const { return !weights; }
169 };
170 
171 long AP_timer();
172 
173 #else
174 #error AP_filter.hxx included twice
175 #endif // AP_FILTER_HXX
long AP_timer()
Definition: AP_filter.cxx:299
const size_t * get_filterpos_2_seqpos() const
Definition: AP_filter.hxx:91
GB_TYPES type
char * blowup_string(const char *filtered_string, char insert) const
Definition: AP_filter.cxx:222
Definition: AP_filter.hxx:36
long get_timestamp() const
Definition: AP_filter.hxx:81
void enable_bootstrap()
Definition: AP_filter.cxx:206
const GB_UINT4 * get_weights() const
Definition: AP_filter.hxx:158
const uchar * get_simplify_table() const
Definition: AP_filter.hxx:100
bool is_unweighted() const
Definition: AP_filter.hxx:168
size_t bootstrapped_seqpos(size_t bpos) const
Definition: AP_filter.hxx:111
DECLARE_ASSIGNMENT_OPERATOR(AP_filter)
AP_weights(const AP_filter *fil)
Definition: AP_filter.cxx:261
unsigned int GB_UINT4
Definition: arbdb_base.h:37
DECLARE_ASSIGNMENT_OPERATOR(AP_weights)
char * to_string() const
Definition: AP_filter.cxx:146
bool does_bootstrap() const
Definition: AP_filter.hxx:109
char * filter_string(const char *fulllen_string) const
Definition: AP_filter.cxx:239
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
unsigned char uchar
Definition: AP_filter.hxx:26
size_t get_length() const
Definition: AP_filter.hxx:83
#define af_assert(cond)
Definition: AP_filter.hxx:24
AWT_FILTER_SIMPLIFY
Definition: AP_filter.hxx:28
size_t get_filtered_length() const
Definition: AP_filter.hxx:82
void enable_simplify(AWT_FILTER_SIMPLIFY type)
Definition: AP_filter.cxx:160
AF_Not
Definition: AP_filter.hxx:35
GB_ERROR is_invalid() const
Definition: AP_filter.hxx:123
unsigned char uchar
Definition: gde.hxx:21
#define NULp
Definition: cxxforward.h:116
AF_Combine
Definition: AP_filter.hxx:36
AP_filter(size_t size)
Definition: AP_filter.cxx:63
bool use_position(size_t pos) const
Definition: AP_filter.hxx:85
GB_UINT4 weight(size_t idx) const
Definition: AP_filter.hxx:162
size_t length() const
Definition: AP_filter.hxx:167
bool was_checked_for_validity() const
Definition: AP_filter.hxx:140