ARB
AP_sequence.hxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : AP_sequence.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #ifndef AP_SEQUENCE_HXX
12 #define AP_SEQUENCE_HXX
13 
14 #ifndef ALIVIEW_HXX
15 #include <AliView.hxx>
16 #endif
17 #ifndef ARBTOOLS_H
18 #include <arbtools.h>
19 #endif
20 #ifndef ARB_ASSERT_H
21 #include <arb_assert.h>
22 #endif
23 #ifndef _STDINT_H
24 #include <stdint.h>
25 #endif
26 
27 #define ap_assert(cond) arb_assert(cond)
28 
29 typedef double AP_FLOAT;
30 
31 long AP_timer();
32 
33 class AP_sequence : virtual Noncopyable {
34  const AliView *ali;
35 
36  GBDATA *gb_sequence; // points to species/ali_xxx/data (or NULp if unbound, e.g. inner nodes in tree)
37  bool has_sequence; // true -> sequence was set()
38  long update;
39 
40 protected:
41  mutable AP_FLOAT cached_wbc; // result for weighted_base_count(); <0.0 means "not initialized"
42 
43  void mark_sequence_set(bool is_set) {
44  if (is_set != has_sequence) {
45  update = is_set ? AP_timer() : 0;
46  has_sequence = is_set;
47  cached_wbc = -1.0;
48  }
49  }
50 
51  virtual void set(const char *sequence) = 0;
52  virtual void unset() = 0;
53 
54  void do_lazy_load() const;
55 
56 public:
57  AP_sequence(const AliView *aliview);
58  virtual ~AP_sequence() {}
59 
60  virtual AP_sequence *dup() const = 0; // used to dup derived class
61 
62  GB_ERROR bind_to_species(GBDATA *gb_species);
63  void unbind_from_species();
64  bool is_bound_to_species() const { return gb_sequence; }
65  GBDATA *get_bound_species_data() const { return gb_sequence; }
66 
67  void lazy_load_sequence() const {
68  if (!has_sequence && is_bound_to_species()) do_lazy_load();
69  }
70  void ensure_sequence_loaded() const {
72  ap_assert(has_sequence);
73  }
74 
75  bool hasSequence() const { return has_sequence; }
76  void forget_sequence() { if (has_sequence) unset(); }
77 
78  size_t get_sequence_length() const { return ali->get_length(); } // filtered length
79  const AP_filter *get_filter() const { return ali->get_filter(); }
80  const AP_weights *get_weights() const { return ali->get_weights(); }
81 
82  const AliView *get_aliview() const { return ali; }
83 
84 };
85 
86 // ----------------------------------------------------------------------
87 // estimation of upper limit for Mutations = MPB * BP * (2*SP) * W
88 //
89 // meaning limits
90 // ------- ------
91 // MPB = max.mutation per basepos 3 for aa; 1 for nucs
92 // BP = base positions ~1500 for nucs (=> ~1200 (4/5) theoretical max. for worst possible tree)
93 // SP = number of species 600k for silva (2*SP = leafs+inner nodes)
94 // W = weight 1M theoretically; ~6k seen
95 //
96 // -> 1 * 1200 * 2*600k * 6k = > 8.6*e^12 (1/1M of long-range)
97 // -> 1 * 1200 * 2*1M * 1M = > 2.4*e^15 (1/3800 of long-range)
98 // ----------------------------------------------------------------------
99 typedef long Mutations; // Note: equal to min. mutations only for nucs w/o weights
100 // ----------------------------------------------------------------------
101 
103  static long global_combineCount;
104 
105 protected:
106  virtual AP_FLOAT count_weighted_bases() const = 0;
107 
108  static void inc_combine_count() { global_combineCount++; }
109 
110 public:
111  AP_combinableSeq(const AliView *aliview) : AP_sequence(aliview) {}
112  virtual ~AP_combinableSeq() {}
113 
114  virtual AP_combinableSeq *dup() const = 0; // used to dup derived class
115  virtual int cmp_combined(const AP_combinableSeq *other) const = 0;
116 
117  virtual Mutations combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights, char *mutation_per_site = NULp) = 0;
118  virtual Mutations mutations_if_combined_with(const AP_combinableSeq *other) = 0;
119 
120  virtual void partial_match(const AP_combinableSeq *part, long *overlap, long *penalty) const = 0;
121  virtual uint32_t checksum() const = 0;
122 
123  static long combine_count() { return global_combineCount; }
124 
125  AP_FLOAT weighted_base_count() const { // returns < 0.0 if no sequence!
127  return cached_wbc;
128  }
129 
131  Mutations res = combine_seq(lefts, rights);
132  global_combineCount--;
133  return res;
134  }
135 
136  bool combinedEquals(const AP_combinableSeq *other) const {
137  return cmp_combined(other) == 0;
138  }
139 };
140 
141 
142 #else
143 #error AP_sequence.hxx included twice
144 #endif // AP_SEQUENCE_HXX
AP_FLOAT cached_wbc
Definition: AP_sequence.hxx:41
double AP_FLOAT
Definition: AP_sequence.hxx:29
const char * GB_ERROR
Definition: arb_core.h:25
AP_combinableSeq(const AliView *aliview)
virtual ~AP_sequence()
Definition: AP_sequence.hxx:58
virtual ~AP_combinableSeq()
virtual AP_FLOAT count_weighted_bases() const =0
const AP_weights * get_weights() const
Definition: AliView.hxx:45
double AP_FLOAT
Definition: AP_matrix.hxx:30
static long combine_count()
GBDATA * get_bound_species_data() const
Definition: AP_sequence.hxx:65
virtual int cmp_combined(const AP_combinableSeq *other) const =0
void ensure_sequence_loaded() const
Definition: AP_sequence.hxx:70
AP_sequence(const AliView *aliview)
Definition: AP_sequence.cxx:16
void do_lazy_load() const
Definition: AP_sequence.cxx:46
const AP_filter * get_filter() const
Definition: AP_sequence.hxx:79
bool hasSequence() const
Definition: AP_sequence.hxx:75
virtual AP_sequence * dup() const =0
virtual AP_combinableSeq * dup() const =0
virtual void unset()=0
long Mutations
Definition: AP_sequence.hxx:99
bool is_bound_to_species() const
Definition: AP_sequence.hxx:64
virtual void partial_match(const AP_combinableSeq *part, long *overlap, long *penalty) const =0
void mark_sequence_set(bool is_set)
Definition: AP_sequence.hxx:43
void forget_sequence()
Definition: AP_sequence.hxx:76
virtual uint32_t checksum() const =0
const AP_weights * get_weights() const
Definition: AP_sequence.hxx:80
Mutations noncounting_combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights)
void unbind_from_species()
Definition: AP_sequence.cxx:40
const AP_filter * get_filter() const
Definition: AliView.hxx:42
bool combinedEquals(const AP_combinableSeq *other) const
const AliView * get_aliview() const
Definition: AP_sequence.hxx:82
size_t get_sequence_length() const
Definition: AP_sequence.hxx:78
AP_FLOAT weighted_base_count() const
#define NULp
Definition: cxxforward.h:116
void lazy_load_sequence() const
Definition: AP_sequence.hxx:67
virtual Mutations combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights, char *mutation_per_site=NULp)=0
GB_ERROR bind_to_species(GBDATA *gb_species)
Definition: AP_sequence.cxx:24
size_t get_length() const
Definition: AliView.cxx:66
virtual void set(const char *sequence)=0
virtual Mutations mutations_if_combined_with(const AP_combinableSeq *other)=0
static void inc_combine_count()
#define ap_assert(cond)
Definition: AP_sequence.hxx:27
long AP_timer()
Definition: AP_filter.cxx:299