ARB
di_clustertree.hxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : di_clustertree.hxx //
4 // Purpose : Tree structure used for cluster detection //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in October 2009 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // =============================================================== //
11 
12 #ifndef DI_CLUSTERTREE_HXX
13 #define DI_CLUSTERTREE_HXX
14 
15 #ifndef ARB_TREE_HXX
16 #include <ARB_Tree.hxx>
17 #endif
18 #ifndef AP_SEQUENCE_HXX
19 #include <AP_sequence.hxx>
20 #endif
21 #ifndef _GLIBCXX_MAP
22 #include <map>
23 #endif
24 
25 
26 #define cl_assert(cond) arb_assert(cond)
27 
28 class ClusterTree;
29 class arb_progress;
30 
32  CS_UNKNOWN = 0, // initial state
33  CS_TOO_SMALL = 1, // cluster is too small
34  CS_MAYBE_CLUSTER = 2, // need to test whether this is a cluster
35  CS_NO_CLUSTER = 4, // not a cluster (only worstKnownDistance is known)
36  CS_IS_CLUSTER = 8, // subtree is cluster (all sequence distances are known)
37  CS_SUB_CLUSTER = 16, // like CS_IS_CLUSTER, but father is cluster as well
38 };
39 
40 // ------------------------
41 // ClusterTreeRoot
42 
43 class ClusterTreeRoot FINAL_TYPE : public ARB_seqtree_root {
44  AP_FLOAT maxDistance; // max. allowed distance inside cluster
45  unsigned minClusterSize; // min. size of cluster (number of leafs)
46 
47 public:
48  ClusterTreeRoot(AliView *aliview, AP_sequence *seqTemplate_, AP_FLOAT maxDistance_, size_t minClusterSize_);
50 
51  inline TreeNode *makeNode() const OVERRIDE;
52  inline void destroyNode(TreeNode *node) const OVERRIDE;
53 
54  DEFINE_DOWNCAST_ACCESSORS(ClusterTree, get_root_node, ARB_seqtree_root::get_root_node());
55 
56  GB_ERROR find_clusters();
57  unsigned get_minClusterSize() const { return minClusterSize; }
58  AP_FLOAT get_maxDistance() const { return maxDistance; }
59 };
60 
61 // ------------------
62 // LeafPairs
63 
64 class TwoLeafs {
65  ClusterTree *ct1, *ct2; // ct1<ct2!
66 
67 public:
68  TwoLeafs(ClusterTree *c1, ClusterTree *c2) :
69  ct1(c1<c2 ? c1 : c2),
70  ct2(c1<c2 ? c2 : c1)
71  {}
72 
73  const ClusterTree *first() const { return ct1; }
74  const ClusterTree *second() const { return ct2; }
75 
76  bool operator<(const TwoLeafs& other) const {
77  return ct1 == other.ct1 ? ct2<other.ct2 : ct1<other.ct1;
78  }
79 };
80 
81 class LeafRelation {
82  const TwoLeafs *pair;
83  const AP_FLOAT value;
84 public:
85  LeafRelation(const TwoLeafs& pair_, AP_FLOAT value_) :
86  pair(&pair_),
87  value(value_)
88  {}
89 
90  bool operator<(const LeafRelation& other) const {
91  if (value == other.value) return *pair < *other.pair;
92  return value < other.value;
93  }
94 
95  const TwoLeafs& get_pair() const { return *pair; }
96 };
97 
98 typedef std::map<ClusterTree*, AP_FLOAT> NodeValues;
99 typedef std::map<TwoLeafs, AP_FLOAT> LeafRelations;
100 typedef LeafRelations::const_iterator LeafRelationCIter;
101 
102 // --------------------
103 // ClusterTree
104 
105 
106 #if defined(DEBUG)
107 #define TRACE_DIST_CALC
108 #endif // DEBUG
109 
110 class ClusterTree FINAL_TYPE : public ARB_countedTree { // derived from a Noncopyable
111  ClusterState state;
112 
113  unsigned leaf_count; // number of leafs in subtree
114  unsigned clus_count; // number of clusters at and in subtree
115  unsigned depth; // depth of node ( 1 == root )
116  AP_FLOAT min_bases; // min. bases used for comparing two members
117 
118  NodeValues *branchDepths; // leaf-depths (distance from this each leaf)
119  LeafRelations *branchDists; // distance (branch) between two leafs
120  LeafRelations *sequenceDists; // real distance between sequences of two leafs
121 
122  TwoLeafs *worstKnownDistance;
123 
124  void calc_branch_depths();
125  void calc_branch_dists();
126 
127 #if defined(TRACE_DIST_CALC)
128  unsigned calculatedDistances;
129 #endif // TRACE_DIST_CALC
130 
131  unsigned get_depth() const { return depth; }
132  bool knows_seqDists() const { return state & (CS_IS_CLUSTER|CS_SUB_CLUSTER); }
133  unsigned possible_relations() const { return (leaf_count*(leaf_count-1)) / 2; }
134  unsigned known_seqDists() const { return knows_seqDists() ? possible_relations() : 0; }
135 
136  const NodeValues *get_branch_depths() {
137  if (!branchDepths) calc_branch_depths();
138  return branchDepths;
139  }
140 
141  const LeafRelations *get_branch_dists() {
142  if (!branchDists) calc_branch_dists();
143  return branchDists;
144  }
145 
146  AP_FLOAT get_seqDist(const TwoLeafs& pair);
147  const AP_FLOAT *has_seqDist(const TwoLeafs& pair) const;
148  const ClusterTree *commonFatherWith(const ClusterTree *other) const;
149 
150  void oblivion(bool forgetDistances); // forget unneeded data
151 
152 protected:
154  delete worstKnownDistance;
155  delete sequenceDists;
156  delete branchDists;
157  delete branchDepths;
158  }
159  friend class ClusterTreeRoot;
160 public:
161  explicit ClusterTree(ClusterTreeRoot *tree_root_) :
162  ARB_countedTree(tree_root_),
163  state(CS_UNKNOWN),
164  leaf_count(0),
165  clus_count(0),
166  depth(0),
167  min_bases(-1.0),
168  branchDepths(NULp),
169  branchDists(NULp),
170  sequenceDists(NULp),
171  worstKnownDistance(NULp)
172  {}
173 
174  DEFINE_TREE_ACCESSORS(ClusterTreeRoot, ClusterTree);
176 
177  unsigned get_cluster_count() const { return clus_count; }
178  unsigned get_leaf_count() const OVERRIDE { return leaf_count; }
179 
180 #if defined(TRACE_DIST_CALC)
181  unsigned get_calculated_distances() const { return calculatedDistances; }
182 #endif // TRACE_DIST_CALC
183 
184  ClusterState get_state() const { return state; }
185 
186  void init_tree() OVERRIDE;
187  void detect_clusters(arb_progress& progress);
188 
189  const LeafRelations *get_sequence_dists() const { return sequenceDists; }
190 
191  AP_FLOAT get_min_bases() const { return min_bases; }
192 };
193 
194 inline TreeNode *ClusterTreeRoot::makeNode() const { return new ClusterTree(const_cast<ClusterTreeRoot*>(this)); }
195 inline void ClusterTreeRoot::destroyNode(TreeNode *node) const { delete DOWNCAST(ClusterTree*, node); }
196 
198  bool selects(const ARB_seqtree&) const OVERRIDE { return true; }
199 };
200 
201 #else
202 #error di_clustertree.hxx included twice
203 #endif // DI_CLUSTERTREE_HXX
TwoLeafs(ClusterTree *c1, ClusterTree *c2)
ClusterState
ClusterTree(ClusterTreeRoot *tree_root_)
const char * GB_ERROR
Definition: arb_core.h:25
const ClusterTree * second() const
~ClusterTreeRoot() OVERRIDE
#define DEFINE_DOWNCAST_ACCESSORS(CLASS, NAME, VALUE)
Definition: downcast.h:147
AP_FLOAT get_maxDistance() const
const ClusterTree * first() const
ClusterState get_state() const
~ClusterTree() OVERRIDE
LeafRelations::const_iterator LeafRelationCIter
#define OVERRIDE_SEQ_ACCESSORS(SEQTYPE, BASETREETYPE)
Definition: ARB_Tree.hxx:181
#define DOWNCAST(totype, expr)
Definition: downcast.h:141
double AP_FLOAT
Definition: AP_matrix.hxx:30
unsigned get_cluster_count() const
virtual TreeNode * makeNode() const =0
DEFINE_TREE_ACCESSORS(ARB_seqtree_root, ARB_countedTree)
AP_FLOAT get_min_bases() const
const TwoLeafs & get_pair() const
std::map< TwoLeafs, AP_FLOAT > LeafRelations
bool operator<(const LeafRelation &other) const
LeafRelation(const TwoLeafs &pair_, AP_FLOAT value_)
void predelete()
Definition: TreeNode.h:59
virtual void init_tree()=0
xml element
#define OVERRIDE
Definition: cxxforward.h:112
#define NULp
Definition: cxxforward.h:116
std::map< ClusterTree *, AP_FLOAT > NodeValues
bool operator<(const TwoLeafs &other) const
unsigned get_leaf_count() const OVERRIDE