ARB
di_foundclusters.hxx
Go to the documentation of this file.
1 // ================================================================ //
2 // //
3 // File : di_foundclusters.hxx //
4 // Purpose : Store results of cluster detection //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in November 2009 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ================================================================ //
11 
12 #ifndef DI_FOUNDCLUSTERS_HXX
13 #define DI_FOUNDCLUSTERS_HXX
14 
15 #ifndef GUI_ALIVIEW_HXX
16 #include <gui_aliview.hxx>
17 #endif
18 #ifndef SMARTPTR_H
19 #include <smartptr.h>
20 #endif
21 
22 #ifndef _GLIBCXX_MAP
23 #include <map>
24 #endif
25 #ifndef _GLIBCXX_VECTOR
26 #include <vector>
27 #endif
28 #ifndef _GLIBCXX_ALGORITHM
29 #include <algorithm>
30 #endif
31 #ifndef _GLIBCXX_STRING
32 #include <string>
33 #endif
34 #ifndef DBITEM_SET_H
35 #include <dbitem_set.h>
36 #endif
37 
38 #define cl_assert(cond) arb_assert(cond)
39 
40 class ClusterTree;
41 class ARB_tree_predicate;
42 struct ARB_countedTree;
44 
45 // ---------------------
46 // Cluster
47 
48 typedef int32_t ID;
49 
51  UNSORTED = 0,
58 
59  SORT_REVERSE = 1<<3, // bitflag!
60 };
61 
62 class DisplayFormat;
63 
64 enum ClusterMarkMode { // what to mark (REP=representative)
66  CMM_ALL = 1,
68 };
69 
70 class Cluster : virtual Noncopyable {
71  double min_dist; // min. distance between species inside Cluster
72  double max_dist; // dito, but max.
73  double mean_dist; // dito, but mean
74 
75  double min_bases; // min bases used for sequence distance
76  double rel_tree_pos; // relative position in tree [0.0 .. 1.0]
77 
78  GBDATA *representative; // cluster member with lowest mean distance
79  DBItemSet members; // all members (including representative)
80 
81  std::string desc; // cluster description
82  std::string *next_desc; // proposed new description (call accept_proposed() to accept it)
83 
84  ID id; // unique id for this cluster (used in AWAR_CLUSTER_SELECTED)
85 
86  static ID unused_id;
87 
88  std::string create_description(const ARB_countedTree *ct);
89  void propose_description(const std::string& newDesc) {
90  delete next_desc;
91  next_desc = new std::string(newDesc);
92  }
93 
94  bool lessByOrder_forward(const Cluster& other, ClusterOrder sortBy) const {
95  bool less = false;
96  switch (sortBy) {
97  case UNSORTED: break;
98  case SORT_BY_MEANDIST: less = mean_dist < other.mean_dist; break;
99  case SORT_BY_MIN_BASES: less = min_bases < other.min_bases; break;
100  case SORT_BY_CLUSTERSIZE: less = members.size() < other.members.size(); break;
101  case SORT_BY_TREEPOSITION: less = rel_tree_pos < other.rel_tree_pos; break;
102  case SORT_BY_MIN_DIST: less = min_dist < other.min_dist; break;
103  case SORT_BY_MAX_DIST: less = max_dist < other.max_dist; break;
104 
105  case SORT_REVERSE:
106  cl_assert(0);
107  break;
108  }
109  return less;
110  }
111 
112 public:
113  Cluster(ClusterTree *ct);
114  ~Cluster() { delete next_desc; }
115 
116  ID get_ID() const { return id; }
117 
118  size_t get_member_count() const { return members.size(); }
120  const char *get_list_display(const DisplayFormat *format) const; // only valid after calling scan_display_widths
121 
122  const DBItemSet& get_members() const { return members; }
123 
124  void mark_all_members(ClusterMarkMode mmode) const;
125  GBDATA *get_representative() const { return representative; }
126 
127  std::string get_upgroup_info(const ARB_countedTree *ct, const ARB_tree_predicate& keep_group_name, const std::string& separator);
128  double get_mean_distance() const { return mean_dist; }
129 
131  propose_description(create_description(ct));
132  }
133  void accept_proposed(bool accept) {
134  if (accept && next_desc) {
135  desc = *next_desc;
136  }
137 
138  delete next_desc;
139  next_desc = NULp;
140  }
141 
142  bool lessByOrder(const Cluster& other, ClusterOrder sortBy) const {
143  bool less;
144  if (sortBy&SORT_REVERSE) {
145  less = other.lessByOrder_forward(*this, ClusterOrder(sortBy^SORT_REVERSE));
146  }
147  else {
148  less = lessByOrder_forward(other, sortBy);
149  }
150  return less;
151  }
152 
153 };
154 
156 typedef std::map<ID, ClusterPtr> KnownClusters;
157 typedef KnownClusters::const_iterator KnownClustersIter;
158 typedef std::vector<ID> ClusterIDs;
159 typedef ClusterIDs::const_iterator ClusterIDsIter;
160 
161 // --------------------
162 // global data
163 
167 };
168 
169 class ClustersData : virtual Noncopyable {
170  KnownClusters known_clusters; // contains all known clusters
171  ClusterIDs shown; // clusters shown in selection list
172  ClusterIDs stored; // stored clusters
173  ClusterOrder criteria[2]; // order of 'shown'
174  bool sort_needed; // need to sort 'shown'
175 
176 public:
177  WeightedFilter &weighted_filter; // @@@ make private
179 
180 private:
181  ClusterIDs& get_subset(ClusterSubset subset) {
182  if (subset == SHOWN_CLUSTERS) {
183  // @@@ sort here if needed
184  return shown;
185  }
186  return stored;
187  }
188  int get_pos(ID id, ClusterSubset subset) {
189  // returns -1 of not member of subset
190  ClusterIDs& ids = get_subset(subset);
191  ClusterIDsIter begin = ids.begin();
192  ClusterIDsIter end = ids.end();
193  ClusterIDsIter found = find(begin, end, id);
194 
195  return found == end ? -1 : distance(begin, found);
196  }
197 
198 public:
199  ID idAtPos(int pos, ClusterSubset subset) {
200  ClusterIDs& ids = get_subset(subset);
201  return size_t(pos)<ids.size() ? ids.at(pos) : 0;
202  }
203 
204 
205  ClustersData(WeightedFilter& weighted_filter_)
206  : sort_needed(true),
207  weighted_filter(weighted_filter_),
208  clusterList(NULp)
209  {
210  criteria[0] = SORT_BY_MEANDIST;
211  criteria[1] = UNSORTED;
212  }
213 
214  void changeSortOrder(ClusterOrder newOrder) {
215  if (newOrder == SORT_REVERSE) { // toggle reverse
216  criteria[0] = ClusterOrder(criteria[0]^SORT_REVERSE);
217  }
218  else if (newOrder != criteria[0]) {
219  criteria[1] = criteria[0];
220  criteria[0] = newOrder;
221  }
222  sort_needed = true;
223  }
224 
226  KnownClustersIter found = known_clusters.find(id);
227  return found == known_clusters.end() ? ClusterPtr() : found->second;
228  }
229 
230  size_t count(ClusterSubset subset) { return get_subset(subset).size(); }
231  const ClusterIDs& get_clusterIDs(ClusterSubset subset) { return get_subset(subset); }
232 
233  int get_pos(ClusterPtr cluster, ClusterSubset subset) { return get_pos(cluster->get_ID(), subset); }
234 
235  void add(ClusterPtr clus, ClusterSubset subset);
236  void remove(ClusterPtr clus, ClusterSubset subset);
237  void clear(ClusterSubset subset);
238 
239  void store(ID id);
240 
241  void store_all();
242  void restore_all();
243  void swap_all();
244 
246 
247  GBDATA *get_gb_main() const { return weighted_filter.get_gb_main(); }
248  AW_root *get_aw_root() const { return weighted_filter.get_aw_root(); }
249 
250  void free();
251 };
252 
253 char *originalGroupName(const char *groupname);
254 
255 #else
256 #error di_foundclusters.hxx included twice
257 #endif // DI_FOUNDCLUSTERS_HXX
GBDATA * get_representative() const
GBDATA * get_gb_main() const
const DBItemSet & get_members() const
size_t count(ClusterSubset subset)
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
Definition: insdel.cxx:615
const char * get_list_display(const DisplayFormat *format) const
double get_mean_distance() const
return string(buffer, length)
KnownClusters::const_iterator KnownClustersIter
void changeSortOrder(ClusterOrder newOrder)
void update_cluster_selection_list()
ID idAtPos(int pos, ClusterSubset subset)
AW_root * get_aw_root() const
Definition: GUI_aliview.cxx:91
void update_description(const ARB_countedTree *ct)
Cluster(ClusterTree *ct)
SmartPtr< Cluster > ClusterPtr
void add(ClusterPtr clus, ClusterSubset subset)
std::string get_upgroup_info(const ARB_countedTree *ct, const ARB_tree_predicate &keep_group_name, const std::string &separator)
void scan_display_widths(DisplayFormat &format) const
std::set< GBDATA * > DBItemSet
Definition: dbitem_set.h:22
AW_root * get_aw_root() const
#define true
Definition: ureadseq.h:14
const ClusterIDs & get_clusterIDs(ClusterSubset subset)
std::vector< ID > ClusterIDs
int get_pos(ClusterPtr cluster, ClusterSubset subset)
bool less(const copy< T > &t1, const copy< T > &t2)
Definition: test_unit.h:645
int32_t ID
void store(ID id)
ClusterMarkMode
GBDATA * get_gb_main() const
Definition: GUI_aliview.cxx:87
ID get_ID() const
#define cl_assert(cond)
void clear(ClusterSubset subset)
std::map< ID, ClusterPtr > KnownClusters
void accept_proposed(bool accept)
char * originalGroupName(const char *groupname)
ClusterSubset
ClusterIDs::const_iterator ClusterIDsIter
ClusterPtr clusterWithID(ID id) const
ClustersData(WeightedFilter &weighted_filter_)
#define NULp
Definition: cxxforward.h:116
size_t get_member_count() const
bool lessByOrder(const Cluster &other, ClusterOrder sortBy) const
WeightedFilter & weighted_filter
AW_selection_list * clusterList
ClusterOrder
void mark_all_members(ClusterMarkMode mmode) const