28 maxDistance(maxDistance_),
29 minClusterSize(minClusterSize_)
36 size_t loadedSequences;
45 static void collectStats(ClusterTree *
node, ClusterStats *stats) {
46 switch (node->get_state()) {
51 if (node->is_leaf()) {
52 stats->loadedSequences += node->hasSequence();
55 collectStats(node->get_leftson(), stats);
56 collectStats(node->get_rightson(), stats);
61 GB_ERROR ClusterTreeRoot::find_clusters() {
62 ClusterTree *root = get_root_node();
66 printf(
"----------------------------------------\n");
67 printf(
"maxDistance: %f\n", maxDistance);
68 printf(
"minClusterSize: %u\n", minClusterSize);
69 printf(
"Leafs in tree: %u\n", root->get_leaf_count());
70 printf(
"Possible clusters: %u\n", root->get_cluster_count());
73 arb_progress cluster_progress(
long(root->get_cluster_count()));
74 cluster_progress.auto_subtitles(
"Cluster");
79 root->detect_clusters(cluster_progress);
81 catch (
const char *msg) { error = msg; }
86 change_root(root,
NULp);
93 collectStats(root, &stats);
95 printf(
"Found clusters:::::::: %zu\n", stats.clusters);
96 printf(
"Found subclusters::::: %zu\n", stats.subclusters);
97 printf(
"Loaded sequences:::::: %zu (%5.2f%%)\n",
98 stats.loadedSequences,
99 (100.0*stats.loadedSequences)/root->get_leaf_count());
101 #if defined(TRACE_DIST_CALC)
102 size_t distances = root->get_calculated_distances();
103 size_t leafs = root->get_leaf_count();
104 size_t existingDistances = (leafs*leafs)/2-1;
106 printf(
"Calculated distances:: %zu (%5.2f%%)\n",
108 (100.0*distances)/existingDistances);
109 #endif // TRACE_DIST_CALC
120 void ClusterTree::init_tree() {
123 #if defined(TRACE_DIST_CALC)
124 calculatedDistances = 0;
125 #endif // TRACE_DIST_CALC
141 ClusterTree *lson = get_leftson();
142 ClusterTree *rson = get_rightson();
147 leaf_count = lson->get_leaf_count() + rson->get_leaf_count();
149 ClusterTreeRoot *troot = get_tree_root();
150 if (leaf_count<troot->get_minClusterSize()) {
156 clus_count = lson->get_cluster_count() + rson->get_cluster_count() + 1;
157 min_bases = numeric_limits<AP_FLOAT>::infinity();
164 void ClusterTree::detect_clusters(
arb_progress& progress) {
168 ClusterTree *lson = get_leftson();
169 ClusterTree *rson = get_rightson();
171 lson->detect_clusters(progress);
172 rson->detect_clusters(progress);
174 #if defined(TRACE_DIST_CALC)
175 calculatedDistances += get_leftson()->calculatedDistances;
176 calculatedDistances += get_rightson()->calculatedDistances;
177 #endif // TRACE_DIST_CALC
193 pairsByBranchDistance.insert(
LeafRelation(bd->first, bd->second));
197 cl_assert(pairsByBranchDistance.size() == possible_relations());
204 AP_FLOAT maxDistance = get_tree_root()->get_maxDistance();
210 SortedPairValues::const_reverse_iterator pair_end = pairsByBranchDistance.rend();
211 for (SortedPairValues::const_reverse_iterator pair = pairsByBranchDistance.rbegin(); pair != pair_end; ++pair) {
212 AP_FLOAT realDist = get_seqDist(pair->get_pair());
214 if (realDist>worstDistanceSeen) {
215 worstDistanceSeen = realDist;
217 delete worstKnownDistance;
218 worstKnownDistance =
new TwoLeafs(pair->get_pair());
220 if (realDist>maxDistance) {
227 #if defined(TRACE_DIST_CALC)
228 calculatedDistances += sequenceDists->size();
229 #endif // TRACE_DIST_CALC
235 size_t knownDistances = sequenceDists->size() +
236 get_leftson()->known_seqDists() +
237 get_rightson()->known_seqDists();
239 cl_assert(knownDistances == possible_relations());
250 lson->oblivion(
false);
251 rson->oblivion(
false);
254 if (progress.
aborted())
throw "aborted on userrequest";
260 void ClusterTree::oblivion(
bool forgetDistances) {
268 if (forgetDistances) {
269 delete sequenceDists;
270 sequenceDists =
NULp;
272 forgetDistances =
true;
279 get_leftson()->oblivion(forgetDistances);
280 get_rightson()->oblivion(forgetDistances);
284 void ClusterTree::calc_branch_depths() {
288 (*branchDepths)[
this] = 0.0;
291 for (
int s = 0;
s<2;
s++) {
292 const NodeValues *sonDepths =
s ? get_leftson()->get_branch_depths() : get_rightson()->get_branch_depths();
293 AP_FLOAT lenToSon =
s ? leftlen : rightlen;
295 NodeValues::const_iterator sd_end = sonDepths->end();
296 for (NodeValues::const_iterator sd = sonDepths->begin(); sd != sd_end; ++sd) {
297 (*branchDepths)[sd->first] = sd->second+lenToSon;
303 void ClusterTree::calc_branch_dists() {
308 for (
int s = 0;
s<2;
s++) {
309 ClusterTree *son =
s ? get_leftson() : get_rightson();
313 if (sonDists) branchDists->insert(sonDists->begin(), sonDists->end());
316 const NodeValues *ldepths = get_leftson()->get_branch_depths();
317 const NodeValues *rdepths = get_rightson()->get_branch_depths();
319 NodeValues::const_iterator l_end = ldepths->end();
320 NodeValues::const_iterator r_end = rdepths->end();
322 for (NodeValues::const_iterator ld = ldepths->begin(); ld != l_end; ++ld) {
324 for (NodeValues::const_iterator rd = rdepths->begin(); rd != r_end; ++rd) {
325 AP_FLOAT rlen = rd->second+rightlen;
327 (*branchDists)[
TwoLeafs(ld->first, rd->first)] = llen+rlen;
331 cl_assert(branchDists->size() == possible_relations());
338 const AP_FLOAT *found = has_seqDist(pair);
340 const ClusterTree *commonFather = pair.
first()->commonFatherWith(pair.
second());
342 while (commonFather !=
this) {
343 found = commonFather->has_seqDist(pair);
346 commonFather = commonFather->get_father();
366 AP_FLOAT minBaseCount = wbc1 < wbc2 ? wbc1 : wbc2;
369 if (minBaseCount>0) {
370 dist = mutations / minBaseCount;
371 if (minBaseCount < min_bases) min_bases = minBaseCount;
375 min_bases = minBaseCount;
378 #if defined(DEBUG) && 0
379 const char *name1 = pair.
first()->name;
380 const char *name2 = pair.
second()->name;
381 printf(
"%s <-?-> %s: mutations=%5.2f wbc1=%5.2f wbc2=%5.2f mbc=%5.2f dist=%5.2f\n",
382 name1, name2, mutations, wbc1, wbc2, minBaseCount, dist);
387 (*sequenceDists)[pair] = dist;
391 found = has_seqDist(pair);
402 if (found != sequenceDists->end()) {
403 return &(found->second);
409 const ClusterTree *ClusterTree::commonFatherWith(
const ClusterTree *other)
const {
410 int depth_diff = get_depth() - other->get_depth();
413 return other->get_father()->commonFatherWith(
this);
424 return get_father()->commonFatherWith(other->get_father());
const ClusterTree * second() const
const ClusterTree * first() const
LeafRelations::const_iterator LeafRelationCIter
POS_TREE1 * get_father() const
virtual AP_combinableSeq * dup() const =0
static void error(const char *msg)
std::map< TwoLeafs, AP_FLOAT > LeafRelations
AP_FLOAT weighted_base_count() const
std::set< LeafRelation > SortedPairValues
void lazy_load_sequence() const
virtual Mutations combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights, char *mutation_per_site=NULp)=0
void destroy(TreeNode *that)
std::map< ClusterTree *, AP_FLOAT > NodeValues
GB_write_int const char s