ARB
SyncRoot.cxx
Go to the documentation of this file.
1 // ========================================================= //
2 // //
3 // File : SyncRoot.cxx //
4 // Purpose : Sync roots of trees //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in May 20 //
7 // http://www.arb-home.de/ //
8 // //
9 // ========================================================= //
10 
11 #include "CT_part.hxx"
12 #include "SyncRoot.hxx"
13 
14 #include <TreeRead.h>
15 
16 using namespace std;
17 
19  arb_assert(!deconstructionPhase());
20 
21  get_species_names(species_names);
22  speciesSpacePtr = new SpeciesSpace(species_names);
23  treePartsPtr = new TreeParts(*speciesSpacePtr, *this);
24 
25  arb_assert(deconstructionPhase());
26 }
27 
28 GB_ERROR RootSynchronizer::deconstructTree(int treeIdx, bool provideProgress) {
29  if (!deconstructionPhase()) beginDeconstructionPhase();
30 
32  if (!valid_tree_index(treeIdx)) {
33  error = GBS_global_string("invalid tree index %i (valid 0-%i)", treeIdx, int(get_tree_count())-1);
34  }
35  else {
36  if (dtree.size() <= size_t(treeIdx)) dtree.resize(get_tree_count());
37  arb_assert(dtree.size()>size_t(treeIdx));
38 
39  if (dtree[treeIdx].isNull()) {
40  const SizeAwareTree *tree = get_tree(treeIdx);
41  if (!tree) {
42  error = GBS_global_string("tree at index #%i vanished (internal error)", treeIdx);
43  }
44  else {
45  dtree[treeIdx] = new DeconstructedTree(*speciesSpacePtr);
46  error = dtree[treeIdx]->deconstruct_weighted(tree, treePartsPtr->get_tree_PART(treeIdx), get_tree_info(treeIdx).species_count(), 1.0, provideProgress, speciesSpacePtr->get_allSpecies(), DMODE_ROOTSYNC);
47  if (!error) dtree[treeIdx]->start_sorted_retrieval();
48  }
49  }
50  }
51  return error;
52 }
53 
54 inline void showDeconstructingSubtitle(arb_progress& progress, int treeNr) {
55  progress.subtitle(GBS_global_string("Deconstructing tree #%i", treeNr+1));
56 }
57 
58 ErrorOrSizeAwareTreePtr RootSynchronizer::find_best_root_candidate(int inTree, int accordingToTree, int& best_dist, bool provideProgress) {
59  GB_ERROR error = NULp;
60  const SizeAwareTree *result = NULp;
61 
62  best_dist = INT_MAX;
63 
64  const bool deconInTree = !has_deconstructed_tree(inTree);
65  const bool deconAcTree = !has_deconstructed_tree(accordingToTree);
66 
67  SmartPtr<arb_progress> progress;
68  if (provideProgress) progress = new arb_progress(2UL); // 50% deconstruct + 50% search
69 
70  // deconstruct involved trees:
71  {
72  SmartPtr<arb_progress> decon_progress;
73  if (provideProgress) {
74  const size_t steps = deconInTree + deconAcTree;
75  if (steps) decon_progress = new arb_progress(steps);
76  }
77  if (!error) {
78  const bool update = deconAcTree && decon_progress.isSet();
79  if (update) showDeconstructingSubtitle(*progress, accordingToTree);
80  error = deconstructTree(accordingToTree, provideProgress);
81  if (update) decon_progress->inc_and_check_user_abort(error);
82  }
83  if (!error) {
84  const bool update = deconInTree && decon_progress.isSet();
85  if (update) showDeconstructingSubtitle(*decon_progress, inTree);
86  error = deconstructTree(inTree, provideProgress);
87  if (update) decon_progress->inc_and_check_user_abort(error);
88  }
89 
90  if (provideProgress) progress->inc_and_check_user_abort(error);
91  }
92 
93  if (!error) {
94  if (provideProgress) progress->subtitle("Searching best matching root");
95 
96  const SizeAwareTree *accordingRoot = get_tree(accordingToTree);
97  const PART *accordingRootPART = dtree[accordingToTree]->find_part(accordingRoot->get_leftson());
98  arb_assert(accordingRootPART);
99 
100  int best_idx;
101  find_best_matching_PART_in(best_dist, best_idx, accordingRootPART, *dtree[inTree], get_tree_PART(accordingToTree), get_tree_PART(inTree), provideProgress);
102 
103  arb_assert(best_idx != -1); // always expect some "best" match
104 
105  result = DOWNCAST(const SizeAwareTree*, PART_FWD::get_origin(dtree[inTree]->peek_part(best_idx)));
106  arb_assert(result);
107 
108  if (provideProgress) progress->inc_and_check_user_abort(error);
109  }
110 
111  if (error && provideProgress) progress->done();
112 
113  return ErrorOrSizeAwareTreePtr(error, result);
114 }
115 
116 void RootSynchronizer::find_best_matching_PART_in(int& best_dist, int &best_idx, const PART *part, const DeconstructedTree& in, const PART *tree_part, const PART *tree_in, bool provideProgress) {
117  // reset result params:
118  best_dist = INT_MAX;
119  best_idx = -1;
120 
121  SmartPtr<arb_progress> findBestProgress;
122  if (provideProgress) {
123  findBestProgress = new arb_progress(in.get_part_count());
124  }
125 
126  for (size_t idx = 0; idx<in.get_part_count(); ++idx) {
127  const PART *pin = in.peek_part(idx);
128  arb_assert(pin);
129 
130  if (represents_existing_edge(pin)) {
131  int dist = PART_FWD::calcDistance(part, pin, tree_part, tree_in);
132  if (dist<best_dist) {
133  best_idx = idx;
134  best_dist = dist;
135  }
136  else if (best_dist == 0) {
137  arb_assert(dist>best_dist); // multiple perfect matches should not occur
138  }
139  }
140  if (provideProgress) {
141  ++*findBestProgress;
142  if (findBestProgress->aborted()) break;
143  }
144  }
145 }
146 
147 void RootSynchronizer::find_worst_matching_PART_in(int& worst_dist, int &worst_idx, const PART *part, const DeconstructedTree& in, const PART *tree_part, const PART *tree_in) {
148  // reset result params:
149  worst_dist = INT_MIN;
150  worst_idx = -1;
151 
152  for (size_t idx = 0; idx<in.get_part_count(); ++idx) {
153  const PART *pin = in.peek_part(idx);
154  arb_assert(pin);
155 
156  if (!represents_existing_edge(pin)) continue;
157 
158  int dist = PART_FWD::calcDistance(part, pin, tree_part, tree_in);
159  if (dist>worst_dist) {
160  worst_idx = idx;
161  worst_dist = dist;
162  }
163  }
164 }
165 
166 // #define DUMP_AGAIN
167 
168 MultirootPtr RootSynchronizer::find_better_multiroot(const Multiroot& start, int best_distSum, int best_centerDist, int *movesPerTree, arb_progress *progress) {
169  // best_distSum should be start.distanceSum() or better
170 
171  Multiroot modified(start);
172  MultirootPtr best;
173  const int nodes = start.size();
174 
175  int leftMoves = 0;
176  for (int t = 0; t<nodes; ++t) {
177  leftMoves += movesPerTree[t];
178  }
179 
180  for (int t = 0; t<nodes && best.isNull(); ++t) {
181  if (movesPerTree[t]>0) {
182  --movesPerTree[t];
183 
185  ConstSizeAwareTreeVector neighbors;
186 
187  // store (up to) 4 neighbors nodes (representing adjacent edges):
188  {
189  if (!node->is_leaf()) { // try branches to both sons
190  neighbors.push_back(node->get_leftson());
191  neighbors.push_back(node->get_rightson());
192  }
193 
194  ConstSizeAwareTreePtr brother = node->get_brother();
195  arb_assert(brother);
196 
197  if (node->is_son_of_root()) {
198  if (!brother->is_leaf()) { // try branches to both sons of brother
199  neighbors.push_back(brother->get_leftson());
200  neighbors.push_back(brother->get_rightson());
201  }
202  }
203  else { // try branches from father to brother and grandpa (or uncle at root)
204  neighbors.push_back(brother);
205  neighbors.push_back(node->get_father());
206  }
207 
208  arb_assert(neighbors.size()>0);
209  arb_assert(neighbors.size()<=4);
210  }
211 
212  // iterate all neighbors:
213  for (ConstSizeAwareTreeVector::const_iterator n = neighbors.begin(); n != neighbors.end() && best.isNull(); ++n) {
214  ConstSizeAwareTreePtr next_node = *n;
215  modified.replace_node(t, next_node);
216 
217  // calc current distance and keep best found Multiroot:
218  int mod_distSum = modified.distanceSum(*this);
219  if (mod_distSum<=best_distSum) {
220  bool takeModified = mod_distSum<best_distSum;
221  if (!takeModified) {
222  arb_assert(mod_distSum == best_distSum);
223  const int mod_centerDist = modified.distanceToCenterSum(*this);
224  if (mod_centerDist<best_centerDist) {
225 #if defined(DUMP_AGAIN)
226  fprintf(stderr, "- again found mod_distSum=%i (center dist: %i -> %i)\n", mod_distSum, best_centerDist, mod_centerDist);
227 #endif
228  best_centerDist = mod_centerDist;
229  takeModified = true;
230  }
231  }
232  if (takeModified) {
233  best_distSum = mod_distSum;
234  best = new Multiroot(modified);
235  }
236  }
237 
238  if (progress && progress->aborted()) {
239  break;
240  }
241 
242  if (leftMoves>1 && best.isNull()) {
243  MultirootPtr recursed = find_better_multiroot(modified, best_distSum, best_centerDist, movesPerTree, progress);
244  if (recursed.isSet()) {
245  int recursed_distSum = recursed->distanceSum(*this);
246  if (recursed_distSum<=best_distSum) {
247  bool takeRecursed = recursed_distSum<best_distSum;
248  if (!takeRecursed) {
249  arb_assert(recursed_distSum == best_distSum);
250  const int rec_centerDist = recursed->distanceToCenterSum(*this);
251  if (rec_centerDist<best_centerDist) {
252 #if defined(DUMP_AGAIN)
253  fprintf(stderr, "- again found recursed_distSum=%i (center dist: %i -> %i)\n", recursed_distSum, best_centerDist, rec_centerDist);
254 #endif
255  best_centerDist = rec_centerDist;
256  takeRecursed = true;
257  }
258  }
259  if (takeRecursed) {
260  best_distSum = recursed_distSum;
261  best = recursed;
262  }
263  }
264  }
265  }
266  }
267 
268  ++movesPerTree[t];
269  }
270  }
271  return best;
272 }
273 
275  SmartPtr<arb_progress> progress;
276  GB_ERROR error = NULp;
277 
278  if (provideProgress) {
279  progress = new arb_progress("Deconstructing trees", get_tree_count());
280  }
281 
282  const int treeCount = get_tree_count();
283 
284  for (int t = 0; t<treeCount && !error; ++t) {
285  if (provideProgress) showDeconstructingSubtitle(*progress, t);
286  error = deconstructTree(t, provideProgress);
287  if (provideProgress) progress->inc_and_check_user_abort(error);
288  }
289 
290  if (error && provideProgress) progress->done();
291 
292  return error;
293 }
294 
295 #define DUMP_DEPTH
296 
298  GB_ERROR error = deconstruct_all_trees(false);
299  arb_assert(deconstructionPhase());
300 
301  if (error) {
303  return ErrorOrMultirootPtr(error, none);
304  }
305 
306  int depth = 0;
307 
308  ErrorOrMultirootPtr emr = get_current_roots();
309  if (!emr.hasError()) {
310  const int CANDIDATES = 2;
311  MultirootPtr mr[CANDIDATES];
312  int mr_dist[CANDIDATES];
313  int mr_centerDist[CANDIDATES];
314 
315  mr[0] = emr.getValue();
316  mr[1] = get_innermost_edges(); // add second, speculative multiroot (at centermost branches)!
317 
318  int best_c = -1;
319  {
320  int best_dist = INT_MAX;
321  int best_centerDist = INT_MAX;
322 
323  for (int c = 0; c<CANDIDATES; ++c) {
324  arb_assert(mr[c].isSet());
325  mr_dist[c] = mr[c]->distanceSum(*this);
326  mr_centerDist[c] = mr[c]->distanceToCenterSum(*this);
327 
328  if (mr_dist[c]<best_dist || (mr_dist[c] == best_dist && mr_centerDist[c]<best_centerDist)) {
329  best_c = c;
330  best_dist = mr_dist[c];
331  best_centerDist = mr_centerDist[c];
332  }
333  }
334  }
335  arb_assert(best_c != -1);
336 
337  bool done = false;
338  while (!done) {
339  if (progress) {
340  progress->subtitle(GBS_global_string("distance=%i / center distance=%i", mr_dist[best_c], mr_centerDist[best_c]));
341  if (progress->aborted()) {
342 #if defined(DUMP_DEPTH)
343  fprintf(stderr, "Aborting recursion (user abort)\n");
344 #endif
345  break;
346  }
347  }
348 
349  int cand_checked = 0;
350  for (int pass = 1; pass<=2 && !done; ++pass) { // pass1 = optimize best_c; pass2=optimize rest
351  for (int c = 0; c<CANDIDATES && !done; ++c) {
352  bool search = pass == 1 ? (c == best_c) : (c != best_c);
353  if (search) {
354  const int nodes = mr[c]->size();
355  int movesPerTree[nodes];
356  for (int n = 0; n<nodes; ++n) {
357  movesPerTree[n] = depth+1;
358  }
359  MultirootPtr better_mr = find_better_multiroot(*(mr[c]), mr_dist[c], mr_centerDist[c], movesPerTree, progress);
360  ++cand_checked;
361  if (better_mr.isNull()) {
362 #if defined(DUMP_DEPTH)
363  fprintf(stderr, "Found no better multiroot[%i] at depth=%i (dist=%i; center-dist=%i)\n", c, depth, mr_dist[c], mr_centerDist[c]);
364 #endif
365  if (cand_checked == CANDIDATES) { // do not increase depth if not all candidates checked yet
366  if (depth == MAX_DEPTH) {
367  done = true; // no improvement -> done
368  }
369  else {
370  ++depth; // search deeper
371 #if defined(DUMP_DEPTH)
372  fprintf(stderr, "Increasing depth to %i\n", depth);
373 #endif
374  }
375  }
376  }
377  else {
378  mr[c] = better_mr;
379  mr_dist[c] = better_mr->distanceSum(*this);
380  mr_centerDist[c] = better_mr->distanceToCenterSum(*this);
381 
382 #if defined(DUMP_DEPTH)
383  fprintf(stderr, "Found better multiroot[%i] at depth=%i (dist=%i; center-dist=%i)\n", c, depth, mr_dist[c], mr_centerDist[c]);
384 #endif
385  if (c != best_c) {
386  if (mr_dist[c]<mr_dist[best_c] || (mr_dist[c] == mr_dist[best_c] && mr_centerDist[c]<mr_centerDist[best_c])) {
387  best_c = c;
388  }
389  }
390 
391  // decrement depth again after better root-combi was found:
392  if (depth>0) --depth;
393 #if defined(DUMP_DEPTH)
394  fprintf(stderr, "[continuing with depth=%i]\n", depth);
395 #endif
396  }
397  }
398  }
399  }
400  }
401 
402  return ErrorOrMultirootPtr(NULp, mr[best_c]);
403  }
404  return emr;
405 }
406 
409  GB_ERROR error = NULp;
410  if (get_tree_count()<2) {
411  error = "Need at least two trees";
412  }
413  else {
414  result = new Multiroot(*this);
415  }
416  return ErrorOrMultirootPtr(error, result);
417 }
418 
420  arb_assert(allTreesDeconstructed());
421 
422  MultirootPtr mr = new Multiroot(*this);
423 
424  // set nodes to innermost edges:
425  for (size_t i = 0; i<get_tree_count(); ++i) {
426  const PART *innerPart = dtree[i]->find_innermost_part();
427  arb_assert(innerPart);
428 
429  const SizeAwareTree *innerNode = DOWNCAST(const SizeAwareTree*, PART_FWD::get_origin(innerPart));
430  mr->replace_node(i, innerNode);
431  }
432 
433  return mr;
434 }
435 
436 int RootSynchronizer::calcEdgeDistance(int i1, const SizeAwareTree *n1, int i2, const SizeAwareTree *n2) const {
437  arb_assert(deconstructionPhase());
438 
439  arb_assert(valid_tree_index(i1));
440  arb_assert(valid_tree_index(i2));
441 
442  arb_assert(!n1->is_root_node());
443  arb_assert(!n2->is_root_node());
444 
445  const PART *p1 = dtree[i1]->find_part(n1);
446  const PART *p2 = dtree[i2]->find_part(n2);
447 
448  arb_assert(p1);
449  arb_assert(p2);
450 
451  const PART *t1 = get_tree_PART(i1);
452  const PART *t2 = get_tree_PART(i2);
453 
454  return PART_FWD::calcDistance(p1, p2, t1, t2);
455 }
456 
457 int RootSynchronizer::calcTreeDistance(int i1, int i2) const {
458  const PART *t1 = get_tree_PART(i1);
459  const PART *t2 = get_tree_PART(i2);
460 
461  return PART_FWD::calcDistance(t1, t2, t1, t2);
462 }
463 
465  int sum = 0;
466  for (size_t i = 0; i<get_tree_count(); ++i) {
467  for (size_t j = 0; j<i; ++j) {
468  sum += calcTreeDistance(i, j);
469  }
470  }
471  return sum;
472 }
473 
474 int Multiroot::lazy_eval_distance(const RootSynchronizer& rsync, int i, int j) const {
475  int dist = distance.get(i, j);
476  if (dist == UNKNOWN_DISTANCE) {
477  dist = rsync.calcEdgeDistance(i, get_node(i), j, get_node(j));
478  distance.set(i, j, dist);
479  }
480  arb_assert(dist >= 0); // distance should be up-to-date now!
481  return dist;
482 }
483 
484 int Multiroot::distanceSum(const RootSynchronizer& rsync) const {
486 
487  int sum = 0;
488  for (int i = 0; i<size(); ++i) {
489  for (int j = 0; j<i; ++j) {
490  sum += lazy_eval_distance(rsync, i, j);
491  }
492  }
493  return sum;
494 }
495 
497  int sum = 0;
498  for (int i = 0; i<size(); ++i) {
499  const PART *part = rsync.get_edge_PART(i, get_node(i));
500  sum += part->distance_to_tree_center();
501  }
502  return sum;
503 }
504 
505 
507  arb_assert(idx>=0 && idx<size());
508  int sum = 0;
509  for (int i = 0; i<size(); ++i) {
510  if (i != idx) {
511  sum += lazy_eval_distance(rsync, i, idx);
512  }
513  }
514  return sum;
515 }
516 
518  arb_assert(newNode); // missing node
519  arb_assert(idx<size());
520 
521  node[idx] = newNode;
522  // invalidate distances affected by replaced node:
523  for (int i = 0; i<size(); ++i) {
524  if (i != idx) {
525  distance.set(i, idx, UNKNOWN_DISTANCE);
526  }
527  }
528 }
529 
const PART * get_edge_PART(int treeIdx, const SizeAwareTree *sonOfEdge) const
Definition: SyncRoot.hxx:161
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
const PART * peek_part(int idx) const
Definition: CT_ctree.cxx:58
void beginDeconstructionPhase()
Definition: SyncRoot.cxx:18
string result
void showDeconstructingSubtitle(arb_progress &progress, int treeNr)
Definition: SyncRoot.cxx:54
int distanceSum(const RootSynchronizer &rsync) const
Definition: SyncRoot.cxx:484
static void find_best_matching_PART_in(int &best_dist, int &best_idx, const PART *part, const DeconstructedTree &in, const PART *tree_part, const PART *tree_in, bool provideProgress)
Definition: SyncRoot.cxx:116
const TreeNode * get_origin(const PART *part)
Definition: CT_part.cxx:523
bool deconstructionPhase() const
Definition: SyncRoot.hxx:131
static void find_worst_matching_PART_in(int &worst_dist, int &worst_idx, const PART *part, const DeconstructedTree &in, const PART *tree_part, const PART *tree_in)
Definition: SyncRoot.cxx:147
bool hasError() const
Definition: ErrorOrType.h:50
int calcEdgeDistance(int i1, const SizeAwareTree *n1, int i2, const SizeAwareTree *n2) const
Definition: SyncRoot.cxx:436
TYPE getValue() const
Definition: ErrorOrType.h:58
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
ErrorOrSizeAwareTreePtr find_best_root_candidate(int inTree, int accordingToTree, int &best_dist, bool provideProgress)
Definition: SyncRoot.cxx:58
STL namespace.
ErrorOrMultirootPtr find_good_roots_for_trees(const int MAX_DEPTH, arb_progress *progress=NULp)
Definition: SyncRoot.cxx:297
int singleTreeDistanceSum(const RootSynchronizer &rsync, int idx)
Definition: SyncRoot.cxx:506
bool isNull() const
test if SmartPtr is NULp
Definition: smartptr.h:248
#define DOWNCAST(totype, expr)
Definition: downcast.h:141
static HelixNrInfo * start
int calcDistance(const PART *e1, const PART *e2, const PART *t1, const PART *t2)
Definition: CT_part.cxx:510
int distanceToCenterSum(const RootSynchronizer &rsync) const
Definition: SyncRoot.cxx:496
bool isSet() const
test if SmartPtr is not NULp
Definition: smartptr.h:245
Generic smart pointer.
Definition: smartptr.h:149
bool aborted()
Definition: arb_progress.h:335
static void error(const char *msg)
Definition: mkptypes.cxx:96
size_t get_part_count() const
Definition: CT_ctree.cxx:56
GB_ERROR deconstruct_all_trees(bool provideProgress)
Definition: SyncRoot.cxx:274
ErrorOr< MultirootPtr > ErrorOrMultirootPtr
Definition: SyncRoot.hxx:98
group_matcher none()
Definition: test_unit.h:1012
ErrorOr< ConstSizeAwareTreePtr > ErrorOrSizeAwareTreePtr
Definition: SyncRoot.hxx:32
Definition: CT_part.hxx:62
MultirootPtr get_innermost_edges() const
Definition: SyncRoot.cxx:419
ErrorOrMultirootPtr get_current_roots() const
Definition: SyncRoot.cxx:407
int size() const
Definition: SyncRoot.hxx:82
bool represents_existing_edge(const PART *p)
Definition: CT_common.hxx:241
std::vector< ConstSizeAwareTreePtr > ConstSizeAwareTreeVector
Definition: SyncRoot.hxx:35
void subtitle(const char *stitle)
Definition: arb_progress.h:321
void replace_node(int idx, ConstSizeAwareTreePtr newNode)
Definition: SyncRoot.cxx:517
#define NULp
Definition: cxxforward.h:116
int minDistanceSum() const
Definition: SyncRoot.cxx:464
int distance_to_tree_center() const
Definition: CT_part.hxx:229
ConstSizeAwareTreePtr get_node(int idx) const
Definition: SyncRoot.hxx:87
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
int calcTreeDistance(int i1, int i2) const
Definition: SyncRoot.cxx:457