ARB
NT_taxonomy.cxx
Go to the documentation of this file.
1 // =========================================================== //
2 // //
3 // File : NT_taxonomy.cxx //
4 // Purpose : Compare two trees by taxonomy //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in May 2015 //
7 // http://www.arb-home.de/ //
8 // //
9 // =========================================================== //
10 
11 #include "ad_trees.h"
12 #include "NT_local.h"
13 
14 #include <aw_window.hxx>
15 #include <aw_root.hxx>
16 #include <aw_awar.hxx>
17 #include <aw_msg.hxx>
18 
19 #include <awt_sel_boxes.hxx>
20 #include <TreeCallbacks.hxx>
21 #include <TreeAdmin.h>
22 
23 #include <arb_progress.h>
24 #include <arb_global_defs.h>
25 #include <item_sel_list.h>
26 
27 #include <set>
28 #include <map>
29 
30 #define TREE_COMPARE_PREFIX "ad_tree/compare/"
31 
32 #define AWAR_TREE_COMPARE_ACTION TREE_COMPARE_PREFIX "action"
33 #define AWAR_TREE_COMPARE_MIN_TAX_LEVELS TREE_COMPARE_PREFIX "taxdist"
34 #define AWAR_TREE_COMPARE_WRITE_FIELD TREE_COMPARE_PREFIX "field"
35 
36 enum Action { // uses same values as NT_mark_all_cb; see ../SL/TREEDISP/TreeCallbacks.cxx@MARK_MODE_LOWER_BITS
37  UNMARK = 0,
38  MARK = 1,
39  INVERT = 2,
40 };
41 
42 enum Target {
43  ALL,
44  TAX,
48 };
49 
50 static TreeNode *findParentGroup(TreeNode *node) { // @@@ DRY vs/using TreeNode::find_parent_with_groupInfo?
51  TreeNode *parent_group = NULp;
52 
53  while (!parent_group && node->father) {
54  node = node->father;
55  if (node->is_normal_group()) {
56  parent_group = node;
57  }
58  }
59 
60  return parent_group;
61 }
62 
63 static int countTaxLevel(TreeNode *node) { // @@@ DRY vs TreeNode::calc_clade_level????
64  int taxlevel = node->is_leaf() ? 0 : node->is_normal_group();
65  TreeNode *parent_group = findParentGroup(node);
66  while (parent_group) {
67  ++taxlevel;
68  parent_group = findParentGroup(parent_group);
69  }
70  return taxlevel;
71 }
72 
73 static int calcTaxDifference(TreeNode *g1, TreeNode *g2) {
74  // returns difference in taxonomy-levels
75  //
76  // difference defined such that:
77  // diff("/A/B", "/A/B") == 0
78  // diff("/A/B", "/A" ) == 1
79  // diff("/A/B", "/A/C") == 2
80  // diff("/A/B/C", "/A/D/E") == 4
81  // diff("/A/B/C", "/A/D/C") == 4
82  // diff("/A/B/C", "/A/C") == 3
83 
84  nt_assert(g1->is_normal_group() && g2->is_normal_group()); // has to be called with root-nodes of groups!
85 
86  TreeNode *p1 = findParentGroup(g1);
87  TreeNode *p2 = findParentGroup(g2);
88 
89  int taxdiff = 0;
90  if (p1) {
91  if (p2) {
92  int pdiff = calcTaxDifference(p1, p2);
93  int p1diff = calcTaxDifference(p1, g2) + 1;
94  int p2diff = calcTaxDifference(g1, p2) + 1;
95 
96  if (pdiff || strcmp(g1->name, g2->name) != 0) {
97  // if parent-taxonomy differs -> ignore names of g1/g2 -> diff=2
98  // if parent-taxonomy matches -> if names of g1 and g2 match -> no diff
99  // if names of g1 and g2 differ -> diff=2
100  pdiff += 2;
101  }
102 
103  taxdiff = std::min(pdiff, std::min(p1diff, p2diff));
104  }
105  else {
106  taxdiff = calcTaxDifference(p1, g2) + 1;
107  }
108  }
109  else {
110  if (p2) {
111  taxdiff = calcTaxDifference(g1, p2) + 1;
112  }
113  else {
114  if (strcmp(g1->name, g2->name) != 0) { // logic similar to (p1 && p2) above
115  taxdiff = 2;
116  }
117  }
118  }
119 
120  return taxdiff;
121 }
122 
124  TreeNode *tree1;
125  TreeNode *tree2;
126 
127 public:
129  : tree1(NULp),
130  tree2(NULp)
131  {}
132 
133  void setSpecies(TreeNode *species, bool first) {
134  nt_assert(species->is_leaf());
135  if (first) {
136  nt_assert(!tree1);
137  tree1 = species;
138  }
139  else {
140  nt_assert(!tree2);
141  tree2 = species;
142  }
143  }
144 
145  bool occursInBothTrees() const { return tree1 && tree2; }
146  int calcTaxDiffLevel() const {
147  TreeNode *parent_group1 = findParentGroup(tree1);
148  TreeNode *parent_group2 = findParentGroup(tree2);
149 
150  int taxdiff = 0;
151 
152  if (parent_group1) {
153  if (parent_group2) {
154  taxdiff = calcTaxDifference(parent_group1, parent_group2);
155  }
156  else {
157  taxdiff = countTaxLevel(parent_group1);
158  }
159  }
160  else {
161  if (parent_group2) {
162  taxdiff = countTaxLevel(parent_group2);
163  }
164  // else -> both outside any group -> no diff
165  }
166 
167  return taxdiff;
168  }
169 };
170 
171 typedef std::set<const char *, charpLess> NameSet;
172 typedef std::map<const char *, SpeciesInTwoTrees, charpLess> TwoTreeMap;
173 
174 static void mapTree(TreeNode *node, TwoTreeMap& tmap, bool first) {
175  if (node->is_leaf()) {
176  nt_assert(node->name);
177  tmap[node->name].setSpecies(node, first);
178  }
179  else {
180  mapTree(node->leftson, tmap, first);
181  mapTree(node->rightson, tmap, first);
182  }
183 }
184 
185 static void mark_action(AW_window *aws, TREE_canvas *ntw, Target target) {
186  AW_root *aw_root = aws->get_root();
187 
188  Action action = Action(aw_root->awar(AWAR_TREE_COMPARE_ACTION)->read_int());
189 
190  arb_progress progress("Mark species");
191  if (target == ALL) {
192  NT_mark_all_cb(NULp, ntw, action);
193  }
194  else {
195  progress.subtitle("Loading trees");
196 
197  const char *treename_left = TreeAdmin::source_tree_awar(aw_root)->read_char_pntr();
198  const char *treename_right = TreeAdmin::dest_tree_awar(aw_root)->read_char_pntr();
199 
200  GBDATA *gb_main = ntw->gb_main;
201  GB_transaction ta(gb_main);
202 
204 
205  TreeNode *tree_left = GBT_read_tree(gb_main, treename_left, new SimpleRoot);
206  TreeNode *tree_right = NULp;
207 
208  GB_ERROR load_error = NULp;
209  if (!tree_left) {
210  load_error = GB_await_error();
211  }
212  else {
213  tree_right = GBT_read_tree(gb_main, treename_right, new SimpleRoot);
214  if (!tree_right) load_error = GB_await_error();
215  }
216 
217  size_t missing = 0;
218  size_t targetted = 0;
219  bool had_error = false;
220 
221  if (load_error) {
222  aw_message(load_error);
223  had_error = load_error;
224  }
225  else {
226  nt_assert(tree_left && tree_right);
227  if (target == TAX) {
228  int min_tax_levels = atoi(aw_root->awar(AWAR_TREE_COMPARE_MIN_TAX_LEVELS)->read_char_pntr());
229 
230  TwoTreeMap tmap;
231  mapTree(tree_left, tmap, true);
232  mapTree(tree_right, tmap, false);
233 
234  size_t commonSpeciesCount = 0;
235  for (TwoTreeMap::iterator s = tmap.begin(); s != tmap.end(); ++s) {
236  if (s->second.occursInBothTrees()) commonSpeciesCount++;
237  }
238 
240  bool writeToField = fieldName;
242 
243  if (!error) {
244  arb_progress subprogress("Comparing taxonomy info", commonSpeciesCount);
245  for (TwoTreeMap::iterator s = tmap.begin(); s != tmap.end() && !error; ++s) {
246  const SpeciesInTwoTrees& species = s->second;
247 
248  if (species.occursInBothTrees()) {
249  int taxDiffLevel = species.calcTaxDiffLevel();
250  if (taxDiffLevel>min_tax_levels) {
251  ++targetted;
252  GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, s->first);
253  if (!gb_species) {
254  ++missing;
255  }
256  else {
257  switch (action) {
258  case UNMARK: GB_write_flag(gb_species, 0); break;
259  case MARK: GB_write_flag(gb_species, 1); break;
260  case INVERT: GB_write_flag(gb_species, !GB_read_flag(gb_species)); break;
261  }
262  if (writeToField) {
263  GBDATA *gb_field = GBT_searchOrCreate_itemfield_according_to_changekey(gb_species, fieldName, SPECIES_get_selector().change_key_path);
264  if (!gb_field) error = GB_await_error();
265  if (!error) error = GB_write_lossless_int(gb_field, taxDiffLevel);
266  }
267  }
268  }
269  subprogress.inc_and_check_user_abort(error);
270  }
271  }
272  }
273  aw_message_if(error);
274  had_error = error;
275  }
276  else {
277  progress.subtitle("Intersecting tree members");
278 
279  NameSet in_left;
280  NameSet in_right;
281  {
282  size_t count_left, count_right;
283  GB_CSTR *names_left = GBT_get_names_of_species_in_tree(tree_left, &count_left);
284  GB_CSTR *names_right = GBT_get_names_of_species_in_tree(tree_right, &count_right);
285 
286  for(size_t i= 0; i<count_left; ++i) in_left .insert(names_left[i]);
287  for(size_t i= 0; i<count_right; ++i) in_right.insert(names_right[i]);
288 
289  free(names_right);
290  free(names_left);
291  }
292 
293  {
294  NameSet& in_one = target == MISSING_LEFT ? in_right : in_left;
295  NameSet& in_other = target == MISSING_LEFT ? in_left : in_right;
296 
297  for (NameSet::const_iterator i = in_one.begin(); i != in_one.end(); ++i) {
298  bool is_in_other = in_other.find(*i) != in_other.end();
299  bool is_target = is_in_other == (target == COMMON);
300 
301  if (is_target) {
302  ++targetted;
303  GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, *i);
304  if (!gb_species) {
305  ++missing;
306  }
307  else {
308  switch (action) {
309  case UNMARK: GB_write_flag(gb_species, 0); break;
310  case MARK: GB_write_flag(gb_species, 1); break;
311  case INVERT: GB_write_flag(gb_species, !GB_read_flag(gb_species)); break;
312  }
313  }
314  }
315  }
316  }
317  }
318  }
319 
320  if (!had_error) {
321  if (!targetted) {
322  aw_message("Warning: no species targetted");
323  }
324  else if (missing) {
325  aw_message(GBS_global_string("Warning: %zu targeted species could not be found\n"
326  "(might be caused by zombies in your trees)", missing));
327  }
328  }
329 
330  destroy(tree_right);
331  destroy(tree_left);
332  }
333 }
334 
336  char *currTree = aw_root->awar(AWAR_TREE_NAME)->read_string();
337 
338  aw_root->awar_int (AWAR_TREE_COMPARE_ACTION, MARK, props);
339  aw_root->awar_string(AWAR_TREE_COMPARE_MIN_TAX_LEVELS, "0", props);
341 
342  free(currTree);
343 }
344 
346  AW_window_simple *aws = new AW_window_simple;
347  aws->init(aw_root, "COMPARE_TAXONOMY", "Compare taxonomy");
348  aws->load_xfig("compare_taxonomy.fig");
349 
350  aws->at("close");
351  aws->callback(AW_POPDOWN);
352  aws->create_button("CLOSE", "CLOSE", "C");
353 
354  aws->at("help");
355  aws->callback(makeHelpCallback("compare_taxonomy.hlp"));
356  aws->create_button("HELP", "HELP", "H");
357 
358  aws->at("action");
359  aws->create_toggle_field(AWAR_TREE_COMPARE_ACTION, NULp, "");
360  aws->insert_default_toggle("mark", "m", MARK);
361  aws->insert_toggle ("unmark", "u", UNMARK);
362  aws->insert_toggle ("invert", "i", INVERT);
363  aws->update_toggle_field();
364 
365  aws->at("all"); aws->callback(makeWindowCallback(mark_action, ntw, ALL)); aws->create_autosize_button("all", "all species");
366  aws->at("tax"); aws->callback(makeWindowCallback(mark_action, ntw, TAX)); aws->create_autosize_button("tax", "species with taxonomy changed");
367  aws->at("common"); aws->callback(makeWindowCallback(mark_action, ntw, COMMON)); aws->create_autosize_button("common", "common species");
368  aws->at("missleft"); aws->callback(makeWindowCallback(mark_action, ntw, MISSING_LEFT)); aws->create_autosize_button("missleft", "species missing in left tree");
369  aws->at("missright"); aws->callback(makeWindowCallback(mark_action, ntw, MISSING_RIGHT)); aws->create_autosize_button("missright", "species missing in right tree");
370 
371  aws->at("levels");
372  aws->create_input_field(AWAR_TREE_COMPARE_MIN_TAX_LEVELS, 5);
373 
375 
377 
378  return aws;
379 }
380 
381 
const char * GB_ERROR
Definition: arb_core.h:25
static void mapTree(TreeNode *node, TwoTreeMap &tmap, bool first)
AW_window * NT_create_compare_taxonomy_window(AW_root *aw_root, TREE_canvas *ntw)
GB_ERROR GB_incur_error()
Definition: arb_msg.h:49
void load_xfig(const char *file, bool resize=true)
Definition: AW_window.cxx:717
void NT_mark_all_cb(UNFIXED, TREE_canvas *ntw, int mark_mode)
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:791
long read_int() const
Definition: AW_awar.cxx:187
Action
Definition: NT_taxonomy.cxx:36
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
int calcTaxDiffLevel() const
void setSpecies(TreeNode *species, bool first)
#define NO_FIELD_SELECTED
TreeNode * rightson
Definition: TreeNode.h:131
bool occursInBothTrees() const
void create_itemfield_selection_button(AW_window *aws, const FieldSelDef &selDef, const char *at)
const char * read_char_pntr() const
Definition: AW_awar.cxx:171
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
#define AWAR_TREE_COMPARE_ACTION
Definition: NT_taxonomy.cxx:32
AW_awar * dest_tree_awar(AW_root *root)
Definition: TreeAdmin.cxx:38
static int calcTaxDifference(TreeNode *g1, TreeNode *g2)
Definition: NT_taxonomy.cxx:73
GBDATA * gb_species_data
Definition: adname.cxx:34
#define AWAR_TREE_NAME
Definition: ad_trees.h:18
TreeNode * father
Definition: TreeNode.h:131
static void error(const char *msg)
Definition: mkptypes.cxx:96
static TreeNode * findParentGroup(TreeNode *node)
Definition: NT_taxonomy.cxx:50
void NT_create_twoTreeSelection(AW_window *aws)
Definition: ad_trees.cxx:566
#define AWAR_TREE_COMPARE_WRITE_FIELD
Definition: NT_taxonomy.cxx:34
GB_ERROR GB_write_lossless_int(GBDATA *gbd, int32_t i)
Definition: arbdb.cxx:1497
const char * prepare_and_get_selected_itemfield(AW_root *awr, const char *awar_name, GBDATA *gb_main, const ItemSelector &itemtype, FailIfField failIf)
int GB_read_flag(GBDATA *gbd)
Definition: arbdb.cxx:2760
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
Definition: aditem.cxx:133
char * read_string() const
Definition: AW_awar.cxx:201
void NT_create_compare_taxonomy_awars(AW_root *aw_root, AW_default props)
TreeNode * leftson
Definition: TreeNode.h:131
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
std::map< const char *, SpeciesInTwoTrees, charpLess > TwoTreeMap
#define nt_assert(cond)
Definition: NT_local.h:27
#define AWAR_TREE_COMPARE_MIN_TAX_LEVELS
Definition: NT_taxonomy.cxx:33
GBDATA * GBT_searchOrCreate_itemfield_according_to_changekey(GBDATA *gb_item, const char *field_name, const char *change_key_path)
Definition: adChangeKey.cxx:62
bool is_leaf() const
Definition: TreeNode.h:171
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2737
AW_awar * source_tree_awar(AW_root *root)
Definition: TreeAdmin.cxx:35
char * name
Definition: TreeNode.h:134
ItemSelector & SPECIES_get_selector()
Definition: species.cxx:139
void aw_message(const char *msg)
Definition: AW_status.cxx:932
static void mark_action(AW_window *aws, TREE_canvas *ntw, Target target)
AW_root * get_root()
Definition: aw_window.hxx:348
#define NULp
Definition: cxxforward.h:97
GB_CSTR * GBT_get_names_of_species_in_tree(const TreeNode *tree, size_t *count)
Definition: adtree.cxx:1291
static int countTaxLevel(TreeNode *node)
Definition: NT_taxonomy.cxx:63
CONSTEXPR long FIELD_FILTER_INT_WRITEABLE
Definition: item_sel_list.h:43
GB_transaction ta(gb_var)
void destroy(TreeNode *that)
Definition: TreeNode.h:559
GBDATA * gb_main
Definition: adname.cxx:33
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
std::set< const char *, charpLess > NameSet
#define min(a, b)
Definition: f2c.h:153
const char * GB_CSTR
Definition: arbdb_base.h:25
bool is_normal_group() const
Definition: TreeNode.h:429
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:274
void aw_message_if(GB_ERROR error)
Definition: aw_msg.hxx:21
Target
Definition: NT_taxonomy.cxx:42
GBDATA * gb_main
Definition: awt_canvas.hxx:336
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105
GB_write_int const char s
Definition: AW_awar.cxx:156