ARB
NT_tree_cmp.h
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : NT_tree_cmp.h //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #ifndef NT_TREE_CMP_H
12 #define NT_TREE_CMP_H
13 
14 #ifndef ARBDB_BASE_H
15 #include <arbdb_base.h>
16 #endif
17 #ifndef NT_LOCAL_H
18 #include "NT_local.h"
19 #endif
20 
22  COMPARE_TOPOLOGY, // compare inner nodes + write differences to tree remarks
23  REMOVE_EXISTING_GROUPS, // transfer groups (removes existing groups)
24  KEEP_OLD_NAMES, // transfer groups (keep old groups; combines mismatching names)
25 };
26 
30 };
31 
32 class RSpecSet;
33 class TSpecSet;
34 class GroupMatchScorer;
35 
36 class RatioLimits {
37  double minRatio;
38  double maxRatio;
39 
40 public:
41  RatioLimits() : minRatio(0.0), maxRatio(1.0) {}
42  RatioLimits(double lower, double upper) : minRatio(lower), maxRatio(upper) {}
43 
44  bool insideLimits(double ratio) const { return minRatio <= ratio && ratio <= maxRatio; }
45  bool isValid() const { return minRatio>=0.0 && minRatio<=maxRatio && maxRatio<=1.0; }
46 };
47 
48 class GroupPenalty {
49  double penalty; // 0 = perfect match; >0 otherwise
50 
51  double ingroup_ratio; // how many percent of original groups members are members of new group [0..1]
52  double outgroup_ratio; // how many percent of new group are no original group members [0..1]
53 
54  bool keeled; // @@@ shall be used for #451
55 
56  // data copied from unregistered set (TSpecSet; i.e. from source tree when moving groups):
57  int groupSize; // size of source group (known members)
58  int unknown; // unknown members in source group
59 
60  static double no_match_penalty() { return LONG_MAX; }
61 
62 #if defined(ASSERTION_USED)
63  bool isRatio(double r) const { return r>=0.0 && r<=1.0; }
64 #endif
65 
66 public:
68  penalty(no_match_penalty()),
69  ingroup_ratio(-1),
70  outgroup_ratio(-1),
71  keeled(false),
72  groupSize(-1),
73  unknown(-1)
74  {}
75  GroupPenalty(double penalty_, double ingroup_ratio_, double outgroup_ratio_, int unreg_groupsize) :
76  penalty(penalty_),
77  ingroup_ratio(ingroup_ratio_),
78  outgroup_ratio(outgroup_ratio_),
79  keeled(false),
80  groupSize(unreg_groupsize),
81  unknown(-1)
82  {
83  nt_assert(isRatio(ingroup_ratio));
84  nt_assert(isRatio(outgroup_ratio));
85  nt_assert(groupSize>0);
86  }
87 
88  bool doesMatch() const { return penalty != no_match_penalty(); }
89  bool betterThan(const GroupPenalty& other) const { return penalty < other.penalty; }
90 
91  double get_penalty() const { return penalty; }
92  bool isPerfectMatch() const { return penalty == 0.0; }
93  double get_ingroup_ratio() const { return ingroup_ratio; }
94  double get_outgroup_ratio() const { return outgroup_ratio; }
95 
96  int get_groupsize() const { nt_assert(groupSize>0); return groupSize; }
97  int get_unknown() const { nt_assert(unknown>=0); return unknown; }
98 
99  bool shouldHaveBeenKeeled() const { return keeled; }
100 
101  static GroupPenalty NoMatch() { return GroupPenalty(); } // syntactic sugar
102 
103  void mark_as_keeled() { keeled = true; }
104  void addPenalty(double p) {
105  nt_assert(p>=0.0);
106  nt_assert(doesMatch()); // avoid "!doesMatch()"-condition gets destroyed
107  penalty += p;
108  }
109  void registerUnfound(const GroupMatchScorer& scorer, const TSpecSet& tset);
110 };
111 
113  RatioLimits ingroup; // only need lower limit (upper should always be 100%)
114  RatioLimits outgroup; // only need upper limit (lower should always be 0%)
115 
116  // Pep: per error penalty (=absolute penalty=traditional penalty scoring)
117  double ingroupPep; // penalty for each former member of group (when moved out of group, but still member of tree)
118  double outgroupPep; // penalty for each former non-member (when moved into group)
119  double unfoundPep; // penalty for each unknown species (when moved into group and not member of source tree)
120 
121  double keelPenalty; // penalty added for using keeled group
122 
123  // RelPen: ratio penalty (=relative penalty)
124  double ingroupInvRelPen; // factor for (1-GroupPenalty::ingroup_ratio), i.e. penalty rel.to ratio of non-included former group members
125  double outgroupRelPen; // factor for ( GroupPenalty::outgroup_ratio), i.e. penalty rel.to ratio of former non-member in new group
126 
127  bool insideLimits(double ingroupRatio, double outgroupRatio) const {
128  return
129  ingroup.insideLimits(ingroupRatio) &&
130  outgroup.insideLimits(outgroupRatio);
131  }
132  GroupPenalty calcPenalty(long removed, long added, long commonSpecies);
133 
134 public:
136  ingroupPep(1.0),
137  outgroupPep(1.0),
138  unfoundPep(0.0001),
139  keelPenalty(0.01),
140  ingroupInvRelPen(0.0),
141  outgroupRelPen(0.0)
142  {}
143 
144  GB_ERROR check_validity() const;
145 
146  // setup:
147  void setLimits(const RatioLimits& ingroupLimits, const RatioLimits& outgroupLimits) {
148  nt_assert(ingroupLimits.insideLimits(1.0)); // upper should always be 100%
149  nt_assert(outgroupLimits.insideLimits(0.0)); // lower should always be 0%
150 
151  ingroup = ingroupLimits;
152  outgroup = outgroupLimits;
153  }
154  void setPerErrorPenalties(double ingroup_pep, double outgroup_pep, double unfound_pep) {
155  ingroupPep = ingroup_pep;
156  outgroupPep = outgroup_pep;
157  unfoundPep = unfound_pep;
158  }
159  void setRelativePenalties(double ingroup_inv_relpen, double outgroup_relpen) {
160  ingroupInvRelPen = ingroup_inv_relpen;
161  outgroupRelPen = outgroup_relpen;
162  }
163 
164  // scoring:
165  GroupPenalty matchGroups(const TSpecSet& sourceSet, const RSpecSet& targetSet, long commonSpecies, long overallSpecies);
166  double calcUnknownMembersPenalty(const TSpecSet& sourceSet) const;
167 };
168 
169 GB_ERROR NTREE_move_tree_info(GBDATA *gb_main, const char *tree_source, const char *tree_dest, const char *log_file, GroupTransferMode mode, GroupsToTransfer what, const GroupMatchScorer& scorer, const char *aci);
170 
171 #else
172 #error NT_tree_cmp.h included twice
173 #endif // NT_TREE_CMP_H
const char * GB_ERROR
Definition: arb_core.h:25
bool insideLimits(double ratio) const
Definition: NT_tree_cmp.h:44
double get_outgroup_ratio() const
Definition: NT_tree_cmp.h:94
double get_ingroup_ratio() const
Definition: NT_tree_cmp.h:93
void addPenalty(double p)
Definition: NT_tree_cmp.h:104
bool betterThan(const GroupPenalty &other) const
Definition: NT_tree_cmp.h:89
int get_groupsize() const
Definition: NT_tree_cmp.h:96
bool isPerfectMatch() const
Definition: NT_tree_cmp.h:92
static GroupPenalty NoMatch()
Definition: NT_tree_cmp.h:101
void registerUnfound(const GroupMatchScorer &scorer, const TSpecSet &tset)
bool doesMatch() const
Definition: NT_tree_cmp.h:88
void setPerErrorPenalties(double ingroup_pep, double outgroup_pep, double unfound_pep)
Definition: NT_tree_cmp.h:154
bool shouldHaveBeenKeeled() const
Definition: NT_tree_cmp.h:99
void setRelativePenalties(double ingroup_inv_relpen, double outgroup_relpen)
Definition: NT_tree_cmp.h:159
#define false
Definition: ureadseq.h:13
void setLimits(const RatioLimits &ingroupLimits, const RatioLimits &outgroupLimits)
Definition: NT_tree_cmp.h:147
GroupsToTransfer
Definition: NT_tree_cmp.h:27
int get_unknown() const
Definition: NT_tree_cmp.h:97
#define nt_assert(cond)
Definition: NT_local.h:27
GroupPenalty matchGroups(const TSpecSet &sourceSet, const RSpecSet &targetSet, long commonSpecies, long overallSpecies)
RatioLimits(double lower, double upper)
Definition: NT_tree_cmp.h:42
GroupPenalty(double penalty_, double ingroup_ratio_, double outgroup_ratio_, int unreg_groupsize)
Definition: NT_tree_cmp.h:75
GB_ERROR NTREE_move_tree_info(GBDATA *gb_main, const char *tree_source, const char *tree_dest, const char *log_file, GroupTransferMode mode, GroupsToTransfer what, const GroupMatchScorer &scorer, const char *aci)
GroupTransferMode
Definition: NT_tree_cmp.h:21
double calcUnknownMembersPenalty(const TSpecSet &sourceSet) const
double get_penalty() const
Definition: NT_tree_cmp.h:91
GB_ERROR check_validity() const
bool isValid() const
Definition: NT_tree_cmp.h:45
GBDATA * gb_main
Definition: adname.cxx:32
void mark_as_keeled()
Definition: NT_tree_cmp.h:103