ARB
di_matr.hxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : di_matr.hxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #ifndef DI_MATR_HXX
12 #define DI_MATR_HXX
13 
14 #ifndef AP_PRO_A_NUCS_HXX
15 #include <AP_pro_a_nucs.hxx>
16 #endif
17 #ifndef AP_TREE_HXX
18 #include <AP_Tree.hxx>
19 #endif
20 #ifndef AP_MATRIX_HXX
21 #include <AP_matrix.hxx>
22 #endif
23 #ifndef AP_SEQ_DNA_HXX
24 #include <AP_seq_dna.hxx>
25 #endif
26 #ifndef AP_SEQ_SIMPLE_PRO_HXX
27 #include <AP_seq_simple_pro.hxx>
28 #endif
29 #ifndef _GLIBCXX_STRING
30 #include <string>
31 #endif
32 
33 #define di_assert(cond) arb_assert(cond)
34 
35 #define AWAR_DIST_PREFIX "dist/"
36 #define AWAR_DIST_CORR_TRANS AWAR_DIST_PREFIX "correction/trans"
37 #define AWAR_DIST_SAVE_MATRIX_BASE "tmp/" AWAR_DIST_PREFIX "save_matrix"
38 
39 #define AWAR_DIST_DIST_PREFIX AWAR_DIST_PREFIX "dist/"
40 #define AWAR_DIST_MIN_DIST AWAR_DIST_DIST_PREFIX "lower"
41 #define AWAR_DIST_MAX_DIST AWAR_DIST_DIST_PREFIX "upper"
42 
48 
53 
59 
61 
62  // -------------------- real transformations are above
63 
64  DI_TRANSFORMATION_COUNT, // amount of real transformations
65  DI_TRANSFORMATION_NONE_DETECTED, // nothing was auto-detected
66 };
67 
71 };
72 
73 class DI_MATRIX;
74 
75 class DI_ENTRY : virtual Noncopyable {
76  DI_MATRIX *phmatrix;
77  char *full_name;
78 
79 public:
80  DI_ENTRY(GBDATA *gbd, DI_MATRIX *phmatrix_);
81  DI_ENTRY(const char *name_, DI_MATRIX *phmatrix_);
82  ~DI_ENTRY();
83 
85 
86  AP_sequence_parsimony *get_nucl_seq() { return DOWNCAST(AP_sequence_parsimony*, sequence); }
88 
89  char *name;
90  int group_nr; // species belongs to group number xxxx
91 };
92 
97 };
98 
100 
101 class MatrixOrder : virtual Noncopyable {
102  GB_HASH *name2pos; // key = species name, value = order in sort_tree [1..n]
103  // if no sort tree was specified, name2pos is NULp
104  int leafs; // number of leafs
105 
106  bool tree_contains_dups; // unused (if true, matrix sorting works partly wrong)
107 
108  void insert_in_hash(TreeNode *tree) {
109  if (tree->is_leaf()) {
110  arb_assert(tree->name);
111  if (GBS_write_hash(name2pos, tree->name, ++leafs) != 0) {
112  tree_contains_dups = true;
113  }
114  }
115  else {
116  insert_in_hash(tree->get_rightson());
117  insert_in_hash(tree->get_leftson());
118  }
119  }
120 
121 public:
122  MatrixOrder(GBDATA *gb_main, GB_CSTR sort_tree_name);
123  ~MatrixOrder() { if (name2pos) GBS_free_hash(name2pos); }
124 
125  bool defined() const { return leafs; }
126  int get_index(const char *name) const {
127  // return 1 for lowest and 'leafs' for highest species in sort-tee
128  // return 0 for all species missing in sort-tree
129  return defined() ? GBS_read_hash(name2pos, name) : -1;
130  }
131  void applyTo(struct TreeOrderedSpecies **gb_species_array, size_t array_size) const;
132 };
133 
134 typedef void (*DI_MATRIX_CB)();
135 
136 class DI_MATRIX : virtual Noncopyable {
137  GBDATA *gb_species_data;
138  long seq_len;
139  char cancel_columns[256];
140  size_t allocated_entries;
141  AliView *aliview;
142 
143  GBDATA *get_gb_main() const { return aliview->get_gb_main(); }
144  double corr(double dist, double b, double & sigma);
145  char *calculate_overall_freqs(double rel_frequencies[AP_MAX], char *cancel_columns);
146  int search_group(TreeNode *node, GB_HASH *hash, size_t& groupcnt, char *groupname, DI_ENTRY **groups);
147 
148 public:
149  // @@@ make members private:
150  bool is_AA;
152  size_t nentries;
155 
156  explicit DI_MATRIX(const AliView& aliview);
157  ~DI_MATRIX();
158 
159  const char *get_aliname() const { return aliview->get_aliname(); }
160  const AliView *get_aliview() const { return aliview; }
161 
162  GB_ERROR load(LoadWhat what, const MatrixOrder& order, bool show_warnings, GBDATA **species_list) __ATTR__USERESULT;
163  char *unload();
164  const char *save(const char *filename, enum DI_SAVE_TYPE type);
165 
166  GB_ERROR calculate(const char *cancel, DI_TRANSFORMATION transformation, bool *aborted_flag, AP_matrix *userdef_matrix);
167  GB_ERROR calculate_pro(DI_TRANSFORMATION transformation, bool *aborted_flag);
168  GB_ERROR extract_from_tree(const char *treename, bool *aborted_flag);
169 
171 
172  char *compress(TreeNode *tree);
173 };
174 
175 class DI_GLOBAL_MATRIX : virtual Noncopyable {
176  DI_MATRIX *matrix;
177  DI_MATRIX_CB changed_cb;
178 
179  void announce_change() { if (changed_cb) changed_cb(); }
180 
181  void forget_no_announce() {
182  delete matrix;
183  matrix = NULp;
184  }
185 
186  void set(DI_MATRIX *new_global) { di_assert(!matrix); matrix = new_global; announce_change(); }
187 
188 public:
189  DI_GLOBAL_MATRIX() : matrix(NULp), changed_cb(NULp) {}
191 
192  DI_MATRIX *get() { return matrix; }
193  void forget() {
194  if (matrix) {
195  forget_no_announce();
196  announce_change();
197  }
198  }
199  void replaceBy(DI_MATRIX *new_global) { forget_no_announce(); set(new_global); }
200 
201  bool exists() const { return matrix; }
202 
204  // announce_change(); // do by caller if really needed
205  changed_cb = cb;
206  }
207 
209  DI_MATRIX *prev = matrix;
210  matrix = other;
211  announce_change();
212  return prev;
213  }
214 
216  return matrix && matrix->matrix_type == type;
217  }
219  if (matrix && matrix->matrix_type != wanted_type) {
220  forget();
221  }
222  }
223 };
224 
226 
227 class WeightedFilter;
229  const char *awar_base;
231 };
232 
234 
235 #else
236 #error di_matr.hxx included twice
237 #endif // DI_MATR_HXX
AW_window * DI_create_save_matrix_window(AW_root *aw_root, save_matrix_params *save_params)
Definition: DI_matr.cxx:1122
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
GB_TYPES type
const char * get_aliname() const
Definition: AliView.hxx:40
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:454
return string(buffer, length)
void show_warnings(const string &helpfile)
GB_ERROR extract_from_tree(const char *treename, bool *aborted_flag)
Definition: DI_matr.cxx:841
~DI_MATRIX()
Definition: DI_matr.cxx:343
bool has_type(DI_MATRIX_TYPE type) const
Definition: di_matr.hxx:215
DI_ENTRY(GBDATA *gbd, DI_MATRIX *phmatrix_)
Definition: DI_matr.cxx:280
int get_index(const char *name) const
Definition: di_matr.hxx:126
GB_ERROR calculate_pro(DI_TRANSFORMATION transformation, bool *aborted_flag)
Definition: DI_matr.cxx:753
GB_ERROR calculate(const char *cancel, DI_TRANSFORMATION transformation, bool *aborted_flag, AP_matrix *userdef_matrix)
Definition: DI_matr.cxx:548
AP_sequence_parsimony * get_nucl_seq()
Definition: di_matr.hxx:86
~DI_ENTRY()
Definition: DI_matr.cxx:314
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
void set_changed_cb(DI_MATRIX_CB cb)
Definition: di_matr.hxx:203
AP_sequence * sequence
Definition: di_matr.hxx:84
char * unload()
Definition: DI_matr.cxx:334
#define cb(action)
#define DOWNCAST(totype, expr)
Definition: downcast.h:141
void forget_if_not_has_type(DI_MATRIX_TYPE wanted_type)
Definition: di_matr.hxx:218
void applyTo(struct TreeOrderedSpecies **gb_species_array, size_t array_size) const
Definition: DI_matr.cxx:390
DI_SAVE_TYPE
Definition: di_matr.hxx:93
const AliView * get_aliview() const
Definition: di_matr.hxx:160
Definition: di_matr.hxx:75
DI_MATRIX(const AliView &aliview)
Definition: DI_matr.cxx:320
char * name
Definition: di_matr.hxx:89
DI_MATRIX_TYPE matrix_type
Definition: di_matr.hxx:154
int group_nr
Definition: di_matr.hxx:90
DI_GLOBAL_MATRIX GLOBAL_MATRIX
Definition: DI_matr.cxx:63
bool exists() const
Definition: di_matr.hxx:201
const WeightedFilter * weighted_filter
Definition: di_matr.hxx:230
MatrixOrder(GBDATA *gb_main, GB_CSTR sort_tree_name)
Definition: DI_matr.cxx:359
const char * awar_base
Definition: di_matr.hxx:229
void(* DI_MATRIX_CB)()
Definition: di_matr.hxx:134
AP_smatrix * matrix
Definition: di_matr.hxx:153
DI_MATRIX_TYPE
Definition: di_matr.hxx:68
bool is_leaf() const
Definition: TreeNode.h:211
DI_ENTRY ** entries
Definition: di_matr.hxx:151
AP_sequence_simple_protein * get_prot_seq()
Definition: di_matr.hxx:87
size_t nentries
Definition: di_matr.hxx:152
#define __ATTR__USERESULT
Definition: attributes.h:58
char * name
Definition: TreeNode.h:174
void replaceBy(DI_MATRIX *new_global)
Definition: di_matr.hxx:199
DI_TRANSFORMATION
Definition: di_matr.hxx:43
#define NULp
Definition: cxxforward.h:116
DI_TRANSFORMATION detect_transformation(std::string &msg)
Definition: distanalyse.cxx:17
const char * get_aliname() const
Definition: di_matr.hxx:159
const char * save(const char *filename, enum DI_SAVE_TYPE type)
#define di_assert(cond)
Definition: di_matr.hxx:33
GBDATA * gb_main
Definition: adname.cxx:32
char * compress(TreeNode *tree)
bool defined() const
Definition: di_matr.hxx:125
DI_MATRIX * swap(DI_MATRIX *other)
Definition: di_matr.hxx:208
GB_ERROR load(LoadWhat what, const MatrixOrder &order, bool show_warnings, GBDATA **species_list) __ATTR__USERESULT
Definition: DI_matr.cxx:415
GBDATA * get_gb_main() const
Definition: AliView.hxx:46
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
bool is_AA
Definition: di_matr.hxx:150
LoadWhat
Definition: di_matr.hxx:99