ARB
DI_matr.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : DI_matr.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "di_protdist.hxx"
12 #include "di_clusters.hxx"
13 #include "di_view_matrix.hxx"
14 #include "di_awars.hxx"
15 
16 #include <neighbourjoin.hxx>
17 #include <AP_filter.hxx>
18 #include <CT_ctree.hxx>
19 #include <ColumnStat.hxx>
20 
21 #include <awt.hxx>
22 #include <awt_sel_boxes.hxx>
23 #include <awt_filter.hxx>
24 
25 #include <aw_preset.hxx>
26 #include <aw_awars.hxx>
27 #include <aw_file.hxx>
28 #include <aw_msg.hxx>
29 #include <aw_root.hxx>
30 
31 #include <gui_aliview.hxx>
32 
33 #include <climits>
34 #include <ctime>
35 #include <cmath>
36 #include <arb_sort.h>
37 #include <arb_global_defs.h>
38 #include <macros.hxx>
39 #include <ad_cb.h>
40 #include <awt_TreeAwars.hxx>
41 #include <arb_defs.h>
42 
43 using std::string;
44 
45 // --------------------------------------------------------------------------------
46 
47 #define AWAR_DIST_BOOTSTRAP_COUNT AWAR_DIST_PREFIX "bootstrap/count"
48 #define AWAR_DIST_CANCEL_CHARS AWAR_DIST_PREFIX "cancel/chars"
49 #define AWAR_DIST_MATRIX_AUTO_RECALC AWAR_DIST_PREFIX "recalc"
50 
51 #define AWAR_DIST_COLUMN_STAT_NAME AWAR_DIST_COLUMN_STAT_PREFIX "name"
52 
53 #define AWAR_DIST_TREE_SORT_NAME "tmp/" AWAR_DIST_TREE_PREFIX "sort_tree_name"
54 #define AWAR_DIST_TREE_COMP_NAME "tmp/" AWAR_DIST_TREE_PREFIX "compr_tree_name"
55 #define AWAR_DIST_TREE_STD_NAME AWAR_DIST_TREE_PREFIX "tree_name"
56 #define AWAR_DIST_MATRIX_AUTO_CALC_TREE AWAR_DIST_TREE_PREFIX "autocalc"
57 
58 #define AWAR_DIST_SAVE_MATRIX_TYPE AWAR_DIST_SAVE_MATRIX_BASE "/type"
59 #define AWAR_DIST_SAVE_MATRIX_FILENAME AWAR_DIST_SAVE_MATRIX_BASE "/file_name"
60 
61 // --------------------------------------------------------------------------------
62 
64 
67 
71  return NULp;
72  }
75  return &userdef_DNA_matrix;
76 }
77 
79  AW_window *aww;
80  WindowCallback cb;
81 public:
82  BoundWindowCallback(AW_window *aww_, const WindowCallback& cb_)
83  : aww(aww_),
84  cb(cb_)
85  {}
86  void operator()() { cb(aww); }
87 };
88 
91 
93 
94 static void matrix_changed_cb() {
95  if (matrixDisplay) {
96  matrixDisplay->mark(MatrixDisplay::NEED_SETUP);
97  matrixDisplay->update_display();
98  }
99 }
100 
101 struct RecalcNeeded {
102  bool matrix;
103  bool tree;
104 
105  RecalcNeeded() : matrix(false), tree(false) { }
106 };
107 
109 
110 CONSTEXPR unsigned UPDATE_DELAY = 200;
111 
112 static unsigned update_cb(AW_root *aw_root);
113 inline void add_update_cb() {
115 }
116 
117 inline void matrix_needs_recalc_cb() {
118  need_recalc.matrix = true;
119  add_update_cb();
120 }
121 inline void tree_needs_recalc_cb() {
122  need_recalc.tree = true;
123  add_update_cb();
124 }
125 
127  if (GLOBAL_MATRIX.has_type(DI_MATRIX_COMPRESSED)) {
129  }
130 }
131 
132 static unsigned update_cb(AW_root *aw_root) {
133  if (need_recalc.matrix) {
134  GLOBAL_MATRIX.forget();
135  need_recalc.matrix = false; // because it's forgotten
136 
137  int matrix_autocalc = aw_root->awar(AWAR_DIST_MATRIX_AUTO_RECALC)->read_int();
138  if (matrix_autocalc) {
139  bool recalc_now = true;
140  int tree_autocalc = aw_root->awar(AWAR_DIST_MATRIX_AUTO_CALC_TREE)->read_int();
141  if (!tree_autocalc) recalc_now = matrixDisplay ? matrixDisplay->willShow() : false;
142 
143  if (recalc_now) {
144  di_assert(recalculate_matrix_cb.isSet());
145  (*recalculate_matrix_cb)();
146  di_assert(need_recalc.tree == true);
147  }
148  }
149  di_assert(need_recalc.matrix == false);
150  }
151 
152  if (need_recalc.tree) {
153  int tree_autocalc = aw_root->awar(AWAR_DIST_MATRIX_AUTO_CALC_TREE)->read_int();
154  if (tree_autocalc) {
155  di_assert(recalculate_tree_cb.isSet());
156  (*recalculate_tree_cb)();
157  need_recalc.matrix = false; // otherwise endless loop, e.g. if output-tree is used for sorting
158  }
159  }
160 
161  return 0; // do not call again
162 }
163 
164 static void auto_calc_changed_cb(AW_root *aw_root) {
165  int matrix_autocalc = aw_root->awar(AWAR_DIST_MATRIX_AUTO_RECALC)->read_int();
166  int tree_autocalc = aw_root->awar(AWAR_DIST_MATRIX_AUTO_CALC_TREE)->read_int();
167 
168  if (matrix_autocalc && !GLOBAL_MATRIX.exists()) matrix_needs_recalc_cb();
169  if (tree_autocalc && (matrix_autocalc || GLOBAL_MATRIX.exists())) tree_needs_recalc_cb();
170 }
171 
173  AW_window_simple *aws = new AW_window_simple;
174  aws->init(aw_root, "SET_DNA_MATRIX", "SET MATRIX");
175  aws->auto_increment(50, 50);
176  aws->button_length(10);
177  aws->callback(AW_POPDOWN);
178  aws->create_button("CLOSE", "CLOSE");
179 
180  aws->callback(makeHelpCallback("user_matrix.hlp"));
181  aws->create_button("HELP", "HELP");
182 
183  aws->at_newline();
184 
186  aws->window_fit();
187  return aws;
188 }
189 
193  }
194 }
195 
197  GB_transaction ta(db);
198 
204 
206 
207  RootCallback matrix_needs_recalc_callback = makeRootCallback(matrix_needs_recalc_cb);
208  aw_root->awar_int(AWAR_DIST_MATRIX_DNA_ENABLED, 0)->add_callback(matrix_needs_recalc_callback); // user matrix disabled by default
209  {
211  GB_add_callback(gbd, GB_CB_CHANGED, makeDatabaseCallback(matrix_needs_recalc_cb));
212  }
213 
214  aw_root->awar_string(AWAR_DIST_WHICH_SPECIES, "marked", def)->add_callback(matrix_needs_recalc_callback);
215  {
216  char *default_ali = GBT_get_default_alignment(db);
217  aw_root->awar_string(AWAR_DIST_ALIGNMENT, "", def)->add_callback(matrix_needs_recalc_callback)->write_string(default_ali);
218  free(default_ali);
219  }
220  aw_root->awar_string(AWAR_DIST_FILTER_ALIGNMENT, "none", def);
221  aw_root->awar_string(AWAR_DIST_FILTER_NAME, "none", def);
222  aw_root->awar_string(AWAR_DIST_FILTER_FILTER, "", def)->add_callback(matrix_needs_recalc_callback);
223  aw_root->awar_int (AWAR_DIST_FILTER_SIMPLIFY, 0, def)->add_callback(matrix_needs_recalc_callback);
224 
225  aw_root->awar_string(AWAR_DIST_CANCEL_CHARS, ".", def)->add_callback(matrix_needs_recalc_callback);
226  aw_root->awar_int(AWAR_DIST_CORR_TRANS, (int)DI_TRANSFORMATION_SIMILARITY, def)->add_callback(matrix_needs_recalc_callback)->set_minmax(0, DI_TRANSFORMATION_COUNT-1);
227 
229 
230  AW_create_fileselection_awars(aw_root, AWAR_DIST_SAVE_MATRIX_BASE, ".", "", "infile");
231  aw_root->awar_int(AWAR_DIST_SAVE_MATRIX_TYPE, 0, def);
232 
233  enum treetype { CURR, SORT, COMPRESS, TREEAWARCOUNT };
234  AW_awar *tree_awar[TREEAWARCOUNT] = { NULp, NULp, NULp };
235 
236  aw_root->awar_string(AWAR_DIST_TREE_STD_NAME, "tree_nj", def);
237  {
238  char *currentTree = aw_root->awar_string(AWAR_TREE, "", db)->read_string();
239  tree_awar[CURR] = aw_root->awar_string(AWAR_DIST_TREE_CURR_NAME, currentTree, def);
240  tree_awar[SORT] = aw_root->awar_string(AWAR_DIST_TREE_SORT_NAME, currentTree, def);
241  free(currentTree);
242  }
243  tree_awar[COMPRESS] = aw_root->awar_string(AWAR_DIST_TREE_COMP_NAME, NO_TREE_SELECTED, def);
244 
245  aw_root->awar_int(AWAR_DIST_BOOTSTRAP_COUNT, 1000, def);
248 
249  aw_root->awar_float(AWAR_DIST_MIN_DIST, 0.0)->set_minmax(0.0, 4.0);
250  aw_root->awar_float(AWAR_DIST_MAX_DIST, 0.0)->set_minmax(0.0, 4.0);
251 
252  aw_root->awar_string(AWAR_SPECIES_NAME, "", db);
253 
254  DI_create_cluster_awars(aw_root, def, db);
255 
256 #if defined(DEBUG)
257  AWT_create_db_browser_awars(aw_root, def);
258 #endif // DEBUG
259 
260  {
262 
264  GB_add_callback(gb_species_data, GB_CB_CHANGED, makeDatabaseCallback(matrix_needs_recalc_cb));
265 
266  GB_pop_transaction(db);
267  }
268 
269  AWT_registerTreeAwarCallback(tree_awar[CURR], makeTreeAwarCallback(selected_tree_changed_cb), true);
270  AWT_registerTreeAwarCallback(tree_awar[SORT], makeTreeAwarCallback(matrix_needs_recalc_cb), true);
271  AWT_registerTreeAwarCallback(tree_awar[COMPRESS], makeTreeAwarCallback(compressed_matrix_needs_recalc_cb), true);
272 
273  auto_calc_changed_cb(aw_root);
274 }
275 
277  : phmatrix(phmatrix_),
278  full_name(NULp),
279  sequence(NULp),
280  name(NULp),
281  group_nr(0)
282 {
283  GBDATA *gb_ali = GB_entry(gbd, phmatrix->get_aliname());
284  if (gb_ali) {
285  GBDATA *gb_data = GB_entry(gb_ali, "data");
286  if (gb_data) {
287  if (phmatrix->is_AA) {
289  }
290  else {
291  sequence = new AP_sequence_parsimony(phmatrix->get_aliview());
292  }
294  sequence->lazy_load_sequence(); // load sequence
295 
296  name = GBT_read_string(gbd, "name");
297  full_name = GBT_read_string(gbd, "full_name");
298  }
299  }
300 }
301 
302 DI_ENTRY::DI_ENTRY(const char *name_, DI_MATRIX *phmatrix_)
303  : phmatrix(phmatrix_),
304  full_name(NULp),
305  sequence(NULp),
306  name(ARB_strdup(name_)),
307  group_nr(0)
308 {}
309 
311  delete sequence;
312  free(name);
313  free(full_name);
314 }
315 
318  seq_len(0),
319  allocated_entries(0),
320  aliview(new AliView(aliview_)),
321  is_AA(false),
322  entries(NULp),
323  nentries(0),
324  matrix(NULp),
325  matrix_type(DI_MATRIX_FULL)
326 {
327  memset(cancel_columns, 0, sizeof(cancel_columns));
328 }
329 
331  for (size_t i=0; i<nentries; i++) {
332  delete entries[i];
333  }
334  freenull(entries);
335  nentries = 0;
336  return NULp;
337 }
338 
340  unload();
341  delete matrix;
342  delete aliview;
343 }
344 
348 
349  TreeOrderedSpecies(const MatrixOrder& order, GBDATA *gb_spec)
350  : gbd(gb_spec),
351  order_index(order.get_index(GBT_get_name_or_description(gbd)))
352  {}
353 };
354 
356  : name2pos(NULp),
357  leafs(0)
358 {
359  if (sort_tree_name) {
360  int size;
361  TreeNode *sort_tree = GBT_read_tree_and_size(gb_main, sort_tree_name, new SimpleRoot, &size);
362 
363  if (sort_tree) {
364  leafs = size+1;
365  name2pos = GBS_create_hash(leafs, GB_IGNORE_CASE);
366 
367  IF_ASSERTION_USED(int leafsLoaded = leafs);
368  leafs = 0;
369  insert_in_hash(sort_tree);
370 
371  arb_assert(leafsLoaded == leafs);
372  destroy(sort_tree);
373  }
374  else {
375  GB_clear_error();
376  }
377  }
378 }
379 static int TreeOrderedSpecies_cmp(const void *p1, const void *p2, void *) {
382 
383  return s2->order_index - s1->order_index;
384 }
385 
386 void MatrixOrder::applyTo(TreeOrderedSpecies **species_array, size_t array_size) const {
387  GB_sort((void**)species_array, 0, array_size, TreeOrderedSpecies_cmp, NULp);
388 }
389 
390 static size_t get_load_count(LoadWhat what, GBDATA *gb_main, GBDATA **species_list) {
391  size_t no_of_species = -1U;
392 
393  switch (what) {
394  case DI_LOAD_ALL:
395  no_of_species = GBT_get_species_count(gb_main);
396  break;
397  case DI_LOAD_MARKED:
398  no_of_species = GBT_count_marked_species(gb_main);
399  break;
400  case DI_LOAD_LIST:
401  di_assert(species_list);
402  for (no_of_species = 0; species_list[no_of_species]; ++no_of_species) ;
403  break;
404  }
405 
406  di_assert(no_of_species != -1U);
407 
408  return no_of_species;
409 }
410 
411 GB_ERROR DI_MATRIX::load(LoadWhat what, const MatrixOrder& order, bool show_warnings, GBDATA **species_list) {
412  GBDATA *gb_main = get_gb_main();
413  const char *use = get_aliname();
414 
415  GB_transaction ta(gb_main);
416 
417  seq_len = GBT_get_alignment_len(gb_main, use);
418  is_AA = GBT_is_alignment_protein(gb_main, use);
419  gb_species_data = GBT_get_species_data(gb_main);
420 
421  allocated_entries = 1000;
422  ARB_calloc(entries, allocated_entries);
423 
424  nentries = 0;
425 
426  size_t no_of_species = get_load_count(what, gb_main, species_list);
427  if (no_of_species<2) {
428  return GBS_global_string("Not enough input species (%zu)", no_of_species);
429  }
430 
431  TreeOrderedSpecies *species_to_load[no_of_species];
432 
433  {
434  size_t i = 0;
435  switch (what) {
436  case DI_LOAD_ALL: {
437  for (GBDATA *gb_species = GBT_first_species_rel_species_data(gb_species_data); gb_species; gb_species = GBT_next_species(gb_species), ++i) {
438  species_to_load[i] = new TreeOrderedSpecies(order, gb_species);
439  }
440  break;
441  }
442  case DI_LOAD_MARKED: {
443  for (GBDATA *gb_species = GBT_first_marked_species_rel_species_data(gb_species_data); gb_species; gb_species = GBT_next_marked_species(gb_species), ++i) {
444  species_to_load[i] = new TreeOrderedSpecies(order, gb_species);
445  }
446  break;
447  }
448  case DI_LOAD_LIST: {
449  for (i = 0; species_list[i]; ++i) {
450  species_to_load[i] = new TreeOrderedSpecies(order, species_list[i]);
451  }
452  break;
453  }
454  }
455  arb_assert(i == no_of_species);
456  }
457 
458  if (order.defined()) {
459  order.applyTo(species_to_load, no_of_species);
460  if (show_warnings) {
461  int species_not_in_sort_tree = 0;
462  for (size_t i = 0; i<no_of_species; ++i) {
463  if (!species_to_load[i]->order_index) {
464  species_not_in_sort_tree++;
465  }
466  }
467  if (species_not_in_sort_tree) {
468  GBT_message(gb_main, GBS_global_string("Warning: %i of the affected species are not in sort-tree", species_not_in_sort_tree));
469  }
470  }
471  }
472  else {
473  if (show_warnings) {
474  static bool shown = false;
475  if (!shown) { // showing once is enough
476  GBT_message(gb_main, "Warning: No valid tree given to sort matrix (using default database order)");
477  shown = true;
478  }
479  }
480  }
481 
482  if (no_of_species>allocated_entries) {
483  allocated_entries = no_of_species;
484  ARB_realloc(entries, allocated_entries);
485  }
486 
487  GB_ERROR error = NULp;
488  arb_progress progress("Preparing sequence data", no_of_species);
489  for (size_t i = 0; i<no_of_species && !error; ++i) {
490  DI_ENTRY *phentry = new DI_ENTRY(species_to_load[i]->gbd, this);
491  if (phentry->sequence) { // a species found
492  arb_assert(nentries<allocated_entries);
493  entries[nentries++] = phentry;
494  }
495  else {
496  delete phentry;
497  }
498  delete species_to_load[i];
499  species_to_load[i] = NULp;
500 
501  progress.inc_and_check_user_abort(error);
502  }
503 
504  return error;
505 }
506 
507 char *DI_MATRIX::calculate_overall_freqs(double rel_frequencies[AP_MAX], char *cancel) {
508  di_assert(is_AA == false);
509 
510  long hits2[AP_MAX];
511  long sum = 0;
512  int i;
513  int pos;
514  int b;
515  long s_len = aliview->get_length();
516 
517  memset((char *) &hits2[0], 0, sizeof(hits2));
518  for (size_t row = 0; row < nentries; row++) {
519  const char *seq1 = entries[row]->get_nucl_seq()->get_sequence();
520  for (pos = 0; pos < s_len; pos++) {
521  b = *(seq1++);
522  if (cancel[b]) continue;
523  hits2[b]++;
524  }
525  }
526  for (i = 0; i < AP_MAX; i++) { // LOOP_VECTORIZED
527  sum += hits2[i];
528  }
529  for (i = 0; i < AP_MAX; i++) {
530  rel_frequencies[i] = hits2[i] / (double) sum;
531  }
532  return NULp;
533 }
534 
535 double DI_MATRIX::corr(double dist, double b, double & sigma) {
536  const double eps = 0.01;
537  double ar = 1.0 - dist/b;
538  sigma = 1000.0;
539  if (ar< eps) return 3.0;
540  sigma = b/ar;
541  return - b * log(1-dist/b);
542 }
543 
544 GB_ERROR DI_MATRIX::calculate(const char *cancel, DI_TRANSFORMATION transformation, bool *aborted_flag, AP_matrix *userdef_matrix) {
545  di_assert(is_AA == false);
546 
547  if (userdef_matrix) {
548  switch (transformation) {
552  break;
553  default:
554  aw_message("Sorry: this kind of distance correction does not support a user defined matrix - it will be ignored");
555  userdef_matrix = NULp;
556  break;
557  }
558  }
559 
560  matrix = new AP_smatrix(nentries);
561 
562  long s_len = aliview->get_length();
563  long hits[AP_MAX][AP_MAX];
564  size_t i;
565 
566  if (nentries<=1) {
567  return "Not enough species selected to calculate matrix";
568  }
569  memset(&cancel_columns[0], 0, 256);
570 
571  for (i=0; cancel[i]; i++) {
572  cancel_columns[safeCharIndex(AP_sequence_parsimony::table[safeCharIndex(cancel[i])])] = 1;
573  UNCOVERED(); // @@@ cover
574  }
575 
576  long columns;
577  double b;
578  long frequencies[AP_MAX];
579  double rel_frequencies[AP_MAX];
580  double S_square = 0;
581 
582  switch (transformation) {
584  this->calculate_overall_freqs(rel_frequencies, cancel_columns);
585  S_square = 0.0;
586  for (i=0; i<AP_MAX; i++) { // LOOP_VECTORIZED[!<8.1]
587  S_square += rel_frequencies[i]*rel_frequencies[i];
588  }
589  break;
590  default: break;
591  };
592 
593  arb_progress progress("Calculating distance matrix", matrix_halfsize(nentries, true));
594  GB_ERROR error = NULp;
595  for (size_t row = 0; row<nentries && !error; row++) {
596  for (size_t col=0; col<=row && !error; col++) {
597  columns = 0;
598 
599  const unsigned char *seq1 = entries[row]->get_nucl_seq()->get_usequence();
600  const unsigned char *seq2 = entries[col]->get_nucl_seq()->get_usequence();
601 
602  b = 0.0;
603  switch (transformation) {
605  di_assert(0);
606  break;
608  b = 0.75;
609  // fall-through
612  double dist = 0.0;
613  if (userdef_matrix) {
614  memset((char *)hits, 0, sizeof(long) * AP_MAX * AP_MAX);
615  int pos;
616  for (pos = s_len; pos >= 0; pos--) {
617  hits[*(seq1++)][*(seq2++)]++;
618  }
619  int x, y;
620  double diffsum = 0.0;
621  double all_sum = 0.001;
622  for (x = AP_A; x < AP_MAX; x*=2) {
623  for (y = AP_A; y < AP_MAX; y*=2) {
624  if (x==y) {
625  all_sum += hits[x][y];
626  }
627  else {
628  UNCOVERED(); // @@@ cover
629  diffsum += hits[x][y] * userdef_matrix->get(x, y);
630  all_sum += hits[x][y] * userdef_matrix->get(x, y);
631  }
632  }
633  }
634  dist = diffsum / all_sum;
635  }
636  else {
637  for (int pos = s_len; pos >= 0; pos--) {
638  int b1 = *(seq1++);
639  int b2 = *(seq2++);
640  if (cancel_columns[b1]) continue;
641  if (cancel_columns[b2]) continue;
642  columns++;
643  if (b1&b2) continue;
644  dist+=1.0;
645  }
646  if (columns == 0) columns = 1;
647  dist /= columns;
648  }
649  if (transformation==DI_TRANSFORMATION_SIMILARITY) {
650  dist = (1.0-dist);
651  }
652  else if (b) {
653  double sigma;
654  dist = this->corr(dist, b, sigma);
655  }
656  matrix->set(row, col, dist);
657  break;
658  }
662  int pos;
663  double dist = 0.0;
664  long N, P, Q, M;
665  double p, q;
666 
667  memset((char *)hits, 0, sizeof(long) * AP_MAX * AP_MAX);
668  for (pos = s_len; pos >= 0; pos--) {
669  hits[*(seq1++)][*(seq2++)]++;
670  }
671  switch (transformation) {
673  P = hits[AP_A][AP_G] +
674  hits[AP_G][AP_A] +
675  hits[AP_C][AP_T] +
676  hits[AP_T][AP_C];
677  Q = hits[AP_A][AP_C] +
678  hits[AP_A][AP_T] +
679  hits[AP_C][AP_A] +
680  hits[AP_T][AP_A] +
681  hits[AP_G][AP_C] +
682  hits[AP_G][AP_T] +
683  hits[AP_C][AP_G] +
684  hits[AP_T][AP_G];
685  M = hits[AP_A][AP_A] +
686  hits[AP_C][AP_C] +
687  hits[AP_G][AP_G] +
688  hits[AP_T][AP_T];
689  N = P+Q+M;
690  if (N==0) N=1;
691  p = (double)P/(double)N;
692  q = (double)Q/(double)N;
693  dist = - .5 * log(
694  (1.0-2.0*p-q)*sqrt(1.0-2.0*q)
695  );
696  break;
697 
700 
701  memset((char *)frequencies, 0,
702  sizeof(long) * AP_MAX);
703 
704  N = 0;
705  M = 0;
706 
707  for (i=0; i<AP_MAX; i++) {
708  if (cancel_columns[i]) continue;
709  unsigned int j;
710  for (j=0; j<i; j++) {
711  if (cancel_columns[j]) continue;
712  frequencies[i] +=
713  hits[i][j]+
714  hits[j][i];
715  }
716  frequencies[i] += hits[i][i];
717  N += frequencies[i];
718  M += hits[i][i];
719  }
720  if (N==0) N=1;
721  if (transformation == DI_TRANSFORMATION_OLSEN) { // Calc sum square freq individually for each line
722  S_square = 0.0;
723  for (i=0; i<AP_MAX; i++) S_square += frequencies[i]*frequencies[i];
724  b = 1.0 - S_square/((double)N*(double)N);
725  }
726  else {
727  b = 1.0 - S_square;
728  }
729 
730  dist = ((double)(N-M)) / (double) N;
731  double sigma;
732  dist = this->corr(dist, b, sigma);
733  break;
734 
735  default: return "Sorry: Transformation not implemented";
736  }
737  matrix->set(row, col, dist);
738  break;
739  }
740  default:;
741  } // switch
742  progress.inc_and_check_user_abort(error);
743  }
744  }
745  if (aborted_flag && progress.aborted()) *aborted_flag = true;
746  return error;
747 }
748 
749 GB_ERROR DI_MATRIX::calculate_pro(DI_TRANSFORMATION transformation, bool *aborted_flag) {
750  di_assert(is_AA == true);
751 
752  di_cattype catType;
753  switch (transformation) {
754  case DI_TRANSFORMATION_NONE: catType = NONE; break;
755  case DI_TRANSFORMATION_SIMILARITY: catType = SIMILARITY; break;
756  case DI_TRANSFORMATION_KIMURA: catType = KIMURA; break;
757  case DI_TRANSFORMATION_PAM: catType = PAM; break;
758  case DI_TRANSFORMATION_CATEGORIES_HALL: catType = HALL; break;
759  case DI_TRANSFORMATION_CATEGORIES_BARKER: catType = GEORGE; break;
760  case DI_TRANSFORMATION_CATEGORIES_CHEMICAL: catType = CHEMICAL; break;
761  default:
762  return "This correction is not available for protein data";
763  }
764  matrix = new AP_smatrix(nentries);
765 
766  di_protdist prodist(UNIVERSAL, catType, nentries, entries, aliview->get_length(), matrix);
767  return prodist.makedists(aborted_flag);
768 }
769 
770 typedef std::map<const char*, TreeNode*, charpLess> NamedNodes;
771 
772 GB_ERROR link_to_tree(NamedNodes& named, TreeNode *node) {
773  GB_ERROR error = NULp;
774  if (node->is_leaf()) {
775  NamedNodes::iterator found = named.find(node->name);
776  if (found != named.end()) {
777  if (found->second) {
778  error = GBS_global_string("Invalid tree (two nodes named '%s')", node->name);
779  }
780  else {
781  found->second = node;
782  }
783  }
784  // otherwise, we do not care about the node (e.g. because it is not marked)
785  }
786  else {
787  error = link_to_tree(named, node->get_leftson());
788  if (!error) error = link_to_tree(named, node->get_rightson());
789  }
790  return error;
791 }
792 
793 #if defined(ASSERTION_USED)
794 static TreeNode *findNode(TreeNode *node, const char *name) {
795  if (node->is_leaf()) {
796  return strcmp(node->name, name) == 0 ? node : NULp;
797  }
798 
799  TreeNode *found = findNode(node->get_leftson(), name);
800  if (!found) found = findNode(node->get_rightson(), name);
801  return found;
802 }
803 #endif
804 
805 static GB_ERROR init(NamedNodes& node, TreeNode *tree, const DI_ENTRY*const*const entries, size_t nentries) {
806  GB_ERROR error = NULp;
807  for (size_t n = 0; n<nentries; ++n) {
808  node[entries[n]->name] = NULp;
809  }
810  error = link_to_tree(node, tree);
811  if (!error) { // check for missing species (needed but not in tree)
812  size_t missing = 0;
813  const char *exampleName = NULp;
814 
815  for (size_t n = 0; n<nentries; ++n) {
816  NamedNodes::iterator found = node.find(entries[n]->name);
817  if (found == node.end()) {
818  ++missing;
819  exampleName = entries[n]->name;
820  }
821  else {
822  di_assert(node[entries[n]->name] == findNode(tree, entries[n]->name));
823  if (!node[entries[n]->name]) {
824  ++missing;
825  exampleName = entries[n]->name;
826  }
827  }
828  }
829 
830  if (missing) {
831  error = GBS_global_string("Tree is missing %zu required species (e.g. '%s')", missing, exampleName);
832  }
833  }
834  return error;
835 }
836 
837 GB_ERROR DI_MATRIX::extract_from_tree(const char *treename, bool *aborted_flag) {
838  GB_ERROR error = NULp;
839  if (nentries<=1) error = "Not enough species selected to calculate matrix";
840  else {
841  TreeNode *tree;
842  {
843  GB_transaction ta(get_gb_main());
844  tree = GBT_read_tree(get_gb_main(), treename, new SimpleRoot);
845  }
846  if (!tree) error = GB_await_error();
847  else {
848  arb_progress progress("Extracting distances from tree", matrix_halfsize(nentries, true));
849  NamedNodes node;
850 
851  error = init(node, tree, entries, nentries);
852  matrix = new AP_smatrix(nentries);
853 
854  for (size_t row = 0; row<nentries && !error; row++) {
855  TreeNode *rnode = node[entries[row]->name];
856  for (size_t col=0; col<=row && !error; col++) {
857  double dist;
858  if (col != row) {
859  TreeNode *cnode = node[entries[col]->name];
860  dist = rnode->intree_distance_to(cnode);
861  }
862  else {
863  dist = 0.0;
864  }
865  matrix->set(row, col, dist);
866  progress.inc_and_check_user_abort(error);
867  }
868  }
869  UNCOVERED();
870  destroy(tree);
871  if (aborted_flag && progress.aborted()) *aborted_flag = true;
872  if (error) progress.done();
873  }
874  }
875  return error;
876 }
877 
879  const char *load_what = AW_root::SINGLETON->awar(AWAR_DIST_WHICH_SPECIES)->read_char_pntr();
880  return (strcmp(load_what, "all") == 0) ? DI_LOAD_ALL : DI_LOAD_MARKED;
881 }
882 
883 __ATTR__USERESULT static GB_ERROR di_calculate_matrix(AW_root *aw_root, const WeightedFilter *weighted_filter, bool bootstrap_flag, bool show_warnings, bool *aborted_flag) {
884  // sets 'aborted_flag' to true, if it is non-NULp and the calculation has been aborted
885  GB_ERROR error = NULp;
886 
887  if (GLOBAL_MATRIX.exists()) {
888  di_assert(!need_recalc.matrix);
889  }
890  else {
891  GBDATA *gb_main = weighted_filter->get_gb_main();
892  GB_transaction ta(gb_main);
893 
894  char *use = aw_root->awar(AWAR_DIST_ALIGNMENT)->read_string();
895  long ali_len = GBT_get_alignment_len(gb_main, use);
896 
897  if (ali_len<=0) {
898  error = "Please select a valid alignment";
899  GB_clear_error();
900  }
901  else {
902  const LoadWhat what = whatToLoad();
903  size_t species_count = get_load_count(what, gb_main, NULp);
904  double phase1_fraction;
905  {
906  // keep sync with .@MatrixSteps
907  double O_loadData = 10*species_count;
908  double O_calcMatrix = triangular_number(species_count);
909  phase1_fraction = O_loadData / (O_loadData+O_calcMatrix);
910  }
911 
912  arb_progress progress(WEIGHTED, "Calculating matrix", phase1_fraction); // two steps: 1x load data + 1x calc matrix
913 
914  char *cancel = aw_root->awar(AWAR_DIST_CANCEL_CHARS)->read_string();
915  AliView *aliview = weighted_filter->create_aliview(use, error);
916 
917  if (!error) {
918  if (bootstrap_flag) aliview->get_filter()->enable_bootstrap();
919 
920  const char *sort_tree_name = aw_root->awar(AWAR_DIST_TREE_SORT_NAME)->read_char_pntr();
921  {
922  DI_MATRIX *phm = new DI_MATRIX(*aliview);
924 
925  static SmartCharPtr last_sort_tree_name;
926  static SmartPtr<MatrixOrder> last_order;
927 
928  if (last_sort_tree_name.isNull() || !sort_tree_name || strcmp(&*last_sort_tree_name, sort_tree_name) != 0) {
929  last_sort_tree_name = nulldup(sort_tree_name);
930  last_order = new MatrixOrder(gb_main, sort_tree_name);
931  }
932  di_assert(last_order.isSet());
933  error = phm->load(what, *last_order, show_warnings, NULp); // step1 of progress
934  progress.inc_and_check_user_abort(error);
935  bool aborted = error ? progress.aborted() : false;
936  error = ta.close(error);
937 
938  if (!error) {
940 
941  if (trans == DI_TRANSFORMATION_FROM_TREE) {
942  const char *treename = aw_root->awar(AWAR_DIST_TREE_CURR_NAME)->read_char_pntr();
943  error = phm->extract_from_tree(treename, &aborted);
944  }
945  else {
946  if (phm->is_AA) error = phm->calculate_pro(trans, &aborted);
947  else error = phm->calculate(cancel, trans, &aborted, get_user_matrix());
948  }
949  }
950 
951  if (aborted) {
952  di_assert(error);
953  if (aborted_flag) *aborted_flag = true;
954  }
955  if (error) {
956  delete phm;
957  GLOBAL_MATRIX.forget();
958  }
959  else {
960  GLOBAL_MATRIX.replaceBy(phm);
962  need_recalc.matrix = false;
963  }
964  }
965  }
966 
967  free(cancel);
968  delete aliview;
969  }
970  free(use);
971 
972  di_assert(contradicted(error, GLOBAL_MATRIX.exists()));
973  }
974  return error;
975 }
976 
977 static void di_mark_by_distance(AW_window *aww, WeightedFilter *weighted_filter) {
978  AW_root *aw_root = aww->get_root();
979  float lowerBound = aw_root->awar(AWAR_DIST_MIN_DIST)->read_float();
980  float upperBound = aw_root->awar(AWAR_DIST_MAX_DIST)->read_float();
981 
982  GB_ERROR error = NULp;
983  if (lowerBound >= upperBound) {
984  error = GBS_global_string("Lower bound (%f) has to be smaller than upper bound (%f)", lowerBound, upperBound);
985  }
986  else if (lowerBound<0.0 || lowerBound > 1.0) {
987  error = GBS_global_string("Lower bound (%f) is not in allowed range [0.0 .. 1.0]", lowerBound);
988  }
989  else if (upperBound<0.0 || upperBound > 1.0) {
990  error = GBS_global_string("Upper bound (%f) is not in allowed range [0.0 .. 1.0]", upperBound);
991  }
992  else {
993  GBDATA *gb_main = weighted_filter->get_gb_main();
994  GB_transaction ta(gb_main);
995 
996  char *selected = aw_root->awar(AWAR_SPECIES_NAME)->read_string();
997  if (!selected[0]) {
998  error = "Please select a species";
999  }
1000  else {
1001  GBDATA *gb_selected = GBT_find_species(gb_main, selected);
1002  if (!gb_selected) {
1003  error = GBS_global_string("Couldn't find species '%s'", selected);
1004  }
1005  else {
1006  char *use = aw_root->awar(AWAR_DIST_ALIGNMENT)->read_string();
1007  char *cancel = aw_root->awar(AWAR_DIST_CANCEL_CHARS)->read_string();
1009  AliView *aliview = weighted_filter->create_aliview(use, error);
1010 
1011  if (!error) {
1012  DI_MATRIX *prev_global = GLOBAL_MATRIX.swap(NULp);
1013 
1014  size_t speciesCount = GBT_get_species_count(gb_main);
1015  bool markedSelected = false;
1016 
1017  arb_progress progress("Mark species by distance", speciesCount);
1018  MatrixOrder order(gb_main, NULp);
1019 
1020  for (GBDATA *gb_species = GBT_first_species(gb_main);
1021  gb_species && !error;
1022  gb_species = GBT_next_species(gb_species))
1023  {
1024  DI_MATRIX *phm = new DI_MATRIX(*aliview);
1025  phm->matrix_type = DI_MATRIX_FULL;
1026  GBDATA *species_pair[] = { gb_selected, gb_species, NULp };
1027 
1028  error = phm->load(DI_LOAD_LIST, order, false, species_pair);
1029 
1030  if (phm->nentries == 2) { // if species has no alignment -> nentries<2
1031  if (!error) {
1032  if (phm->is_AA) error = phm->calculate_pro(trans, NULp);
1033  else error = phm->calculate(cancel, trans, NULp, get_user_matrix());
1034  }
1035 
1036  if (!error) {
1037  double dist_value = phm->matrix->get(0, 1); // distance or conformance
1038  bool mark = (lowerBound <= dist_value && dist_value <= upperBound);
1039  GB_write_flag(gb_species, mark);
1040 
1041  if (!markedSelected) {
1042  dist_value = phm->matrix->get(0, 0); // distance or conformance to self
1043  mark = (lowerBound <= dist_value && dist_value <= upperBound);
1044  GB_write_flag(gb_selected, mark);
1045 
1046  markedSelected = true;
1047  }
1048  }
1049  }
1050 
1051  delete phm;
1052  if (!error) progress.inc_and_check_user_abort(error);
1053  }
1054 
1055  di_assert(!GLOBAL_MATRIX.exists());
1056  ASSERT_RESULT(DI_MATRIX*, NULp, GLOBAL_MATRIX.swap(prev_global));
1057 
1058  if (error) progress.done();
1059  }
1060 
1061  delete aliview;
1062  free(cancel);
1063  free(use);
1064  }
1065  }
1066 
1067  free(selected);
1068  error = ta.close(error);
1069  }
1070 
1071  if (error) {
1072  aw_message(error);
1073  }
1074 }
1075 
1077  // recalculate matrix
1079  if (need_recalc.matrix && GLOBAL_MATRIX.exists()) {
1080  GLOBAL_MATRIX.forget();
1081  }
1082  di_assert(recalculate_matrix_cb.isSet());
1083  (*recalculate_matrix_cb)();
1085 }
1086 
1087 static void di_view_matrix_cb(AW_window *aww, save_matrix_params *sparam) {
1089  if (!error) {
1090  if (!matrixDisplay) matrixDisplay = new MatrixDisplay(sparam->weighted_filter->get_gb_main());
1091 
1092  static AW_window *viewer = NULp;
1093  if (!viewer) viewer = DI_create_view_matrix_window(aww->get_root(), matrixDisplay, sparam);
1094 
1095  matrixDisplay->mark(MatrixDisplay::NEED_SETUP);
1096  matrixDisplay->update_display();
1097 
1098  GLOBAL_MATRIX.set_changed_cb(matrix_changed_cb);
1099 
1100  viewer->activate();
1101  }
1102 }
1103 
1104 static void di_save_matrix_cb(AW_window *aww) {
1105  // save the matrix
1107  if (!error) {
1108  char *filename = aww->get_root()->awar(AWAR_DIST_SAVE_MATRIX_FILENAME)->read_string();
1110 
1111  GLOBAL_MATRIX.get()->save(filename, type);
1112  free(filename);
1113  }
1115  aww->hide_or_notify(error);
1116 }
1117 
1119  static AW_window_simple *aws = NULp;
1120  if (!aws) {
1121  aws = new AW_window_simple;
1122  aws->init(aw_root, "SAVE_MATRIX", "Save Matrix");
1123  aws->load_xfig("sel_box_user.fig");
1124 
1125  aws->at("close");
1126  aws->callback(AW_POPDOWN);
1127  aws->create_button("CLOSE", "CANCEL", "C");
1128 
1129 
1130  aws->at("help"); aws->callback(makeHelpCallback("save_matrix.hlp"));
1131  aws->create_button("HELP", "HELP", "H");
1132 
1133  aws->at("user");
1134  aws->create_option_menu(AWAR_DIST_SAVE_MATRIX_TYPE);
1135  aws->insert_default_option("Phylip Format (Lower Triangular Matrix)", "P", DI_SAVE_PHYLIP_COMP);
1136  aws->insert_option("Readable (using NDS)", "R", DI_SAVE_READABLE);
1137  aws->insert_option("Tabbed (using NDS)", "R", DI_SAVE_TABBED);
1138  aws->update_option_menu();
1139 
1140  AW_create_standard_fileselection(aws, save_params->awar_base);
1141 
1142  aws->at("save2");
1143  aws->callback(makeWindowCallback(di_save_matrix_cb));
1144  aws->create_button("SAVE", "SAVE", "S");
1145 
1146  aws->at("cancel2");
1147  aws->callback(AW_POPDOWN);
1148  aws->create_button("CLOSE", "CANCEL", "C");
1149  }
1150  return aws;
1151 }
1152 
1154  AW_window_simple *aws = new AW_window_simple;
1155  aws->init(aw_root, "SELECT_CHARS_TO_CANCEL_COLUMN", "CANCEL SELECT");
1156  aws->load_xfig("di_cancel.fig");
1157 
1158  aws->at("close");
1159  aws->callback(AW_POPDOWN);
1160  aws->create_button("CLOSE", "CLOSE", "C");
1161 
1162  aws->at("cancel");
1163  aws->create_input_field(AWAR_DIST_CANCEL_CHARS, 12);
1164 
1165  return aws;
1166 }
1167 
1168 static const char *enum_trans_to_string[] = {
1169  "none",
1170  "similarity",
1171  "jukes_cantor",
1172  "felsenstein",
1173 
1174  "pam",
1175  "hall",
1176  "barker",
1177  "chemical",
1178 
1179  "kimura",
1180  "olsen",
1181  "felsenstein voigt",
1182  "olsen voigt",
1183  "max ml",
1184 
1185  NULp, // treedist
1186 };
1187 
1188 STATIC_ASSERT(ARRAY_ELEMS(enum_trans_to_string) == DI_TRANSFORMATION_COUNT);
1189 
1190 static void di_calculate_tree_cb(AW_window *aww, WeightedFilter *weighted_filter, bool bootstrap_flag) {
1191  recalculate_tree_cb = new BoundWindowCallback(aww, makeWindowCallback(di_calculate_tree_cb, weighted_filter, bootstrap_flag));
1192 
1193  AW_root *aw_root = aww->get_root();
1194  GB_ERROR error = NULp;
1195  StrArray *all_names = NULp;
1196 
1197  long loop_count = 0;
1198  long bootstrap_count = aw_root->awar(AWAR_DIST_BOOTSTRAP_COUNT)->read_int();
1199 
1200  {
1201  char *tree_name = aw_root->awar(AWAR_DIST_TREE_STD_NAME)->read_string();
1202  error = GBT_check_tree_name(tree_name);
1203  free(tree_name);
1204  }
1205 
1206  SmartPtr<arb_progress> progress, tree_progress;
1208 
1209  GBDATA *gb_main = weighted_filter->get_gb_main();
1210 
1211  double phase1_fraction;
1212  {
1213  const size_t species_count = get_load_count(whatToLoad(), gb_main, NULp);
1214 
1215  double O_calcMatrix = 60 * (triangular_number(species_count) + 10*species_count); // keep sync with .@MatrixSteps
1216  double O_joinTree = 1 * sum_of_triangular_numbers(species_count); // keep sync with ../SL/NEIGHBOURJOIN/NJ.cxx@NJsteps
1217 
1218  phase1_fraction = O_calcMatrix / (O_calcMatrix+O_joinTree);
1219  }
1220 
1221  if (!error) {
1222  if (bootstrap_flag) {
1223  if (bootstrap_count) {
1224  progress = new arb_progress("Calculating bootstrap trees", bootstrap_count+1); // 1 for each bootstrap-tree + 1 for final consensus tree
1225  }
1226  else {
1227  progress = new arb_progress("Calculating bootstrap trees (KILL to stop)", long(INT_MAX));
1228  }
1229  progress->auto_subtitles("tree");
1230  }
1231  else {
1232  progress = new arb_progress("Calculating tree");
1233  }
1234 
1235  // @@@ handle case where matrix is already up-to-date?
1236  // @@@ if need_recalc.matrix -> use simple wrapping progress (using 1 step); do the same below at 2nd construction point
1237  tree_progress = new arb_progress(WEIGHTED, NULp, phase1_fraction); // each tree calculation consists of 2 steps (matrix+neighbourjoining)
1238 
1239  if (bootstrap_flag) {
1240  GLOBAL_MATRIX.forget();
1241  GLOBAL_MATRIX.set_changed_cb(NULp); // otherwise matrix window will repeatedly pop up/down
1242 
1243  error = di_calculate_matrix(aw_root, weighted_filter, bootstrap_flag, true, NULp);
1244  if (!error) {
1245  DI_MATRIX *matr = GLOBAL_MATRIX.get();
1246  if (!matr) {
1247  error = "unexpected error in di_calculate_matrix_cb (data missing)";
1248  }
1249  else {
1250  all_names = new StrArray;
1251  all_names->reserve(matr->nentries+2);
1252 
1253  for (size_t i=0; i<matr->nentries; i++) {
1254  all_names->put(ARB_strdup(matr->entries[i]->name));
1255  }
1256  ctree = new ConsensusTree(*all_names);
1257  }
1258  }
1259  }
1260  }
1261 
1262  TreeNode *tree = NULp;
1263  bool aborted = false;
1264 
1265  do {
1266  if (error) break;
1267 
1268  aborted = false;
1269 
1270  if (bootstrap_flag) {
1271  if (loop_count>0) { // in first loop we already have a valid matrix -> no need to recalculate
1272  GLOBAL_MATRIX.forget();
1273  }
1274  }
1275  else if (need_recalc.matrix) {
1276  GLOBAL_MATRIX.forget();
1277  }
1278 
1279  if (tree_progress.isNull()) tree_progress = new arb_progress(WEIGHTED, NULp, phase1_fraction); // each tree calculation consists of 2 steps (matrix+neighbourjoining)
1280 
1281  error = di_calculate_matrix(aw_root, weighted_filter, bootstrap_flag, !bootstrap_flag, &aborted); // [implicit progress increment only if matrix is recalculated]
1282  if (error && aborted) {
1283  error = NULp; // clear error (otherwise no tree will be read below)
1284  break; // end of bootstrap
1285  }
1286  tree_progress->inc();
1287 
1288  if (!GLOBAL_MATRIX.exists()) {
1289  error = "unexpected error in di_calculate_matrix_cb (data missing)";
1290  break;
1291  }
1292 
1293  DI_MATRIX *matr = GLOBAL_MATRIX.get();
1294  char **names = ARB_calloc<char*>(matr->nentries+2);
1295 
1296  for (size_t i=0; i<matr->nentries; i++) {
1297  names[i] = matr->entries[i]->name;
1298  }
1299  di_assert(matr->nentries == matr->matrix->size());
1300  tree = neighbourjoining(names, *matr->matrix, new SimpleRoot, &aborted); // [implicit progress increment]
1301  tree_progress->inc();
1302 
1303  if (bootstrap_flag && !aborted) {
1304  error = ctree->insert_tree_weighted(tree, matr->nentries, 1, false);
1305  UNCOVERED();
1306  destroy(tree); tree = NULp;
1307  // Warning: nodes stored as origin in deconstructed PARTs now point to deleted nodes.
1308 
1309  if (!error) {
1310  ++loop_count;
1311  progress->inc();
1312  }
1313  if (!bootstrap_count) { // when waiting for kill
1314  time_t t = time(NULp);
1315  static time_t tlast = 0;
1316 
1317  double diff_seconds = difftime(t, tlast);
1318  if (diff_seconds>3) {
1319  progress->force_update();
1320  tlast = t;
1321  }
1322  }
1323  }
1324  free(names);
1325 
1326  if (aborted || error) {
1327  break;
1328  }
1329 
1330  tree_progress.setNull();
1331 
1332  } while (bootstrap_flag && loop_count != bootstrap_count);
1333 
1334  if (aborted || error) tree_progress->done();
1335  tree_progress.setNull();
1336 
1337  if (!error) {
1338  if (bootstrap_flag) {
1339  if (aborted) {
1340  // continuing to use progress would result in instant abort.
1341  // (abort state is already set in progress-implementation)
1342  // @@@L would need a mechanism to reset that 'abort' flag!
1343 
1344  progress->done();
1345  progress.setNull();
1346  progress = new arb_progress("Collect consensus of bootstrapped trees", 1UL);
1347  }
1348 
1349  tree = ctree->get_consensus_tree(error);
1350  progress->inc();
1351  if (!error) {
1352  error = GBT_is_invalid(tree);
1353  di_assert(!error);
1354  }
1355  }
1356  else {
1357  error = progress->error_if_aborted();
1358  }
1359 
1360  if (!error) {
1361  char *tree_name = aw_root->awar(AWAR_DIST_TREE_STD_NAME)->read_string();
1362  GB_begin_transaction(gb_main);
1363  error = GBT_write_tree(gb_main, tree_name, tree);
1364 
1365  if (!error) {
1366  char *filter_name = AWT_get_combined_filter_name(aw_root, "dist");
1367  int transr = aw_root->awar(AWAR_DIST_CORR_TRANS)->read_int();
1368 
1369  {
1370  const char *comment;
1371  if (enum_trans_to_string[transr]) {
1372  comment = GBS_global_string("PRG=dnadist CORR=%s FILTER=%s PKG=ARB", enum_trans_to_string[transr], filter_name);
1373  }
1374  else {
1376  const char *treename = aw_root->awar(AWAR_DIST_TREE_CURR_NAME)->read_char_pntr();
1377  comment = GBS_global_string("PRG=treedist (from '%s') PKG=ARB", treename);
1378  }
1379 
1380  error = GBT_write_tree_remark(gb_main, tree_name, comment);
1381  }
1382 
1383  if (!error) {
1384  const char *log_message;
1385  if (bootstrap_flag) {
1386  log_message = GBS_global_string("generated consensus tree from %li bootstrap trees%s.",
1387  loop_count,
1388  aborted ? " (aborted by user)" : "");
1389  }
1390  else {
1391  log_message = "calculation finished";
1392  }
1393  error = GBT_log_to_named_trees_remark(gb_main, tree_name, log_message, true);
1394  }
1395 
1396  free(filter_name);
1397  }
1398  error = GB_end_transaction(gb_main, error);
1399  free(tree_name);
1400  }
1401  }
1402 
1403  UNCOVERED();
1404  destroy(tree);
1405 
1406  // aw_status(); // remove 'abort' flag (@@@ got no equiv for arb_progress yet. really needed?)
1407 
1408  if (bootstrap_flag) {
1409  if (all_names) delete all_names;
1410  GLOBAL_MATRIX.forget();
1411  }
1412 #if defined(DEBUG)
1413  else {
1414  di_assert(!all_names);
1415  }
1416 #endif // DEBUG
1417 
1418  if (progress.isSet()) {
1419  progress->done();
1420  }
1421 
1422  if (error) {
1423  aw_message(error);
1424  }
1425  else {
1426  need_recalc.tree = false;
1427  aw_root->awar(AWAR_TREE_REFRESH)->touch();
1428  }
1429 }
1430 
1431 
1433  GB_push_transaction(gb_main);
1434 
1435  GLOBAL_MATRIX.forget();
1436 
1437  AW_root *aw_root = aww->get_root();
1438  char *use = aw_root->awar(AWAR_DIST_ALIGNMENT)->read_string();
1439  long ali_len = GBT_get_alignment_len(gb_main, use);
1440  GB_ERROR error = NULp;
1441 
1442  if (ali_len<=0) {
1443  GB_pop_transaction(gb_main);
1444  error = "Please select a valid alignment";
1445  GB_clear_error();
1446  }
1447  else {
1448  arb_progress progress("Analyzing data");
1449 
1450  char *filter_str = aw_root->awar(AWAR_DIST_FILTER_FILTER)->read_string();
1451  // char *cancel = aw_root->awar(AWAR_DIST_CANCEL_CHARS)->read_string();
1452 
1453  AliView *aliview = NULp;
1454  {
1455  AP_filter *ap_filter = NULp;
1456  long flen = strlen(filter_str);
1457 
1458  if (flen == ali_len) {
1459  ap_filter = new AP_filter(filter_str, "0", ali_len);
1460  }
1461  else {
1462  if (flen) {
1463  aw_message("Warning: your filter len is not equal to the alignment len\nfilter got truncated with zeros or cutted");
1464  ap_filter = new AP_filter(filter_str, "0", ali_len);
1465  }
1466  else {
1467  ap_filter = new AP_filter(ali_len); // unfiltered
1468  }
1469  }
1470 
1471  error = ap_filter->is_invalid();
1472  if (!error) {
1473  AP_weights ap_weights(ap_filter);
1474  aliview = new AliView(gb_main, *ap_filter, ap_weights, use);
1475  }
1476  delete ap_filter;
1477  }
1478 
1479  if (error) {
1480  GB_pop_transaction(gb_main);
1481  }
1482  else {
1483  DI_MATRIX phm(*aliview);
1484 
1485  {
1486  const char *sort_tree_name = aw_root->awar(AWAR_DIST_TREE_SORT_NAME)->read_char_pntr();
1487 
1488  GLOBAL_MATRIX.forget();
1489 
1490  MatrixOrder order(gb_main, sort_tree_name);
1491  error = phm.load(whatToLoad(), order, true, NULp);
1492  }
1493 
1494  GB_pop_transaction(gb_main);
1495 
1496  if (!error) {
1497  progress.subtitle("Search Correction");
1498 
1499  string msg;
1500  DI_TRANSFORMATION detected = phm.detect_transformation(msg);
1501  aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(detected);
1502  aw_message(msg.c_str());
1503  }
1504  }
1505 
1506  // free(cancel);
1507  delete aliview;
1508 
1509  free(filter_str);
1510  }
1511 
1512  if (error) aw_message(error);
1513 
1514  free(use);
1515 }
1516 
1518  if (gb_main) {
1519  AW_root *aw_root = aww->get_root();
1520  shutdown_macro_recording(aw_root);
1521  aw_root->unlink_awars_from_DB(gb_main);
1522  GB_close(gb_main);
1523  }
1524  GLOBAL_MATRIX.set_changed_cb(NULp);
1525  exit(EXIT_SUCCESS);
1526 }
1527 
1528 static void di_calculate_full_matrix_cb(AW_window *aww, const WeightedFilter *weighted_filter) {
1529  recalculate_matrix_cb = new BoundWindowCallback(aww, makeWindowCallback(di_calculate_full_matrix_cb, weighted_filter));
1530 
1531  GLOBAL_MATRIX.forget_if_not_has_type(DI_MATRIX_FULL);
1532  GB_ERROR error = di_calculate_matrix(aww->get_root(), weighted_filter, 0, true, NULp);
1533  aw_message_if(error);
1535  if (!error) tree_needs_recalc_cb();
1536 }
1537 
1538 static void di_calculate_compressed_matrix_cb(AW_window *aww, WeightedFilter *weighted_filter) {
1539  recalculate_matrix_cb = new BoundWindowCallback(aww, makeWindowCallback(di_calculate_compressed_matrix_cb, weighted_filter));
1540 
1541  GBDATA *gb_main = weighted_filter->get_gb_main();
1542  GB_transaction ta(gb_main);
1543 
1544  AW_root *aw_root = aww->get_root();
1545  char *treename = aw_root->awar(AWAR_DIST_TREE_COMP_NAME)->read_string();
1546  GB_ERROR error = NULp;
1547  TreeNode *tree = GBT_read_tree(gb_main, treename, new SimpleRoot);
1548 
1549  if (!tree) {
1550  error = GB_await_error();
1551  }
1552  else {
1553  {
1554  LocallyModify<MatrixDisplay*> skipRefresh(matrixDisplay, NULp); // skip refresh, until matrix has been compressed
1555 
1556  GLOBAL_MATRIX.forget(); // always forget (as tree might have changed)
1557  error = di_calculate_matrix(aw_root, weighted_filter, 0, true, NULp);
1558  if (!error && !GLOBAL_MATRIX.exists()) {
1559  error = "Failed to calculate your matrix (bug?)";
1560  }
1561  if (!error) {
1562  error = GLOBAL_MATRIX.get()->compress(tree);
1563  }
1564  }
1565  UNCOVERED();
1566  destroy(tree);
1567 
1568  // now force refresh
1569  if (matrixDisplay) {
1570  matrixDisplay->mark(MatrixDisplay::NEED_SETUP);
1571  matrixDisplay->update_display();
1572  }
1573  }
1574  free(treename);
1575  aw_message_if(error);
1577  if (!error) tree_needs_recalc_cb();
1578 }
1579 
1581  AW_root *aw_root = aww->get_root();
1582  char *tree_name = aw_root->awar(AWAR_DIST_TREE_CURR_NAME)->read_string();
1583  aw_root->awar(AWAR_DIST_TREE_SORT_NAME)->write_string(tree_name);
1584  free(tree_name);
1585 }
1587  AW_root *aw_root = aww->get_root();
1588  char *tree_name = aw_root->awar(AWAR_DIST_TREE_CURR_NAME)->read_string();
1589  aw_root->awar(AWAR_DIST_TREE_COMP_NAME)->write_string(tree_name);
1590  free(tree_name);
1591 }
1592 
1594  AW_root *aw_root = aww->get_root();
1595  char *tree_name = aw_root->awar(AWAR_DIST_TREE_CURR_NAME)->read_string();
1596  aw_root->awar(AWAR_DIST_TREE_STD_NAME)->write_string(tree_name);
1597  free(tree_name);
1598 }
1599 
1600 
1603  aws->init(aw_root, "NEIGHBOUR JOINING", "NEIGHBOUR JOINING [ARB_DIST]");
1604  aws->load_xfig("di_ge_ma.fig");
1605  aws->button_length(10);
1606 
1607  WindowCallback close_cb = makeWindowCallback(di_exit, gb_main);
1608 
1609  aws->at("close");
1610  aws->callback(close_cb);
1611  aws->create_button("CLOSE", "CLOSE", "C");
1612 
1613  aws->at("help");
1614  aws->callback(makeHelpCallback("dist.hlp"));
1615  aws->create_button("HELP", "HELP", "H");
1616 
1617  GB_push_transaction(gb_main);
1618 
1619 #if defined(DEBUG)
1620  AWT_create_debug_menu(aws);
1621 #endif // DEBUG
1622 
1623  aws->create_menu("File", "F", AWM_ALL);
1624  insert_macro_menu_entry(aws, false);
1625  aws->insert_menu_topic("quit", "Quit", "Q", "quit.hlp", AWM_ALL, close_cb);
1626 
1627  aws->create_menu("Properties", "P", AWM_ALL);
1628  aws->insert_menu_topic("frame_props", "Frame settings ...", "F", "props_frame.hlp", AWM_ALL, AW_preset_window);
1629  aws->sep______________();
1631  aws->sep______________();
1632  aws->insert_menu_topic("save_props", "Save Properties (dist.arb)", "S", "savedef.hlp", AWM_ALL, AW_save_properties);
1633 
1634  aws->insert_help_topic("ARB_DIST help", "D", "dist.hlp", AWM_ALL, makeHelpCallback("dist.hlp"));
1635 
1636  // ------------------
1637  // left side
1638 
1639  aws->at("which_species");
1641  aws->insert_option("all", "a", "all");
1642  aws->insert_default_option("marked", "m", "marked");
1643  aws->update_option_menu();
1644 
1645  aws->at("which_alignment");
1647 
1648  // filter & weights
1649 
1650  AW_awar *awar_dist_alignment = aws->get_root()->awar_string(AWAR_DIST_ALIGNMENT);
1651  WeightedFilter *weighted_filter = // do NOT free (bound to callbacks)
1652  new WeightedFilter(gb_main, aws->get_root(), AWAR_DIST_FILTER_NAME, AWAR_DIST_COLUMN_STAT_NAME, awar_dist_alignment);
1653 
1654  aws->at("filter_select");
1655  aws->callback(makeCreateWindowCallback(awt_create_select_filter_win, weighted_filter->get_adfiltercbstruct()));
1656  aws->create_button("SELECT_FILTER", AWAR_DIST_FILTER_NAME);
1657 
1658  aws->at("weights_select");
1659  aws->sens_mask(AWM_EXP);
1660  aws->callback(makeCreateWindowCallback(COLSTAT_create_selection_window, weighted_filter->get_column_stat()));
1661  aws->create_button("SELECT_COL_STAT", AWAR_DIST_COLUMN_STAT_NAME);
1662  aws->sens_mask(AWM_ALL);
1663 
1664  aws->at("which_cancel");
1666 
1667  aws->at("cancel_select");
1669  aws->create_button("SELECT_CANCEL_CHARS", "Info", "C");
1670 
1671  aws->at("change_matrix");
1673  aws->create_button("EDIT_MATRIX", "Edit Matrix");
1674 
1675  aws->at("enable");
1677 
1678  aws->at("which_correction");
1680  aws->insert_option("none", "n", (int)DI_TRANSFORMATION_NONE);
1681  aws->insert_option("similarity", "n", (int)DI_TRANSFORMATION_SIMILARITY);
1682  aws->insert_option("jukes-cantor (dna)", "c", (int)DI_TRANSFORMATION_JUKES_CANTOR);
1683  aws->insert_option("felsenstein (dna)", "f", (int)DI_TRANSFORMATION_FELSENSTEIN);
1684  aws->insert_option("olsen (dna)", "o", (int)DI_TRANSFORMATION_OLSEN);
1685  aws->insert_option("felsenstein/voigt (exp)", "1", (int)DI_TRANSFORMATION_FELSENSTEIN_VOIGT);
1686  aws->insert_option("olsen/voigt (exp)", "2", (int)DI_TRANSFORMATION_OLSEN_VOIGT);
1687  aws->insert_option("kimura (pro)", "k", (int)DI_TRANSFORMATION_KIMURA);
1688  aws->insert_option("PAM (protein)", "c", (int)DI_TRANSFORMATION_PAM);
1689  aws->insert_option("Cat. Hall(exp)", "c", (int)DI_TRANSFORMATION_CATEGORIES_HALL);
1690  aws->insert_option("Cat. Barker(exp)", "c", (int)DI_TRANSFORMATION_CATEGORIES_BARKER);
1691  aws->insert_option("Cat.Chem (exp)", "c", (int)DI_TRANSFORMATION_CATEGORIES_CHEMICAL);
1692  aws->insert_option("from selected tree", "t", (int)DI_TRANSFORMATION_FROM_TREE);
1693  aws->insert_default_option("unknown", "u", (int)DI_TRANSFORMATION_NONE);
1694 
1695  aws->update_option_menu();
1696 
1697  aws->at("autodetect"); // auto
1698  aws->callback(makeWindowCallback(di_autodetect_callback, gb_main));
1699  aws->sens_mask(AWM_EXP);
1700  aws->create_button("AUTODETECT_CORRECTION", "AUTODETECT", "A");
1701  aws->sens_mask(AWM_ALL);
1702 
1703  // -------------------
1704  // right side
1705 
1706 
1707  aws->at("mark_distance");
1708  aws->callback(makeWindowCallback(di_mark_by_distance, weighted_filter));
1709  aws->create_autosize_button("MARK_BY_DIST", "Mark all species");
1710 
1711  aws->at("mark_lower");
1713 
1714  aws->at("mark_upper");
1716 
1717  // -----------------
1718 
1719  // tree selection
1720 
1721  aws->at("tree_list");
1723 
1724  aws->at("detect_clusters");
1725  aws->callback(makeCreateWindowCallback(DI_create_cluster_detection_window, weighted_filter));
1726  aws->create_autosize_button("DETECT_CLUSTERS", "Detect homogenous clusters in tree", "D");
1727 
1728  // matrix calculation
1729 
1730  aws->button_length(18);
1731 
1732  aws->at("calculate");
1733  aws->callback(makeWindowCallback(di_calculate_full_matrix_cb, weighted_filter));
1734  aws->create_button("CALC_FULL_MATRIX", "Calculate\nFull Matrix", "F");
1735 
1736  aws->at("compress");
1737  aws->callback(makeWindowCallback(di_calculate_compressed_matrix_cb, weighted_filter));
1738  aws->create_button("CALC_COMPRESSED_MATRIX", "Calculate\nCompressed Matrix", "C");
1739 
1740  recalculate_matrix_cb = new BoundWindowCallback(aws, makeWindowCallback(di_calculate_full_matrix_cb, weighted_filter));
1741 
1742  aws->button_length(13);
1743 
1744  {
1745  static save_matrix_params sparams;
1746 
1748  sparams.weighted_filter = weighted_filter;
1749 
1750  aws->at("save_matrix");
1751  aws->callback(makeCreateWindowCallback(DI_create_save_matrix_window, &sparams));
1752  aws->create_button("SAVE_MATRIX", "Save matrix", "M");
1753 
1754  aws->at("view_matrix");
1755  aws->callback(makeWindowCallback(di_view_matrix_cb, &sparams));
1756  aws->create_button("VIEW_MATRIX", "View matrix", "V");
1757  }
1758 
1759  aws->button_length(22);
1760  aws->at("use_compr_tree");
1762  aws->create_button("USE_COMPRESSION_TREE", "Use to compress", "");
1763  aws->at("use_sort_tree");
1765  aws->create_button("USE_SORT_TREE", "Use to sort", "");
1766 
1767  aws->at("compr_tree_name"); aws->create_input_field(AWAR_DIST_TREE_COMP_NAME, 12);
1768  aws->at("sort_tree_name"); aws->create_input_field(AWAR_DIST_TREE_SORT_NAME, 12);
1769 
1770  // tree calculation
1771 
1772  aws->button_length(18);
1773 
1774  aws->at("t_calculate");
1775  aws->callback(makeWindowCallback(di_calculate_tree_cb, weighted_filter, false));
1776  aws->create_button("CALC_TREE", "Calculate \ntree", "C");
1777 
1778  aws->at("bootstrap");
1779  aws->callback(makeWindowCallback(di_calculate_tree_cb, weighted_filter, true));
1780  aws->create_button("CALC_BOOTSTRAP_TREE", "Calculate \nbootstrap tree");
1781 
1782  recalculate_tree_cb = new BoundWindowCallback(aws, makeWindowCallback(di_calculate_tree_cb, weighted_filter, false));
1783 
1784  aws->button_length(22);
1785  aws->at("use_existing");
1787  aws->create_button("USE_NAME", "Use as new tree name", "");
1788 
1789  aws->at("calc_tree_name");
1791 
1792  aws->at("bcount");
1794 
1795  {
1796  aws->sens_mask(AWM_EXP);
1797 
1798  aws->at("auto_calc_tree");
1799  aws->label("Auto calculate tree");
1801 
1802  aws->at("auto_recalc");
1803  aws->label("Auto recalculate");
1805 
1806  aws->sens_mask(AWM_ALL);
1807  }
1808 
1809  bool disable_autocalc = !ARB_in_expert_mode(aw_root);
1810  if (disable_autocalc) {
1813  }
1814 
1815  GB_pop_transaction(gb_main);
1816  return aws;
1817 }
1818 
1819 // --------------------------------------------------------------------------------
1820 
1821 #ifdef UNIT_TESTS
1822 #include <arb_diff.h>
1823 #include <arb_file.h>
1824 
1825 #ifndef TEST_UNIT_H
1826 #include <test_unit.h>
1827 #endif
1828 
1829 class DIST_testenv : virtual Noncopyable {
1830  GB_shell shell;
1831  GBDATA *gb_main;
1832  AliView *ali_view;
1833 
1834 public:
1835  DIST_testenv(const char *dbname, const char *aliName)
1836  : ali_view(NULp)
1837  {
1838  gb_main = GB_open(dbname, "r");
1839  TEST_REJECT_NULL(gb_main);
1840 
1841  GB_transaction ta(gb_main);
1842  size_t aliLength = GBT_get_alignment_len(gb_main, aliName);
1843 
1844  AP_filter filter(aliLength);
1845  if (!filter.is_invalid()) {
1846  AP_weights weights(&filter);
1847  ali_view = new AliView(gb_main, filter, weights, aliName);
1848  }
1849  }
1850  ~DIST_testenv() {
1851  delete ali_view;
1852  GB_close(gb_main);
1853  }
1854 
1855  const AliView& aliview() const { return *ali_view; }
1856  GBDATA *gbmain() const { return gb_main; }
1857 };
1858 
1859 void TEST_matrix() {
1860  for (int iat = GB_AT_RNA; iat<=GB_AT_AA; ++iat) {
1862  // ---------------
1863  // setup
1864  // GB_AT_RNA GB_AT_DNA GB_AT_AA
1865  const char *db_name[]= { NULp, "TEST_trees.arb", "TEST_realign.arb", "TEST_realign.arb", };
1866  const char *ali_name[]= { NULp, "ali_5s", "ali_dna", "ali_pro", };
1867 
1868  TEST_ANNOTATE(GBS_global_string("ali_name=%s", ali_name[at]));
1869  DIST_testenv env(db_name[at], ali_name[at]);
1870 
1871  DI_MATRIX matrix(env.aliview());
1872  MatrixOrder order(env.gbmain(), "tree_abc"); // no such tree!
1873  TEST_EXPECT_NO_ERROR(matrix.load(DI_LOAD_MARKED, order, true, NULp));
1874 
1875  // -------------------------------
1876  // detect_transformation
1877  DI_TRANSFORMATION detected_trans;
1878  {
1879  string msg;
1880 
1881  detected_trans = matrix.detect_transformation(msg);
1883  switch (at) {
1884  case GB_AT_RNA: expected = DI_TRANSFORMATION_NONE; break;
1885  case GB_AT_DNA: expected = DI_TRANSFORMATION_JUKES_CANTOR; break;
1886  case GB_AT_AA: expected = DI_TRANSFORMATION_PAM; break;
1887  case GB_AT_UNKNOWN: di_assert(0); break;
1888  }
1889  TEST_EXPECT_EQUAL(detected_trans, expected);
1890  }
1891 
1892  // ------------------------------
1893  // calculate the matrix
1894 
1895  // @@@ does not test user-defined transformation-matrix!
1896  if (at == GB_AT_AA) {
1897  matrix.calculate_pro(detected_trans, NULp);
1898  }
1899  else {
1900  if (at == GB_AT_RNA) detected_trans = DI_TRANSFORMATION_FELSENSTEIN; // force calculate_overall_freqs
1901  matrix.calculate("", detected_trans, NULp, NULp);
1902  }
1903 
1904  // -----------------------------------
1905  // save in available formats
1906 
1907  for (DI_SAVE_TYPE saveType = DI_SAVE_PHYLIP_COMP; saveType<=DI_SAVE_TABBED; saveType = DI_SAVE_TYPE(saveType+1)) {
1908  const char *savename = "distance/matrix.out";
1909  matrix.save(savename, saveType);
1910 
1911  const char *suffixAT[] = { NULp, "rna", "dna", "pro" };
1912  const char *suffixST[] = { "phylipComp", "readable", "tabbed" };
1913  char *expected = GBS_global_string_copy("distance/matrix.%s.%s.expected", suffixAT[at], suffixST[saveType]);
1914 
1915 // #define TEST_AUTO_UPDATE // uncomment to auto-update expected matrices
1916 
1917 #if defined(TEST_AUTO_UPDATE)
1918  TEST_COPY_FILE(savename, expected);
1919 #else
1920  TEST_EXPECT_TEXTFILES_EQUAL(savename, expected);
1921 #endif // TEST_AUTO_UPDATE
1923 
1924  free(expected);
1925  }
1926  }
1927 }
1928 
1929 #endif // UNIT_TESTS
1930 
1931 // --------------------------------------------------------------------------------
1932 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
#define AWAR_DIST_SAVE_MATRIX_FILENAME
Definition: DI_matr.cxx:59
void insert_option(AW_label choice_label, const char *mnemonic, const char *var_value, const char *name_of_color=NULp)
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
BoundWindowCallback(AW_window *aww_, const WindowCallback &cb_)
Definition: DI_matr.cxx:82
#define NONE
Definition: GDE_def.h:34
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
GB_TYPES type
#define AWAR_DIST_SAVE_MATRIX_TYPE
Definition: DI_matr.cxx:58
ColumnStat * get_column_stat()
Definition: gui_aliview.hxx:42
SizeAwareTree * get_consensus_tree(GB_ERROR &error)
Definition: CT_ctree.cxx:150
adfiltercbstruct * get_adfiltercbstruct()
Definition: gui_aliview.hxx:41
void button_length(int length)
Definition: AW_at.cxx:283
void force_update()
Definition: arb_progress.h:371
void sens_mask(AW_active mask)
Definition: AW_window.cxx:127
GBDATA * GBT_first_marked_species_rel_species_data(GBDATA *gb_species_data)
Definition: aditem.cxx:109
static char * y[maxsp+1]
void matrix_needs_recalc_cb()
Definition: DI_matr.cxx:117
return string(buffer, length)
void insert_menu_topic(const char *id, const char *name, const char *mnemonic, const char *help_text_, AW_active mask, const WindowCallback &wcb)
Definition: AW_window.cxx:595
void show_warnings(const string &helpfile)
CONSTEXPR_INLINE unsigned char safeCharIndex(char c)
Definition: dupstr.h:73
void load_xfig(const char *file, bool resize=true)
Definition: AW_window.cxx:720
#define AWAR_DIST_FILTER_ALIGNMENT
Definition: di_awars.hxx:23
void tree_needs_recalc_cb()
Definition: DI_matr.cxx:121
GB_ERROR GB_add_callback(GBDATA *gbd, GB_CB_TYPE type, const DatabaseCallback &dbcb)
Definition: ad_cb.cxx:356
GB_ERROR extract_from_tree(const char *treename, bool *aborted_flag)
Definition: DI_matr.cxx:837
void set(size_t i, size_t j, T val)
Definition: matrix.h:49
~DI_MATRIX()
Definition: DI_matr.cxx:339
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
bool has_type(DI_MATRIX_TYPE type) const
Definition: di_matr.hxx:215
#define AWAR_DIST_MATRIX_AUTO_CALC_TREE
Definition: DI_matr.cxx:56
AW_window * COLSTAT_create_selection_window(AW_root *aw_root, ColumnStat *column_stat)
Definition: ColumnStat.cxx:373
CONSTEXPR unsigned UPDATE_DELAY
Definition: DI_matr.cxx:110
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void at(int x, int y)
Definition: AW_at.cxx:93
char name[nmlngth+1]
DI_ENTRY(GBDATA *gbd, DI_MATRIX *phmatrix_)
Definition: DI_matr.cxx:276
void AW_create_standard_fileselection(AW_window *aws, const char *awar_prefix)
Definition: aw_file.hxx:30
#define ASSERT_RESULT(Type, Expected, Expr)
Definition: arb_assert.h:336
#define NO_TREE_SELECTED
GB_ERROR calculate_pro(DI_TRANSFORMATION transformation, bool *aborted_flag)
Definition: DI_matr.cxx:749
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
#define AWAR_DIST_WHICH_SPECIES
Definition: di_awars.hxx:20
void create_toggle(const char *awar_name)
Definition: AW_button.cxx:844
void enable_bootstrap()
Definition: AP_filter.cxx:206
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:791
long read_int() const
Definition: AW_awar.cxx:184
AW_awar * set_minmax(float min, float max)
Definition: AW_awar.cxx:530
AW_window * DI_create_save_matrix_window(AW_root *aw_root, save_matrix_params *save_params)
Definition: DI_matr.cxx:1118
#define AWAR_DIST_ALIGNMENT
Definition: di_awars.hxx:21
static void selected_tree_changed_cb()
Definition: DI_matr.cxx:190
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:202
static GB_ERROR last_matrix_calculation_error
Definition: DI_matr.cxx:92
CONSTEXPR_INLINE long sum_of_triangular_numbers(const long L)
Definition: triangular.h:30
GB_ERROR calculate(const char *cancel, DI_TRANSFORMATION transformation, bool *aborted_flag, AP_matrix *userdef_matrix)
Definition: DI_matr.cxx:544
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:716
#define AWAR_DIST_TREE_SORT_NAME
Definition: DI_matr.cxx:53
AW_DB_selection * awt_create_TREE_selection_list(GBDATA *gb_main, AW_window *aws, const char *varname)
AP_sequence_parsimony * get_nucl_seq()
Definition: di_matr.hxx:86
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
~DI_ENTRY()
Definition: DI_matr.cxx:310
void AW_insert_common_property_menu_entries(AW_window_menu_modes *awmm)
Definition: AW_preset.cxx:1445
void AWT_registerTreeAwarCallback(AW_awar *awar, const TreeAwarCallback &tacb, bool triggerIfTreeDataChanges)
void auto_subtitles(const char *prefix)
Definition: arb_progress.h:344
void set_changed_cb(DI_MATRIX_CB cb)
Definition: di_matr.hxx:203
void add_timed_callback(int ms, const TimedCallback &tcb)
Definition: AW_root.cxx:538
bool isNull() const
test if SmartPtr is NULp
Definition: smartptr.h:248
#define AWAR_DIST_FILTER_SIMPLIFY
Definition: di_awars.hxx:26
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
#define AWAR_DIST_TREE_COMP_NAME
Definition: DI_matr.cxx:54
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
Definition: trnsprob.h:20
AP_sequence * sequence
Definition: di_matr.hxx:84
static size_t get_load_count(LoadWhat what, GBDATA *gb_main, GBDATA **species_list)
Definition: DI_matr.cxx:390
char * unload()
Definition: DI_matr.cxx:330
void mark(UpdateFlag needed)
void create_input_fields(AW_window *aww)
Definition: AP_matrix.cxx:82
bool GBT_is_alignment_protein(GBDATA *gb_main, const char *alignment_name)
Definition: adali.cxx:767
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
void setNull()
set SmartPtr to NULp
Definition: smartptr.h:251
CONSTEXPR_INLINE long triangular_number(const long N)
Definition: triangular.h:18
void update_option_menu()
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
void add_update_cb()
Definition: DI_matr.cxx:113
GB_ERROR error_if_aborted()
Definition: arb_progress.h:323
void forget_if_not_has_type(DI_MATRIX_TYPE wanted_type)
Definition: di_matr.hxx:218
AW_awar * add_callback(const RootCallback &cb)
Definition: AW_awar.cxx:231
#define AWAR_DIST_MATRIX_DNA_ENABLED
Definition: di_awars.hxx:31
GBDATA * GBT_first_species_rel_species_data(GBDATA *gb_species_data)
Definition: aditem.cxx:121
static int TreeOrderedSpecies_cmp(const void *p1, const void *p2, void *)
Definition: DI_matr.cxx:379
GB_ERROR GBT_write_tree_remark(GBDATA *gb_main, const char *tree_name, const char *remark)
Definition: adtree.cxx:487
static FullNameMap names
const char * read_char_pntr() const
Definition: AW_awar.cxx:168
GB_ERROR link_to_tree(NamedNodes &named, TreeNode *node)
Definition: DI_matr.cxx:772
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:341
static AW_root * SINGLETON
Definition: aw_root.hxx:102
STATIC_ASSERT(ARRAY_ELEMS(enum_trans_to_string)==DI_TRANSFORMATION_COUNT)
static void matrix_changed_cb()
Definition: DI_matr.cxx:94
void applyTo(struct TreeOrderedSpecies **gb_species_array, size_t array_size) const
Definition: DI_matr.cxx:386
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
char * GBT_read_string(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:267
__ATTR__USERESULT GB_ERROR insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress)
Definition: CT_ctree.cxx:145
static void di_calculate_full_matrix_cb(AW_window *aww, const WeightedFilter *weighted_filter)
Definition: DI_matr.cxx:1528
#define AWAR_DIST_COLUMN_STAT_NAME
Definition: DI_matr.cxx:51
static SmartPtr< BoundWindowCallback > recalculate_tree_cb
Definition: DI_matr.cxx:90
GB_ERROR GBT_check_tree_name(const char *tree_name)
Definition: adtree.cxx:1039
bool isSet() const
test if SmartPtr is not NULp
Definition: smartptr.h:245
AW_awar * awar_float(const char *var_name, float default_value=0.0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:560
GBDATA * gb_species_data
Definition: adname.cxx:34
Generic smart pointer.
Definition: smartptr.h:149
AW_window * DI_create_cluster_detection_window(AW_root *aw_root, WeightedFilter *weightedFilter)
bool aborted()
Definition: arb_progress.h:335
DI_SAVE_TYPE
Definition: di_matr.hxx:93
DI_MATRIX * get()
Definition: di_matr.hxx:192
void AW_save_properties(AW_window *aw)
Definition: AW_preset.cxx:1452
void GB_clear_error()
Definition: arb_msg.cxx:353
AW_window * DI_create_matrix_window(AW_root *aw_root, GBDATA *gb_main)
Definition: DI_matr.cxx:1601
#define false
Definition: ureadseq.h:13
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
const AliView * get_aliview() const
Definition: di_matr.hxx:160
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, TreeNode *tree)
Definition: adtree.cxx:477
#define AWAR_DIST_MIN_DIST
Definition: di_matr.hxx:40
void create_menu(const char *name, const char *mnemonic, AW_active mask=AWM_ALL)
Definition: AW_window.cxx:472
void DI_create_matrix_variables(AW_root *aw_root, AW_default def, AW_default db)
Definition: DI_matr.cxx:196
AP_FLOAT get(int i, int j)
Definition: AP_matrix.hxx:47
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1310
TreeNode * neighbourjoining(const char *const *names, const AP_smatrix &smatrix, TreeRoot *troot, bool *aborted_flag)
Definition: NJ.cxx:204
void touch()
Definition: AW_awar.cxx:207
static MatrixDisplay * matrixDisplay
Definition: DI_matr.cxx:65
static __ATTR__USERESULT GB_ERROR di_calculate_matrix(AW_root *aw_root, const WeightedFilter *weighted_filter, bool bootstrap_flag, bool show_warnings, bool *aborted_flag)
Definition: DI_matr.cxx:883
static void error(const char *msg)
Definition: mkptypes.cxx:96
#define AWAR_DIST_MATRIX_AUTO_RECALC
Definition: DI_matr.cxx:49
#define AWAR_DIST_MATRIX_DNA_BASE
Definition: di_awars.hxx:30
#define AWAR_TREE_REFRESH
void AW_create_fileselection_awars(AW_root *awr, const char *awar_base, const char *directories, const char *filter, const char *file_name)
Definition: AW_file.cxx:72
static void di_calculate_tree_cb(AW_window *aww, WeightedFilter *weighted_filter, bool bootstrap_flag)
Definition: DI_matr.cxx:1190
Definition: di_matr.hxx:75
static AW_window * create_dna_matrix_window(AW_root *aw_root)
Definition: DI_matr.cxx:172
AW_window * AW_preset_window(AW_root *root)
Definition: AW_preset.cxx:1920
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
#define AWAR_TREE
void label(const char *label)
Definition: AW_window.cxx:102
void insert_help_topic(const char *labeli, const char *mnemonic, const char *helpText, AW_active mask, const WindowCallback &cb)
Definition: AW_window.cxx:569
static void di_define_sort_tree_name_cb(AW_window *aww)
Definition: DI_matr.cxx:1580
TreeNode * GBT_read_tree_and_size(GBDATA *gb_main, const char *tree_name, TreeRoot *troot, int *tree_size)
Definition: adtree.cxx:724
DI_MATRIX(const AliView &aliview)
Definition: DI_matr.cxx:316
di_cattype
Definition: di_protdist.hxx:26
void set_descriptions(int idx, const char *desc)
Definition: AP_matrix.hxx:72
#define AWAR_SPECIES_NAME
GBDATA * get_gb_main() const
Definition: GUI_aliview.cxx:87
AW_window * awt_create_select_filter_win(AW_root *aw_root, adfiltercbstruct *acbs)
Definition: AWT_filter.cxx:364
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
Definition: test_unit.h:1079
static TreeNode * findNode(TreeNode *node, const char *name)
Definition: DI_matr.cxx:794
#define AWAR_DIST_BOOTSTRAP_COUNT
Definition: DI_matr.cxx:47
void compressed_matrix_needs_recalc_cb()
Definition: DI_matr.cxx:126
static unsigned update_cb(AW_root *aw_root)
Definition: DI_matr.cxx:132
static SearchTree * tree[SEARCH_PATTERNS]
Definition: ED4_search.cxx:629
char * name
Definition: di_matr.hxx:89
static GB_ERROR init(NamedNodes &node, TreeNode *tree, const DI_ENTRY *const *const entries, size_t nentries)
Definition: DI_matr.cxx:805
char * read_string() const
Definition: AW_awar.cxx:198
static void di_define_compression_tree_name_cb(AW_window *aww)
Definition: DI_matr.cxx:1586
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
DI_MATRIX_TYPE matrix_type
Definition: di_matr.hxx:154
GB_ERROR GB_pop_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2524
Definition: arbdb.h:86
bool exists() const
Definition: di_matr.hxx:201
const AP_filter * get_filter() const
Definition: AliView.hxx:42
static const char * enum_trans_to_string[]
Definition: DI_matr.cxx:1168
#define AWAR_DIST_CANCEL_CHARS
Definition: DI_matr.cxx:48
static void * M
Definition: mem.cxx:18
void AW_refresh_fileselection(AW_root *awr, const char *awar_prefix)
Definition: AW_file.cxx:944
const WeightedFilter * weighted_filter
Definition: di_matr.hxx:230
MatrixOrder(GBDATA *gb_main, GB_CSTR sort_tree_name)
Definition: DI_matr.cxx:355
static void di_mark_by_distance(AW_window *aww, WeightedFilter *weighted_filter)
Definition: DI_matr.cxx:977
void create_autosize_button(const char *macro_name, AW_label label, const char *mnemonic=NULp, unsigned xtraSpace=1)
Definition: AW_button.cxx:421
GB_ERROR makedists(bool *aborted_flag)
long GBT_count_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:372
#define AWAR_DIST_MAX_DIST
Definition: di_matr.hxx:41
float read_float() const
Definition: AW_awar.cxx:177
AW_window * DI_create_view_matrix_window(AW_root *awr, MatrixDisplay *disp, save_matrix_params *sparam)
static void auto_calc_changed_cb(AW_root *aw_root)
Definition: DI_matr.cxx:164
void unlink_awars_from_DB(GBDATA *gb_main)
Definition: AW_root.cxx:635
GB_alignment_type
Definition: arbdb_base.h:61
void insert_default_option(AW_label choice_label, const char *mnemonic, const char *var_value, const char *name_of_color=NULp)
AW_DB_selection * awt_create_ALI_selection_list(GBDATA *gb_main, AW_window *aws, const char *varname, const char *ali_type_match)
DI_GLOBAL_MATRIX GLOBAL_MATRIX
Definition: DI_matr.cxx:63
const char * awar_base
Definition: di_matr.hxx:229
AP_smatrix * matrix
Definition: di_matr.hxx:153
void create_input_field(const char *awar_name, int columns=0)
Definition: AW_button.cxx:857
#define AWAR_DIST_TREE_CURR_NAME
Definition: di_awars.hxx:28
bool is_leaf() const
Definition: TreeNode.h:171
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
DI_ENTRY ** entries
Definition: di_matr.hxx:151
TYPE * ARB_calloc(size_t nelem)
Definition: arb_mem.h:81
#define IF_ASSERTION_USED(x)
Definition: arb_assert.h:308
AliView * create_aliview(const char *aliname, GB_ERROR &error) const
Definition: GUI_aliview.cxx:66
static SmartPtr< BoundWindowCallback > recalculate_matrix_cb
Definition: DI_matr.cxx:89
void DI_create_cluster_awars(AW_root *aw_root, AW_default def, AW_default db)
GBT_LEN intree_distance_to(const TreeNode *other) const
Definition: TreeNode.h:248
GB_ERROR close(GB_ERROR error)
Definition: arbdbpp.cxx:32
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2773
GB_ERROR GBT_is_invalid(const TreeNode *tree)
Definition: adtree.cxx:834
LoadWhat whatToLoad()
Definition: DI_matr.cxx:878
#define AWAR_DIST_SAVE_MATRIX_BASE
Definition: di_matr.hxx:37
void update_from_awars(AW_root *awr)
Definition: AP_matrix.cxx:67
TreeOrderedSpecies(const MatrixOrder &order, GBDATA *gb_spec)
Definition: DI_matr.cxx:349
size_t nentries
Definition: di_matr.hxx:152
static AP_matrix * get_user_matrix()
Definition: DI_matr.cxx:68
#define __ATTR__USERESULT
Definition: attributes.h:58
char * name
Definition: TreeNode.h:134
CONSTEXPR_INLINE long matrix_halfsize(long entries, bool inclusive_diagonal)
Definition: AP_matrix.hxx:26
static void di_save_matrix_cb(AW_window *aww)
Definition: DI_matr.cxx:1104
AW_awar * map(const char *awarn)
Definition: AW_awar.cxx:521
bool ARB_in_expert_mode(AW_root *awr)
void ARB_realloc(TYPE *&tgt, size_t nelem)
Definition: arb_mem.h:43
void subtitle(const char *stitle)
Definition: arb_progress.h:321
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
GB_ERROR is_invalid() const
Definition: AP_filter.hxx:123
void GBT_message(GBDATA *gb_main, const char *msg)
Definition: adtools.cxx:238
static GB_ERROR di_recalc_matrix()
Definition: DI_matr.cxx:1076
#define di_assert(cond)
Definition: DI_clusters.cxx:31
void replaceBy(DI_MATRIX *new_global)
Definition: di_matr.hxx:199
void create_awars(AW_root *awr)
Definition: AP_matrix.cxx:47
void auto_increment(int dx, int dy)
Definition: AW_at.cxx:270
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
static void di_calculate_compressed_matrix_cb(AW_window *aww, WeightedFilter *weighted_filter)
Definition: DI_matr.cxx:1538
void aw_message(const char *msg)
Definition: AW_status.cxx:1144
AW_option_menu_struct * create_option_menu(const char *awar_name)
GB_ERROR GBT_log_to_named_trees_remark(GBDATA *gb_main, const char *tree_name, const char *log_entry, bool stamp)
Definition: adtree.cxx:510
void insert_macro_menu_entry(AW_window *awm, bool prepend_separator)
Definition: macro_gui.cxx:171
static AP_userdef_matrix userdef_DNA_matrix(AP_MAX, AWAR_DIST_MATRIX_DNA_BASE)
AW_root * get_root()
Definition: aw_window.hxx:350
DI_TRANSFORMATION
Definition: di_matr.hxx:43
void shutdown_macro_recording(AW_root *aw_root)
Definition: trackers.cxx:470
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:114
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
DI_TRANSFORMATION detect_transformation(std::string &msg)
Definition: distanalyse.cxx:17
const char * get_aliname() const
Definition: di_matr.hxx:159
#define __ATTR__NORETURN
Definition: attributes.h:56
GB_ERROR write_string(const char *aw_string)
#define AWAR_DIST_CORR_TRANS
Definition: di_matr.hxx:36
void lazy_load_sequence() const
Definition: AP_sequence.hxx:67
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:687
void sep______________()
Definition: AW_window.cxx:753
size_t size() const
Definition: matrix.h:66
T get(size_t i, size_t j) const
Definition: matrix.h:52
const char * save(const char *filename, enum DI_SAVE_TYPE type)
long GBT_get_species_count(GBDATA *gb_main)
Definition: aditem.cxx:207
#define AWAR_DIST_TREE_STD_NAME
Definition: DI_matr.cxx:55
char * AWT_get_combined_filter_name(AW_root *aw_root, GB_CSTR prefix)
Definition: AWT_filter.cxx:333
GB_transaction ta(gb_var)
static void di_view_matrix_cb(AW_window *aww, save_matrix_params *sparam)
Definition: DI_matr.cxx:1087
void callback(const WindowCallback &cb)
Definition: AW_window.cxx:133
void destroy(TreeNode *that)
Definition: TreeNode.h:560
GB_ERROR bind_to_species(GBDATA *gb_species)
Definition: AP_sequence.cxx:24
static RecalcNeeded need_recalc
Definition: DI_matr.cxx:108
GBDATA * gb_main
Definition: adname.cxx:33
size_t get_length() const
Definition: AliView.cxx:66
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
char * compress(TreeNode *tree)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
void hide_or_notify(const char *error)
Definition: AW_window.cxx:1832
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:460
bool defined() const
Definition: di_matr.hxx:125
#define TEST_EXPECT_TEXTFILES_EQUAL(fgot, fwant)
Definition: test_unit.h:1396
DI_MATRIX * swap(DI_MATRIX *other)
Definition: di_matr.hxx:208
std::map< const char *, TreeNode *, charpLess > NamedNodes
Definition: DI_matr.cxx:770
#define AWAR_DIST_FILTER_NAME
Definition: di_awars.hxx:24
#define CONSTEXPR
Definition: cxxforward.h:106
static Params P
Definition: arb_probe.cxx:81
#define AW_ROOT_DEFAULT
Definition: aw_base.hxx:106
GB_ERROR load(LoadWhat what, const MatrixOrder &order, bool show_warnings, GBDATA **species_list) __ATTR__USERESULT
Definition: DI_matr.cxx:411
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
GB_ERROR write_int(long aw_int)
#define AWAR_DIST_FILTER_FILTER
Definition: di_awars.hxx:25
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
void init(AW_root *root, const char *wid, const char *windowname)
Definition: AW_window.cxx:2832
void aw_message_if(GB_ERROR error)
Definition: aw_msg.hxx:21
bool is_AA
Definition: di_matr.hxx:150
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:193
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
static Score ** U
Definition: align.cxx:67
static AW_window * awt_create_select_cancel_window(AW_root *aw_root)
Definition: DI_matr.cxx:1153
static void di_autodetect_callback(AW_window *aww, GBDATA *gb_main)
Definition: DI_matr.cxx:1432
static __ATTR__NORETURN void di_exit(AW_window *aww, GBDATA *gb_main)
Definition: DI_matr.cxx:1517
LoadWhat
Definition: di_matr.hxx:99
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
static void di_define_save_tree_name_cb(AW_window *aww)
Definition: DI_matr.cxx:1593
void create_button(const char *macro_name, AW_label label, const char *mnemonic=NULp, const char *color=NULp)
Definition: AW_button.cxx:448
#define UNCOVERED()
Definition: arb_assert.h:380
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105