ARB
MG_preserves.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : MG_preserves.cxx //
4 // Purpose : find candidates for alignment preservation //
5 // //
6 // //
7 // Coded by Ralf Westram (coder@reallysoft.de) in July 2003 //
8 // Copyright Department of Microbiology (Technical University Munich) //
9 // //
10 // Visit our web site at: http://www.arb-home.de/ //
11 // //
12 // //
13 // ==================================================================== //
14 
15 #include "merge.hxx"
16 #include "MG_adapt_ali.hxx"
17 
18 #include <aw_awar.hxx>
19 #include <aw_root.hxx>
20 #include <aw_msg.hxx>
21 #include <aw_select.hxx>
22 #include <arb_progress.h>
23 #include <arb_global_defs.h>
24 #include <arbdbt.h>
25 
26 #include <set>
27 #include <string>
28 
29 using namespace std;
30 
31 // find species/SAIs to preserve alignment
32 
33 #define AWAR_REMAP_CANDIDATE AWAR_MERGE_TMP "remap_candidates"
34 #define AWAR_REMAP_ALIGNMENT AWAR_MERGE_TMP "remap_alignment"
35 #define AWAR_REMAP_SEL_REFERENCE AWAR_MERGE_TMP "remap_reference"
36 
37 struct preserve_para {
39  AW_selection_list *refCandidatesList; // reference candidates
41 };
42 
43 static void get_global_alignments(ConstStrArray& ali_names) {
44  // get all alignment names available in both databases
46  GBDATA *gb_presets = GBT_get_presets(GLOBAL_gb_dst);
47 
48  int i;
49  for (i = 0; ali_names[i]; ++i) {
50  GBDATA *gb_ali_name = GB_find_string(gb_presets, "alignment_name", ali_names[i], GB_IGNORE_CASE, SEARCH_GRANDCHILD);
51  if (!gb_ali_name) ali_names.remove(i--);
52  }
53 }
54 
55 static void init_alignments(preserve_para *para) {
56  // initialize the alignment selection list
57  ConstStrArray ali_names;
58  get_global_alignments(ali_names);
59  para->alignmentList->init_from_array(ali_names, "All", "All");
60 }
61 
62 static void clear_candidates(preserve_para *para) {
63  // clear the candidate list
64  AW_selection_list *candList = para->refCandidatesList;
65 
66  candList->clear();
68  candList->update();
69 }
70 
71 static long count_bases(const char *data, long len) {
72  long count = 0;
73  for (long i = 0; i<len; ++i) { // LOOP_VECTORIZED=* [!<5.0] (8 loops reported with gcc5+6; 2 loops with gcc7)
74  if (!GAP::is_std_gap(data[i])) {
75  ++count;
76  }
77  }
78  return count;
79 }
80 
81 // count real bases
82 // (gb_data should point to ali_xxx/data)
83 static long count_bases(GBDATA *gb_data) {
84  long count = 0;
85  switch (GB_read_type(gb_data)) {
86  case GB_STRING:
87  count = count_bases(GB_read_char_pntr(gb_data), GB_read_count(gb_data));
88  break;
89  case GB_BITS: {
90  char *bitstring = GB_read_as_string(gb_data);
91  long len = strlen(bitstring);
92  count = count_bases(bitstring, len);
93  free(bitstring);
94  break;
95  }
96  default:
97  GB_export_errorf("Data type %s is not supported", GB_get_type_name(gb_data));
98  break;
99  }
100  return count;
101 }
102 
103 // -------------------------
104 // class Candidate
105 
106 class Candidate {
107  string name; // species/SAI name
108  double score;
109  int found_alignments; // counts alignments with data in both databases
110  long base_count_diff;
111 
112 public:
113  Candidate(bool is_species, const char *name_, GBDATA *gb_src, GBDATA *gb_dst, const CharPtrArray& ali_names)
114  : name(is_species ? name_ : string("SAI:")+name_)
115  {
116  found_alignments = 0;
117  score = 0.0;
118  base_count_diff = 0;
119  bool valid = true;
120 
122 
123  for (int i = 0; valid && ali_names[i]; ++i) {
124  if (GBDATA *gb_src_data = GBT_find_sequence(gb_src, ali_names[i])) {
125  if (GBDATA *gb_dst_data = GBT_find_sequence(gb_dst, ali_names[i])) {
126  ++found_alignments;
127 
128  long src_bases = count_bases(gb_src_data);
129  long dst_bases = count_bases(gb_dst_data);
130  base_count_diff = labs(src_bases-dst_bases);
131 
132  if (GB_have_error()) valid = false;
133 
134  double factor;
135  if (base_count_diff>5) factor = 0.2;
136  else factor = (6-base_count_diff)*(1.0/6);
137 
138  long min_bases = src_bases<dst_bases ? src_bases : dst_bases;
139  score += min_bases*factor;
140  }
141  }
142  }
143 
144  if (!valid) found_alignments = 0;
145  }
147 
148  int has_alignments() const { return found_alignments; }
149  double get_score() const { return score; }
150 
151  const char *get_name() const { return name.c_str(); }
152  const char *get_entry() const {
153  return GBS_global_string("%24s %i %li %f", get_name(), found_alignments, base_count_diff, score);
154  }
155 
156  bool operator < (const Candidate& other) const { // sort highest score first
157  int ali_diff = found_alignments-other.found_alignments;
158  if (ali_diff) return ali_diff>0;
159  return score > other.score;
160  }
161 };
162 
163 // use SmartPtr to memory-manage Candidate's
164 static bool operator < (const SmartPtr<Candidate>& c1, const SmartPtr<Candidate>& c2) {
165  return c1->operator<(*c2);
166 }
167 typedef set< SmartPtr<Candidate> > Candidates;
168 
169 static void find_species_candidates(Candidates& candidates, const CharPtrArray& ali_names) {
170  // collect names of all species in source database
172  long src_count = GBS_hash_elements(src_species);
173  arb_progress progress("Examining species", src_count);
174  bool aborted = false;
175 
176  // find existing species in destination database
177  for (GBDATA *gb_dst_species = GBT_first_species(GLOBAL_gb_dst);
178  gb_dst_species && !aborted;
179  gb_dst_species = GBT_next_species(gb_dst_species))
180  {
181  const char *dst_name = GBT_get_name_or_description(gb_dst_species);
182 
183  if (GBDATA *gb_src_species = (GBDATA*)GBS_read_hash(src_species, dst_name)) {
184  Candidate *cand = new Candidate(true, dst_name, gb_src_species, gb_dst_species, ali_names);
185 
186  if (cand->has_alignments() && cand->get_score()>0.0) {
187  candidates.insert(cand);
188  }
189  else {
190  if (GB_have_error()) {
191  aw_message(GBS_global_string("Invalid adaption candidate '%s' (%s)", dst_name, GB_await_error()));
192  }
193  delete cand;
194  }
195 
196  progress.inc();
197  aborted = progress.aborted();
198  }
199  }
200 
201  GBS_free_hash(src_species);
202  progress.done();
203 }
204 
205 static void find_SAI_candidates(Candidates& candidates, const CharPtrArray& ali_names) {
206  // add all candidate SAIs to 'candidates'
208  long src_count = GBS_hash_elements(src_SAIs);
209  arb_progress progress("Examining SAIs", src_count);
210 
211  // find existing SAIs in destination database
212  for (GBDATA *gb_dst_SAI = GBT_first_SAI(GLOBAL_gb_dst);
213  gb_dst_SAI;
214  gb_dst_SAI = GBT_next_SAI(gb_dst_SAI))
215  {
216  const char *dst_name = GBT_get_name_or_description(gb_dst_SAI);
217 
218  if (GBDATA *gb_src_SAI = (GBDATA*)GBS_read_hash(src_SAIs, dst_name)) {
219  Candidate *cand = new Candidate(false, dst_name, gb_src_SAI, gb_dst_SAI, ali_names);
220 
221  if (cand->has_alignments() && cand->get_score()>0.0) {
222  candidates.insert(cand);
223  }
224  else {
225  if (GB_have_error()) {
226  aw_message(GBS_global_string("Invalid adaption candidate 'SAI:%s' (%s)", dst_name, GB_await_error()));
227  }
228  delete cand;
229  }
230 
231  progress.inc();
232  }
233  }
234 
235  GBS_free_hash(src_SAIs);
236 }
237 
239  // FIND button (rebuild candidates list)
240 
243 
244  clear_candidates(para);
245 
246  const char *ali = aww->get_root()->awar(AWAR_REMAP_ALIGNMENT)->read_char_pntr();
247  Candidates candidates;
248 
249  arb_progress("Searching candidates");
250 
251  // add candidates
252  {
253  ConstStrArray ali_names;
254  if (0 == strcmp(ali, "All")) {
255  get_global_alignments(ali_names);
256  }
257  else {
258  ali_names.put(ali);
259  }
260  find_SAI_candidates(candidates, ali_names);
261  find_species_candidates(candidates, ali_names);
262  }
263 
264  int count = 0;
265  Candidates::iterator e = candidates.end();
266  AW_selection_list *refCandList = para->refCandidatesList;
267 
268  for (Candidates::iterator i = candidates.begin();
269  i != e && count<5000;
270  ++i, ++count)
271  {
272  string name = (*i)->get_name();
273  string shown = (*i)->get_entry();
274 
275  refCandList->insert(shown.c_str(), name.c_str());
276  }
277 
278  refCandList->update();
279 }
280 
281 
282 
283 static void read_references(ConstStrArray& refs, AW_root *aw_root) {
284  char *ref_string = aw_root->awar(AWAR_REMAP_SPECIES_LIST)->read_string();
285  GBT_splitNdestroy_string(refs, ref_string, " \n,;", true);
286 }
287 static void write_references(AW_root *aw_root, const CharPtrArray& ref_array) {
288  char *ref_string = GBT_join_strings(ref_array, '\n');
289  aw_root->awar(AWAR_REMAP_SPECIES_LIST)->write_string(ref_string);
290  aw_root->awar(AWAR_REMAP_ENABLE)->write_int(ref_string[0] != 0);
291  free(ref_string);
292 }
293 static void select_reference(AW_root *aw_root, const char *ref_to_select) {
294  aw_root->awar(AWAR_REMAP_SEL_REFERENCE)->write_string(ref_to_select);
295 }
296 static char *get_selected_reference(AW_root *aw_root) {
297  return aw_root->awar(AWAR_REMAP_SEL_REFERENCE)->read_string();
298 }
299 
300 static void refresh_reference_list_cb(AW_root *aw_root, preserve_para *para) {
301  ConstStrArray refs;
302  read_references(refs, aw_root);
303  para->usedRefsList->init_from_array(refs, "", "");
304 }
305 
306 static void add_selected_cb(AW_window *aww, preserve_para *para) {
307  // ADD button (add currently selected candidate to references)
308  AW_root *aw_root = aww->get_root();
309  ConstStrArray refs;
310  read_references(refs, aw_root);
311 
312  char *candidate = aw_root->awar(AWAR_REMAP_CANDIDATE)->read_string();
313  char *selected = get_selected_reference(aw_root);
314  int cand_index = refs.index_of(candidate);
315  int sel_index = refs.index_of(selected);
316 
317  if (cand_index == -1) refs.put_before(sel_index+1, candidate);
318  else refs.move(cand_index, sel_index);
319 
320  write_references(aw_root, refs);
321  select_reference(aw_root, candidate);
322 
323  free(selected);
324  free(candidate);
325 
327 }
328 
329 static void clear_references_cb(AW_window *aww) {
330  // CLEAR button (clear references)
332 }
333 
334 static void del_reference_cb(AW_window *aww) {
335  AW_root *aw_root = aww->get_root();
336  ConstStrArray refs;
337  read_references(refs, aw_root);
338 
339  char *selected = get_selected_reference(aw_root);
340  int sel_index = refs.index_of(selected);
341 
342  if (sel_index >= 0) {
343  select_reference(aw_root, refs[sel_index+1]);
344  refs.safe_remove(sel_index);
345  write_references(aw_root, refs);
346  }
347 
348  free(selected);
349 }
350 
351 static void lower_reference_cb(AW_window *aww) {
352  AW_root *aw_root = aww->get_root();
353  ConstStrArray refs;
354  read_references(refs, aw_root);
355 
356  char *selected = get_selected_reference(aw_root);
357  int sel_index = refs.index_of(selected);
358 
359  if (sel_index >= 0) {
360  refs.move(sel_index, sel_index+1);
361  write_references(aw_root, refs);
362  }
363 
364  free(selected);
365 }
366 static void raise_reference_cb(AW_window *aww) {
367  AW_root *aw_root = aww->get_root();
368  ConstStrArray refs;
369  read_references(refs, aw_root);
370 
371  char *selected = get_selected_reference(aw_root);
372  int sel_index = refs.index_of(selected);
373 
374  if (sel_index > 0) {
375  refs.move(sel_index, sel_index-1);
376  write_references(aw_root, refs);
377  }
378 
379  free(selected);
380 }
381 
382 static void test_references_cb(AW_window *aww) {
383  char *reference_species_names = aww->get_root()->awar(AWAR_REMAP_SPECIES_LIST)->read_string();
386 
387  MG_remaps test_mapping(GLOBAL_gb_src, GLOBAL_gb_dst, true, reference_species_names); // will raise aw_message's in case of problems
388 
389  free(reference_species_names);
390 }
391 
392 static void init_preserve_awars(AW_root *aw_root) {
396 }
397 
399  // SELECT PRESERVES window
400  init_preserve_awars(aw_root);
401 
402  AW_window_simple *aws = new AW_window_simple;
403 
404  aws->init(aw_root, "SELECT_PRESERVES", "Select adaption candidates");
405  aws->load_xfig("merge/preserves.fig");
406 
407  aws->at("close");
408  aws->callback(AW_POPDOWN);
409  aws->create_button("CLOSE", "CLOSE", "C");
410 
411  aws->at("help");
412  aws->callback(makeHelpCallback("mg_preserve.hlp"));
413  aws->create_button("HELP", "HELP", "H");
414 
415  // ----------
416 
417  preserve_para *para = new preserve_para; // do not free (is passed to callback)
418 
419  aws->at("ali");
420  para->alignmentList = aws->create_selection_list(AWAR_REMAP_ALIGNMENT, 10, 30, true);
421 
422  // ----------
423 
424  aws->at("adapt");
425  aws->label("Adapt alignments");
426  aws->create_toggle(AWAR_REMAP_ENABLE);
427 
428  aws->at("reference");
429  para->usedRefsList = aws->create_selection_list(AWAR_REMAP_SEL_REFERENCE, 10, 30, true);
430 
431  aws->button_length(8);
432 
433  aws->at("clear");
434  aws->callback(clear_references_cb);
435  aws->create_button("CLEAR", "Clear", "C");
436 
437  aws->at("del");
438  aws->callback(del_reference_cb);
439  aws->create_button("DEL", "Del", "L");
440 
441  aws->at("up");
442  aws->callback(raise_reference_cb);
443  aws->create_button("UP", "Up", "U");
444 
445  aws->at("down");
446  aws->callback(lower_reference_cb);
447  aws->create_button("DOWN", "Down", "D");
448 
449  aws->at("test");
450  aws->callback(test_references_cb);
451  aws->create_button("TEST", "Test", "T");
452 
453  // ----------
454 
455  aws->at("find");
456  aws->callback(makeWindowCallback(calculate_preserves_cb, para));
457  aws->create_autosize_button("FIND", "Find candidates", "F", 1);
458 
459  aws->at("add");
460  aws->callback(makeWindowCallback(add_selected_cb, para));
461  aws->create_button("ADD", "Add", "A");
462 
463  aws->at("candidate");
464  para->refCandidatesList = aws->create_selection_list(AWAR_REMAP_CANDIDATE, 10, 30, true);
465 
466  {
469 
470  init_alignments(para);
471  clear_candidates(para);
472  }
473 
474  {
476  awar_list->add_callback(makeRootCallback(refresh_reference_list_cb, para));
477  awar_list->touch(); // trigger callback
478  }
479 
480  return aws;
481 }
482 
483 
484 
485 
GBDATA * GBT_first_SAI(GBDATA *gb_main)
Definition: aditem.cxx:162
static void calculate_preserves_cb(AW_window *aww, preserve_para *para)
void put(const char *elem)
Definition: arb_strarray.h:199
#define AWAR_REMAP_SEL_REFERENCE
int has_alignments() const
return string(buffer, length)
#define AWAR_REMAP_ALIGNMENT
Definition: arbdb.h:69
AW_selection_list * usedRefsList
void insert_default(const char *displayed, const AW_scalar &value)
Definition: AW_select.cxx:385
static void lower_reference_cb(AW_window *aww)
#define AWAR_REMAP_SPECIES_LIST
Definition: merge.hxx:76
static void init_preserve_awars(AW_root *aw_root)
#define AWAR_REMAP_CANDIDATE
const char * GB_get_type_name(GBDATA *gbd)
Definition: arbdb.cxx:66
void GBT_get_alignment_names(ConstStrArray &names, GBDATA *gbd)
Definition: adali.cxx:316
char * GB_read_as_string(GBDATA *gbd)
Definition: arbdb.cxx:1054
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
GB_HASH * GBT_create_SAI_hash(GBDATA *gb_main)
Definition: adhashtools.cxx:44
bool GB_have_error()
Definition: arb_msg.cxx:349
STL namespace.
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
GB_HASH * GBT_create_species_hash(GBDATA *gb_main)
Definition: adhashtools.cxx:36
const char * get_entry() const
#define DISPLAY_NONE
GBDATA * GLOBAL_gb_dst
Definition: MG_main.cxx:32
AW_selection_list * alignmentList
AW_selection_list * refCandidatesList
AW_awar * add_callback(const RootCallback &cb)
Definition: AW_awar.cxx:234
void insert(const char *displayed, const AW_scalar &value)
Definition: AW_select.cxx:380
const char * read_char_pntr() const
Definition: AW_awar.cxx:171
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
long GB_read_count(GBDATA *gbd)
Definition: arbdb.cxx:752
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1641
int index_of(const char *search_for) const
bool aborted()
Definition: arb_progress.h:277
static void find_species_candidates(Candidates &candidates, const CharPtrArray &ali_names)
#define mg_assert(bed)
Definition: merge.hxx:24
static void clear_references_cb(AW_window *aww)
static void init_alignments(preserve_para *para)
AW_window * MG_create_preserves_selection_window(AW_root *aw_root)
void touch()
Definition: AW_awar.cxx:210
set< SmartPtr< Candidate > > Candidates
void remove(int i)
Definition: arb_strarray.h:103
double get_score() const
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, bool dropEmptyTokens)
static void add_selected_cb(AW_window *aww, preserve_para *para)
size_t GBS_hash_elements(const GB_HASH *hs)
Definition: adhash.cxx:573
static void raise_reference_cb(AW_window *aww)
Definition: align.cxx:35
void safe_remove(int i)
Definition: arb_strarray.h:114
char * read_string() const
Definition: AW_awar.cxx:201
CONSTEXPR_INLINE bool valid(SpeciesCreationMode m)
Definition: ed4_class.hxx:2246
void put_before(int insert_before, const char *elem)
Definition: arb_strarray.h:208
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:670
static void test_references_cb(AW_window *aww)
static void read_references(ConstStrArray &refs, AW_root *aw_root)
GBDATA * GLOBAL_gb_src
Definition: MG_main.cxx:31
static void clear_candidates(preserve_para *para)
static void find_SAI_candidates(Candidates &candidates, const CharPtrArray &ali_names)
char * GBT_join_strings(const CharPtrArray &strings, char separator)
GBDATA * GBT_next_SAI(GBDATA *gb_sai)
Definition: aditem.cxx:166
GB_ERROR GB_export_errorf(const char *templat,...)
Definition: arb_msg.cxx:264
static void get_global_alignments(ConstStrArray &ali_names)
static char * get_selected_reference(AW_root *aw_root)
void move_selection(int offset)
Definition: AW_select.cxx:415
void move(int from, int to)
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
void aw_message(const char *msg)
Definition: AW_status.cxx:932
static void write_references(AW_root *aw_root, const CharPtrArray &ref_array)
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:302
AW_root * get_root()
Definition: aw_window.hxx:348
static void del_reference_cb(AW_window *aww)
bool is_std_gap(const char c)
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NO_ALI_SELECTED
GB_ERROR write_string(const char *aw_string)
static long count_bases(const char *data, long len)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:898
void init_from_array(const CharPtrArray &entries, const char *default_displayed, const char *default_value)
Definition: AW_select.cxx:322
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
GBDATA * GBT_get_presets(GBDATA *gb_main)
Definition: adali.cxx:29
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:441
#define AWAR_REMAP_ENABLE
Definition: merge.hxx:77
static bool operator<(const SmartPtr< Candidate > &c1, const SmartPtr< Candidate > &c2)
const char * get_name() const
GB_ERROR write_int(long aw_int)
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
static void refresh_reference_list_cb(AW_root *aw_root, preserve_para *para)
static void select_reference(AW_root *aw_root, const char *ref_to_select)
static struct SearchAwarList awar_list[SEARCH_PATTERNS]
Definition: ED4_search.cxx:601
Candidate(bool is_species, const char *name_, GBDATA *gb_src, GBDATA *gb_dst, const CharPtrArray &ali_names)