ARB
MG_preserves.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : MG_preserves.cxx //
4 // Purpose : find candidates for alignment preservation //
5 // //
6 // //
7 // Coded by Ralf Westram (coder@reallysoft.de) in July 2003 //
8 // Copyright Department of Microbiology (Technical University Munich) //
9 // //
10 // Visit our web site at: http://www.arb-home.de/ //
11 // //
12 // //
13 // ==================================================================== //
14 
15 #include "merge.hxx"
16 #include "MG_adapt_ali.hxx"
17 
18 #include <aw_awar.hxx>
19 #include <aw_root.hxx>
20 #include <aw_msg.hxx>
21 #include <aw_select.hxx>
22 #include <arb_progress.h>
23 #include <arb_global_defs.h>
24 #include <arbdbt.h>
25 
26 #include <set>
27 #include <string>
28 
29 using namespace std;
30 
31 // find species/SAIs to preserve alignment
32 
33 #define AWAR_REMAP_CANDIDATE AWAR_MERGE_TMP "remap_candidates"
34 #define AWAR_REMAP_ALIGNMENT AWAR_MERGE_TMP "remap_alignment"
35 #define AWAR_REMAP_SEL_REFERENCE AWAR_MERGE_TMP "remap_reference"
36 
37 struct preserve_para {
39  AW_selection_list *refCandidatesList; // reference candidates
41 };
42 
43 static void get_global_alignments(ConstStrArray& ali_names) {
44  // get all alignment names available in both databases
46  GBDATA *gb_presets = GBT_get_presets(GLOBAL_gb_dst);
47 
48  int i;
49  for (i = 0; ali_names[i]; ++i) {
50  GBDATA *gb_ali_name = GB_find_string(gb_presets, "alignment_name", ali_names[i], GB_IGNORE_CASE, SEARCH_GRANDCHILD);
51  if (!gb_ali_name) ali_names.remove(i--);
52  }
53 }
54 
55 static void init_alignments(preserve_para *para) {
56  // initialize the alignment selection list
57  ConstStrArray ali_names;
58  get_global_alignments(ali_names);
59  para->alignmentList->init_from_array(ali_names, "All", "All");
60 }
61 
62 static void clear_candidates(preserve_para *para) {
63  // clear the candidate list
64  AW_selection_list *candList = para->refCandidatesList;
65 
66  candList->clear();
68  candList->update();
69 }
70 
71 static long count_bases(const char *data, long len) {
72  long count = 0;
73  for (long i = 0; i<len; ++i) { // LOOP_VECTORIZED =0[<5] =8[<7] =2 (failed before 5.0; 8 loops reported with 5.x + 6.x; 2 loops since 7.x)
74  if (!GAP::is_std_gap(data[i])) {
75  ++count;
76  }
77  }
78  return count;
79 }
80 
81 // count real bases
82 // (gb_data should point to ali_xxx/data)
83 static long count_bases(GBDATA *gb_data) {
84  long count = 0;
85  switch (GB_read_type(gb_data)) {
86  case GB_STRING:
87  count = count_bases(GB_read_char_pntr(gb_data), GB_read_count(gb_data));
88  break;
89  case GB_BITS: {
90  char *bitstring = GB_read_as_string(gb_data);
91  long len = strlen(bitstring);
92  count = count_bases(bitstring, len);
93  free(bitstring);
94  break;
95  }
96  default:
97  GB_export_errorf("Data type %s is not supported", GB_get_type_name(gb_data));
98  break;
99  }
100  return count;
101 }
102 
103 // -------------------------
104 // class Candidate
105 
106 namespace preserve { // using namespace to avoid gdb-confusion with other local Candidate class
107  class Candidate {
108  string name; // species/SAI name
109  double score;
110  int found_alignments; // counts alignments with data in both databases
111  long base_count_diff;
112 
113  public:
114  Candidate(bool is_species, const char *name_, GBDATA *gb_src, GBDATA *gb_dst, const CharPtrArray& ali_names)
115  : name(is_species ? name_ : string("SAI:")+name_)
116  {
117  found_alignments = 0;
118  score = 0.0;
119  base_count_diff = 0;
120  bool valid = true;
121 
123 
124  for (int i = 0; valid && ali_names[i]; ++i) {
125  if (GBDATA *gb_src_data = GBT_find_sequence(gb_src, ali_names[i])) {
126  if (GBDATA *gb_dst_data = GBT_find_sequence(gb_dst, ali_names[i])) {
127  ++found_alignments;
128 
129  long src_bases = count_bases(gb_src_data);
130  long dst_bases = count_bases(gb_dst_data);
131  base_count_diff = labs(src_bases-dst_bases);
132 
133  if (GB_have_error()) valid = false;
134 
135  double factor;
136  if (base_count_diff>5) factor = 0.2;
137  else factor = (6-base_count_diff)*(1.0/6);
138 
139  long min_bases = src_bases<dst_bases ? src_bases : dst_bases;
140  score += min_bases*factor;
141  }
142  }
143  }
144 
145  if (!valid) found_alignments = 0;
146  }
148 
149  int has_alignments() const { return found_alignments; }
150  double get_score() const { return score; }
151 
152  const char *get_name() const { return name.c_str(); }
153  const char *get_entry() const {
154  return GBS_global_string("%24s %i %li %f", get_name(), found_alignments, base_count_diff, score);
155  }
156 
157  bool operator < (const Candidate& other) const { // sort highest score first
158  int ali_diff = found_alignments-other.found_alignments;
159  if (ali_diff) return ali_diff>0;
160  return score > other.score;
161  }
162  };
163 }
164 using namespace preserve;
165 
166 // use SmartPtr to memory-manage Candidate's
167 static bool operator < (const SmartPtr<Candidate>& c1, const SmartPtr<Candidate>& c2) {
168  return c1->operator<(*c2);
169 }
170 typedef set< SmartPtr<Candidate> > Candidates;
171 
172 static void find_species_candidates(Candidates& candidates, const CharPtrArray& ali_names) {
173  // collect names of all species in source database
175  long src_count = GBS_hash_elements(src_species);
176  arb_progress progress("Examining species", src_count);
177  bool aborted = false;
178 
179  // find existing species in destination database
180  for (GBDATA *gb_dst_species = GBT_first_species(GLOBAL_gb_dst);
181  gb_dst_species && !aborted;
182  gb_dst_species = GBT_next_species(gb_dst_species))
183  {
184  const char *dst_name = GBT_get_name_or_description(gb_dst_species);
185 
186  if (GBDATA *gb_src_species = (GBDATA*)GBS_read_hash(src_species, dst_name)) {
187  Candidate *cand = new Candidate(true, dst_name, gb_src_species, gb_dst_species, ali_names);
188 
189  if (cand->has_alignments() && cand->get_score()>0.0) {
190  candidates.insert(cand);
191  }
192  else {
193  if (GB_have_error()) {
194  aw_message(GBS_global_string("Invalid adaption candidate '%s' (%s)", dst_name, GB_await_error()));
195  }
196  delete cand;
197  }
198 
199  progress.inc();
200  aborted = progress.aborted();
201  }
202  }
203 
204  GBS_free_hash(src_species);
205  progress.done();
206 }
207 
208 static void find_SAI_candidates(Candidates& candidates, const CharPtrArray& ali_names) {
209  // add all candidate SAIs to 'candidates'
211  long src_count = GBS_hash_elements(src_SAIs);
212  arb_progress progress("Examining SAIs", src_count);
213 
214  // find existing SAIs in destination database
215  for (GBDATA *gb_dst_SAI = GBT_first_SAI(GLOBAL_gb_dst);
216  gb_dst_SAI;
217  gb_dst_SAI = GBT_next_SAI(gb_dst_SAI))
218  {
219  const char *dst_name = GBT_get_name_or_description(gb_dst_SAI);
220 
221  if (GBDATA *gb_src_SAI = (GBDATA*)GBS_read_hash(src_SAIs, dst_name)) {
222  Candidate *cand = new Candidate(false, dst_name, gb_src_SAI, gb_dst_SAI, ali_names);
223 
224  if (cand->has_alignments() && cand->get_score()>0.0) {
225  candidates.insert(cand);
226  }
227  else {
228  if (GB_have_error()) {
229  aw_message(GBS_global_string("Invalid adaption candidate 'SAI:%s' (%s)", dst_name, GB_await_error()));
230  }
231  delete cand;
232  }
233 
234  progress.inc();
235  }
236  }
237 
238  GBS_free_hash(src_SAIs);
239 }
240 
242  // FIND button (rebuild candidates list)
243 
246 
247  clear_candidates(para);
248 
249  const char *ali = aww->get_root()->awar(AWAR_REMAP_ALIGNMENT)->read_char_pntr();
250  Candidates candidates;
251 
252  arb_progress("Searching candidates");
253 
254  // add candidates
255  {
256  ConstStrArray ali_names;
257  if (0 == strcmp(ali, "All")) {
258  get_global_alignments(ali_names);
259  }
260  else {
261  ali_names.put(ali);
262  }
263  find_SAI_candidates(candidates, ali_names);
264  find_species_candidates(candidates, ali_names);
265  }
266 
267  int count = 0;
268  Candidates::iterator e = candidates.end();
269  AW_selection_list *refCandList = para->refCandidatesList;
270 
271  for (Candidates::iterator i = candidates.begin();
272  i != e && count<5000;
273  ++i, ++count)
274  {
275  string name = (*i)->get_name();
276  string shown = (*i)->get_entry();
277 
278  refCandList->insert(shown.c_str(), name.c_str());
279  }
280 
281  refCandList->update();
282 }
283 
284 
285 
286 static void read_references(ConstStrArray& refs, AW_root *aw_root) {
287  char *ref_string = aw_root->awar(AWAR_REMAP_SPECIES_LIST)->read_string();
288  GBT_splitNdestroy_string(refs, ref_string, " \n,;", SPLIT_DROPEMPTY);
289 }
290 static void write_references(AW_root *aw_root, const CharPtrArray& ref_array) {
291  char *ref_string = GBT_join_strings(ref_array, '\n');
292  aw_root->awar(AWAR_REMAP_SPECIES_LIST)->write_string(ref_string);
293  aw_root->awar(AWAR_REMAP_ENABLE)->write_int(ref_string[0] != 0);
294  free(ref_string);
295 }
296 static void select_reference(AW_root *aw_root, const char *ref_to_select) {
297  aw_root->awar(AWAR_REMAP_SEL_REFERENCE)->write_string(ref_to_select);
298 }
299 static char *get_selected_reference(AW_root *aw_root) {
300  return aw_root->awar(AWAR_REMAP_SEL_REFERENCE)->read_string();
301 }
302 
303 static void refresh_reference_list_cb(AW_root *aw_root, preserve_para *para) {
304  ConstStrArray refs;
305  read_references(refs, aw_root);
306  para->usedRefsList->init_from_array(refs, "", "");
307 }
308 
309 static void add_selected_cb(AW_window *aww, preserve_para *para) {
310  // ADD button (add currently selected candidate to references)
311  AW_root *aw_root = aww->get_root();
312  ConstStrArray refs;
313  read_references(refs, aw_root);
314 
315  char *candidate = aw_root->awar(AWAR_REMAP_CANDIDATE)->read_string();
316  char *selected = get_selected_reference(aw_root);
317  int cand_index = refs.index_of(candidate);
318  int sel_index = refs.index_of(selected);
319 
320  if (cand_index == -1) refs.put_before(sel_index+1, candidate);
321  else refs.move(cand_index, sel_index);
322 
323  write_references(aw_root, refs);
324  select_reference(aw_root, candidate);
325 
326  free(selected);
327  free(candidate);
328 
330 }
331 
332 static void clear_references_cb(AW_window *aww) {
333  // CLEAR button (clear references)
335 }
336 
337 static void del_reference_cb(AW_window *aww) {
338  AW_root *aw_root = aww->get_root();
339  ConstStrArray refs;
340  read_references(refs, aw_root);
341 
342  char *selected = get_selected_reference(aw_root);
343  int sel_index = refs.index_of(selected);
344 
345  if (sel_index >= 0) {
346  select_reference(aw_root, refs[sel_index+1]);
347  refs.safe_remove(sel_index);
348  write_references(aw_root, refs);
349  }
350 
351  free(selected);
352 }
353 
354 static void lower_reference_cb(AW_window *aww) {
355  AW_root *aw_root = aww->get_root();
356  ConstStrArray refs;
357  read_references(refs, aw_root);
358 
359  char *selected = get_selected_reference(aw_root);
360  int sel_index = refs.index_of(selected);
361 
362  if (sel_index >= 0) {
363  refs.move(sel_index, sel_index+1);
364  write_references(aw_root, refs);
365  }
366 
367  free(selected);
368 }
369 static void raise_reference_cb(AW_window *aww) {
370  AW_root *aw_root = aww->get_root();
371  ConstStrArray refs;
372  read_references(refs, aw_root);
373 
374  char *selected = get_selected_reference(aw_root);
375  int sel_index = refs.index_of(selected);
376 
377  if (sel_index > 0) {
378  refs.move(sel_index, sel_index-1);
379  write_references(aw_root, refs);
380  }
381 
382  free(selected);
383 }
384 
385 static void test_references_cb(AW_window *aww) {
386  char *reference_species_names = aww->get_root()->awar(AWAR_REMAP_SPECIES_LIST)->read_string();
389 
390  MG_remaps test_mapping(GLOBAL_gb_src, GLOBAL_gb_dst, true, reference_species_names); // will raise aw_message's in case of problems
391 
392  free(reference_species_names);
393 }
394 
395 static void init_preserve_awars(AW_root *aw_root) {
399 }
400 
402  // SELECT PRESERVES window
403  init_preserve_awars(aw_root);
404 
405  AW_window_simple *aws = new AW_window_simple;
406 
407  aws->init(aw_root, "SELECT_PRESERVES", "Select adaption candidates");
408  aws->load_xfig("merge/preserves.fig");
409 
410  aws->at("close");
411  aws->callback(AW_POPDOWN);
412  aws->create_button("CLOSE", "CLOSE", "C");
413 
414  aws->at("help");
415  aws->callback(makeHelpCallback("mg_preserve.hlp"));
416  aws->create_button("HELP", "HELP", "H");
417 
418  // ----------
419 
420  preserve_para *para = new preserve_para; // do not free (is passed to callback)
421 
422  aws->at("ali");
423  para->alignmentList = aws->create_selection_list(AWAR_REMAP_ALIGNMENT, 10, 30);
424 
425  // ----------
426 
427  aws->at("adapt");
428  aws->label("Adapt alignments");
429  aws->create_toggle(AWAR_REMAP_ENABLE);
430 
431  aws->at("reference");
432  para->usedRefsList = aws->create_selection_list(AWAR_REMAP_SEL_REFERENCE, 10, 30);
433 
434  aws->button_length(8);
435 
436  aws->at("clear");
437  aws->callback(clear_references_cb);
438  aws->create_button("CLEAR", "Clear", "C");
439 
440  aws->at("del");
441  aws->callback(del_reference_cb);
442  aws->create_button("DEL", "Del", "L");
443 
444  aws->at("up");
445  aws->callback(raise_reference_cb);
446  aws->create_button("UP", "Up", "U");
447 
448  aws->at("down");
449  aws->callback(lower_reference_cb);
450  aws->create_button("DOWN", "Down", "D");
451 
452  aws->at("test");
453  aws->callback(test_references_cb);
454  aws->create_button("TEST", "Test", "T");
455 
456  // ----------
457 
458  aws->at("find");
459  aws->callback(makeWindowCallback(calculate_preserves_cb, para));
460  aws->create_autosize_button("FIND", "Find candidates", "F", 1);
461 
462  aws->at("add");
463  aws->callback(makeWindowCallback(add_selected_cb, para));
464  aws->create_button("ADD", "Add", "A");
465 
466  aws->at("candidate");
467  para->refCandidatesList = aws->create_selection_list(AWAR_REMAP_CANDIDATE, 10, 30);
468 
469  {
472 
473  init_alignments(para);
474  clear_candidates(para);
475  }
476 
477  {
479  awar_list->add_callback(makeRootCallback(refresh_reference_list_cb, para));
480  awar_list->touch(); // trigger callback
481  }
482 
483  return aws;
484 }
485 
486 
487 
488 
GBDATA * GBT_first_SAI(GBDATA *gb_main)
Definition: aditem.cxx:162
static void calculate_preserves_cb(AW_window *aww, preserve_para *para)
void put(const char *elem)
Definition: arb_strarray.h:188
#define AWAR_REMAP_SEL_REFERENCE
const char * get_entry() const
return string(buffer, length)
#define AWAR_REMAP_ALIGNMENT
Definition: arbdb.h:69
AW_selection_list * usedRefsList
void insert_default(const char *displayed, const AW_scalar &value)
Definition: AW_select.cxx:385
static void lower_reference_cb(AW_window *aww)
#define AWAR_REMAP_SPECIES_LIST
Definition: merge.hxx:76
static void init_preserve_awars(AW_root *aw_root)
#define AWAR_REMAP_CANDIDATE
double get_score() const
const char * GB_get_type_name(GBDATA *gbd)
Definition: arbdb.cxx:72
void GBT_get_alignment_names(ConstStrArray &names, GBDATA *gbd)
Definition: adali.cxx:317
char * GB_read_as_string(GBDATA *gbd)
Definition: arbdb.cxx:1060
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
GB_HASH * GBT_create_SAI_hash(GBDATA *gb_main)
Definition: adhashtools.cxx:44
bool GB_have_error()
Definition: arb_msg.cxx:338
STL namespace.
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
GB_HASH * GBT_create_species_hash(GBDATA *gb_main)
Definition: adhashtools.cxx:36
#define DISPLAY_NONE
GBDATA * GLOBAL_gb_dst
Definition: MG_main.cxx:31
AW_selection_list * alignmentList
AW_selection_list * refCandidatesList
AW_awar * add_callback(const RootCallback &cb)
Definition: AW_awar.cxx:231
void insert(const char *displayed, const AW_scalar &value)
Definition: AW_select.cxx:380
const char * read_char_pntr() const
Definition: AW_awar.cxx:168
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
long GB_read_count(GBDATA *gbd)
Definition: arbdb.cxx:758
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1643
int index_of(const char *search_for) const
bool aborted()
Definition: arb_progress.h:335
static void find_species_candidates(Candidates &candidates, const CharPtrArray &ali_names)
#define mg_assert(bed)
Definition: merge.hxx:24
static void clear_references_cb(AW_window *aww)
static void init_alignments(preserve_para *para)
AW_window * MG_create_preserves_selection_window(AW_root *aw_root)
void touch()
Definition: AW_awar.cxx:207
set< SmartPtr< Candidate > > Candidates
void remove(int i)
static void add_selected_cb(AW_window *aww, preserve_para *para)
size_t GBS_hash_elements(const GB_HASH *hs)
Definition: adhash.cxx:570
static void raise_reference_cb(AW_window *aww)
Definition: align.cxx:35
void safe_remove(int i)
Definition: arb_strarray.h:103
char * read_string() const
Definition: AW_awar.cxx:198
CONSTEXPR_INLINE bool valid(SpeciesCreationMode m)
Definition: ed4_class.hxx:2248
void put_before(int insert_before, const char *elem)
Definition: arb_strarray.h:197
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:568
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:718
static void test_references_cb(AW_window *aww)
static void read_references(ConstStrArray &refs, AW_root *aw_root)
GBDATA * GLOBAL_gb_src
Definition: MG_main.cxx:30
static void clear_candidates(preserve_para *para)
static void find_SAI_candidates(Candidates &candidates, const CharPtrArray &ali_names)
Candidate(bool is_species, const char *name_, GBDATA *gb_src, GBDATA *gb_dst, const CharPtrArray &ali_names)
char * GBT_join_strings(const CharPtrArray &strings, char separator)
GBDATA * GBT_next_SAI(GBDATA *gb_sai)
Definition: aditem.cxx:166
GB_ERROR GB_export_errorf(const char *templat,...)
Definition: arb_msg.cxx:262
static void get_global_alignments(ConstStrArray &ali_names)
static char * get_selected_reference(AW_root *aw_root)
void move_selection(int offset)
Definition: AW_select.cxx:415
void move(int from, int to)
const char * get_name() const
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
static void write_references(AW_root *aw_root, const CharPtrArray &ref_array)
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:302
AW_root * get_root()
Definition: aw_window.hxx:359
static void del_reference_cb(AW_window *aww)
bool is_std_gap(const char c)
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NO_ALI_SELECTED
GB_ERROR write_string(const char *aw_string)
static long count_bases(const char *data, long len)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
void init_from_array(const CharPtrArray &entries, const char *default_displayed, const char *default_value)
Definition: AW_select.cxx:322
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:584
GBDATA * GBT_get_presets(GBDATA *gb_main)
Definition: adali.cxx:30
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
#define AWAR_REMAP_ENABLE
Definition: merge.hxx:77
static bool operator<(const SmartPtr< Candidate > &c1, const SmartPtr< Candidate > &c2)
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, SplitMode mode)
GB_ERROR write_int(long aw_int)
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
int has_alignments() const
static void refresh_reference_list_cb(AW_root *aw_root, preserve_para *para)
static void select_reference(AW_root *aw_root, const char *ref_to_select)
static struct SearchAwarList awar_list[SEARCH_PATTERNS]
Definition: ED4_search.cxx:601