ARB
ED4_dots.cxx
Go to the documentation of this file.
1 // ================================================================ //
2 // //
3 // File : ED4_dots.cxx //
4 // Purpose : Insert dots where bases may be missing //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in December 2008 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ================================================================ //
11 
12 #include "ed4_dots.hxx"
13 #include "ed4_class.hxx"
14 
15 #include <awt_sel_boxes.hxx>
16 #include <aw_awar.hxx>
17 #include <aw_msg.hxx>
18 #include <aw_root.hxx>
19 #include <arbdbt.h>
20 #include <cctype>
21 #include <awt_config_manager.hxx>
22 
23 using namespace std;
24 
25 #define AWAR_DOT_BASE "dotmiss/"
26 #define AWAR_DOT_SAI AWAR_DOT_BASE "sainame" // selected SAI
27 #define AWAR_DOT_SAI_CHARS AWAR_DOT_BASE "chars" // handle columns where SAI contains one of these chars
28 #define AWAR_DOT_MARKED AWAR_DOT_BASE "marked" // handle marked only?
29 
31  size_t pos_count;
32  size_t *position; // contains 'pos_count' positions, where dots get inserted if sequence contains '-'
33 
35 
36  // statistics:
37  size_t dots_inserted;
38  size_t already_there;
40 };
41 
44 
45  if (base->is_sequence_info_terminal()) {
46  ED4_sequence_info_terminal *seq_term = base->to_sequence_info_terminal();
47  if (seq_term) {
48  GBDATA *gb_ali = seq_term->data();
49  if (gb_ali) {
50  GBDATA *gb_species = GB_get_grandfather(gb_ali);
51  bool marked = GB_read_flag(gb_species);
52  dot_insert_stat& stat = *statPtr;
53 
54  if (marked || !stat.marked_only) {
55  char *sequence = GB_read_string(gb_ali);
56 
57  if (!sequence) {
58  GB_ERROR err = GB_await_error();
59  error = GBS_global_string("No sequence found for '%s'\n(Reason: %s)",
60  GBT_get_name_or_description(gb_species), err);
61  }
62  else {
63  size_t length = GB_read_string_count(gb_ali);
64  size_t old_dots_inserted = stat.dots_inserted;
65 
66  for (size_t p = 0; p<stat.pos_count; p++) {
67  size_t pos = stat.position[p];
68 
69  if (pos<length) {
70  switch (sequence[pos]) {
71  case '-':
72  sequence[pos] = '.';
73  stat.dots_inserted++;
74  break;
75 
76  case '.':
77  stat.already_there++;
78  break;
79 
80  default:
81  break;
82  }
83  }
84  }
85 
86  if (stat.dots_inserted > old_dots_inserted) { // did sequence change?
87  error = GB_write_string(gb_ali, sequence);
88  }
89 
90  free(sequence);
91  stat.sequences_checked++;
92  }
93  }
94  }
95  }
96  }
97 
98  return error;
99 }
100 
101 static void dot_missing_bases(AW_window *aww) {
102  ED4_MostRecentWinContext context;
103 
104  ED4_cursor *cursor = &current_cursor();
105  ARB_ERROR error = NULp;
106 
107  if (!cursor->in_consensus_terminal()) {
108  error = "No consensus selected";
109  }
110  else {
111  AW_root *aw_root = aww->get_root();
112 
113  dot_insert_stat stat;
114  stat.dots_inserted = 0;
115  stat.already_there = 0;
116  stat.position = NULp;
117  stat.sequences_checked = 0;
118  stat.marked_only = aw_root->awar(AWAR_DOT_MARKED)->read_int();
119 
120  ED4_group_manager *group_manager = cursor->owner_of_cursor->get_parent(LEV_GROUP)->to_group_manager();
121  {
122  // build list of positions where consensus contains upper case characters:
123  char *consensus = group_manager->build_consensus_string();
124  for (int pass = 1; pass <= 2; pass++) {
125  stat.pos_count = 0;
126  for (int pos = 0; consensus[pos]; pos++) {
127  if (isupper(consensus[pos])) {
128  if (pass == 2) stat.position[stat.pos_count] = pos;
129  stat.pos_count++;
130  }
131  }
132 
133  if (pass == 1) ARB_alloc(stat.position, stat.pos_count);
134  }
135  free(consensus);
136  }
137 
139  if (!stat.pos_count) {
140  error = "No consensus column contains upper case characters";
141  }
142  else {
143  // if SAI is selected, reduce list of affected positions
144  char *sai = NULp;
145  size_t sai_len = -1;
146  {
147  GB_transaction ta(gb_main);
148  char *sai_name = aw_root->awar(AWAR_DOT_SAI)->read_string();
149 
150  if (sai_name && sai_name[0]) {
151  GBDATA *gb_sai = GBT_expect_SAI(gb_main, sai_name);
152  if (!gb_sai) error = GB_await_error();
153  else {
154  GBDATA *gb_ali = GBT_find_sequence(gb_sai, ED4_ROOT->get_alignment_name());
155  if (!gb_ali) {
156  error = GBS_global_string("SAI '%s' has no data in '%s'", sai_name, ED4_ROOT->get_alignment_name());
157  }
158  else {
159  sai = GB_read_string(gb_ali); // @@@ NOT_ALL_SAI_HAVE_DATA
160  sai_len = GB_read_string_count(gb_ali);
161  }
162  }
163  }
164  free(sai_name);
165  error = ta.close(error);
166  }
167 
168  if (sai) { // SAI is selected
169  if (!error) {
170  char *sai_chars = aw_root->awar(AWAR_DOT_SAI_CHARS)->read_string();
171  if (sai_chars[0] == 0) error = "No SAI characters given -> no column selectable";
172  else {
173  size_t k = 0;
174  size_t p;
175  for (p = 0; p<stat.pos_count && stat.position[p]<sai_len; p++) {
176  size_t pos = stat.position[p];
177  if (strchr(sai_chars, sai[pos])) { // SAI contains one of the 'sai_chars'
178  stat.position[k++] = pos; // use current position
179  }
180  }
181  stat.pos_count = k;
182 
183  if (!stat.pos_count) error = "SAI selects other columns than consensus. Nothing to do.";
184  }
185  free(sai_chars);
186  }
187  free(sai);
188  }
189  }
190 
191  if (!error) {
192  e4_assert(stat.pos_count);
193  GB_transaction ta(gb_main);
194  error = group_manager->route_down_hierarchy(makeED4_route_cb(dot_sequence_by_consensus, &stat));
195 
196  if (stat.sequences_checked == 0 && !error) {
197  error = GBS_global_string("Group contains no %ssequences", stat.marked_only ? "marked " : "");
198  }
199 
200  if (!error) {
201  const char *present = "";
202  if (stat.already_there) {
203  present = GBS_global_string("Dots already present: %zu ", stat.already_there);
204  }
205 
206  const char *changed = stat.dots_inserted
207  ? GBS_global_string("Gaps changed into dots: %zu", stat.dots_inserted)
208  : "No gaps were changed into dots.";
209 
210  aw_message(GBS_global_string("%s%s", present, changed));
211  }
212 
213  error = ta.close(error);
214  }
215  }
216 
217  aw_message_if(error);
218 }
219 
221  aw_root->awar_int(AWAR_DOT_MARKED, 0, aw_def);
222  aw_root->awar_string(AWAR_DOT_SAI, "", aw_def);
223  aw_root->awar_string(AWAR_DOT_SAI_CHARS, "", aw_def);
224 }
225 
227  { AWAR_DOT_MARKED, "marked" },
228  { AWAR_DOT_SAI, "sai" },
229  { AWAR_DOT_SAI_CHARS, "saichars" },
230 
231  { NULp, NULp }
232 };
233 
235  AW_root *aw_root = editor_window->get_root();
236  static AW_window_simple *aws = NULp;
237 
238  ED4_LocalWinContext uses(editor_window);
239 
240  if (!aws) {
241  aws = new AW_window_simple;
242 
243  aws->init(aw_root, "DOT_MISS_BASES", "Dot potentially missing bases");
244  aws->load_xfig("edit4/missbase.fig");
245 
246  aws->button_length(10);
247 
248  aws->at("close");
249  aws->callback(AW_POPDOWN);
250  aws->create_button("CLOSE", "CLOSE", "C");
251 
252  aws->at("help");
253  aws->callback(makeHelpCallback("missbase.hlp"));
254  aws->create_button("HELP", "HELP", "H");
255 
256  aws->at("marked");
257  aws->label("Marked species only");
258  aws->create_toggle(AWAR_DOT_MARKED);
259 
260  aws->at("SAI");
261  aws->button_length(30);
263 
264  aws->at("SAI_chars");
265  aws->label("contains one of");
266  aws->create_input_field(AWAR_DOT_SAI_CHARS, 20);
267 
268  aws->button_length(10);
269 
270  aws->at("cons_def");
271  aws->label("Change definition of");
273  aws->create_button("CONS_DEF", "Consensus", "C");
274 
275  aws->at("go");
276  aws->callback(dot_missing_bases);
277  aws->create_button("GO", "GO", "G");
278 
279  aws->at("config");
280  AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, "dotbases", dotbases_config_mapping);
281  }
282 
283  e4_assert(aws);
284 
285  aws->activate();
286 }
287 
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
static AWT_config_mapping_def dotbases_config_mapping[]
Definition: ED4_dots.cxx:226
void AWT_insert_config_manager(AW_window *aww, AW_default default_file_, const char *id, const StoreConfigCallback &store_cb, const RestoreConfigCallback &load_or_reset_cb, const char *macro_id, const AWT_predefined_config *predef)
long read_int() const
Definition: AW_awar.cxx:184
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
ED4_root * ED4_ROOT
Definition: ED4_main.cxx:49
STL namespace.
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
GBDATA * GBT_expect_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:184
ED4_terminal * owner_of_cursor
Definition: ed4_class.hxx:644
void awt_create_SAI_selection_button(GBDATA *gb_main, AW_window *aws, const char *varname, const SaiSelectionlistFilterCallback &fcb)
GBDATA * GB_get_grandfather(GBDATA *gbd)
Definition: arbdb.cxx:1728
#define e4_assert(bed)
Definition: ed4_class.hxx:14
void ED4_create_dot_missing_bases_awars(AW_root *aw_root, AW_default aw_def)
Definition: ED4_dots.cxx:220
char * build_consensus_string(PosRange range) const
Definition: ed4_class.hxx:1655
size_t GB_read_string_count(GBDATA *gbd)
Definition: arbdb.cxx:916
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
GBDATA * get_gb_main() const
Definition: ed4_class.hxx:1422
size_t dots_inserted
Definition: ED4_dots.cxx:37
bool in_consensus_terminal() const
Definition: ed4_class.hxx:1853
static ARB_ERROR dot_sequence_by_consensus(ED4_base *base, dot_insert_stat *statPtr)
Definition: ED4_dots.cxx:42
static void error(const char *msg)
Definition: mkptypes.cxx:96
#define AWAR_DOT_MARKED
Definition: ED4_dots.cxx:28
ARB_ERROR route_down_hierarchy(const ED4_route_cb &cb) FINAL_OVERRIDE
Definition: ED4_base.cxx:392
int GB_read_flag(GBDATA *gbd)
Definition: arbdb.cxx:2796
char * read_string() const
Definition: AW_awar.cxx:198
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:708
int is_sequence_info_terminal() const
Definition: ed4_class.hxx:1082
static void dot_missing_bases(AW_window *aww)
Definition: ED4_dots.cxx:101
void ED4_popup_dot_missing_bases_window(AW_window *editor_window)
Definition: ED4_dots.cxx:234
ED4_cursor & current_cursor()
Definition: ed4_class.hxx:1400
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
GB_ERROR close(GB_ERROR error)
Definition: arbdbpp.cxx:35
const char * get_alignment_name() const
Definition: ed4_class.hxx:1458
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:909
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
size_t already_there
Definition: ED4_dots.cxx:38
AW_root * get_root()
Definition: aw_window.hxx:359
#define NULp
Definition: cxxforward.h:116
#define AWAR_DOT_SAI_CHARS
Definition: ED4_dots.cxx:27
GB_transaction ta(gb_var)
size_t sequences_checked
Definition: ED4_dots.cxx:39
GBDATA * gb_main
Definition: adname.cxx:32
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
size_t * position
Definition: ED4_dots.cxx:32
size_t length
AW_window * ED4_create_consensus_definition_window(AW_root *root)
ED4_manager * get_parent(ED4_level lev) const
Definition: ed4_class.hxx:1821
#define AWAR_DOT_SAI
Definition: ED4_dots.cxx:26
size_t pos_count
Definition: ED4_dots.cxx:31
#define AW_ROOT_DEFAULT
Definition: aw_base.hxx:106
void aw_message_if(GB_ERROR error)
Definition: aw_msg.hxx:21