ARB
MG_checkfield.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : MG_checkfield.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "merge.hxx"
12 
13 #include <item_sel_list.h>
14 
15 #include <aw_awar.hxx>
16 #include <aw_root.hxx>
17 #include <aw_msg.hxx>
18 #include <arb_progress.h>
19 
20 #include <arbdbt.h>
21 #include <arb_strbuf.h>
22 #include <arb_global_defs.h>
23 
24 #include <cctype>
25 
26 #define AWAR_CHECK AWAR_MERGE_TMP "chk/"
27 
28 #define AWAR_COMPARE_FIELD AWAR_CHECK "compare"
29 #define AWAR_REPORT_FIELD AWAR_CHECK "report"
30 #define AWAR_TOUPPER AWAR_CHECK "ToUpper"
31 #define AWAR_EXCLUDE AWAR_CHECK "exclude"
32 #define AWAR_CORRECT AWAR_CHECK "correct"
33 #define AWAR_ETAG AWAR_CHECK "tag"
34 
35 
36 static int gbs_cmp_strings(char *str1, char *str2, int *tab) { // returns 0 if strings are equal
37  char *s1, *s2;
38  int c1, c2;
39  s1 = str1;
40  s2 = str2;
41  int count = 10;
42  do {
43  do { c1 = *(s1++); } while (tab[c1] < 0);
44  do { c2 = *(s2++); } while (tab[c2] < 0);
45  if (tab[c1] != tab[c2]) { // difference found
46  return 1;
47  }
48  count --;
49  } while (count && c1 && c2);
50  return 0;
51 }
52 
53 
54 static char *MG_diff_strings(char *str1, char * &str2, char *exclude, bool ToUpper, bool correct,
55  char **res1, char **res2, bool& corrrected) {
56 
57  char buffer1[256];
58  char buffer2[256];
59  char *dest1 = buffer1;
60  char *dest2 = buffer2;
61  char *s1, *s2;
62  int c1, c2;
63  int count = 3;
64  int tab[256];
65  int i;
66 
67  s1 = str1;
68  s2 = str2;
69  *dest1 = 0;
70  *dest2 = 0;
71  tab[0] = 0;
72  char gapchar = '#';
73  if (strlen(exclude)) gapchar = exclude[0];
74  else exclude = NULp;
75 
76  for (i=1; i<256; i++) { // LOOP_VECTORIZED[!<6.0,!>=8.0]
77  tab[i] = i;
78  if (exclude && strchr(exclude, i)) {
79  tab[i] = -1;
80  continue;
81  }
82  if (ToUpper && i >= 'a' && i <= 'z') {
83  tab[i] = i-'a'+'A';
84  }
85  }
86 
87  do {
88  do { c1 = *(s1++); } while (tab[c1] < 0);
89  do { c2 = *(s2++); } while (tab[c2] < 0);
90  if (tab[c1] != tab[c2]) { // difference found
91  if (correct) {
92  // check substitution
93  {
94  int c = s2[-1];
95  s2[-1] = s1[-1];
96  if (toupper(c1) == toupper(c2) || !gbs_cmp_strings(s1, s2, &tab[0])) {
97  corrrected = true;
98  continue;
99  }
100  s2[-1] = c;
101  }
102 
103  // check insertion in s2
104  if (!gbs_cmp_strings(s1-1, s2, &tab[0])) {
105  s2[-1] = gapchar;
106  do { c2 = *(s2++); } while (tab[c2] < 0); // eat s2
107  corrrected = true;
108  continue;
109  }
110  // check deletion in s2
111  if (!gbs_cmp_strings(s1, s2-1, &tab[0])) {
112  int toins = c1;
113  char *toinspos = s2-1;
114  if (toinspos > str2) toinspos--;
115  if (tab[(unsigned char)toinspos[0]]> 0) { // real insertion
116  GBS_strstruct *str = GBS_stropen(strlen(str2+10));
117  int pos = s2-str2-1;
118  GBS_strncat(str, str2, pos);
119  GBS_chrcat(str, toins);
120  GBS_strcat(str, str2+pos);
121  delete str2;
122  str2 = GBS_strclose(str);
123  s2 = str2+pos+1;
124  corrrected = true;
125  continue;
126  }
127  int side=1; // 0 = left 1= right
128  if (tab[(unsigned char)s1[0]]<0) side = 0;
129  if (! side) {
130  while (toinspos > str2 &&
131  tab[(unsigned char)toinspos[-1]] < 0) toinspos--;
132  }
133  toinspos[0] = toins;
134  corrrected = true;
135  do { c1 = *(s1++); } while (tab[c1] < 0); // eat s1
136  continue;
137  }
138 
139  // one correction rejected -> don't try further
140  corrrected = false;
141  correct = false;
142  }
143  if (count >= 0) {
144  sprintf(dest1, "%ti ", s1-str1-1);
145  sprintf(dest2, "%ti ", s2-str2-1);
146  dest1 += strlen(dest1);
147  dest2 += strlen(dest2);
148  }
149  count --;
150  }
151  } while (c1 && c2);
152 
153  if (c1 || c2) {
154  sprintf(dest1, "... %ti ", s1-str1-1);
155  sprintf(dest2, "... %ti ", s2-str2-1);
156  dest1 += strlen(dest1);
157  dest2 += strlen(dest2);
158  }
159  if (count<0) {
160  sprintf(dest1, "and %i more", 1-count);
161  sprintf(dest2, "and %i more", 1-count);
162  dest1 += strlen(dest1);
163  dest2 += strlen(dest2);
164  }
165  if (strlen(buffer1)) {
166  *res1 = ARB_strdup(buffer1);
167  *res2 = ARB_strdup(buffer2);
168  }
169  else {
170  *res1 = NULp;
171  *res2 = NULp;
172  }
173  return NULp;
174 }
175 
177  long queried = 0;
178  for (GBDATA *gb_spec = GBT_first_species(gb_main);
179  gb_spec;
180  gb_spec = GBT_next_species(gb_spec))
181  {
182  if (IS_QUERIED_SPECIES(gb_spec)) queried++;
183  }
184  return queried;
185 }
186 
187 static void mg_check_field_cb(AW_window *aww) {
188  AW_root *root = aww->get_root();
189  GB_ERROR error = NULp;
190  char *compareField = root->awar(AWAR_COMPARE_FIELD)->read_string();
191  char *exclude = root->awar(AWAR_EXCLUDE)->read_string();
192  bool ToUpper = root->awar(AWAR_TOUPPER)->read_int();
193  bool correct = root->awar(AWAR_CORRECT)->read_int();
194  char *tag = root->awar(AWAR_ETAG)->read_string();
195  int correctCount = 0;
196 
197  if (strcmp(compareField, NO_FIELD_SELECTED) == 0) {
198  error = "Please select a field to compare";
199  }
200 
201  if (!error) error = GB_begin_transaction(GLOBAL_gb_src);
202  if (!error) error = GB_begin_transaction(GLOBAL_gb_dst);
203 
204  const char *reportField = NULp;
205  if (!error) {
207  if (!reportField) {
208  error = GB_await_error();
209  }
210  else {
211  const char *otherdb_reportField = prepare_and_get_selected_itemfield(root, AWAR_REPORT_FIELD, GLOBAL_gb_dst, SPECIES_get_selector());
212  if (!otherdb_reportField) {
213  error = GB_await_error();
214  }
215  else {
216  mg_assert(strcmp(reportField, otherdb_reportField) == 0);
217  }
218  }
219  }
220 
221  if (!error) {
222  GBDATA *gb_src_species_data = GBT_get_species_data(GLOBAL_gb_src);
223  GBDATA *gb_dst_species_data = GBT_get_species_data(GLOBAL_gb_dst);
224 
225  GBDATA *gb_src_species;
226  GBDATA *gb_dst_species;
227 
228  // First step: count selected species
229  arb_progress progress("Checking fields", mg_count_queried(GLOBAL_gb_src));
230 
231  // Delete all 'report' fields in target database
232  for (gb_dst_species = GBT_first_species_rel_species_data(gb_dst_species_data);
233  gb_dst_species && !error;
234  gb_dst_species = GBT_next_species(gb_dst_species))
235  {
236  GBDATA *gbd = GB_search(gb_dst_species, reportField, GB_FIND);
237  if (gbd) error = GB_delete(gbd);
238  }
239 
240  bool seenQueried = false;
241  for (gb_src_species = GBT_first_species_rel_species_data(gb_src_species_data);
242  gb_src_species && !error;
243  gb_src_species = GBT_next_species(gb_src_species))
244  {
245  { // Delete all 'report' fields in source database
246  GBDATA *gbd = GB_search(gb_src_species, reportField, GB_FIND);
247  if (gbd) error = GB_delete(gbd);
248  }
249 
250  if (!error) {
251  if (IS_QUERIED_SPECIES(gb_src_species)) {
252  seenQueried = true;
253  const char *src_name = GBT_get_name_or_description(gb_src_species);
254  gb_dst_species = GB_find_string(gb_dst_species_data, "name", src_name, GB_IGNORE_CASE, SEARCH_GRANDCHILD);
255  if (!gb_dst_species) {
256  aw_message(GBS_global_string("WARNING: Species %s not found in target DB", src_name));
257  }
258  else {
259  gb_dst_species = GB_get_father(gb_dst_species);
260 
261  GBDATA *gb_src_field = GB_search(gb_src_species, compareField, GB_FIND);
262  GBDATA *gb_dst_field = GB_search(gb_dst_species, compareField, GB_FIND);
263 
264  char *src_val = gb_src_field ? GB_read_as_tagged_string(gb_src_field, tag) : NULp;
265  char *dst_val = gb_dst_field ? GB_read_as_tagged_string(gb_dst_field, tag) : NULp;
266 
267  if (src_val || dst_val) {
268  char *src_positions = NULp;
269  char *dst_positions = NULp;
270 
271  if (src_val && dst_val) {
272  bool corrected = false;
273  MG_diff_strings(src_val, dst_val, exclude, ToUpper, correct, &src_positions, &dst_positions, corrected);
274  if (corrected) {
275  error = GB_write_autoconv_string(gb_dst_field, dst_val);
276  if (!error) {
277  GB_write_flag(gb_dst_species, 1);
278  correctCount++;
279  }
280  }
281  }
282  else {
283  src_positions = GBS_global_string_copy("field missing in %s DB", src_val ? "other" : "this");
284  dst_positions = GBS_global_string_copy("field missing in %s DB", dst_val ? "other" : "this");
285  }
286 
287  if (src_positions && !error) {
288  error = GBT_write_string(gb_dst_species, reportField, dst_positions);
289  if (!error) error = GBT_write_string(gb_src_species, reportField, src_positions);
290  }
291 
292  free(dst_positions);
293  free(src_positions);
294  }
295 
296  free(dst_val);
297  free(src_val);
298  }
299  progress.inc_and_check_user_abort(error);
300  }
301  }
302  }
303 
304  if (!seenQueried && !error) {
305  error = "Empty hitlist in source database (nothing to do)";
306  }
307  }
308 
309  error = GB_end_transaction(GLOBAL_gb_src, error);
310  error = GB_end_transaction(GLOBAL_gb_dst, error);
311 
312  if (error) {
313  aw_message(error);
314  }
315  else if (correctCount) {
316  aw_message(GBS_global_string("Corrected field content of %i species\n"
317  "(species %s been marked in target database).",
318  correctCount, correctCount == 1 ? "has" : "have"));
319  }
320 
321  free(tag);
322  free(exclude);
323  free(compareField);
324 }
325 
326 
330  aw_root->awar_string(AWAR_EXCLUDE, ".-", AW_ROOT_DEFAULT);
331  aw_root->awar_string(AWAR_ETAG, "");
332  aw_root->awar_int(AWAR_TOUPPER);
333  aw_root->awar_int(AWAR_CORRECT);
334 
335  AW_window_simple *aws = new AW_window_simple;
336  aws->init(aw_root, "MERGE_COMPARE_FIELD", "COMPARE DATABASE FIELD");
337  aws->load_xfig("merge/seqcheck.fig");
338 
339  aws->callback(AW_POPDOWN);
340  aws->create_button("CLOSE", "CLOSE", "C");
341 
342  aws->at("help");
343  aws->callback(makeHelpCallback("checkfield.hlp"));
344  aws->create_button("HELP", "HELP", "H");
345 
346 
347  aws->at("exclude");
348  aws->create_input_field(AWAR_EXCLUDE);
349 
350  aws->at("toupper");
351  aws->create_toggle(AWAR_TOUPPER);
352 
353  aws->at("correct");
354  aws->create_toggle(AWAR_CORRECT);
355 
356  aws->at("tag");
357  aws->create_input_field(AWAR_ETAG, 6);
358 
361 
362  aws->at("go");
363  aws->highlight();
364  aws->callback(mg_check_field_cb);
365  aws->create_button("GO", "GO");
366 
367  return aws;
368 }
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2516
const char * GB_ERROR
Definition: arb_core.h:25
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
Definition: adlang1.cxx:913
long mg_count_queried(GBDATA *gb_main)
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2549
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
long read_int() const
Definition: AW_awar.cxx:187
char * GB_read_as_tagged_string(GBDATA *gbd, const char *tagi)
Definition: adstring.cxx:779
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
void GBS_strncat(GBS_strstruct *strstr, const char *ptr, size_t len)
Definition: arb_strbuf.cxx:101
#define AWAR_CORRECT
#define AWAR_ETAG
GBDATA * GB_get_father(GBDATA *gbd)
Definition: arbdb.cxx:1720
static int gbs_cmp_strings(char *str1, char *str2, int *tab)
GBDATA * GLOBAL_gb_dst
Definition: MG_main.cxx:32
#define NO_FIELD_SELECTED
#define AWAR_COMPARE_FIELD
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1904
GBDATA * GBT_first_species_rel_species_data(GBDATA *gb_species_data)
Definition: aditem.cxx:121
void create_itemfield_selection_button(AW_window *aws, const FieldSelDef &selDef, const char *at)
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
#define AWAR_EXCLUDE
#define mg_assert(bed)
Definition: merge.hxx:24
CONSTEXPR long FIELD_FILTER_STRING_WRITEABLE
Definition: item_sel_list.h:42
void GBS_strcat(GBS_strstruct *strstr, const char *ptr)
Definition: arb_strbuf.cxx:108
static void error(const char *msg)
Definition: mkptypes.cxx:96
const char * prepare_and_get_selected_itemfield(AW_root *awr, const char *awar_name, GBDATA *gb_main, const ItemSelector &itemtype, FailIfField failIf)
char * read_string() const
Definition: AW_awar.cxx:201
#define AWAR_REPORT_FIELD
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
Definition: arbdb.h:86
GBDATA * GLOBAL_gb_src
Definition: MG_main.cxx:31
static void mg_check_field_cb(AW_window *aww)
void GBS_chrcat(GBS_strstruct *strstr, char ch)
Definition: arb_strbuf.cxx:119
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2761
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
ItemSelector & SPECIES_get_selector()
Definition: species.cxx:139
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
#define IS_QUERIED_SPECIES(gb_species)
Definition: merge.hxx:79
void aw_message(const char *msg)
Definition: AW_status.cxx:932
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:302
AW_window * create_mg_check_fields_window(AW_root *aw_root)
AW_root * get_root()
Definition: aw_window.hxx:348
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:97
GBDATA * gb_main
Definition: adname.cxx:33
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:441
#define AWAR_TOUPPER
static char * MG_diff_strings(char *str1, char *&str2, char *exclude, bool ToUpper, bool correct, char **res1, char **res2, bool &corrrected)
#define AW_ROOT_DEFAULT
Definition: aw_base.hxx:106
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:274
CONSTEXPR long FIELD_FILTER_STRING_READABLE
Definition: item_sel_list.h:47
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105
GB_ERROR GB_write_autoconv_string(GBDATA *gbd, const char *val)
Definition: arbdb.cxx:1477