ARB
MG_checkfield.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : MG_checkfield.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "merge.hxx"
12 
13 #include <item_sel_list.h>
14 
15 #include <aw_awar.hxx>
16 #include <aw_root.hxx>
17 #include <aw_msg.hxx>
18 #include <arb_progress.h>
19 
20 #include <arbdbt.h>
21 #include <arb_strbuf.h>
22 #include <arb_global_defs.h>
23 
24 #include <cctype>
25 
26 #define AWAR_CHECK AWAR_MERGE_TMP "chk/"
27 
28 #define AWAR_COMPARE_FIELD AWAR_CHECK "compare"
29 #define AWAR_REPORT_FIELD AWAR_CHECK "report"
30 #define AWAR_TOUPPER AWAR_CHECK "ToUpper"
31 #define AWAR_EXCLUDE AWAR_CHECK "exclude"
32 #define AWAR_CORRECT AWAR_CHECK "correct"
33 #define AWAR_ETAG AWAR_CHECK "tag"
34 
35 
36 static int gbs_cmp_strings(char *str1, char *str2, int *tab) { // returns 0 if strings are equal
37  char *s1, *s2;
38  int c1, c2;
39  s1 = str1;
40  s2 = str2;
41  int count = 10;
42  do {
43  do { c1 = *(s1++); } while (tab[c1] < 0);
44  do { c2 = *(s2++); } while (tab[c2] < 0);
45  if (tab[c1] != tab[c2]) { // difference found
46  return 1;
47  }
48  count --;
49  } while (count && c1 && c2);
50  return 0;
51 }
52 
53 
54 static char *MG_diff_strings(char *str1, char * &str2, char *exclude, bool ToUpper, bool correct,
55  char **res1, char **res2, bool& corrrected) {
56 
57  char buffer1[256];
58  char buffer2[256];
59  char *dest1 = buffer1;
60  char *dest2 = buffer2;
61  char *s1, *s2;
62  int c1, c2;
63  int count = 3;
64  int tab[256];
65  int i;
66 
67  s1 = str1;
68  s2 = str2;
69  *dest1 = 0;
70  *dest2 = 0;
71  tab[0] = 0;
72  char gapchar = '#';
73  if (strlen(exclude)) gapchar = exclude[0];
74  else exclude = NULp;
75 
76  for (i=1; i<256; i++) { // LOOP_VECTORIZED[!<6.0,!>=8.0]
77  tab[i] = i;
78  if (exclude && strchr(exclude, i)) {
79  tab[i] = -1;
80  continue;
81  }
82  if (ToUpper && i >= 'a' && i <= 'z') {
83  tab[i] = i-'a'+'A';
84  }
85  }
86 
87  do {
88  do { c1 = *(s1++); } while (tab[c1] < 0);
89  do { c2 = *(s2++); } while (tab[c2] < 0);
90  if (tab[c1] != tab[c2]) { // difference found
91  if (correct) {
92  // check substitution
93  {
94  int c = s2[-1];
95  s2[-1] = s1[-1];
96  if (toupper(c1) == toupper(c2) || !gbs_cmp_strings(s1, s2, &tab[0])) {
97  corrrected = true;
98  continue;
99  }
100  s2[-1] = c;
101  }
102 
103  // check insertion in s2
104  if (!gbs_cmp_strings(s1-1, s2, &tab[0])) {
105  s2[-1] = gapchar;
106  do { c2 = *(s2++); } while (tab[c2] < 0); // eat s2
107  corrrected = true;
108  continue;
109  }
110  // check deletion in s2
111  if (!gbs_cmp_strings(s1, s2-1, &tab[0])) {
112  int toins = c1;
113  char *toinspos = s2-1;
114  if (toinspos > str2) toinspos--;
115  if (tab[(unsigned char)toinspos[0]]> 0) { // real insertion
116  GBS_strstruct buf(strlen(str2+10));
117  int pos = s2-str2-1;
118 
119  buf.ncat(str2, pos);
120  buf.put(toins);
121  buf.cat(str2+pos);
122 
123  freeset(str2, buf.release());
124 
125  s2 = str2+pos+1;
126  corrrected = true;
127 
128  continue;
129  }
130  int side=1; // 0 = left 1= right
131  if (tab[(unsigned char)s1[0]]<0) side = 0;
132  if (! side) {
133  while (toinspos > str2 &&
134  tab[(unsigned char)toinspos[-1]] < 0) toinspos--;
135  }
136  toinspos[0] = toins;
137  corrrected = true;
138  do { c1 = *(s1++); } while (tab[c1] < 0); // eat s1
139  continue;
140  }
141 
142  // one correction rejected -> don't try further
143  corrrected = false;
144  correct = false;
145  }
146  if (count >= 0) {
147  sprintf(dest1, "%ti ", s1-str1-1);
148  sprintf(dest2, "%ti ", s2-str2-1);
149  dest1 += strlen(dest1);
150  dest2 += strlen(dest2);
151  }
152  count --;
153  }
154  } while (c1 && c2);
155 
156  if (c1 || c2) {
157  sprintf(dest1, "... %ti ", s1-str1-1);
158  sprintf(dest2, "... %ti ", s2-str2-1);
159  dest1 += strlen(dest1);
160  dest2 += strlen(dest2);
161  }
162  if (count<0) {
163  sprintf(dest1, "and %i more", 1-count);
164  sprintf(dest2, "and %i more", 1-count);
165  dest1 += strlen(dest1);
166  dest2 += strlen(dest2);
167  }
168  if (strlen(buffer1)) {
169  *res1 = ARB_strdup(buffer1);
170  *res2 = ARB_strdup(buffer2);
171  }
172  else {
173  *res1 = NULp;
174  *res2 = NULp;
175  }
176  return NULp;
177 }
178 
180  long queried = 0;
181  for (GBDATA *gb_spec = GBT_first_species(gb_main);
182  gb_spec;
183  gb_spec = GBT_next_species(gb_spec))
184  {
185  if (IS_QUERIED_SPECIES(gb_spec)) queried++;
186  }
187  return queried;
188 }
189 
190 static void mg_check_field_cb(AW_window *aww) {
191  AW_root *root = aww->get_root();
192  GB_ERROR error = NULp;
193  char *compareField = root->awar(AWAR_COMPARE_FIELD)->read_string();
194  char *exclude = root->awar(AWAR_EXCLUDE)->read_string();
195  bool ToUpper = root->awar(AWAR_TOUPPER)->read_int();
196  bool correct = root->awar(AWAR_CORRECT)->read_int();
197  char *tag = root->awar(AWAR_ETAG)->read_string();
198  int correctCount = 0;
199 
200  if (strcmp(compareField, NO_FIELD_SELECTED) == 0) {
201  error = "Please select a field to compare";
202  }
203 
204  if (!error) error = GB_begin_transaction(GLOBAL_gb_src);
205  if (!error) error = GB_begin_transaction(GLOBAL_gb_dst);
206 
207  const char *reportField = NULp;
208  if (!error) {
210  if (!reportField) {
211  error = GB_await_error();
212  }
213  else {
214  const char *otherdb_reportField = prepare_and_get_selected_itemfield(root, AWAR_REPORT_FIELD, GLOBAL_gb_dst, SPECIES_get_selector());
215  if (!otherdb_reportField) {
216  error = GB_await_error();
217  }
218  else {
219  mg_assert(strcmp(reportField, otherdb_reportField) == 0);
220  }
221  }
222  }
223 
224  if (!error) {
225  GBDATA *gb_src_species_data = GBT_get_species_data(GLOBAL_gb_src);
226  GBDATA *gb_dst_species_data = GBT_get_species_data(GLOBAL_gb_dst);
227 
228  GBDATA *gb_src_species;
229  GBDATA *gb_dst_species;
230 
231  // First step: count selected species
232  arb_progress progress("Checking fields", mg_count_queried(GLOBAL_gb_src));
233 
234  // Delete all 'report' fields in target database
235  for (gb_dst_species = GBT_first_species_rel_species_data(gb_dst_species_data);
236  gb_dst_species && !error;
237  gb_dst_species = GBT_next_species(gb_dst_species))
238  {
239  GBDATA *gbd = GB_search(gb_dst_species, reportField, GB_FIND);
240  if (gbd) error = GB_delete(gbd);
241  }
242 
243  bool seenQueried = false;
244  for (gb_src_species = GBT_first_species_rel_species_data(gb_src_species_data);
245  gb_src_species && !error;
246  gb_src_species = GBT_next_species(gb_src_species))
247  {
248  { // Delete all 'report' fields in source database
249  GBDATA *gbd = GB_search(gb_src_species, reportField, GB_FIND);
250  if (gbd) error = GB_delete(gbd);
251  }
252 
253  if (!error) {
254  if (IS_QUERIED_SPECIES(gb_src_species)) {
255  seenQueried = true;
256  const char *src_name = GBT_get_name_or_description(gb_src_species);
257  gb_dst_species = GB_find_string(gb_dst_species_data, "name", src_name, GB_IGNORE_CASE, SEARCH_GRANDCHILD);
258  if (!gb_dst_species) {
259  aw_message(GBS_global_string("WARNING: Species %s not found in target DB", src_name));
260  }
261  else {
262  gb_dst_species = GB_get_father(gb_dst_species);
263 
264  GBDATA *gb_src_field = GB_search(gb_src_species, compareField, GB_FIND);
265  GBDATA *gb_dst_field = GB_search(gb_dst_species, compareField, GB_FIND);
266 
267  char *src_val = gb_src_field ? GB_read_as_tagged_string(gb_src_field, tag) : NULp;
268  char *dst_val = gb_dst_field ? GB_read_as_tagged_string(gb_dst_field, tag) : NULp;
269 
270  if (src_val || dst_val) {
271  char *src_positions = NULp;
272  char *dst_positions = NULp;
273 
274  if (src_val && dst_val) {
275  bool corrected = false;
276  MG_diff_strings(src_val, dst_val, exclude, ToUpper, correct, &src_positions, &dst_positions, corrected);
277  if (corrected) {
278  error = GB_write_autoconv_string(gb_dst_field, dst_val);
279  if (!error) {
280  GB_write_flag(gb_dst_species, 1);
281  correctCount++;
282  }
283  }
284  }
285  else {
286  src_positions = GBS_global_string_copy("field missing in %s DB", src_val ? "other" : "this");
287  dst_positions = GBS_global_string_copy("field missing in %s DB", dst_val ? "other" : "this");
288  }
289 
290  if (src_positions && !error) {
291  error = GBT_write_string(gb_dst_species, reportField, dst_positions);
292  if (!error) error = GBT_write_string(gb_src_species, reportField, src_positions);
293  }
294 
295  free(dst_positions);
296  free(src_positions);
297  }
298 
299  free(dst_val);
300  free(src_val);
301  }
302  progress.inc_and_check_user_abort(error);
303  }
304  }
305  }
306 
307  if (!seenQueried && !error) {
308  error = "Empty hitlist in source database (nothing to do)";
309  }
310  }
311 
312  error = GB_end_transaction(GLOBAL_gb_src, error);
313  error = GB_end_transaction(GLOBAL_gb_dst, error);
314 
315  if (error) {
316  aw_message(error);
317  }
318  else if (correctCount) {
319  aw_message(GBS_global_string("Corrected field content of %i species\n"
320  "(species %s been marked in target database).",
321  correctCount, correctCount == 1 ? "has" : "have"));
322  }
323 
324  free(tag);
325  free(exclude);
326  free(compareField);
327 }
328 
329 
333  aw_root->awar_string(AWAR_EXCLUDE, ".-", AW_ROOT_DEFAULT);
334  aw_root->awar_string(AWAR_ETAG, "");
335  aw_root->awar_int(AWAR_TOUPPER);
336  aw_root->awar_int(AWAR_CORRECT);
337 
338  AW_window_simple *aws = new AW_window_simple;
339  aws->init(aw_root, "MERGE_COMPARE_FIELD", "COMPARE DATABASE FIELD");
340  aws->load_xfig("merge/seqcheck.fig");
341 
342  aws->callback(AW_POPDOWN);
343  aws->create_button("CLOSE", "CLOSE", "C");
344 
345  aws->at("help");
346  aws->callback(makeHelpCallback("checkfield.hlp"));
347  aws->create_button("HELP", "HELP", "H");
348 
349 
350  aws->at("exclude");
351  aws->create_input_field(AWAR_EXCLUDE);
352 
353  aws->at("toupper");
354  aws->create_toggle(AWAR_TOUPPER);
355 
356  aws->at("correct");
357  aws->create_toggle(AWAR_CORRECT);
358 
359  aws->at("tag");
360  aws->create_input_field(AWAR_ETAG, 6);
361 
364 
365  aws->at("go");
366  aws->highlight();
367  aws->callback(mg_check_field_cb);
368  aws->create_button("GO", "GO");
369 
370  return aws;
371 }
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
const char * GB_ERROR
Definition: arb_core.h:25
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
Definition: adlang1.cxx:914
long mg_count_queried(GBDATA *gb_main)
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
long read_int() const
Definition: AW_awar.cxx:184
char * GB_read_as_tagged_string(GBDATA *gbd, const char *tagi)
Definition: adstring.cxx:776
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
char * release()
Definition: arb_strbuf.h:129
void cat(const char *from)
Definition: arb_strbuf.h:199
#define AWAR_CORRECT
#define AWAR_ETAG
GBDATA * GB_get_father(GBDATA *gbd)
Definition: arbdb.cxx:1722
static int gbs_cmp_strings(char *str1, char *str2, int *tab)
GBDATA * GLOBAL_gb_dst
Definition: MG_main.cxx:32
#define NO_FIELD_SELECTED
#define AWAR_COMPARE_FIELD
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1916
GBDATA * GBT_first_species_rel_species_data(GBDATA *gb_species_data)
Definition: aditem.cxx:121
void create_itemfield_selection_button(AW_window *aws, const FieldSelDef &selDef, const char *at)
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
#define AWAR_EXCLUDE
#define mg_assert(bed)
Definition: merge.hxx:24
CONSTEXPR long FIELD_FILTER_STRING_WRITEABLE
Definition: item_sel_list.h:42
static void error(const char *msg)
Definition: mkptypes.cxx:96
const char * prepare_and_get_selected_itemfield(AW_root *awr, const char *awar_name, GBDATA *gb_main, const ItemSelector &itemtype, FailIfField failIf)
char * read_string() const
Definition: AW_awar.cxx:198
#define AWAR_REPORT_FIELD
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
Definition: arbdb.h:86
GBDATA * GLOBAL_gb_src
Definition: MG_main.cxx:31
static void mg_check_field_cb(AW_window *aww)
void ncat(const char *from, size_t count)
Definition: arb_strbuf.h:189
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2773
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
ItemSelector & SPECIES_get_selector()
Definition: species.cxx:139
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
#define IS_QUERIED_SPECIES(gb_species)
Definition: merge.hxx:79
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:302
AW_window * create_mg_check_fields_window(AW_root *aw_root)
AW_root * get_root()
Definition: aw_window.hxx:359
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:116
GBDATA * gb_main
Definition: adname.cxx:32
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
#define AWAR_TOUPPER
static char * MG_diff_strings(char *str1, char *&str2, char *exclude, bool ToUpper, bool correct, char **res1, char **res2, bool &corrrected)
#define AW_ROOT_DEFAULT
Definition: aw_base.hxx:106
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
CONSTEXPR long FIELD_FILTER_STRING_READABLE
Definition: item_sel_list.h:47
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void put(char c)
Definition: arb_strbuf.h:174
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105
GB_ERROR GB_write_autoconv_string(GBDATA *gbd, const char *val)
Definition: arbdb.cxx:1479