ARB
adname.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adname.cxx //
4 // Purpose : species names //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "gb_local.h"
12 
13 #include <ad_config.h>
14 #include "TreeNode.h"
15 
16 #include <arb_progress.h>
17 #include <arb_strbuf.h>
18 #include <arb_strarray.h>
19 #include <arb_file.h>
20 #include <arb_diff.h>
21 
22 #include <cctype>
23 #include "ad_colorset.h"
24 
25 struct gbt_renamed {
26  int used_by;
27  char data[1];
28 };
29 
30 static struct {
35  int all_flag;
36 } NameSession;
37 
38 #if defined(WARN_TODO)
39 #warning change all_flag into estimated number of renames ( == 0 shall mean all)
40 #endif
41 
43  /* Starts a rename session (to rename one or many species)
44  * all_flag == 1 -> rename all species in DB
45  * Call GBT_abort_rename_session() or GBT_commit_rename_session() to close the session.
46  */
47 
49  if (!error) {
50  NameSession.gb_main = gb_main;
51  NameSession.gb_species_data = GBT_get_species_data(gb_main);
52 
53  if (!all_flag) { // this is meant to be used for single or few species
54  int hash_size = 128;
55 
57  NameSession.old_species_hash = NULp;
58  }
59  else {
61  NameSession.old_species_hash = GBT_create_species_hash(gb_main);
62  }
63  NameSession.all_flag = all_flag;
64  }
65  return error;
66 }
67 
68 GB_ERROR GBT_rename_species(const char *oldname, const char *newname, bool ignore_protection) {
69  GBDATA *gb_species;
70  GBDATA *gb_name;
72 
73  if (strcmp(oldname, newname) == 0) return NULp;
74 
75 #if defined(DEBUG) && 1
76  if (isdigit(oldname[0])) {
77  printf("oldname='%s' newname='%s'\n", oldname, newname);
78  }
79 #endif
80 
81  if (NameSession.all_flag) {
82  gb_assert(NameSession.old_species_hash);
83  gb_species = (GBDATA *)GBS_read_hash(NameSession.old_species_hash, oldname);
84  }
85  else {
86  GBDATA *gb_found_species;
87 
88  gb_assert(!NameSession.old_species_hash);
89  gb_found_species = GBT_find_species_rel_species_data(NameSession.gb_species_data, newname);
90  gb_species = GBT_find_species_rel_species_data(NameSession.gb_species_data, oldname);
91 
92  if (gb_found_species && gb_species != gb_found_species) {
93  return GBS_global_string("A species named '%s' already exists.", newname);
94  }
95  }
96 
97  if (!gb_species) {
98  return GBS_global_string("Expected that a species named '%s' exists (maybe there are duplicate species, database might be corrupt)", oldname);
99  }
100 
101  gb_name = GB_entry(gb_species, "name");
102  {
103  SmartPtr<GB_topSecurityLevel> probablyUnsecured;
104  if (ignore_protection) probablyUnsecured = new GB_topSecurityLevel(NameSession.gb_main);
105  error = GB_write_string(gb_name, newname);
106  }
107 
108  if (!error) {
109  if (NameSession.old_species_hash) {
110  GBS_write_hash(NameSession.old_species_hash, oldname, 0);
111  }
112  gbt_renamed *rns = (gbt_renamed*)ARB_calloc<char>(strlen(newname)+sizeof(gbt_renamed));
113  strcpy(&rns->data[0], newname);
114  GBS_write_hash(NameSession.renamed_hash, oldname, (long)rns);
115  }
116  return error;
117 }
118 
120  if (NameSession.renamed_hash) {
121  GBS_free_hash(NameSession.renamed_hash);
122  NameSession.renamed_hash = NULp;
123  }
124  if (NameSession.old_species_hash) {
125  GBS_free_hash(NameSession.old_species_hash);
126  NameSession.old_species_hash = NULp;
127  }
128 }
129 
132  return GB_abort_transaction(NameSession.gb_main);
133 }
134 
135 static const char *currentTreeName = NULp;
136 
137 static GB_ERROR gbt_rename_tree_rek(TreeNode *tree, int tree_index) {
138  if (tree) {
139  if (tree->is_leaf()) {
140  if (tree->name) {
141  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, tree->name);
142  if (rns) {
143  char *newname;
144  if (rns->used_by == tree_index) { // species more than once in the tree
145  static int counter = 0;
146  char buffer[256];
147 
148  sprintf(buffer, "%s_%i", rns->data, counter++);
149  GB_warningf("Species '%s' more than once in '%s', creating zombie '%s'",
150  tree->name, currentTreeName, buffer);
151  newname = buffer;
152  }
153  else {
154  newname = &rns->data[0];
155  }
156  freedup(tree->name, newname);
157  rns->used_by = tree_index;
158  }
159  }
160  }
161  else {
162  gbt_rename_tree_rek(tree->get_leftson(), tree_index);
163  gbt_rename_tree_rek(tree->get_rightson(), tree_index);
164  }
165  }
166  return NULp;
167 }
168 
169 GB_ERROR GBT_commit_rename_session() { // goes to header: __ATTR__USERESULT
170  bool is_genome_db = GEN_is_genome_db(NameSession.gb_main, -1);
171  arb_progress commit_progress("Correcting name references", long(3+is_genome_db));
172  GB_ERROR error = NULp;
173 
174  commit_progress.allow_title_reuse();
175 
176  // rename species in trees
177  {
178  ConstStrArray tree_names;
179  GBT_get_tree_names(tree_names, NameSession.gb_main, false);
180 
181  if (!tree_names.empty()) {
182  size_t tree_count = tree_names.size();
183  arb_progress progress(GBS_global_string("Correcting names in %zu tree%c", tree_count, "s"[tree_count<2]), tree_count*3);
184 
185  for (size_t count = 0; count<tree_count && !error; ++count) {
186  const char *tname = tree_names[count];
187  TreeNode *tree = GBT_read_tree(NameSession.gb_main, tname, new SimpleRoot);
188  ++progress;
189 
190  if (tree) {
191  currentTreeName = tname; // provide tree name (used for error message)
192  gbt_rename_tree_rek(tree, count+1);
193  currentTreeName = NULp;
194 
195  ++progress;
196 
197  GBT_write_tree(NameSession.gb_main, tname, tree);
198  destroy(tree);
199 
200  progress.inc_and_check_user_abort(error);
201  }
202  else {
203  GBT_message(NameSession.gb_main, GBS_global_string("Warning: failed to read '%s'\n"
204  "(Reason: %s)\n"
205  "Please note that this tree is useless now!",
206  tname, GB_await_error()));
207  ++progress;
208  ++progress;
209  }
210  }
211  }
212  commit_progress.inc_and_check_user_abort(error);
213  }
214  // rename configurations
215  if (!error) {
216  ConstStrArray config_names;
217  GBT_get_configuration_names(config_names, NameSession.gb_main);
218 
219  if (!config_names.empty()) {
220  size_t config_count = config_names.size();
221  arb_progress progress(GBS_global_string("Correcting names in %zu config%c", config_count, "s"[config_count<2]), config_count);
222 
223  for (size_t count = 0; !error && count<config_count; ++count) {
224  GBT_config config(NameSession.gb_main, config_names[count], error);
225  if (!error) {
226  int need_save = 0;
227  for (int area = 0; !error && area<2; ++area) {
228  GBT_config_parser parser(config, area);
229  GBS_strstruct *strstruct = GBS_stropen(1000);
230 
231  while (1) {
232  const GBT_config_item& item = parser.nextItem(error);
233  if (error || item.type == CI_END_OF_CONFIG) break;
234 
235  if (item.type == CI_SPECIES) {
236  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, item.name);
237  if (rns) { // species was renamed
239  need_save = 1;
240  continue;
241  }
242  }
243  GBT_append_to_config_string(item, strstruct);
244  }
245 
246  if (!error) {
247  config.set_definition(area, GBS_strclose(strstruct));
248  }
249  else {
250  error = GBS_global_string("Failed to parse configuration '%s' (Reason: %s)", config_names[count], error);
251  GBS_strforget(strstruct);
252  }
253  }
254 
255  if (!error && need_save) {
256  error = config.save(NameSession.gb_main, config_names[count], false);
257  }
258  }
259  progress.inc_and_check_user_abort(error);
260  }
261  }
262  commit_progress.inc_and_check_user_abort(error);
263  }
264 
265  // rename species in saved colorsets
266  if (!error) {
267  GBDATA *gb_species_colorset_root = GBT_colorset_root(NameSession.gb_main, "species");
268  if (gb_species_colorset_root) {
269  ConstStrArray colorset_names;
270  GBT_get_colorset_names(colorset_names, gb_species_colorset_root);
271 
272  size_t colorset_count = colorset_names.size();
273  if (colorset_count>0) {
274  arb_progress progress(GBS_global_string("Correcting names in %zu colorset%c", colorset_count, "s"[colorset_count<2]), colorset_count);
275 
276  for (size_t c = 0; c<colorset_count && !error; ++c) {
277  GBDATA *gb_colorset = GBT_find_colorset(gb_species_colorset_root, colorset_names[c]);
278  if (!gb_colorset) error = GB_await_error();
279  else {
280  ConstStrArray colorDefs;
281  error = GBT_load_colorset(gb_colorset, colorDefs);
282  if (!error) {
283  StrArray modifiedDefs;
284  bool changed = false;
285 
286  for (int d = colorDefs.size()-1; d>=0; --d) {
287  const char *def = colorDefs[d];
288  const char *equal = strchr(def, '=');
289 
290  if (equal) { // only handle correct entries (do not touch rest)
291  if (strcmp(equal+1, "0") == 0) { // unneeded "no color"-entry (see [14094])
292  colorDefs.remove(d);
293  changed = true;
294  }
295  else {
296  gbt_renamed *rns;
297  {
298  LocallyModify<char> tempSplit(const_cast<char*>(equal)[0], 0);
299  rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, def);
300  }
301  if (rns) { // species was renamed
302  char *newDef = GBS_global_string_copy("%s%s", rns->data, equal);
303  colorDefs.replace(d, newDef); // replace colorDefs
304  modifiedDefs.put(newDef); // keep heapcopy until colorDefs gets written
305 
306  changed = true;
307  }
308  }
309  }
310  }
311 
312  if (changed && !error) error = GBT_save_colorset(gb_colorset, colorDefs);
313  }
314  }
315  progress.inc_and_check_user_abort(error);
316  }
317  }
318  }
319  commit_progress.inc_and_check_user_abort(error);
320  }
321 
322  // rename links in pseudo-species
323  if (!error && is_genome_db) {
324  {
325  arb_progress progress("Correcting names of organism references");
326 
327  GBDATA *gb_pseudo;
328  for (gb_pseudo = GEN_first_pseudo_species(NameSession.gb_main);
329  gb_pseudo && !error;
330  gb_pseudo = GEN_next_pseudo_species(gb_pseudo))
331  {
332  GBDATA *gb_origin_organism = GB_entry(gb_pseudo, "ARB_origin_species");
333  if (gb_origin_organism) {
334  const char *origin = GB_read_char_pntr(gb_origin_organism);
335  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, origin);
336  if (rns) { // species was renamed
337  const char *newname = &rns->data[0];
338  error = GB_write_string(gb_origin_organism, newname);
339  }
340  }
341  }
342  }
343  commit_progress.inc_and_check_user_abort(error);
344  }
345 
347 
348  error = GB_end_transaction(NameSession.gb_main, error);
349  return error;
350 }
351 
352 // --------------------------------------------------------------------------------
353 
354 #ifdef UNIT_TESTS
355 #ifndef TEST_UNIT_H
356 #include <test_unit.h>
357 #endif
358 
359 // #define TEST_AUTO_UPDATE // uncomment to auto-update test result db
360 
361 void TEST_SLOW_rename_session() {
362  const char *inputname = "TEST_opti_ascii_in.arb";
363  const char *outputname = "TEST_opti_ascii_renamed.arb";
364  const char *expectedname = "TEST_opti_ascii_renamed_expected.arb";
365 
366  {
367  GB_shell shell;
368  GBDATA *gb_main;
369  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(inputname, "rw"));
370 
371  for (int session = 1; session <= 2; ++session) {
372  TEST_ANNOTATE(GBS_global_string("session=%i", session));
373 
375  if (session == 2) { // session 1 tests renaming nothing
376  // only in config 'some':
377  TEST_EXPECT_NO_ERROR(GBT_rename_species("FrnPhilo", "olihPnrF", true));
378  TEST_EXPECT_NO_ERROR(GBT_rename_species("DsfDesul", "luseDfsD", true));
379  // also in config 'other':
380  TEST_EXPECT_NO_ERROR(GBT_rename_species("CalSacch", "hccaSlaC", true));
381  TEST_EXPECT_NO_ERROR(GBT_rename_species("LacReute", "etueRcaL", true));
382  }
384  }
385 
386  TEST_EXPECT_NO_ERROR(GB_save_as(gb_main, outputname, "a"));
387  GB_close(gb_main);
388  }
389 
390 #if defined(TEST_AUTO_UPDATE)
391  TEST_COPY_FILE(outputname, expectedname);
392 #endif
393  TEST_EXPECT_TEXTFILE_DIFFLINES(outputname, expectedname, 0);
395 }
396 TEST_PUBLISH(TEST_SLOW_rename_session);
397 
398 #endif // UNIT_TESTS
399 
400 // --------------------------------------------------------------------------------
const char * GB_ERROR
Definition: arb_core.h:25
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
size_t size() const
Definition: arb_strarray.h:85
GBDATA * GBT_colorset_root(GBDATA *gb_main, const char *itemsname)
Definition: ad_colorset.cxx:52
GB_ERROR GBT_abort_rename_session()
Definition: adname.cxx:130
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:457
GB_HASH * old_species_hash
Definition: adname.cxx:32
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1361
size_t hash_size(size_t estimated_elements)
Definition: adhash.cxx:245
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2525
void allow_title_reuse()
Definition: arb_progress.h:261
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:791
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
GB_ERROR GBT_rename_species(const char *oldname, const char *newname, bool ignore_protection)
Definition: adname.cxx:68
GB_ERROR GBT_begin_rename_session(GBDATA *gb_main, int all_flag)
Definition: adname.cxx:42
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
Definition: adhash.cxx:271
static GB_ERROR gbt_rename_tree_rek(TreeNode *tree, int tree_index)
Definition: adname.cxx:137
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
GB_HASH * GBT_create_species_hash(GBDATA *gb_main)
Definition: adhashtools.cxx:36
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2458
bool empty() const
Definition: arb_strarray.h:86
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1484
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
GBDATA * GEN_first_pseudo_species(GBDATA *gb_main)
Definition: adGene.cxx:694
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:490
GB_ERROR GBT_commit_rename_session()
Definition: adname.cxx:169
GBDATA * gb_species_data
Definition: adname.cxx:34
Generic smart pointer.
Definition: smartptr.h:149
GBT_CONFIG_ITEM_TYPE type
Definition: ad_config.h:80
GB_ERROR GB_save_as(GBDATA *gbd, const char *path, const char *savetype)
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, TreeNode *tree)
Definition: adtree.cxx:477
int all_flag
Definition: adname.cxx:35
int used_by
Definition: adname.cxx:26
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR GB_abort_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2503
void remove(int i)
Definition: arb_strarray.h:103
GB_ERROR save(GBDATA *gb_main, const char *name, bool warnIfSavingDefault) const
Definition: ad_config.h:64
GB_ERROR GBT_save_colorset(GBDATA *gb_colorset, CharPtrArray &colorsetDefs)
GB_ERROR GBT_load_colorset(GBDATA *gb_colorset, ConstStrArray &colorsetDefs)
Definition: ad_colorset.cxx:94
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
Definition: test_unit.h:1079
void GBS_dynaval_free(long val)
Definition: adhash.cxx:278
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
Definition: aditem.cxx:133
void GBS_strforget(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:76
char data[1]
Definition: adname.cxx:27
GBDATA * GEN_next_pseudo_species(GBDATA *gb_species)
Definition: adGene.cxx:701
bool is_leaf() const
Definition: TreeNode.h:171
#define gb_assert(cond)
Definition: arbdbt.h:11
const GBT_config_item & nextItem(GB_ERROR &error)
Definition: ad_config.cxx:140
static const char * currentTreeName
Definition: adname.cxx:135
void GBT_append_to_config_string(const GBT_config_item &item, struct GBS_strstruct *strstruct)
Definition: ad_config.cxx:190
char * name
Definition: TreeNode.h:134
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
void GBT_get_tree_names(ConstStrArray &names, GBDATA *gb_main, bool sorted)
Definition: adtree.cxx:1136
void GBT_message(GBDATA *gb_main, const char *msg)
Definition: adtools.cxx:238
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
GBDATA * GBT_find_colorset(GBDATA *gb_colorset_root, const char *name)
Definition: ad_colorset.cxx:76
void GBT_get_configuration_names(ConstStrArray &configNames, GBDATA *gb_main)
Definition: ad_config.cxx:21
#define NULp
Definition: cxxforward.h:97
#define TEST_EXPECT_TEXTFILE_DIFFLINES(fgot, fwant, diff)
Definition: test_unit.h:1387
void GBT_get_colorset_names(ConstStrArray &colorsetNames, GBDATA *gb_colorset_root)
Definition: ad_colorset.cxx:62
long GBT_get_species_count(GBDATA *gb_main)
Definition: aditem.cxx:203
void destroy(TreeNode *that)
Definition: TreeNode.h:559
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:874
GBDATA * gb_main
Definition: adname.cxx:33
static struct @4 NameSession
bool GEN_is_genome_db(GBDATA *gb_main, int default_value)
Definition: adGene.cxx:20
GB_HASH * renamed_hash
Definition: adname.cxx:31
const char * replace(int i, const char *elem)
Definition: arb_strarray.h:217
static void gbt_free_rename_session_data()
Definition: adname.cxx:119
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:274
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:625
void set_definition(int area, char *new_def)
Definition: ad_config.h:54
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105