ARB
adname.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adname.cxx //
4 // Purpose : species names //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "gb_local.h"
12 
13 #include <ad_config.h>
14 #include "TreeNode.h"
15 
16 #include <arb_progress.h>
17 #include <arb_strbuf.h>
18 #include <arb_strarray.h>
19 #include <arb_file.h>
20 #include <arb_diff.h>
21 
22 #include <cctype>
23 #include "ad_colorset.h"
24 
25 struct gbt_renamed {
26  int used_by;
27  char data[1];
28 };
29 
30 static struct {
35  int all_flag;
36 } NameSession;
37 
39  /* Starts a rename session (to rename one or many species)
40  * all_flag == 1 -> rename all species in DB
41  * Call GBT_abort_rename_session() or GBT_commit_rename_session() to close the session.
42  */
43 
44  // @@@ change all_flag into estimated number of renames ( == 0 shall mean all)
45 
47  if (!error) {
48  NameSession.gb_main = gb_main;
49  NameSession.gb_species_data = GBT_get_species_data(gb_main);
50 
51  if (!all_flag) { // this is meant to be used for single or few species
52  int hash_size = 128;
53 
55  NameSession.old_species_hash = NULp;
56  }
57  else {
59  NameSession.old_species_hash = GBT_create_species_hash(gb_main);
60  }
61  NameSession.all_flag = all_flag;
62  }
63  return error;
64 }
65 
66 GB_ERROR GBT_rename_species(const char *oldname, const char *newname, bool ignore_protection) {
67  GBDATA *gb_species;
68  GBDATA *gb_name;
70 
71  if (strcmp(oldname, newname) == 0) return NULp;
72 
73 #if defined(DEBUG) && 1
74  if (isdigit(oldname[0])) {
75  printf("oldname='%s' newname='%s'\n", oldname, newname);
76  }
77 #endif
78 
79  if (NameSession.all_flag) {
80  gb_assert(NameSession.old_species_hash);
81  gb_species = (GBDATA *)GBS_read_hash(NameSession.old_species_hash, oldname);
82  }
83  else {
84  GBDATA *gb_found_species;
85 
86  gb_assert(!NameSession.old_species_hash);
87  gb_found_species = GBT_find_species_rel_species_data(NameSession.gb_species_data, newname);
88  gb_species = GBT_find_species_rel_species_data(NameSession.gb_species_data, oldname);
89 
90  if (gb_found_species && gb_species != gb_found_species) {
91  return GBS_global_string("A species named '%s' already exists.", newname);
92  }
93  }
94 
95  if (!gb_species) {
96  return GBS_global_string("Expected that a species named '%s' exists (maybe there are duplicate species, database might be corrupt)", oldname);
97  }
98 
99  gb_name = GB_entry(gb_species, "name");
100  {
101  SmartPtr<GB_topSecurityLevel> probablyUnsecured;
102  if (ignore_protection) probablyUnsecured = new GB_topSecurityLevel(NameSession.gb_main);
103  error = GB_write_string(gb_name, newname);
104  }
105 
106  if (!error) {
107  if (NameSession.old_species_hash) {
108  GBS_write_hash(NameSession.old_species_hash, oldname, 0);
109  }
110  gbt_renamed *rns = (gbt_renamed*)ARB_calloc<char>(strlen(newname)+sizeof(gbt_renamed));
111  strcpy(&rns->data[0], newname);
112  GBS_write_hash(NameSession.renamed_hash, oldname, (long)rns);
113  }
114  return error;
115 }
116 
118  if (NameSession.renamed_hash) {
119  GBS_free_hash(NameSession.renamed_hash);
120  NameSession.renamed_hash = NULp;
121  }
122  if (NameSession.old_species_hash) {
123  GBS_free_hash(NameSession.old_species_hash);
124  NameSession.old_species_hash = NULp;
125  }
126 }
127 
130  return GB_abort_transaction(NameSession.gb_main);
131 }
132 
133 static const char *currentTreeName = NULp;
134 
135 static GB_ERROR gbt_rename_tree_rek(TreeNode *tree, int tree_index) {
136  if (tree) {
137  if (tree->is_leaf()) {
138  if (tree->name) {
139  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, tree->name);
140  if (rns) {
141  char *newname;
142  if (rns->used_by == tree_index) { // species more than once in the tree
143  static int counter = 0;
144  char buffer[256];
145 
146  sprintf(buffer, "%s_%i", rns->data, counter++);
147  GB_warningf("Species '%s' more than once in '%s', creating zombie '%s'",
148  tree->name, currentTreeName, buffer);
149  newname = buffer;
150  }
151  else {
152  newname = &rns->data[0];
153  }
154  freedup(tree->name, newname);
155  rns->used_by = tree_index;
156  }
157  }
158  }
159  else {
160  gbt_rename_tree_rek(tree->get_leftson(), tree_index);
161  gbt_rename_tree_rek(tree->get_rightson(), tree_index);
162  }
163  }
164  return NULp;
165 }
166 
167 GB_ERROR GBT_commit_rename_session() { // goes to header: __ATTR__USERESULT
168  bool is_genome_db = GEN_is_genome_db(NameSession.gb_main, -1);
169  arb_progress commit_progress("Correcting name references", long(3+is_genome_db));
170  GB_ERROR error = NULp;
171 
172  commit_progress.allow_title_reuse();
173 
174  // rename species in trees
175  {
176  ConstStrArray tree_names;
177  GBT_get_tree_names(tree_names, NameSession.gb_main, false);
178 
179  if (!tree_names.empty()) {
180  size_t tree_count = tree_names.size();
181  arb_progress progress(GBS_global_string("Correcting names in %zu tree%c", tree_count, "s"[tree_count<2]), tree_count*3);
182 
183  for (size_t count = 0; count<tree_count && !error; ++count) {
184  const char *tname = tree_names[count];
185  TreeNode *tree = GBT_read_tree(NameSession.gb_main, tname, new SimpleRoot);
186  ++progress;
187 
188  if (tree) {
189  currentTreeName = tname; // provide tree name (used for error message)
190  gbt_rename_tree_rek(tree, count+1);
191  currentTreeName = NULp;
192 
193  ++progress;
194 
195  GBT_write_tree(NameSession.gb_main, tname, tree);
196  destroy(tree);
197 
198  progress.inc_and_check_user_abort(error);
199  }
200  else {
201  GBT_message(NameSession.gb_main, GBS_global_string("Warning: failed to read '%s'\n"
202  "(Reason: %s)\n"
203  "Please note that this tree is useless now!",
204  tname, GB_await_error()));
205  ++progress;
206  ++progress;
207  }
208  }
209  }
210  commit_progress.inc_and_check_user_abort(error);
211  }
212  // rename configurations
213  if (!error) {
214  ConstStrArray config_names;
215  GBT_get_configuration_names(config_names, NameSession.gb_main);
216 
217  if (!config_names.empty()) {
218  size_t config_count = config_names.size();
219  arb_progress progress(GBS_global_string("Correcting names in %zu config%c", config_count, "s"[config_count<2]), config_count);
220 
221  for (size_t count = 0; !error && count<config_count; ++count) {
222  GBT_config config(NameSession.gb_main, config_names[count], error);
223  if (!error) {
224  int need_save = 0;
225  for (int area = GBT_config::TOP_AREA; !error && area<=GBT_config::MIDDLE_AREA; ++area) {
226  GBT_config_parser parser(config, area);
227  GBS_strstruct *strstruct = GBS_stropen(1000);
228 
229  while (1) {
230  const GBT_config_item& item = parser.nextItem(error);
231  if (error || item.type == CI_END_OF_CONFIG) break;
232 
233  if (item.type == CI_SPECIES) {
234  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, item.name);
235  if (rns) { // species was renamed
237  need_save = 1;
238  continue;
239  }
240  }
241  GBT_append_to_config_string(item, strstruct);
242  }
243 
244  if (!error) {
245  config.set_definition(area, GBS_strclose(strstruct));
246  }
247  else {
248  error = GBS_global_string("Failed to parse configuration '%s' (Reason: %s)", config_names[count], error);
249  GBS_strforget(strstruct);
250  }
251  }
252 
253  if (!error && need_save) {
254  error = config.save(NameSession.gb_main, config_names[count], false);
255  }
256  }
257  progress.inc_and_check_user_abort(error);
258  }
259  }
260  commit_progress.inc_and_check_user_abort(error);
261  }
262 
263  // rename species in saved colorsets
264  if (!error) {
265  GBDATA *gb_species_colorset_root = GBT_colorset_root(NameSession.gb_main, "species");
266  if (gb_species_colorset_root) {
267  ConstStrArray colorset_names;
268  GBT_get_colorset_names(colorset_names, gb_species_colorset_root);
269 
270  size_t colorset_count = colorset_names.size();
271  if (colorset_count>0) {
272  arb_progress progress(GBS_global_string("Correcting names in %zu colorset%c", colorset_count, "s"[colorset_count<2]), colorset_count);
273 
274  for (size_t c = 0; c<colorset_count && !error; ++c) {
275  GBDATA *gb_colorset = GBT_find_colorset(gb_species_colorset_root, colorset_names[c]);
276  if (!gb_colorset) error = GB_await_error();
277  else {
278  ConstStrArray colorDefs;
279  error = GBT_load_colorset(gb_colorset, colorDefs);
280  if (!error) {
281  StrArray modifiedDefs;
282  bool changed = false;
283 
284  for (int d = colorDefs.size()-1; d>=0; --d) {
285  const char *def = colorDefs[d];
286  const char *equal = strchr(def, '=');
287 
288  if (equal) { // only handle correct entries (do not touch rest)
289  if (strcmp(equal+1, "0") == 0) { // unneeded "no color"-entry (see [14094])
290  colorDefs.remove(d);
291  changed = true;
292  }
293  else {
294  gbt_renamed *rns;
295  {
296  LocallyModify<char> tempSplit(const_cast<char*>(equal)[0], 0);
297  rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, def);
298  }
299  if (rns) { // species was renamed
300  char *newDef = GBS_global_string_copy("%s%s", rns->data, equal);
301  colorDefs.replace(d, newDef); // replace colorDefs
302  modifiedDefs.put(newDef); // keep heapcopy until colorDefs gets written
303 
304  changed = true;
305  }
306  }
307  }
308  }
309 
310  if (changed && !error) error = GBT_save_colorset(gb_colorset, colorDefs);
311  }
312  }
313  progress.inc_and_check_user_abort(error);
314  }
315  }
316  }
317  commit_progress.inc_and_check_user_abort(error);
318  }
319 
320  // rename links in pseudo-species
321  if (!error && is_genome_db) {
322  {
323  arb_progress progress("Correcting names of organism references");
324 
325  GBDATA *gb_pseudo;
326  for (gb_pseudo = GEN_first_pseudo_species(NameSession.gb_main);
327  gb_pseudo && !error;
328  gb_pseudo = GEN_next_pseudo_species(gb_pseudo))
329  {
330  GBDATA *gb_origin_organism = GB_entry(gb_pseudo, "ARB_origin_species");
331  if (gb_origin_organism) {
332  const char *origin = GB_read_char_pntr(gb_origin_organism);
333  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, origin);
334  if (rns) { // species was renamed
335  const char *newname = &rns->data[0];
336  error = GB_write_string(gb_origin_organism, newname);
337  }
338  }
339  }
340  }
341  commit_progress.inc_and_check_user_abort(error);
342  }
343 
345 
346  error = GB_end_transaction(NameSession.gb_main, error);
347  return error;
348 }
349 
350 // --------------------------------------------------------------------------------
351 
352 #ifdef UNIT_TESTS
353 #ifndef TEST_UNIT_H
354 #include <test_unit.h>
355 #endif
356 
357 // #define TEST_AUTO_UPDATE // uncomment to auto-update test result db
358 
359 void TEST_SLOW_rename_session() {
360  const char *inputname = "TEST_opti_ascii_in.arb";
361  const char *outputname = "TEST_opti_ascii_renamed.arb";
362  const char *expectedname = "TEST_opti_ascii_renamed_expected.arb";
363 
364  {
365  GB_shell shell;
366  GBDATA *gb_main;
367  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(inputname, "rw"));
368 
369  for (int session = 1; session <= 2; ++session) {
370  TEST_ANNOTATE(GBS_global_string("session=%i", session));
371 
373  if (session == 2) { // session 1 tests renaming nothing
374  // only in config 'some':
375  TEST_EXPECT_NO_ERROR(GBT_rename_species("FrnPhilo", "olihPnrF", true));
376  TEST_EXPECT_NO_ERROR(GBT_rename_species("DsfDesul", "luseDfsD", true));
377  // also in config 'other':
378  TEST_EXPECT_NO_ERROR(GBT_rename_species("CalSacch", "hccaSlaC", true));
379  TEST_EXPECT_NO_ERROR(GBT_rename_species("LacReute", "etueRcaL", true));
380  }
382  }
383 
384  TEST_EXPECT_NO_ERROR(GB_save_as(gb_main, outputname, "a"));
385  GB_close(gb_main);
386  }
387 
388 #if defined(TEST_AUTO_UPDATE)
389  TEST_COPY_FILE(outputname, expectedname);
390 #endif
391  TEST_EXPECT_TEXTFILE_DIFFLINES(outputname, expectedname, 0);
393 }
394 TEST_PUBLISH(TEST_SLOW_rename_session);
395 
396 #endif // UNIT_TESTS
397 
398 // --------------------------------------------------------------------------------
const char * GB_ERROR
Definition: arb_core.h:25
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
size_t size() const
Definition: arb_strarray.h:85
GBDATA * GBT_colorset_root(GBDATA *gb_main, const char *itemsname)
Definition: ad_colorset.cxx:52
GB_ERROR GBT_abort_rename_session()
Definition: adname.cxx:128
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:457
GB_HASH * old_species_hash
Definition: adname.cxx:32
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
static const int MIDDLE_AREA
Definition: ad_config.h:46
size_t hash_size(size_t estimated_elements)
Definition: adhash.cxx:245
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void allow_title_reuse()
Definition: arb_progress.h:319
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:791
static const int TOP_AREA
Definition: ad_config.h:45
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:202
GB_ERROR GBT_rename_species(const char *oldname, const char *newname, bool ignore_protection)
Definition: adname.cxx:66
GB_ERROR GBT_begin_rename_session(GBDATA *gb_main, int all_flag)
Definition: adname.cxx:38
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
Definition: adhash.cxx:271
static GB_ERROR gbt_rename_tree_rek(TreeNode *tree, int tree_index)
Definition: adname.cxx:135
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
GB_HASH * GBT_create_species_hash(GBDATA *gb_main)
Definition: adhashtools.cxx:36
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
bool empty() const
Definition: arb_strarray.h:86
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1502
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:341
GBDATA * GEN_first_pseudo_species(GBDATA *gb_main)
Definition: adGene.cxx:694
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:477
GB_ERROR GBT_commit_rename_session()
Definition: adname.cxx:167
GBDATA * gb_species_data
Definition: adname.cxx:34
Generic smart pointer.
Definition: smartptr.h:149
GBT_CONFIG_ITEM_TYPE type
Definition: ad_config.h:80
GB_ERROR GB_save_as(GBDATA *gbd, const char *path, const char *savetype)
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, TreeNode *tree)
Definition: adtree.cxx:477
int all_flag
Definition: adname.cxx:35
int used_by
Definition: adname.cxx:26
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR GB_abort_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2539
void remove(int i)
Definition: arb_strarray.h:103
GB_ERROR save(GBDATA *gb_main, const char *name, bool warnIfSavingDefault) const
Definition: ad_config.h:64
GB_ERROR GBT_save_colorset(GBDATA *gb_colorset, CharPtrArray &colorsetDefs)
GB_ERROR GBT_load_colorset(GBDATA *gb_colorset, ConstStrArray &colorsetDefs)
Definition: ad_colorset.cxx:94
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
Definition: test_unit.h:1079
void GBS_dynaval_free(long val)
Definition: adhash.cxx:278
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
Definition: aditem.cxx:133
void GBS_strforget(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:76
char data[1]
Definition: adname.cxx:27
GBDATA * GEN_next_pseudo_species(GBDATA *gb_species)
Definition: adGene.cxx:701
bool is_leaf() const
Definition: TreeNode.h:171
#define gb_assert(cond)
Definition: arbdbt.h:11
const GBT_config_item & nextItem(GB_ERROR &error)
Definition: ad_config.cxx:140
static const char * currentTreeName
Definition: adname.cxx:133
void GBT_append_to_config_string(const GBT_config_item &item, struct GBS_strstruct *strstruct)
Definition: ad_config.cxx:190
char * name
Definition: TreeNode.h:134
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
void GBT_get_tree_names(ConstStrArray &names, GBDATA *gb_main, bool sorted)
Definition: adtree.cxx:1136
void GBT_message(GBDATA *gb_main, const char *msg)
Definition: adtools.cxx:238
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
GBDATA * GBT_find_colorset(GBDATA *gb_colorset_root, const char *name)
Definition: ad_colorset.cxx:76
void GBT_get_configuration_names(ConstStrArray &configNames, GBDATA *gb_main)
Definition: ad_config.cxx:21
#define NULp
Definition: cxxforward.h:114
#define TEST_EXPECT_TEXTFILE_DIFFLINES(fgot, fwant, diff)
Definition: test_unit.h:1401
void GBT_get_colorset_names(ConstStrArray &colorsetNames, GBDATA *gb_colorset_root)
Definition: ad_colorset.cxx:62
long GBT_get_species_count(GBDATA *gb_main)
Definition: aditem.cxx:207
void destroy(TreeNode *that)
Definition: TreeNode.h:560
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:33
static struct @4 NameSession
bool GEN_is_genome_db(GBDATA *gb_main, int default_value)
Definition: adGene.cxx:20
GB_HASH * renamed_hash
Definition: adname.cxx:31
const char * replace(int i, const char *elem)
Definition: arb_strarray.h:217
static void gbt_free_rename_session_data()
Definition: adname.cxx:117
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:193
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
void set_definition(int area, char *new_def)
Definition: ad_config.h:54
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105