ARB
adname.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adname.cxx //
4 // Purpose : species names //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "gb_local.h"
12 
13 #include <ad_config.h>
14 #include "TreeNode.h"
15 
16 #include <arb_progress.h>
17 #include <arb_strarray.h>
18 #include <arb_file.h>
19 #include <arb_diff.h>
20 
21 #include <cctype>
22 #include "ad_colorset.h"
23 
24 struct gbt_renamed {
25  int used_by;
26  char data[1];
27 };
28 
29 static struct {
34  int all_flag;
35 } NameSession;
36 
38  /* Starts a rename session (to rename one or many species)
39  * all_flag == 1 -> rename all species in DB
40  * Call GBT_abort_rename_session() or GBT_commit_rename_session() to close the session.
41  */
42 
43  // @@@ change all_flag into estimated number of renames ( == 0 shall mean all)
44 
46  if (!error) {
47  NameSession.gb_main = gb_main;
48  NameSession.gb_species_data = GBT_get_species_data(gb_main);
49 
50  if (!all_flag) { // this is meant to be used for single or few species
51  int hash_size = 128;
52 
54  NameSession.old_species_hash = NULp;
55  }
56  else {
58  NameSession.old_species_hash = GBT_create_species_hash(gb_main);
59  }
60  NameSession.all_flag = all_flag;
61  }
62  return error;
63 }
64 
65 GB_ERROR GBT_rename_species(const char *oldname, const char *newname, bool ignore_protection) {
66  GBDATA *gb_species;
67  GBDATA *gb_name;
69 
70  if (strcmp(oldname, newname) == 0) return NULp;
71 
72 #if defined(DEBUG) && 1
73  if (isdigit(oldname[0])) {
74  printf("oldname='%s' newname='%s'\n", oldname, newname);
75  }
76 #endif
77 
78  if (NameSession.all_flag) {
79  gb_assert(NameSession.old_species_hash);
80  gb_species = (GBDATA *)GBS_read_hash(NameSession.old_species_hash, oldname);
81  }
82  else {
83  GBDATA *gb_found_species;
84 
85  gb_assert(!NameSession.old_species_hash);
86  gb_found_species = GBT_find_species_rel_species_data(NameSession.gb_species_data, newname);
87  gb_species = GBT_find_species_rel_species_data(NameSession.gb_species_data, oldname);
88 
89  if (gb_found_species && gb_species != gb_found_species) {
90  return GBS_global_string("A species named '%s' already exists.", newname);
91  }
92  }
93 
94  if (!gb_species) {
95  return GBS_global_string("Expected that a species named '%s' exists (maybe there are duplicate species, database might be corrupt)", oldname);
96  }
97 
98  gb_name = GB_entry(gb_species, "name");
99  {
100  SmartPtr<GB_topSecurityLevel> probablyUnsecured;
101  if (ignore_protection) probablyUnsecured = new GB_topSecurityLevel(NameSession.gb_main);
102  error = GB_write_string(gb_name, newname);
103  }
104 
105  if (!error) {
106  if (NameSession.old_species_hash) {
107  GBS_write_hash(NameSession.old_species_hash, oldname, 0);
108  }
109  gbt_renamed *rns = (gbt_renamed*)ARB_calloc<char>(strlen(newname)+sizeof(gbt_renamed));
110  strcpy(&rns->data[0], newname);
111  GBS_write_hash(NameSession.renamed_hash, oldname, (long)rns);
112  }
113  return error;
114 }
115 
117  if (NameSession.renamed_hash) {
118  GBS_free_hash(NameSession.renamed_hash);
119  NameSession.renamed_hash = NULp;
120  }
121  if (NameSession.old_species_hash) {
122  GBS_free_hash(NameSession.old_species_hash);
123  NameSession.old_species_hash = NULp;
124  }
125 }
126 
129  return GB_abort_transaction(NameSession.gb_main);
130 }
131 
132 static const char *currentTreeName = NULp;
133 
134 static GB_ERROR gbt_rename_tree_rek(TreeNode *tree, int tree_index) {
135  if (tree) {
136  if (tree->is_leaf()) {
137  if (tree->name) {
138  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, tree->name);
139  if (rns) {
140  char *newname;
141  if (rns->used_by == tree_index) { // species more than once in the tree
142  static int counter = 0;
143  char buffer[256];
144 
145  sprintf(buffer, "%s_%i", rns->data, counter++);
146  GB_warningf("Species '%s' more than once in '%s', creating zombie '%s'",
147  tree->name, currentTreeName, buffer);
148  newname = buffer;
149  }
150  else {
151  newname = &rns->data[0];
152  }
153  freedup(tree->name, newname);
154  rns->used_by = tree_index;
155  }
156  }
157  }
158  else {
159  gbt_rename_tree_rek(tree->get_leftson(), tree_index);
160  gbt_rename_tree_rek(tree->get_rightson(), tree_index);
161  }
162  }
163  return NULp;
164 }
165 
166 GB_ERROR GBT_commit_rename_session() { // goes to header: __ATTR__USERESULT
167  bool is_genome_db = GEN_is_genome_db(NameSession.gb_main, -1);
168  arb_progress commit_progress("Correcting name references", long(3+is_genome_db));
169  GB_ERROR error = NULp;
170 
171  commit_progress.allow_title_reuse();
172 
173  // rename species in trees
174  {
175  ConstStrArray tree_names;
176  GBT_get_tree_names(tree_names, NameSession.gb_main, false);
177 
178  if (!tree_names.empty()) {
179  size_t tree_count = tree_names.size();
180  arb_progress progress(GBS_global_string("Correcting names in %zu tree%c", tree_count, "s"[tree_count<2]), tree_count*3);
181 
182  for (size_t count = 0; count<tree_count && !error; ++count) {
183  const char *tname = tree_names[count];
184  TreeNode *tree = GBT_read_tree(NameSession.gb_main, tname, new SimpleRoot);
185  ++progress;
186 
187  if (tree) {
188  currentTreeName = tname; // provide tree name (used for error message)
189  gbt_rename_tree_rek(tree, count+1);
190  currentTreeName = NULp;
191 
192  ++progress;
193 
194  GBT_write_tree(NameSession.gb_main, tname, tree);
195  destroy(tree);
196 
197  progress.inc_and_check_user_abort(error);
198  }
199  else {
200  GBT_message(NameSession.gb_main, GBS_global_string("Warning: failed to read '%s'\n"
201  "(Reason: %s)\n"
202  "Please note that this tree is useless now!",
203  tname, GB_await_error()));
204  ++progress;
205  ++progress;
206  }
207  }
208  }
209  commit_progress.inc_and_check_user_abort(error);
210  }
211  // rename configurations
212  if (!error) {
213  ConstStrArray config_names;
214  GBT_get_configuration_names(config_names, NameSession.gb_main);
215 
216  if (!config_names.empty()) {
217  size_t config_count = config_names.size();
218  arb_progress progress(GBS_global_string("Correcting names in %zu config%c", config_count, "s"[config_count<2]), config_count);
219 
220  for (size_t count = 0; !error && count<config_count; ++count) {
221  GBT_config config(NameSession.gb_main, config_names[count], error);
222  if (!error) {
223  int need_save = 0;
224  for (int area = GBT_config::TOP_AREA; !error && area<=GBT_config::MIDDLE_AREA; ++area) {
225  GBT_config_parser parser(config, area);
226  GBS_strstruct cfg(1000);
227 
228  while (1) {
229  const GBT_config_item& item = parser.nextItem(error);
230  if (error || item.type == CI_END_OF_CONFIG) break;
231 
232  if (item.type == CI_SPECIES) {
233  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, item.name);
234  if (rns) { // species was renamed
236  need_save = 1;
237  continue;
238  }
239  }
240  item.append_to_config_string(cfg);
241  }
242 
243  if (!error) {
244  config.set_definition(area, cfg.release_memfriendly());
245  }
246  else {
247  error = GBS_global_string("Failed to parse configuration '%s' (Reason: %s)", config_names[count], error);
248  }
249  }
250 
251  if (!error && need_save) {
252  error = config.save(NameSession.gb_main, config_names[count], false);
253  }
254  }
255  progress.inc_and_check_user_abort(error);
256  }
257  }
258  commit_progress.inc_and_check_user_abort(error);
259  }
260 
261  // rename species in saved colorsets
262  if (!error) {
263  GBDATA *gb_species_colorset_root = GBT_colorset_root(NameSession.gb_main, "species");
264  if (gb_species_colorset_root) {
265  ConstStrArray colorset_names;
266  GBT_get_colorset_names(colorset_names, gb_species_colorset_root);
267 
268  size_t colorset_count = colorset_names.size();
269  if (colorset_count>0) {
270  arb_progress progress(GBS_global_string("Correcting names in %zu colorset%c", colorset_count, "s"[colorset_count<2]), colorset_count);
271 
272  for (size_t c = 0; c<colorset_count && !error; ++c) {
273  GBDATA *gb_colorset = GBT_find_colorset(gb_species_colorset_root, colorset_names[c]);
274  if (!gb_colorset) error = GB_await_error();
275  else {
276  ConstStrArray colorDefs;
277  error = GBT_load_colorset(gb_colorset, colorDefs);
278  if (!error) {
279  StrArray modifiedDefs;
280  bool changed = false;
281 
282  for (int d = colorDefs.size()-1; d>=0; --d) {
283  const char *def = colorDefs[d];
284  const char *equal = strchr(def, '=');
285 
286  if (equal) { // only handle correct entries (do not touch rest)
287  if (strcmp(equal+1, "0") == 0) { // unneeded "no color"-entry (see [14094])
288  colorDefs.remove(d);
289  changed = true;
290  }
291  else {
292  gbt_renamed *rns;
293  {
294  LocallyModify<char> tempSplit(const_cast<char*>(equal)[0], 0);
295  rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, def);
296  }
297  if (rns) { // species was renamed
298  char *newDef = GBS_global_string_copy("%s%s", rns->data, equal);
299  colorDefs.replace(d, newDef); // replace colorDefs
300  modifiedDefs.put(newDef); // keep heapcopy until colorDefs gets written
301 
302  changed = true;
303  }
304  }
305  }
306  }
307 
308  if (changed && !error) error = GBT_save_colorset(gb_colorset, colorDefs);
309  }
310  }
311  progress.inc_and_check_user_abort(error);
312  }
313  }
314  }
315  commit_progress.inc_and_check_user_abort(error);
316  }
317 
318  // rename links in pseudo-species
319  if (!error && is_genome_db) {
320  {
321  arb_progress progress("Correcting names of organism references");
322 
323  GBDATA *gb_pseudo;
324  for (gb_pseudo = GEN_first_pseudo_species(NameSession.gb_main);
325  gb_pseudo && !error;
326  gb_pseudo = GEN_next_pseudo_species(gb_pseudo))
327  {
328  GBDATA *gb_origin_organism = GB_entry(gb_pseudo, "ARB_origin_species");
329  if (gb_origin_organism) {
330  const char *origin = GB_read_char_pntr(gb_origin_organism);
331  gbt_renamed *rns = (gbt_renamed *)GBS_read_hash(NameSession.renamed_hash, origin);
332  if (rns) { // species was renamed
333  const char *newname = &rns->data[0];
334  error = GB_write_string(gb_origin_organism, newname);
335  }
336  }
337  }
338  }
339  commit_progress.inc_and_check_user_abort(error);
340  }
341 
343 
344  error = GB_end_transaction(NameSession.gb_main, error);
345  return error;
346 }
347 
348 // --------------------------------------------------------------------------------
349 
350 #ifdef UNIT_TESTS
351 #ifndef TEST_UNIT_H
352 #include <test_unit.h>
353 #endif
354 
355 // #define TEST_AUTO_UPDATE // uncomment to auto-update test result db
356 
357 void TEST_SLOW_rename_session() {
358  const char *inputname = "TEST_opti_ascii_in.arb";
359  const char *outputname = "TEST_opti_ascii_renamed.arb";
360  const char *expectedname = "TEST_opti_ascii_renamed_expected.arb";
361 
362  {
363  GB_shell shell;
364  GBDATA *gb_main;
365  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(inputname, "rw"));
366 
367  for (int session = 1; session <= 2; ++session) {
368  TEST_ANNOTATE(GBS_global_string("session=%i", session));
369 
371  if (session == 2) { // session 1 tests renaming nothing
372  // only in config 'some':
373  TEST_EXPECT_NO_ERROR(GBT_rename_species("FrnPhilo", "olihPnrF", true));
374  TEST_EXPECT_NO_ERROR(GBT_rename_species("DsfDesul", "luseDfsD", true));
375  // also in config 'other':
376  TEST_EXPECT_NO_ERROR(GBT_rename_species("CalSacch", "hccaSlaC", true));
377  TEST_EXPECT_NO_ERROR(GBT_rename_species("LacReute", "etueRcaL", true));
378  }
380  }
381 
382  TEST_EXPECT_NO_ERROR(GB_save_as(gb_main, outputname, "a"));
383  GB_close(gb_main);
384  }
385 
386 #if defined(TEST_AUTO_UPDATE)
387  TEST_COPY_FILE(outputname, expectedname);
388 #endif
389  TEST_EXPECT_TEXTFILE_DIFFLINES(outputname, expectedname, 0);
391 }
392 TEST_PUBLISH(TEST_SLOW_rename_session);
393 
394 #endif // UNIT_TESTS
395 
396 // --------------------------------------------------------------------------------
const char * GB_ERROR
Definition: arb_core.h:25
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
size_t size() const
Definition: arb_strarray.h:85
GBDATA * GBT_colorset_root(GBDATA *gb_main, const char *itemsname)
Definition: ad_colorset.cxx:52
GB_ERROR GBT_abort_rename_session()
Definition: adname.cxx:127
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:454
GB_HASH * old_species_hash
Definition: adname.cxx:31
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
static const int MIDDLE_AREA
Definition: ad_config.h:49
size_t hash_size(size_t estimated_elements)
Definition: adhash.cxx:245
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void allow_title_reuse()
Definition: arb_progress.h:319
TreeNode * GBT_read_tree(GBDATA *gb_main, const char *tree_name, TreeRoot *troot)
Definition: adtree.cxx:837
static const int TOP_AREA
Definition: ad_config.h:48
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
GB_ERROR GBT_rename_species(const char *oldname, const char *newname, bool ignore_protection)
Definition: adname.cxx:65
GB_ERROR GBT_begin_rename_session(GBDATA *gb_main, int all_flag)
Definition: adname.cxx:37
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
Definition: adhash.cxx:271
static GB_ERROR gbt_rename_tree_rek(TreeNode *tree, int tree_index)
Definition: adname.cxx:134
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
GB_HASH * GBT_create_species_hash(GBDATA *gb_main)
Definition: adhashtools.cxx:36
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
bool empty() const
Definition: arb_strarray.h:86
void append_to_config_string(GBS_strstruct &out) const
Definition: ad_config.cxx:189
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1517
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
GBDATA * GEN_first_pseudo_species(GBDATA *gb_main)
Definition: adGene.cxx:684
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:536
GB_ERROR GBT_commit_rename_session()
Definition: adname.cxx:166
GBDATA * gb_species_data
Definition: adname.cxx:33
Generic smart pointer.
Definition: smartptr.h:149
GBT_CONFIG_ITEM_TYPE type
Definition: ad_config.h:83
GB_ERROR GB_save_as(GBDATA *gbd, const char *path, const char *savetype)
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, TreeNode *tree)
Definition: adtree.cxx:523
int all_flag
Definition: adname.cxx:34
int used_by
Definition: adname.cxx:25
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR GB_abort_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2539
void remove(int i)
GB_ERROR save(GBDATA *gb_main, const char *name, bool warnIfSavingDefault) const
Definition: ad_config.h:67
GB_ERROR GBT_save_colorset(GBDATA *gb_colorset, CharPtrArray &colorsetDefs)
GB_ERROR GBT_load_colorset(GBDATA *gb_colorset, ConstStrArray &colorsetDefs)
Definition: ad_colorset.cxx:94
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
Definition: test_unit.h:1090
void GBS_dynaval_free(long val)
Definition: adhash.cxx:278
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
Definition: aditem.cxx:133
char data[1]
Definition: adname.cxx:26
GBDATA * GEN_next_pseudo_species(GBDATA *gb_species)
Definition: adGene.cxx:691
bool is_leaf() const
Definition: TreeNode.h:211
#define gb_assert(cond)
Definition: arbdbt.h:11
const GBT_config_item & nextItem(GB_ERROR &error)
Definition: ad_config.cxx:139
static const char * currentTreeName
Definition: adname.cxx:132
char * name
Definition: TreeNode.h:174
void GBT_get_tree_names(ConstStrArray &names, GBDATA *gb_main, bool sorted)
Definition: adtree.cxx:1187
void GBT_message(GBDATA *gb_main, const char *msg)
Definition: adtools.cxx:238
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
GBDATA * GBT_find_colorset(GBDATA *gb_colorset_root, const char *name)
Definition: ad_colorset.cxx:76
void GBT_get_configuration_names(ConstStrArray &configNames, GBDATA *gb_main)
Definition: ad_config.cxx:20
#define NULp
Definition: cxxforward.h:116
#define TEST_EXPECT_TEXTFILE_DIFFLINES(fgot, fwant, diff)
Definition: test_unit.h:1416
void GBT_get_colorset_names(ConstStrArray &colorsetNames, GBDATA *gb_colorset_root)
Definition: ad_colorset.cxx:62
long GBT_get_species_count(GBDATA *gb_main)
Definition: aditem.cxx:207
void destroy(TreeNode *that)
Definition: TreeNode.h:600
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
static struct @4 NameSession
bool GEN_is_genome_db(GBDATA *gb_main, int default_value)
Definition: adGene.cxx:20
GB_HASH * renamed_hash
Definition: adname.cxx:30
const char * replace(int i, const char *elem)
Definition: arb_strarray.h:206
static void gbt_free_rename_session_data()
Definition: adname.cxx:116
char * release_memfriendly()
Definition: arb_strbuf.h:133
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
void set_definition(int area, char *new_def)
Definition: ad_config.h:57
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105