ARB
NT_concatenate.cxx
Go to the documentation of this file.
1 // =======================================================================================
2 //
3 // File : NT_concatenate.cxx
4 // Purpose : 1.Concatenatenation of sequences or alignments
5 // 2.Merging the fields of similar species and creating a new species
6 // Author : Yadhu Kumar
7 // web site : http://www.arb-home.de/
8 //
9 // Copyright Department of Microbiology (Technical University Munich)
10 //
11 // =======================================================================================
12 
13 #include "NT_local.h"
14 
15 #include <items.h>
16 #include <item_sel_list.h>
17 #include <awt_sel_boxes.hxx>
18 #include <AW_rename.hxx>
19 #include <aw_question.hxx>
20 #include <aw_awar.hxx>
21 #include <aw_msg.hxx>
22 #include <aw_root.hxx>
23 #include <arb_progress.h>
24 #include <arb_strbuf.h>
25 #include <arb_strarray.h>
26 #include <awt_modules.hxx>
27 #include <arb_global_defs.h>
28 
29 using namespace std;
30 
31 #define AWAR_CON_SEQUENCE_TYPE "tmp/concat/sequence_type"
32 #define AWAR_CON_NEW_ALIGNMENT_NAME "tmp/concat/new_alignment_name"
33 #define AWAR_CON_ALIGNMENT_SEPARATOR "tmp/concat/alignment_separator"
34 #define AWAR_CON_SELECTED_ALI "tmp/concat/database_alignments"
35 #define AWAR_CON_MERGE_FIELD "tmp/concat/merge_field"
36 #define AWAR_CON_STORE_SIM_SP_NO "tmp/concat/store_sim_sp_no"
37 
38 #define AWAR_CON_ALLOW_OVERWRITE_ALI "tmp/concat/overwrite"
39 #define AWAR_CON_INSGAPS_FOR_MISS_ALIS "tmp/concat/insgaps"
40 
41 #define MOVE_DOWN 0
42 #define MOVE_UP 1
43 
46  char *species_name;
47 
49 };
50 
51 // --------------------------creating and initializing AWARS----------------------------------------
53  GB_transaction ta(gb_main);
54 
55  char *ali_type = NULp;
56  {
57  char *ali_default = GBT_get_default_alignment(gb_main);
58  if (ali_default) {
59  ali_type = GBT_get_alignment_type_string(gb_main, ali_default);
60  if (!ali_type) {
61  // Note: this message will appear during startup (i.e. stick to general statement here!)
62  aw_message(GBS_global_string("Failed to detect type of default alignment (%s)\n"
63  "(Reason: %s)", ali_default, GB_await_error()));
64  }
65  free(ali_default);
66  }
67  else {
69  }
70  }
71  if (!ali_type) ali_type = ARB_strdup("rna");
72 
73  aw_root->awar_string(AWAR_CON_SEQUENCE_TYPE, ali_type, aw_def);
75  aw_root->awar_string(AWAR_CON_ALIGNMENT_SEPARATOR, "XXX", aw_def);
76  aw_root->awar_string(AWAR_CON_SELECTED_ALI, "", aw_def);
77  aw_root->awar_string(AWAR_CON_MERGE_FIELD, "full_name", aw_def);
78  aw_root->awar_string(AWAR_CON_STORE_SIM_SP_NO, "merged_species", aw_def);
79 
80  aw_root->awar_int(AWAR_CON_ALLOW_OVERWRITE_ALI, 0, aw_def);
81  aw_root->awar_int(AWAR_CON_INSGAPS_FOR_MISS_ALIS, 1, aw_def);
82 
83  free(ali_type);
84 }
85 
86 // ------------------------Selecting alignments from the database for concatenation----------------------
87 
88 inline char *get_alitype_eval(AW_root *aw_root) {
90 }
91 
92 static void alitype_changed_cb(AW_root *aw_root, AW_DB_selection *db_sel) {
93  char *ali_type = get_alitype_eval(aw_root);
94  awt_reconfigure_ALI_selection_list(db_sel, ali_type);
95  free(ali_type);
96 }
97 
98 static AW_DB_selection* createSelectionList(GBDATA *gb_main, AW_window *aws, const char *awarName) {
99 
100 #ifdef DEBUG
101  static bool ran=false;
102  nt_assert(!ran);
103  ran=true; // prevents calling this function for the second time
104 #endif
105 
106  AW_root *aw_root = aws->get_root();
107  char *ali_type = get_alitype_eval(aw_root);
108  AW_DB_selection *db_sel = awt_create_ALI_selection_list(gb_main, aws, awarName, ali_type);
109 
110  free(ali_type);
111  return db_sel;
112 }
113 
114 // ---------- Create SAI to display alignments that were concatenated --------------
115 
116 static GB_ERROR create_concatInfo_SAI(GBDATA *gb_main, const char *new_ali_name, const char *ali_separator, const StrArray& ali_names) {
117  GB_ERROR error = NULp;
118  GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, "ConcatInfo");
119 
120  if (!gb_extended) error = GB_await_error();
121  else {
122  GBDATA *gb_data = GBT_add_data(gb_extended, new_ali_name, "data", GB_STRING);
123 
124  if (!gb_data) {
125  error = GB_await_error();
126  }
127  else {
128  int new_ali_length = GBT_get_alignment_len(gb_main, new_ali_name);
129  nt_assert(new_ali_length>0);
130 
131  int sep_len = strlen(ali_separator);
132 
133  char *info = ARB_alloc<char>(new_ali_length+1);
134  memset(info, '=', new_ali_length);
135 
136  int offset = 0;
137  int last_ali_idx = ali_names.size()-1;
138 
139  for (int a = 0; a <= last_ali_idx; ++a) {
140  const char *ali = ali_names[a];
141 
142  int ali_len = GBT_get_alignment_len(gb_main, ali);
143  int ali_str_len = strlen(ali);
144  nt_assert(ali_len>0);
145 
146  char *my_info = info+offset;
147 
148  int half_ali_len = ali_len/2;
149  for (int i = 0; i<5; ++i) {
150  if (i<half_ali_len) {
151  my_info[i] = '<';
152  my_info[ali_len-i-1] = '>';
153  }
154  }
155 
156  if (ali_str_len<ali_len) {
157  int namepos = half_ali_len - ali_str_len/2;
158  memcpy(my_info+namepos, ali, ali_str_len);
159  }
160 
161  offset += ali_len;
162  if (a != last_ali_idx) {
163  memcpy(info+offset, ali_separator, sep_len);
164  offset += sep_len;
165  }
166  }
167 
168  nt_assert(offset == new_ali_length); // wrong alignment length!
169  info[new_ali_length] = 0;
170 
171  if (!error) error = GB_write_string(gb_data, info);
172  free(info);
173  }
174  }
175  return error;
176 }
177 
178 // ---------------------------------------- Concatenation function ----------------------------------
179 static void concatenateAlignments(AW_window *aws, AW_selection *selected_alis) {
180  nt_assert(selected_alis);
181 
183 
184  long marked_species = GBT_count_marked_species(GLOBAL.gb_main);
185  AW_root *aw_root = aws->get_root();
186  char *new_ali_name = aw_root->awar(AWAR_CON_NEW_ALIGNMENT_NAME)->read_string();
187  GB_ERROR error = GBT_check_alignment_name(new_ali_name);
188 
189  StrArray ali_names;
190  selected_alis->get_values(ali_names);
191 
192  arb_progress progress("Concatenating alignments", marked_species);
193  size_t ali_count = ali_names.size();
194 
195  if (!error && ali_count<2) {
196  error = "Not enough alignments selected for concatenation (need at least 2)";
197  }
198  if (!error) {
199  int found[ali_count], missing[ali_count], ali_length[ali_count];
200 
201  for (size_t a = 0; a<ali_count; a++) {
202  found[a] = 0;
203  missing[a] = 0;
204  ali_length[a] = GBT_get_alignment_len(GLOBAL.gb_main, ali_names[a]);
205 
206  if (ali_length[a]<=0) {
207  error = GB_await_error();
208  }
209  else if (strcmp(ali_names[a], new_ali_name) == 0) {
210  error = "Target alignment may not be one of the source alignments";
211  }
212  }
213 
214  if (!error) {
215  char *ali_separator = aw_root->awar(AWAR_CON_ALIGNMENT_SEPARATOR)->read_string();
216  const int sep_len = strlen(ali_separator);
217 
218  long new_alignment_len = (ali_count-1)*sep_len;
219  for (size_t a = 0; a<ali_count; ++a) { // LOOP_VECTORIZED
220  new_alignment_len += ali_length[a];
221  }
222 
223  GBDATA *gb_presets = GBT_get_presets(GLOBAL.gb_main);
224  GBDATA *gb_alignment_exists = GB_find_string(gb_presets, "alignment_name", new_ali_name, GB_IGNORE_CASE, SEARCH_GRANDCHILD);
225  GBDATA *gb_new_alignment = NULp;
226  char *seq_type = aw_root->awar(AWAR_CON_SEQUENCE_TYPE)->read_string();
227 
228  if (gb_alignment_exists) {
229  // target alignment exists
230  if (aw_root->awar(AWAR_CON_ALLOW_OVERWRITE_ALI)->read_int()) { // allow overwrite
231  gb_new_alignment = GBT_get_alignment(GLOBAL.gb_main, new_ali_name);
232  if (!gb_new_alignment) error = GB_await_error();
233  }
234  else {
235  error = GBS_global_string("Target alignment '%s' already exists\n(check overwrite-toggle if you really want to overwrite)", new_ali_name);
236  }
237  }
238  else {
239  // create new target alignment
240  char *source_alignments = GBT_join_strings(ali_names, ' ');
241  char *why_created = GBS_global_string_copy("while concatenating %s", source_alignments);
242 
243  gb_new_alignment = GBT_create_alignment(GLOBAL.gb_main, new_ali_name, new_alignment_len, 0, 0, seq_type, why_created);
244  if (!gb_new_alignment) error = GB_await_error();
245 
246  free(why_created);
247  free(source_alignments);
248  }
249 
250  if (!error) {
251  AW_repeated_question ask_about_missing_alignment;
252  bool insertGaps = aw_root->awar(AWAR_CON_INSGAPS_FOR_MISS_ALIS)->read_int();
253 
254  for (GBDATA *gb_species = GBT_first_marked_species(GLOBAL.gb_main);
255  gb_species && !error;
256  gb_species = GBT_next_marked_species(gb_species))
257  {
258  GBS_strstruct concat_seq(new_alignment_len+1);
259  int data_inserted = 0;
260 
261  for (size_t a = 0; a<ali_count; ++a) {
262  if (a) concat_seq.cat(ali_separator);
263 
264  GBDATA *gb_seq_data = GBT_find_sequence(gb_species, ali_names[a]);
265  if (gb_seq_data) { // found data
266  const char *seq_data = GB_read_char_pntr(gb_seq_data);
267  concat_seq.cat(seq_data);
268  ++found[a];
269  ++data_inserted;
270  }
271  else { // missing data
272  if (insertGaps) concat_seq.nput('.', ali_length[a]);
273  ++missing[a];
274  }
275  }
276 
277  if (!data_inserted) {
278  error = GBS_global_string("None of the source alignments had data for species '%s'", GBT_get_name_or_description(gb_species));
279  }
280  else {
281  GBDATA *gb_data = GBT_add_data(gb_species, new_ali_name, "data", GB_STRING);
282  GB_write_string(gb_data, concat_seq.get_data());
283  }
284  progress.inc_and_check_user_abort(error);
285  }
286 
287  if (!error) {
288  // ............. print missing alignments...........
289  aw_message(GBS_global_string("Concatenation of alignments was performed for %ld species.", marked_species));
290  for (size_t a = 0; a<ali_count; ++a) {
291  aw_message(GBS_global_string("%s: was found in %d species and missing in %d species.", ali_names[a], found[a], missing[a]));
292  }
293  }
294 
295  if (!error) error = GBT_check_data(GLOBAL.gb_main, new_ali_name); // update alignment info (otherwise create_concatInfo_SAI fails when overwriting an alignment)
296  if (!error) error = create_concatInfo_SAI(GLOBAL.gb_main, new_ali_name, ali_separator, ali_names);
297  }
298 
299  free(seq_type);
300  free(ali_separator);
301  }
302  }
303 
304  if (!error) {
305  error = GBT_add_alignment_changekeys(GLOBAL.gb_main, new_ali_name);
306  }
307  else {
308  progress.done();
309  }
311  free(new_ali_name);
312 }
313 
314 static void addSpeciesToConcatenateList(SpeciesConcatenateList **sclp, GB_CSTR species_name) {
315 
317  GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, species_name);
318 
319  if (gb_species) {
321 
322  scl->species = gb_species;
323  scl->species_name = ARB_strdup(species_name);
324  scl->next = *sclp;
325  *sclp = scl;
326  }
327 }
328 
330  while (scl) {
331  SpeciesConcatenateList *next = scl->next;
332  free(scl->species_name);
333  delete scl;
334  scl = next;
335  }
336 }
337 
339 
340  char *doneFields = ARB_strdup(";name;"); // all fields which are already merged
341  int doneLen = strlen(doneFields);
342  SpeciesConcatenateList *sl = scl;
343  int sl_length = 0; while (scl) { sl_length++; scl=scl->next; } // counting no. of similar species stored in the list
344  int *fieldStat = new int[sl_length]; // 0 = not used yet ; -1 = doesn't have field ; 1..n = field content (same number means same content)
345 
346  while (sl && !error) { // with all species do..
347  char *newFields = GB_get_subfields(sl->species);
348  char *fieldStart = newFields; // points to ; before next field
349 
350  while (fieldStart[1] && !error) { // with all subfields of the species do..
351  char *fieldEnd = strchr(fieldStart+1, ';');
352  nt_assert(fieldEnd);
353  char behind = fieldEnd[1]; fieldEnd[1] = 0;
354 
355  if (!strstr(doneFields, fieldStart)) { // field is not merged yet
356  char *fieldName = fieldStart+1;
357  int fieldLen = int(fieldEnd-fieldName);
358 
359  nt_assert(fieldEnd[0]==';');
360  fieldEnd[0] = 0;
361 
362  GBDATA *gb_field = GB_search(sl->species, fieldName, GB_FIND); // field does to exist (it was found before)
363  GB_TYPES type = GB_read_type(gb_field);
364 
365  if (type==GB_STRING) { // we only merge string fields
366  int i; int doneSpecies = 0; int nextStat = 1;
367 
368  for (i=0; i<sl_length; i++) { fieldStat[i] = 0; } // clear field status
369 
370  while (doneSpecies<sl_length) { // since all species in list were handled
371  SpeciesConcatenateList *sl2 = sl;
372  i = 0;
373 
374  while (sl2) {
375  if (fieldStat[i]==0) {
376  gb_field = GB_search(sl2->species, fieldName, GB_FIND);
377  if (gb_field) {
378  char *content = GB_read_as_string(gb_field);
379  SpeciesConcatenateList *sl3 = sl2->next;
380  fieldStat[i] = nextStat;
381  int j = i+1; doneSpecies++;
382 
383  while (sl3) {
384  if (fieldStat[j]==0) {
385  gb_field = GB_search(sl3->species, fieldName, GB_FIND);
386  if (gb_field) {
387  char *content2 = GB_read_as_string(gb_field);
388  if (strcmp(content, content2)==0) { // if contents are the same, they get the same status
389  fieldStat[j] = nextStat;
390  doneSpecies++;
391  }
392  free(content2);
393  }
394  else {
395  fieldStat[j] = -1;
396  doneSpecies++;
397  }
398  }
399  sl3 = sl3->next; j++;
400  }
401  free(content); nextStat++;
402  }
403  else {
404  fieldStat[i] = -1; // field does not exist here
405  doneSpecies++;
406  }
407  }
408  sl2 = sl2->next; i++;
409  }
410  if (!sl2) break;
411  }
412  nt_assert(nextStat!=1); // this would mean that none of the species contained the field
413  {
414  char *new_content = NULp;
415  int new_content_len = 0; // @@@ useless (0 where used; unused otherwise)
416 
417  if (nextStat==2) { // all species contain same field content or do not have the field
418  SpeciesConcatenateList *sl2 = sl;
419  while (sl2) {
420  gb_field = GB_search(sl2->species, fieldName, GB_FIND);
421  if (gb_field) {
422  new_content = GB_read_as_string(gb_field);
423  new_content_len = strlen(new_content);
424  break;
425  }
426  sl2 = sl2->next;
427  }
428  }
429  else { // different field contents
430  int actualStat;
431  for (actualStat=1; actualStat<nextStat; actualStat++) {
432  SpeciesConcatenateList *sl2 = sl;
433 
434  int names_len = 1; // open bracket
435  char *content = NULp;
436  i = 0;
437 
438  while (sl2) {
439  if (fieldStat[i]==actualStat) {
440  names_len += strlen(sl2->species_name)+1;
441  if (!content) {
442  gb_field = GB_search(sl2->species, fieldName, GB_FIND);
443  nt_assert(gb_field);
444  content = GB_read_as_string(gb_field);
445  }
446  }
447  sl2 = sl2->next; i++;
448  }
449  nt_assert(content);
450  int add_len = names_len+1+strlen(content);
451  char *whole = ARB_alloc<char>(new_content_len+1+add_len+1);
452  nt_assert(whole);
453  char *add = new_content ? whole+sprintf(whole, "%s ", new_content) : whole;
454  sl2 = sl; i = 0;
455  int first = 1;
456  while (sl2) {
457  if (fieldStat[i]==actualStat) {
458  add += sprintf(add, "%c%s", first ? '{' : ';', sl2->species_name);
459  first = 0;
460  }
461  sl2 = sl2->next; i++;
462  }
463  add += sprintf(add, "} %s", content);
464 
465  free(content);
466  freeset(new_content, whole);
467  new_content_len = strlen(new_content); // cppcheck-suppress deallocuse
468  }
469  }
470 
471  if (new_content) {
472  error = GBT_write_string(gb_new_species, fieldName, new_content);
473  free(new_content);
474  }
475  }
476  }
477 
478  // mark field as done:
479  char *new_doneFields = ARB_alloc<char>(doneLen+fieldLen+1+1);
480  sprintf(new_doneFields, "%s%s;", doneFields, fieldName);
481  doneLen += fieldLen+1;
482  freeset(doneFields, new_doneFields);
483  fieldEnd[0] = ';';
484  }
485  fieldEnd[1] = behind;
486  fieldStart = fieldEnd;
487  }
488  free(newFields);
489  sl = sl->next;
490  }
491  free(doneFields);
492  delete [] fieldStat;
493 
494  return error;
495 }
496 
499 
500  GB_ERROR error = NULp;
502 
503  // data needed for name generation
504  char *full_name = NULp;
505  char *acc = NULp;
506 
507  // --------------------getting the species related data --------------------
508 
509  GBDATA *gb_new_species = NULp;
510 
511  if (!error) {
512  // copy species to create a new species
513  gb_new_species = GB_create_container(gb_species_data, "species");
514  error = gb_new_species ? GB_copy_dropProtectMarksAndTempstate(gb_new_species, gb_species) : GB_await_error();
515 
516  if (!error) { // write dummy-name (real name written below)
517  error = GBT_write_string(gb_new_species, "name", "$currcat$");
518  }
519  }
520 
521  if (!error) { // copy full name
522  full_name = GBT_read_string(gb_species, "full_name");
523  if (!full_name) error = GB_await_error();
524  else error = GBT_write_string(gb_new_species, "full_name", full_name);
525  }
526 
527  if (!error) {
528  ConstStrArray ali_names;
530 
531  long id = 0;
532  for (SpeciesConcatenateList *speciesList = scl; speciesList; speciesList = speciesList->next) {
533  for (int no_of_alignments = 0; ali_names[no_of_alignments]; no_of_alignments++) {
534  GBDATA *gb_seq_data = GBT_find_sequence(speciesList->species, ali_names[no_of_alignments]);
535  if (gb_seq_data) {
536  const char *seq_data = GB_read_char_pntr(gb_seq_data);
537  GBDATA *gb_data = GBT_add_data(gb_new_species, ali_names[no_of_alignments], "data", GB_STRING);
538  error = GB_write_string(gb_data, seq_data);
539  if (!error) id += GBS_checksum(seq_data, 1, ".-"); // creating checksum of the each aligned sequence to generate new accession number
540  }
541  if (error) error = GB_export_errorf("Can't create alignment '%s'", ali_names[no_of_alignments]);
542  }
543  }
544 
545  if (!error) {
546  acc = GBS_global_string_copy("ARB_%lX", id); // create new accession number
547  error = GBT_write_string(gb_new_species, "acc", acc);
548  }
549  }
550 
551  if (!error) error = checkAndMergeFields(gb_new_species, error, scl);
552 
553  // now generate new name
554  if (!error) {
555  char *new_species_name = NULp;
556 
557  const char *add_field = AW_get_nameserver_addid(GLOBAL.gb_main);
558  GBDATA *gb_addid = add_field[0] ? GB_entry(gb_new_species, add_field) : NULp;
559  char *addid = NULp;
560  if (gb_addid) addid = GB_read_as_string(gb_addid);
561 
562  error = AWTC_generate_one_name(GLOBAL.gb_main, full_name, acc, addid, new_species_name);
563  if (!error) { // name was created
564  if (GBT_find_species_rel_species_data(gb_species_data, new_species_name)) {
565  // if the name is not unique -> create unique name
566  UniqueNameDetector und(gb_species_data);
567  freeset(new_species_name, AWTC_makeUniqueShortName(new_species_name, und));
568  if (!new_species_name) error = GB_await_error();
569  }
570  }
571 
572  if (!error) error = GBT_write_string(gb_new_species, "name", new_species_name); // insert new 'name'
573 
574  free(new_species_name);
575  free(addid);
576  }
577 
578  error = GB_end_transaction(GLOBAL.gb_main, error);
579  if (error) {
580  gb_new_species = NULp;
581  aw_message(error);
582  }
583 
584  free(acc);
585  free(full_name);
586 
587  return gb_new_species;
588 }
589 
593 };
594 
595 static void mergeSimilarSpecies(AW_window *aws, MergeSpeciesType mergeType, AW_selection *selected_alis) {
596  nt_assert(correlated(selected_alis, mergeType == MERGE_SPECIES_AND_CONCAT_ALI));
597 
598  GB_ERROR error = NULp;
599  arb_progress wrapper;
600  {
601  AW_root *aw_root = aws->get_root();
602  char *merge_field_name = aw_root->awar(AWAR_CON_MERGE_FIELD)->read_string();
603 
604  SpeciesConcatenateList *scl = NULp; // to build list of similar species
605  SpeciesConcatenateList *newSpeciesList = NULp; // new SpeciesConcatenateList
606 
607  GB_begin_transaction(GLOBAL.gb_main); // open database for transaction
608 
610  error = GB_incur_error_if(!report_field_name);
611 
612  if (!error && strcmp(merge_field_name, NO_FIELD_SELECTED) == 0) {
613  error = "Please select database field for similarity detection";
614  }
615 
616  if (!error) {
618  arb_progress progress("Merging similar species", GBT_count_marked_species(GLOBAL.gb_main));
619  progress.auto_subtitles("Species");
620 
621  for (GBDATA * gb_species = GBT_first_marked_species(GLOBAL.gb_main);
622  gb_species && !error;
623  gb_species = GBT_next_marked_species(gb_species))
624  {
625  GBDATA *gb_species_field = GB_entry(gb_species, merge_field_name);
626 
627  if (!gb_species_field) {
628  // exit if species doesn't have any data in the selected field
629  error = GBS_global_string("Species '%s' does not contain data in selected field '%s'",
630  GBT_get_name_or_description(gb_species),
631  merge_field_name);
632  }
633  else {
634  char *gb_species_field_content = GB_read_as_string(gb_species_field);
635  int similar_species = 0;
636 
637  for (GBDATA * gb_species_next = GBT_next_marked_species(gb_species);
638  gb_species_next && !error;
639  gb_species_next = GBT_next_marked_species(gb_species_next))
640  {
641  GBDATA *gb_next_species_field = GB_entry(gb_species_next, merge_field_name);
642  if (!gb_next_species_field) {
643  // exit if species doesn't have any data in the selected field
644  error = GBS_global_string("Species '%s' does not contain data in selected field '%s'",
645  GBT_get_name_or_description(gb_species_next),
646  merge_field_name);
647  }
648  else {
649  char *gb_next_species_field_content = GB_read_as_string(gb_next_species_field);
650 
651  if (strcmp(gb_species_field_content, gb_next_species_field_content) == 0) {
653  GB_write_flag(gb_species_next, 0);
654  ++similar_species;
655  ++progress;
656  }
657  free(gb_next_species_field_content);
658  }
659  }
660 
661  if (similar_species > 0 && !error) {
662  ++similar_species; // correct merge counter
664  GB_write_flag(gb_species, 0);
665 
666  GBDATA *new_species_created = concatenateFieldsCreateNewSpecies(aws, gb_species, scl);
667 
668  nt_assert(new_species_created);
669  if (new_species_created) { // create a list of newly created species
670  addSpeciesToConcatenateList(&newSpeciesList, GBT_get_name_or_description(new_species_created));
671  }
672 
673  if (report_field_name) {
674  GBDATA *gb_report = GBT_searchOrCreate_itemfield_according_to_changekey(new_species_created, report_field_name, SPECIES_get_selector().change_key_path);
675  if (!gb_report) error = GB_await_error();
676  else error = GB_write_lossless_int(gb_report, similar_species);
677  }
678  }
679 
680  freeSpeciesConcatenateList(scl); scl = NULp;
681  free(gb_species_field_content);
682  }
683 
684  progress.inc_and_check_user_abort(error);
685  }
686  }
687 
688  if (!error) {
689  GBT_mark_all(GLOBAL.gb_main, 0); // unmark all species in the database
690  int newSpeciesCount = 0;
691 
692  for (; newSpeciesList; newSpeciesList = newSpeciesList->next) { // mark only newly created species
693  GB_write_flag(newSpeciesList->species, 1);
694  newSpeciesCount++;
695  }
696  aw_message(GBS_global_string("%i new species were created by taking \"%s\" as a criterion!", newSpeciesCount, merge_field_name));
697  freeSpeciesConcatenateList(newSpeciesList);
698  }
699 
700  free(merge_field_name);
701 
703  }
704 
705  if (mergeType == MERGE_SPECIES_AND_CONCAT_ALI && !error) {
706  // @@@ what happens if merge-process above succeeds and concatenateAlignments below fails?
707  // @@@ i think both steps should be put into ONE transaction!
708  concatenateAlignments(aws, selected_alis);
709  }
710 }
711 
712 static AW_window *createMergeSimilarSpeciesWindow(AW_root *aw_root, MergeSpeciesType mergeType, AW_selection *selected_alis) {
713  AW_window_simple *aws = new AW_window_simple;
714 
715  {
716  char *window_id = GBS_global_string_copy("MERGE_SPECIES_%i", mergeType);
717  const char *window_title = NULp;
718  switch (mergeType) {
719  case MERGE_SPECIES_SIMPLE: window_title = "Merge species"; break;
720  case MERGE_SPECIES_AND_CONCAT_ALI: window_title = "Merge and concatenate"; break;
721  }
722  aws->init(aw_root, window_id, window_title);
723  free(window_id);
724  }
725  aws->load_xfig("merge_species.fig");
726 
727  aws->callback(makeHelpCallback("merge_species.hlp"));
728  aws->at("help");
729  aws->create_button("HELP", "HELP", "H");
730 
733 
734  {
735  const char *buttonText = NULp;
736  switch (mergeType) {
737  case MERGE_SPECIES_SIMPLE: buttonText = "Merge similar species"; break;
738  case MERGE_SPECIES_AND_CONCAT_ALI: buttonText = "Merge similar species and concat alignments"; break;
739  }
740 
741  aws->at("merge");
742  aws->callback(makeWindowCallback(mergeSimilarSpecies, mergeType, selected_alis));
743  aws->create_autosize_button("MERGE_SIMILAR_SPECIES", buttonText, "M");
744  }
745 
746  aws->at("close");
747  aws->callback(AW_POPDOWN);
748  aws->create_button("CLOSE", "CLOSE", "C");
749 
750  return aws;
751 }
752 
754  static AW_window *aw = NULp;
756  return aw;
757 }
758 
760  static AW_window *aw = NULp;
761 #if defined(ASSERTION_USED)
762  static AW_selection *prev_selected_alis = NULp;
763 #endif
764 
765  if (!aw) {
767 #if defined(ASSERTION_USED)
768  prev_selected_alis = selected_alis;
769 #endif
770  }
771 #if defined(ASSERTION_USED)
772  nt_assert(selected_alis == prev_selected_alis); // would need multiple windows in that case
773 #endif
774  return aw;
775 }
776 
777 static void useSelectedAlignment(AW_window *aww) {
778  AW_root *root = aww->get_root();
779  const char *selali = root->awar(AWAR_CON_SELECTED_ALI)->read_char_pntr();
780  if (selali && strcmp(selali, NO_ALI_SELECTED) != 0) {
782  }
783  else {
784  aw_message("Select alignment to use in the left alignment list");
785  }
786 }
787 
788 // ----------------------------Creating concatenation window-----------------------------------------
790  AW_window_simple *aws = new AW_window_simple;
791 
792  aws->init(aw_root, "CONCAT_ALIGNMENTS", "Concatenate Alignments");
793  aws->load_xfig("concatenate.fig");
794 
795  aws->auto_space(5, 5);
796  aws->button_length(8);
797 
798  aws->callback(makeHelpCallback("concatenate.hlp"));
799  aws->at("help");
800  aws->create_button("HELP", "HELP", "H");
801 
802  aws->at("close");
803  aws->callback(AW_POPDOWN);
804  aws->create_button("CLOSE", "CLOSE", "C");
805 
806  aws->at("dbAligns");
808  AW_selection *sel_alis = awt_create_subset_selection_list(aws, all_alis->get_sellist(), "concatAligns", "collect", "sort");
809 
810  aws->at("type");
811  aws->create_option_menu(AWAR_CON_SEQUENCE_TYPE);
812  aws->insert_option("DNA", "d", "dna");
813  aws->insert_option("RNA", "r", "rna");
814  aws->insert_default_option("PROTEIN", "p", "ami");
815  aws->update_option_menu();
816  aw_root->awar(AWAR_CON_SEQUENCE_TYPE)->add_callback(makeRootCallback(alitype_changed_cb, all_alis));
817 
818  aws->at("aliSeparator");
819  aws->create_input_field(AWAR_CON_ALIGNMENT_SEPARATOR, 10);
820 
821  aws->at("aliName");
822  aws->create_input_field(AWAR_CON_NEW_ALIGNMENT_NAME, 25);
823  aws->button_length(5);
824  aws->callback(useSelectedAlignment);
825  aws->create_button("USE", "Use");
826 
827  aws->at("overwrite");
828  aws->label("Allow to overwrite an existing alignment?");
829  aws->create_toggle(AWAR_CON_ALLOW_OVERWRITE_ALI);
830 
831  aws->at("insgaps");
832  aws->label("Insert gaps for missing alignment data?");
833  aws->create_toggle(AWAR_CON_INSGAPS_FOR_MISS_ALIS);
834 
835  aws->button_length(22);
836  aws->at("go");
837 
838  aws->callback(makeWindowCallback(concatenateAlignments, sel_alis));
839  aws->create_button("CONCATENATE", "CONCATENATE", "A");
840 
841  aws->callback(NT_createMergeSimilarSpeciesWindow);
842  aws->create_button("MERGE_SPECIES", "MERGE SIMILAR SPECIES", "M");
843 
844  aws->callback(makeCreateWindowCallback(NT_createMergeSimilarSpeciesAndConcatenateWindow, sel_alis));
845  aws->create_button("MERGE_CONCATENATE", "MERGE & CONCATENATE", "S");
846 
847  return aws;
848 }
849 // -------------------------------------------------------------------------------------------------------
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
GB_ERROR GB_copy_dropProtectMarksAndTempstate(GBDATA *dest, GBDATA *source)
Definition: arbdb.cxx:2152
const char * GB_ERROR
Definition: arb_core.h:25
GB_TYPES type
AW_awar * set_srt(const char *srt)
Definition: AW_awar.cxx:567
#define AWAR_CON_NEW_ALIGNMENT_NAME
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
#define AWAR_CON_ALLOW_OVERWRITE_ALI
void NT_createConcatenationAwars(AW_root *aw_root, AW_default aw_def, GBDATA *gb_main)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
void load_xfig(const char *file, bool resize=true)
Definition: AW_window.cxx:720
static void useSelectedAlignment(AW_window *aww)
long GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
GB_ERROR GBT_check_alignment_name(const char *alignment_name)
Definition: adali.cxx:343
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
AW_selection * awt_create_subset_selection_list(AW_window *aww, AW_selection_list *parent_selection, const char *at_box, const char *at_add, const char *at_sort, bool autocorrect_subselection, SubsetChangedCb subChanged_cb, AW_CL cl_user)
GBDATA * GBT_get_alignment(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:808
void add(int v)
Definition: ClustalV.cxx:461
GB_ERROR AWTC_generate_one_name(GBDATA *gb_main, const char *full_name, const char *acc, const char *addid, char *&new_name)
Definition: AW_rename.cxx:317
void GBT_get_alignment_names(ConstStrArray &names, GBDATA *gbd)
Definition: adali.cxx:317
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
char * GB_read_as_string(GBDATA *gbd)
Definition: arbdb.cxx:1060
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
Definition: arbdb.cxx:2584
long read_int() const
Definition: AW_awar.cxx:184
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
#define AWAR_CON_MERGE_FIELD
STL namespace.
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
void nput(char c, size_t count)
Definition: arb_strbuf.h:180
void auto_subtitles(const char *prefix)
Definition: arb_progress.h:344
void cat(const char *from)
Definition: arb_strbuf.h:199
void get_values(StrArray &intoArray)
Definition: aw_select.hxx:198
char * GBT_get_alignment_type_string(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:863
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
#define NO_FIELD_SELECTED
AW_window * NT_createMergeSimilarSpeciesWindow(AW_root *aw_root)
char * AWTC_makeUniqueShortName(const char *prefix, UniqueNameDetector &existingNames)
Definition: AW_rename.cxx:648
GB_ERROR GB_incur_error_if(bool error_may_occur)
Definition: arb_msg.h:56
AW_awar * add_callback(const RootCallback &cb)
Definition: AW_awar.cxx:231
GB_ERROR GBT_add_alignment_changekeys(GBDATA *gb_main, const char *ali)
static void addSpeciesToConcatenateList(SpeciesConcatenateList **sclp, GB_CSTR species_name)
void create_itemfield_selection_button(AW_window *aws, const FieldSelDef &selDef, const char *at)
const char * AW_get_nameserver_addid(GBDATA *gb_main)
Definition: AW_rename.cxx:39
static GBDATA * concatenateFieldsCreateNewSpecies(AW_window *, GBDATA *gb_species, SpeciesConcatenateList *scl)
const char * read_char_pntr() const
Definition: AW_awar.cxx:168
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:597
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
GBDATA * GB_create_container(GBDATA *father, const char *key)
Definition: arbdb.cxx:1829
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
static GB_ERROR create_concatInfo_SAI(GBDATA *gb_main, const char *new_ali_name, const char *ali_separator, const StrArray &ali_names)
char * GBT_read_string(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:267
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1643
#define AWAR_CON_ALIGNMENT_SEPARATOR
GBDATA * gb_species_data
Definition: adname.cxx:33
void GB_clear_error()
Definition: arb_msg.cxx:354
static GB_ERROR checkAndMergeFields(GBDATA *gb_new_species, GB_ERROR error, SpeciesConcatenateList *scl)
char * GB_get_subfields(GBDATA *gbd)
Definition: arbdb.cxx:2225
static void error(const char *msg)
Definition: mkptypes.cxx:96
#define AWAR_CON_INSGAPS_FOR_MISS_ALIS
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
static void concatenateAlignments(AW_window *aws, AW_selection *selected_alis)
GB_ERROR GB_write_lossless_int(GBDATA *gbd, int32_t i)
Definition: arbdb.cxx:1523
#define SRT_AUTOCORRECT_ALINAME
Definition: arbdbt.h:80
#define AWAR_CON_STORE_SIM_SP_NO
const char * prepare_and_get_selected_itemfield(AW_root *awr, const char *awar_name, GBDATA *gb_main, const ItemSelector &itemtype, FailIfField failIf)
#define AWAR_CON_SELECTED_ALI
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
Definition: aditem.cxx:133
char * read_string() const
Definition: AW_awar.cxx:198
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
Definition: arbdb.h:86
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:708
long GBT_count_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:372
#define nt_assert(cond)
Definition: NT_local.h:27
AW_DB_selection * awt_create_ALI_selection_list(GBDATA *gb_main, AW_window *aws, const char *varname, const char *ali_type_match)
GBDATA * GBT_searchOrCreate_itemfield_according_to_changekey(GBDATA *gb_item, const char *field_name, const char *change_key_path)
Definition: adChangeKey.cxx:61
static void alitype_changed_cb(AW_root *aw_root, AW_DB_selection *db_sel)
char * GBT_join_strings(const CharPtrArray &strings, char separator)
GB_ERROR GB_export_errorf(const char *templat,...)
Definition: arb_msg.cxx:262
#define AWAR_CON_SEQUENCE_TYPE
MergeSpeciesType
static void mergeSimilarSpecies(AW_window *aws, MergeSpeciesType mergeType, AW_selection *selected_alis)
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
SpeciesConcatenateList * next
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2773
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
char * get_alitype_eval(AW_root *aw_root)
ItemSelector & SPECIES_get_selector()
Definition: species.cxx:139
void awt_reconfigure_ALI_selection_list(AW_DB_selection *dbsel, const char *ali_type_match)
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:352
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:302
AW_root * get_root()
Definition: aw_window.hxx:359
GBDATA * GBT_create_alignment(GBDATA *gb_main, const char *name, long len, long aligned, long security, const char *type, const char *why_created)
Definition: adali.cxx:388
AW_window * NT_createConcatenationWindow(AW_root *aw_root)
#define NULp
Definition: cxxforward.h:116
GBDATA * gb_main
Definition: NT_local.h:37
static AW_window * createMergeSimilarSpeciesWindow(AW_root *aw_root, MergeSpeciesType mergeType, AW_selection *selected_alis)
#define NO_ALI_SELECTED
GB_ERROR write_string(const char *aw_string)
static AW_window * NT_createMergeSimilarSpeciesAndConcatenateWindow(AW_root *aw_root, AW_selection *selected_alis)
#define offset(field)
Definition: GLwDrawA.c:73
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
const char * get_data() const
Definition: arb_strbuf.h:120
GB_TYPES
Definition: arbdb.h:62
CONSTEXPR long FIELD_FILTER_INT_WRITEABLE
Definition: item_sel_list.h:43
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:65
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
GB_ERROR GBT_check_data(GBDATA *Main, const char *alignment_name)
Definition: adali.cxx:218
GBDATA * GBT_get_presets(GBDATA *gb_main)
Definition: adali.cxx:30
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
static int info[maxsites+1]
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
AW_selection_list * get_sellist()
Definition: aw_select.hxx:196
CONSTEXPR long FIELD_FILTER_STRING_READABLE
Definition: item_sel_list.h:47
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
NT_global GLOBAL
Definition: NT_main.cxx:46
static AW_DB_selection * createSelectionList(GBDATA *gb_main, AW_window *aws, const char *awarName)
static void freeSpeciesConcatenateList(SpeciesConcatenateList *scl)
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105