ARB
NT_concatenate.cxx
Go to the documentation of this file.
1 // =======================================================================================
2 //
3 // File : NT_concatenate.cxx
4 // Purpose : 1.Concatenatenation of sequences or alignments
5 // 2.Merging the fields of similar species and creating a new species
6 // Author : Yadhu Kumar
7 // web site : http://www.arb-home.de/
8 //
9 // Copyright Department of Microbiology (Technical University Munich)
10 //
11 // =======================================================================================
12 
13 #include "NT_local.h"
14 
15 #include <items.h>
16 #include <item_sel_list.h>
17 #include <awt_sel_boxes.hxx>
18 #include <AW_rename.hxx>
19 #include <aw_question.hxx>
20 #include <aw_awar.hxx>
21 #include <aw_msg.hxx>
22 #include <aw_root.hxx>
23 #include <arb_progress.h>
24 #include <arb_strbuf.h>
25 #include <arb_strarray.h>
26 #include <awt_modules.hxx>
27 #include <arb_global_defs.h>
28 
29 using namespace std;
30 
31 #define AWAR_CON_SEQUENCE_TYPE "tmp/concat/sequence_type"
32 #define AWAR_CON_NEW_ALIGNMENT_NAME "tmp/concat/new_alignment_name"
33 #define AWAR_CON_ALIGNMENT_SEPARATOR "tmp/concat/alignment_separator"
34 #define AWAR_CON_SELECTED_ALI "tmp/concat/database_alignments"
35 #define AWAR_CON_MERGE_FIELD "tmp/concat/merge_field"
36 #define AWAR_CON_STORE_SIM_SP_NO "tmp/concat/store_sim_sp_no"
37 
38 #define AWAR_CON_ALLOW_OVERWRITE_ALI "tmp/concat/overwrite"
39 #define AWAR_CON_INSGAPS_FOR_MISS_ALIS "tmp/concat/insgaps"
40 
41 #define MOVE_DOWN 0
42 #define MOVE_UP 1
43 
46  char *species_name;
47 
49 };
50 
51 // --------------------------creating and initializing AWARS----------------------------------------
53  GB_transaction ta(gb_main);
54 
55  char *ali_default = GBT_get_default_alignment(gb_main);
56  char *ali_type = NULp;
57 
58  if (ali_default) {
59  ali_type = GBT_get_alignment_type_string(gb_main, ali_default);
60  if (!ali_type) {
61  // Note: this message will appear during startup (i.e. stick to general statement here!)
62  aw_message(GBS_global_string("Failed to detect type of default alignment (%s)\n"
63  "(Reason: %s)", ali_default, GB_await_error()));
64  }
65  }
66  if (!ali_type) ali_type = ARB_strdup("rna");
67 
68  aw_root->awar_string(AWAR_CON_SEQUENCE_TYPE, ali_type, aw_def);
70  aw_root->awar_string(AWAR_CON_ALIGNMENT_SEPARATOR, "XXX", aw_def);
71  aw_root->awar_string(AWAR_CON_SELECTED_ALI, "", aw_def);
72  aw_root->awar_string(AWAR_CON_MERGE_FIELD, "full_name", aw_def);
73  aw_root->awar_string(AWAR_CON_STORE_SIM_SP_NO, "merged_species", aw_def);
74 
75  aw_root->awar_int(AWAR_CON_ALLOW_OVERWRITE_ALI, 0, aw_def);
76  aw_root->awar_int(AWAR_CON_INSGAPS_FOR_MISS_ALIS, 1, aw_def);
77 
78  free(ali_type);
79  free(ali_default);
80 }
81 
82 // ------------------------Selecting alignments from the database for concatenation----------------------
83 
84 inline char *get_alitype_eval(AW_root *aw_root) {
86 }
87 
88 static void alitype_changed_cb(AW_root *aw_root, AW_DB_selection *db_sel) {
89  char *ali_type = get_alitype_eval(aw_root);
90  awt_reconfigure_ALI_selection_list(db_sel, ali_type);
91  free(ali_type);
92 }
93 
94 static AW_DB_selection* createSelectionList(GBDATA *gb_main, AW_window *aws, const char *awarName) {
95 
96 #ifdef DEBUG
97  static bool ran=false;
98  nt_assert(!ran);
99  ran=true; // prevents calling this function for the second time
100 #endif
101 
102  AW_root *aw_root = aws->get_root();
103  char *ali_type = get_alitype_eval(aw_root);
104  AW_DB_selection *db_sel = awt_create_ALI_selection_list(gb_main, aws, awarName, ali_type);
105 
106  free(ali_type);
107  return db_sel;
108 }
109 
110 // ---------- Create SAI to display alignments that were concatenated --------------
111 
112 static GB_ERROR create_concatInfo_SAI(GBDATA *gb_main, const char *new_ali_name, const char *ali_separator, const StrArray& ali_names) {
113  GB_ERROR error = NULp;
114  GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, "ConcatInfo");
115 
116  if (!gb_extended) error = GB_await_error();
117  else {
118  GBDATA *gb_data = GBT_add_data(gb_extended, new_ali_name, "data", GB_STRING);
119 
120  if (!gb_data) {
121  error = GB_await_error();
122  }
123  else {
124  int new_ali_length = GBT_get_alignment_len(gb_main, new_ali_name);
125  int sep_len = strlen(ali_separator);
126 
127  char *info = ARB_alloc<char>(new_ali_length+1);
128  memset(info, '=', new_ali_length);
129 
130  int offset = 0;
131  int last_ali_idx = ali_names.size()-1;
132 
133  for (int a = 0; a <= last_ali_idx; ++a) {
134  const char *ali = ali_names[a];
135  int ali_len = GBT_get_alignment_len(gb_main, ali);
136  int ali_str_len = strlen(ali);
137 
138  char *my_info = info+offset;
139 
140  int half_ali_len = ali_len/2;
141  for (int i = 0; i<5; ++i) {
142  if (i<half_ali_len) {
143  my_info[i] = '<';
144  my_info[ali_len-i-1] = '>';
145  }
146  }
147 
148  if (ali_str_len<ali_len) {
149  int namepos = half_ali_len - ali_str_len/2;
150  memcpy(my_info+namepos, ali, ali_str_len);
151  }
152 
153  offset += ali_len;
154  if (a != last_ali_idx) {
155  memcpy(info+offset, ali_separator, sep_len);
156  offset += sep_len;
157  }
158  }
159 
160  nt_assert(offset == new_ali_length); // wrong alignment length!
161  info[new_ali_length] = 0;
162 
163  if (!error) error = GB_write_string(gb_data, info);
164  free(info);
165  }
166  }
167  return error;
168 }
169 
170 // ---------------------------------------- Concatenation function ----------------------------------
171 static void concatenateAlignments(AW_window *aws, AW_selection *selected_alis) {
172  nt_assert(selected_alis);
173 
175 
176  long marked_species = GBT_count_marked_species(GLOBAL.gb_main);
177  AW_root *aw_root = aws->get_root();
178  char *new_ali_name = aw_root->awar(AWAR_CON_NEW_ALIGNMENT_NAME)->read_string();
179  GB_ERROR error = GBT_check_alignment_name(new_ali_name);
180 
181  StrArray ali_names;
182  selected_alis->get_values(ali_names);
183 
184  arb_progress progress("Concatenating alignments", marked_species);
185  size_t ali_count = ali_names.size();
186 
187  if (!error && ali_count<2) {
188  error = "Not enough alignments selected for concatenation (need at least 2)";
189  }
190  if (!error) {
191  int found[ali_count], missing[ali_count], ali_length[ali_count];
192 
193  for (size_t a = 0; a<ali_count; a++) {
194  found[a] = 0;
195  missing[a] = 0;
196  ali_length[a] = GBT_get_alignment_len(GLOBAL.gb_main, ali_names[a]);
197 
198  if (strcmp(ali_names[a], new_ali_name) == 0) {
199  error = "Target alignment may not be one of the source alignments";
200  }
201  }
202 
203  if (!error) {
204  char *ali_separator = aw_root->awar(AWAR_CON_ALIGNMENT_SEPARATOR)->read_string();
205  const int sep_len = strlen(ali_separator);
206 
207  long new_alignment_len = (ali_count-1)*sep_len;
208  for (size_t a = 0; a<ali_count; ++a) { // LOOP_VECTORIZED
209  new_alignment_len += ali_length[a];
210  }
211 
212  GBDATA *gb_presets = GBT_get_presets(GLOBAL.gb_main);
213  GBDATA *gb_alignment_exists = GB_find_string(gb_presets, "alignment_name", new_ali_name, GB_IGNORE_CASE, SEARCH_GRANDCHILD);
214  GBDATA *gb_new_alignment = NULp;
215  char *seq_type = aw_root->awar(AWAR_CON_SEQUENCE_TYPE)->read_string();
216 
217  if (gb_alignment_exists) {
218  // target alignment exists
219  if (aw_root->awar(AWAR_CON_ALLOW_OVERWRITE_ALI)->read_int()) { // allow overwrite
220  gb_new_alignment = GBT_get_alignment(GLOBAL.gb_main, new_ali_name);
221  if (!gb_new_alignment) error = GB_await_error();
222  }
223  else {
224  error = GBS_global_string("Target alignment '%s' already exists\n(check overwrite-toggle if you really want to overwrite)", new_ali_name);
225  }
226  }
227  else {
228  // create new target alignment
229  gb_new_alignment = GBT_create_alignment(GLOBAL.gb_main, new_ali_name, new_alignment_len, 0, 0, seq_type);
230  if (!gb_new_alignment) error = GB_await_error();
231  }
232 
233  if (!error) {
234  AW_repeated_question ask_about_missing_alignment;
235  bool insertGaps = aw_root->awar(AWAR_CON_INSGAPS_FOR_MISS_ALIS)->read_int();
236 
237  for (GBDATA *gb_species = GBT_first_marked_species(GLOBAL.gb_main);
238  gb_species && !error;
239  gb_species = GBT_next_marked_species(gb_species))
240  {
241  GBS_strstruct concat_seq(new_alignment_len+1);
242  int data_inserted = 0;
243 
244  for (size_t a = 0; a<ali_count; ++a) {
245  if (a) concat_seq.cat(ali_separator);
246 
247  GBDATA *gb_seq_data = GBT_find_sequence(gb_species, ali_names[a]);
248  if (gb_seq_data) { // found data
249  const char *seq_data = GB_read_char_pntr(gb_seq_data);
250  concat_seq.cat(seq_data);
251  ++found[a];
252  ++data_inserted;
253  }
254  else { // missing data
255  if (insertGaps) concat_seq.nput('.', ali_length[a]);
256  ++missing[a];
257  }
258  }
259 
260  if (!data_inserted) {
261  error = GBS_global_string("None of the source alignments had data for species '%s'", GBT_get_name_or_description(gb_species));
262  }
263  else {
264  GBDATA *gb_data = GBT_add_data(gb_species, new_ali_name, "data", GB_STRING);
265  GB_write_string(gb_data, concat_seq.get_data());
266  }
267  progress.inc_and_check_user_abort(error);
268  }
269 
270  if (!error) {
271  // ............. print missing alignments...........
272  aw_message(GBS_global_string("Concatenation of alignments was performed for %ld species.", marked_species));
273  for (size_t a = 0; a<ali_count; ++a) {
274  aw_message(GBS_global_string("%s: was found in %d species and missing in %d species.", ali_names[a], found[a], missing[a]));
275  }
276  }
277 
278  if (!error) error = GBT_check_data(GLOBAL.gb_main, new_ali_name); // update alignment info (otherwise create_concatInfo_SAI fails when overwriting an alignment)
279  if (!error) error = create_concatInfo_SAI(GLOBAL.gb_main, new_ali_name, ali_separator, ali_names);
280  }
281 
282  free(seq_type);
283  free(ali_separator);
284  }
285  }
286 
287  if (!error) {
288  char *nfield = GBS_global_string_copy("%s/data", new_ali_name);
289  error = GBT_add_new_changekey(GLOBAL.gb_main, nfield, GB_STRING);
290  free(nfield);
291  }
292  else {
293  progress.done();
294  }
296  free(new_ali_name);
297 }
298 
299 static void addSpeciesToConcatenateList(SpeciesConcatenateList **sclp, GB_CSTR species_name) {
300 
302  GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, species_name);
303 
304  if (gb_species) {
306 
307  scl->species = gb_species;
308  scl->species_name = ARB_strdup(species_name);
309  scl->next = *sclp;
310  *sclp = scl;
311  }
312 }
313 
315  while (scl) {
316  SpeciesConcatenateList *next = scl->next;
317  free(scl->species_name);
318  delete scl;
319  scl = next;
320  }
321 }
322 
324 
325  char *doneFields = ARB_strdup(";name;"); // all fields which are already merged
326  int doneLen = strlen(doneFields);
327  SpeciesConcatenateList *sl = scl;
328  int sl_length = 0; while (scl) { sl_length++; scl=scl->next; } // counting no. of similar species stored in the list
329  int *fieldStat = new int[sl_length]; // 0 = not used yet ; -1 = doesn't have field ; 1..n = field content (same number means same content)
330 
331  while (sl && !error) { // with all species do..
332  char *newFields = GB_get_subfields(sl->species);
333  char *fieldStart = newFields; // points to ; before next field
334 
335  while (fieldStart[1] && !error) { // with all subfields of the species do..
336  char *fieldEnd = strchr(fieldStart+1, ';');
337  nt_assert(fieldEnd);
338  char behind = fieldEnd[1]; fieldEnd[1] = 0;
339 
340  if (!strstr(doneFields, fieldStart)) { // field is not merged yet
341  char *fieldName = fieldStart+1;
342  int fieldLen = int(fieldEnd-fieldName);
343 
344  nt_assert(fieldEnd[0]==';');
345  fieldEnd[0] = 0;
346 
347  GBDATA *gb_field = GB_search(sl->species, fieldName, GB_FIND); // field does to exist (it was found before)
348  GB_TYPES type = GB_read_type(gb_field);
349 
350  if (type==GB_STRING) { // we only merge string fields
351  int i; int doneSpecies = 0; int nextStat = 1;
352 
353  for (i=0; i<sl_length; i++) { fieldStat[i] = 0; } // clear field status
354 
355  while (doneSpecies<sl_length) { // since all species in list were handled
356  SpeciesConcatenateList *sl2 = sl;
357  i = 0;
358 
359  while (sl2) {
360  if (fieldStat[i]==0) {
361  gb_field = GB_search(sl2->species, fieldName, GB_FIND);
362  if (gb_field) {
363  char *content = GB_read_as_string(gb_field);
364  SpeciesConcatenateList *sl3 = sl2->next;
365  fieldStat[i] = nextStat;
366  int j = i+1; doneSpecies++;
367 
368  while (sl3) {
369  if (fieldStat[j]==0) {
370  gb_field = GB_search(sl3->species, fieldName, GB_FIND);
371  if (gb_field) {
372  char *content2 = GB_read_as_string(gb_field);
373  if (strcmp(content, content2)==0) { // if contents are the same, they get the same status
374  fieldStat[j] = nextStat;
375  doneSpecies++;
376  }
377  free(content2);
378  }
379  else {
380  fieldStat[j] = -1;
381  doneSpecies++;
382  }
383  }
384  sl3 = sl3->next; j++;
385  }
386  free(content); nextStat++;
387  }
388  else {
389  fieldStat[i] = -1; // field does not exist here
390  doneSpecies++;
391  }
392  }
393  sl2 = sl2->next; i++;
394  }
395  if (!sl2) break;
396  }
397  nt_assert(nextStat!=1); // this would mean that none of the species contained the field
398  {
399  char *new_content = NULp;
400  int new_content_len = 0; // @@@ useless (0 where used; unused otherwise)
401 
402  if (nextStat==2) { // all species contain same field content or do not have the field
403  SpeciesConcatenateList *sl2 = sl;
404  while (sl2) {
405  gb_field = GB_search(sl2->species, fieldName, GB_FIND);
406  if (gb_field) {
407  new_content = GB_read_as_string(gb_field);
408  new_content_len = strlen(new_content);
409  break;
410  }
411  sl2 = sl2->next;
412  }
413  }
414  else { // different field contents
415  int actualStat;
416  for (actualStat=1; actualStat<nextStat; actualStat++) {
417  SpeciesConcatenateList *sl2 = sl;
418 
419  int names_len = 1; // open bracket
420  char *content = NULp;
421  i = 0;
422 
423  while (sl2) {
424  if (fieldStat[i]==actualStat) {
425  names_len += strlen(sl2->species_name)+1;
426  if (!content) {
427  gb_field = GB_search(sl2->species, fieldName, GB_FIND);
428  nt_assert(gb_field);
429  content = GB_read_as_string(gb_field);
430  }
431  }
432  sl2 = sl2->next; i++;
433  }
434  nt_assert(content);
435  int add_len = names_len+1+strlen(content);
436  char *whole = ARB_alloc<char>(new_content_len+1+add_len+1);
437  nt_assert(whole);
438  char *add = new_content ? whole+sprintf(whole, "%s ", new_content) : whole;
439  sl2 = sl; i = 0;
440  int first = 1;
441  while (sl2) {
442  if (fieldStat[i]==actualStat) {
443  add += sprintf(add, "%c%s", first ? '{' : ';', sl2->species_name);
444  first = 0;
445  }
446  sl2 = sl2->next; i++;
447  }
448  add += sprintf(add, "} %s", content);
449 
450  free(content);
451  freeset(new_content, whole);
452  new_content_len = strlen(new_content); // cppcheck-suppress deallocuse
453  }
454  }
455 
456  if (new_content) {
457  error = GBT_write_string(gb_new_species, fieldName, new_content);
458  free(new_content);
459  }
460  }
461  }
462 
463  // mark field as done:
464  char *new_doneFields = ARB_alloc<char>(doneLen+fieldLen+1+1);
465  sprintf(new_doneFields, "%s%s;", doneFields, fieldName);
466  doneLen += fieldLen+1;
467  freeset(doneFields, new_doneFields);
468  fieldEnd[0] = ';';
469  }
470  fieldEnd[1] = behind;
471  fieldStart = fieldEnd;
472  }
473  free(newFields);
474  sl = sl->next;
475  }
476  free(doneFields);
477  delete [] fieldStat;
478 
479  return error;
480 }
481 
484 
485  GB_ERROR error = NULp;
487 
488  // data needed for name generation
489  char *full_name = NULp;
490  char *acc = NULp;
491 
492  // --------------------getting the species related data --------------------
493 
494  GBDATA *gb_new_species = NULp;
495 
496  if (!error) {
497  // copy species to create a new species
498  gb_new_species = GB_create_container(gb_species_data, "species");
499  error = gb_new_species ? GB_copy_dropProtectMarksAndTempstate(gb_new_species, gb_species) : GB_await_error();
500 
501  if (!error) { // write dummy-name (real name written below)
502  error = GBT_write_string(gb_new_species, "name", "$currcat$");
503  }
504  }
505 
506  if (!error) { // copy full name
507  full_name = GBT_read_string(gb_species, "full_name");
508  if (!full_name) error = GB_await_error();
509  else error = GBT_write_string(gb_new_species, "full_name", full_name);
510  }
511 
512  if (!error) {
513  ConstStrArray ali_names;
515 
516  long id = 0;
517  for (SpeciesConcatenateList *speciesList = scl; speciesList; speciesList = speciesList->next) {
518  for (int no_of_alignments = 0; ali_names[no_of_alignments]; no_of_alignments++) {
519  GBDATA *gb_seq_data = GBT_find_sequence(speciesList->species, ali_names[no_of_alignments]);
520  if (gb_seq_data) {
521  const char *seq_data = GB_read_char_pntr(gb_seq_data);
522  GBDATA *gb_data = GBT_add_data(gb_new_species, ali_names[no_of_alignments], "data", GB_STRING);
523  error = GB_write_string(gb_data, seq_data);
524  if (!error) id += GBS_checksum(seq_data, 1, ".-"); // creating checksum of the each aligned sequence to generate new accession number
525  }
526  if (error) error = GB_export_errorf("Can't create alignment '%s'", ali_names[no_of_alignments]);
527  }
528  }
529 
530  if (!error) {
531  acc = GBS_global_string_copy("ARB_%lX", id); // create new accession number
532  error = GBT_write_string(gb_new_species, "acc", acc);
533  }
534  }
535 
536  if (!error) error = checkAndMergeFields(gb_new_species, error, scl);
537 
538  // now generate new name
539  if (!error) {
540  char *new_species_name = NULp;
541 
542  const char *add_field = AW_get_nameserver_addid(GLOBAL.gb_main);
543  GBDATA *gb_addid = add_field[0] ? GB_entry(gb_new_species, add_field) : NULp;
544  char *addid = NULp;
545  if (gb_addid) addid = GB_read_as_string(gb_addid);
546 
547  error = AWTC_generate_one_name(GLOBAL.gb_main, full_name, acc, addid, new_species_name);
548  if (!error) { // name was created
549  if (GBT_find_species_rel_species_data(gb_species_data, new_species_name)) {
550  // if the name is not unique -> create unique name
551  UniqueNameDetector und(gb_species_data);
552  freeset(new_species_name, AWTC_makeUniqueShortName(new_species_name, und));
553  if (!new_species_name) error = GB_await_error();
554  }
555  }
556 
557  if (!error) error = GBT_write_string(gb_new_species, "name", new_species_name); // insert new 'name'
558 
559  free(new_species_name);
560  free(addid);
561  }
562 
563  error = GB_end_transaction(GLOBAL.gb_main, error);
564  if (error) {
565  gb_new_species = NULp;
566  aw_message(error);
567  }
568 
569  free(acc);
570  free(full_name);
571 
572  return gb_new_species;
573 }
574 
578 };
579 
580 static void mergeSimilarSpecies(AW_window *aws, MergeSpeciesType mergeType, AW_selection *selected_alis) {
581  nt_assert(correlated(selected_alis, mergeType == MERGE_SPECIES_AND_CONCAT_ALI));
582 
583  GB_ERROR error = NULp;
584  arb_progress wrapper;
585  {
586  AW_root *aw_root = aws->get_root();
587  char *merge_field_name = aw_root->awar(AWAR_CON_MERGE_FIELD)->read_string();
588 
589  SpeciesConcatenateList *scl = NULp; // to build list of similar species
590  SpeciesConcatenateList *newSpeciesList = NULp; // new SpeciesConcatenateList
591 
592  GB_begin_transaction(GLOBAL.gb_main); // open database for transaction
593 
595  error = GB_incur_error_if(!report_field_name);
596 
597  if (!error && strcmp(merge_field_name, NO_FIELD_SELECTED) == 0) {
598  error = "Please select database field for similarity detection";
599  }
600 
601  if (!error) {
603  arb_progress progress("Merging similar species", GBT_count_marked_species(GLOBAL.gb_main));
604  progress.auto_subtitles("Species");
605 
606  for (GBDATA * gb_species = GBT_first_marked_species(GLOBAL.gb_main);
607  gb_species && !error;
608  gb_species = GBT_next_marked_species(gb_species))
609  {
610  GBDATA *gb_species_field = GB_entry(gb_species, merge_field_name);
611 
612  if (!gb_species_field) {
613  // exit if species doesn't have any data in the selected field
614  error = GBS_global_string("Species '%s' does not contain data in selected field '%s'",
615  GBT_get_name_or_description(gb_species),
616  merge_field_name);
617  }
618  else {
619  char *gb_species_field_content = GB_read_as_string(gb_species_field);
620  int similar_species = 0;
621 
622  for (GBDATA * gb_species_next = GBT_next_marked_species(gb_species);
623  gb_species_next && !error;
624  gb_species_next = GBT_next_marked_species(gb_species_next))
625  {
626  GBDATA *gb_next_species_field = GB_entry(gb_species_next, merge_field_name);
627  if (!gb_next_species_field) {
628  // exit if species doesn't have any data in the selected field
629  error = GBS_global_string("Species '%s' does not contain data in selected field '%s'",
630  GBT_get_name_or_description(gb_species_next),
631  merge_field_name);
632  }
633  else {
634  char *gb_next_species_field_content = GB_read_as_string(gb_next_species_field);
635 
636  if (strcmp(gb_species_field_content, gb_next_species_field_content) == 0) {
638  GB_write_flag(gb_species_next, 0);
639  ++similar_species;
640  ++progress;
641  }
642  free(gb_next_species_field_content);
643  }
644  }
645 
646  if (similar_species > 0 && !error) {
647  ++similar_species; // correct merge counter
649  GB_write_flag(gb_species, 0);
650 
651  GBDATA *new_species_created = concatenateFieldsCreateNewSpecies(aws, gb_species, scl);
652 
653  nt_assert(new_species_created);
654  if (new_species_created) { // create a list of newly created species
655  addSpeciesToConcatenateList(&newSpeciesList, GBT_get_name_or_description(new_species_created));
656  }
657 
658  if (report_field_name) {
659  GBDATA *gb_report = GBT_searchOrCreate_itemfield_according_to_changekey(new_species_created, report_field_name, SPECIES_get_selector().change_key_path);
660  if (!gb_report) error = GB_await_error();
661  else error = GB_write_lossless_int(gb_report, similar_species);
662  }
663  }
664 
665  freeSpeciesConcatenateList(scl); scl = NULp;
666  free(gb_species_field_content);
667  }
668 
669  progress.inc_and_check_user_abort(error);
670  }
671  }
672 
673  if (!error) {
674  GBT_mark_all(GLOBAL.gb_main, 0); // unmark all species in the database
675  int newSpeciesCount = 0;
676 
677  for (; newSpeciesList; newSpeciesList = newSpeciesList->next) { // mark only newly created species
678  GB_write_flag(newSpeciesList->species, 1);
679  newSpeciesCount++;
680  }
681  aw_message(GBS_global_string("%i new species were created by taking \"%s\" as a criterion!", newSpeciesCount, merge_field_name));
682  freeSpeciesConcatenateList(newSpeciesList);
683  }
684 
685  free(merge_field_name);
686 
688  }
689 
690  if (mergeType == MERGE_SPECIES_AND_CONCAT_ALI && !error) {
691  // @@@ what happens if merge-process above succeeds and concatenateAlignments below fails?
692  // @@@ i think both steps should be put into ONE transaction!
693  concatenateAlignments(aws, selected_alis);
694  }
695 }
696 
697 static AW_window *createMergeSimilarSpeciesWindow(AW_root *aw_root, MergeSpeciesType mergeType, AW_selection *selected_alis) {
698  AW_window_simple *aws = new AW_window_simple;
699 
700  {
701  char *window_id = GBS_global_string_copy("MERGE_SPECIES_%i", mergeType);
702  const char *window_title = NULp;
703  switch (mergeType) {
704  case MERGE_SPECIES_SIMPLE: window_title = "Merge species"; break;
705  case MERGE_SPECIES_AND_CONCAT_ALI: window_title = "Merge and concatenate"; break;
706  }
707  aws->init(aw_root, window_id, window_title);
708  free(window_id);
709  }
710  aws->load_xfig("merge_species.fig");
711 
712  aws->callback(makeHelpCallback("merge_species.hlp"));
713  aws->at("help");
714  aws->create_button("HELP", "HELP", "H");
715 
718 
719  {
720  const char *buttonText = NULp;
721  switch (mergeType) {
722  case MERGE_SPECIES_SIMPLE: buttonText = "Merge similar species"; break;
723  case MERGE_SPECIES_AND_CONCAT_ALI: buttonText = "Merge similar species and concat alignments"; break;
724  }
725 
726  aws->at("merge");
727  aws->callback(makeWindowCallback(mergeSimilarSpecies, mergeType, selected_alis));
728  aws->create_autosize_button("MERGE_SIMILAR_SPECIES", buttonText, "M");
729  }
730 
731  aws->at("close");
732  aws->callback(AW_POPDOWN);
733  aws->create_button("CLOSE", "CLOSE", "C");
734 
735  return aws;
736 }
737 
739  static AW_window *aw = NULp;
741  return aw;
742 }
743 
745  static AW_window *aw = NULp;
746 #if defined(ASSERTION_USED)
747  static AW_selection *prev_selected_alis = NULp;
748 #endif
749 
750  if (!aw) {
752 #if defined(ASSERTION_USED)
753  prev_selected_alis = selected_alis;
754 #endif
755  }
756 #if defined(ASSERTION_USED)
757  nt_assert(selected_alis == prev_selected_alis); // would need multiple windows in that case
758 #endif
759  return aw;
760 }
761 
762 static void useSelectedAlignment(AW_window *aww) {
763  AW_root *root = aww->get_root();
764  const char *selali = root->awar(AWAR_CON_SELECTED_ALI)->read_char_pntr();
765  if (selali && strcmp(selali, NO_ALI_SELECTED) != 0) {
767  }
768  else {
769  aw_message("Select alignment to use in the left alignment list");
770  }
771 }
772 
773 // ----------------------------Creating concatenation window-----------------------------------------
775  AW_window_simple *aws = new AW_window_simple;
776 
777  aws->init(aw_root, "CONCAT_ALIGNMENTS", "Concatenate Alignments");
778  aws->load_xfig("concatenate.fig");
779 
780  aws->auto_space(5, 5);
781  aws->button_length(8);
782 
783  aws->callback(makeHelpCallback("concatenate.hlp"));
784  aws->at("help");
785  aws->create_button("HELP", "HELP", "H");
786 
787  aws->at("close");
788  aws->callback(AW_POPDOWN);
789  aws->create_button("CLOSE", "CLOSE", "C");
790 
791  aws->at("dbAligns");
793  AW_selection *sel_alis = awt_create_subset_selection_list(aws, all_alis->get_sellist(), "concatAligns", "collect", "sort");
794 
795  aws->at("type");
796  aws->create_option_menu(AWAR_CON_SEQUENCE_TYPE, true);
797  aws->insert_option("DNA", "d", "dna");
798  aws->insert_option("RNA", "r", "rna");
799  aws->insert_default_option("PROTEIN", "p", "ami");
800  aws->update_option_menu();
801  aw_root->awar(AWAR_CON_SEQUENCE_TYPE)->add_callback(makeRootCallback(alitype_changed_cb, all_alis));
802 
803  aws->at("aliSeparator");
804  aws->create_input_field(AWAR_CON_ALIGNMENT_SEPARATOR, 10);
805 
806  aws->at("aliName");
807  aws->create_input_field(AWAR_CON_NEW_ALIGNMENT_NAME, 25);
808  aws->button_length(5);
809  aws->callback(useSelectedAlignment);
810  aws->create_button("USE", "Use");
811 
812  aws->at("overwrite");
813  aws->label("Allow to overwrite an existing alignment?");
814  aws->create_toggle(AWAR_CON_ALLOW_OVERWRITE_ALI);
815 
816  aws->at("insgaps");
817  aws->label("Insert gaps for missing alignment data?");
818  aws->create_toggle(AWAR_CON_INSGAPS_FOR_MISS_ALIS);
819 
820  aws->button_length(22);
821  aws->at("go");
822 
823  aws->callback(makeWindowCallback(concatenateAlignments, sel_alis));
824  aws->create_button("CONCATENATE", "CONCATENATE", "A");
825 
826  aws->callback(NT_createMergeSimilarSpeciesWindow);
827  aws->create_button("MERGE_SPECIES", "MERGE SIMILAR SPECIES", "M");
828 
829  aws->callback(makeCreateWindowCallback(NT_createMergeSimilarSpeciesAndConcatenateWindow, sel_alis));
830  aws->create_button("MERGE_CONCATENATE", "MERGE & CONCATENATE", "S");
831 
832  return aws;
833 }
834 // -------------------------------------------------------------------------------------------------------
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2516
GB_ERROR GB_copy_dropProtectMarksAndTempstate(GBDATA *dest, GBDATA *source)
Definition: arbdb.cxx:2144
const char * GB_ERROR
Definition: arb_core.h:25
GB_TYPES type
AW_awar * set_srt(const char *srt)
Definition: AW_awar.cxx:569
#define AWAR_CON_NEW_ALIGNMENT_NAME
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
#define AWAR_CON_ALLOW_OVERWRITE_ALI
void NT_createConcatenationAwars(AW_root *aw_root, AW_default aw_def, GBDATA *gb_main)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1385
void load_xfig(const char *file, bool resize=true)
Definition: AW_window.cxx:717
static void useSelectedAlignment(AW_window *aww)
GB_ERROR GBT_check_alignment_name(const char *alignment_name)
Definition: adali.cxx:342
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2549
AW_selection * awt_create_subset_selection_list(AW_window *aww, AW_selection_list *parent_selection, const char *at_box, const char *at_add, const char *at_sort, bool autocorrect_subselection, SubsetChangedCb subChanged_cb, AW_CL cl_user)
GBDATA * GBT_get_alignment(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:684
void add(int v)
Definition: ClustalV.cxx:461
GB_ERROR AWTC_generate_one_name(GBDATA *gb_main, const char *full_name, const char *acc, const char *addid, char *&new_name)
Definition: AW_rename.cxx:317
void GBT_get_alignment_names(ConstStrArray &names, GBDATA *gbd)
Definition: adali.cxx:316
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
char * GB_read_as_string(GBDATA *gbd)
Definition: arbdb.cxx:1054
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
Definition: arbdb.cxx:2572
long read_int() const
Definition: AW_awar.cxx:187
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:706
#define AWAR_CON_MERGE_FIELD
STL namespace.
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
void nput(char c, size_t count)
Definition: arb_strbuf.h:143
void auto_subtitles(const char *prefix)
Definition: arb_progress.h:286
void cat(const char *from)
Definition: arb_strbuf.h:158
void get_values(StrArray &intoArray)
Definition: aw_select.hxx:198
GB_ERROR GBT_add_new_changekey(GBDATA *gb_main, const char *name, int type)
char * GBT_get_alignment_type_string(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:727
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2482
#define NO_FIELD_SELECTED
AW_window * NT_createMergeSimilarSpeciesWindow(AW_root *aw_root)
char * AWTC_makeUniqueShortName(const char *prefix, UniqueNameDetector &existingNames)
Definition: AW_rename.cxx:633
GB_ERROR GB_incur_error_if(bool error_may_occur)
Definition: arb_msg.h:56
AW_awar * add_callback(const RootCallback &cb)
Definition: AW_awar.cxx:234
static void addSpeciesToConcatenateList(SpeciesConcatenateList **sclp, GB_CSTR species_name)
void create_itemfield_selection_button(AW_window *aws, const FieldSelDef &selDef, const char *at)
const char * AW_get_nameserver_addid(GBDATA *gb_main)
Definition: AW_rename.cxx:39
static GBDATA * concatenateFieldsCreateNewSpecies(AW_window *, GBDATA *gb_species, SpeciesConcatenateList *scl)
const char * read_char_pntr() const
Definition: AW_awar.cxx:171
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:559
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
GBDATA * GB_create_container(GBDATA *father, const char *key)
Definition: arbdb.cxx:1827
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
GBDATA * GBT_create_alignment(GBDATA *gbd, const char *name, long len, long aligned, long security, const char *type)
Definition: adali.cxx:387
static GB_ERROR create_concatInfo_SAI(GBDATA *gb_main, const char *new_ali_name, const char *ali_separator, const StrArray &ali_names)
char * GBT_read_string(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:267
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1641
#define AWAR_CON_ALIGNMENT_SEPARATOR
GBDATA * gb_species_data
Definition: adname.cxx:34
static GB_ERROR checkAndMergeFields(GBDATA *gb_new_species, GB_ERROR error, SpeciesConcatenateList *scl)
char * GB_get_subfields(GBDATA *gbd)
Definition: arbdb.cxx:2217
static void error(const char *msg)
Definition: mkptypes.cxx:96
#define AWAR_CON_INSGAPS_FOR_MISS_ALIS
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
static void concatenateAlignments(AW_window *aws, AW_selection *selected_alis)
GB_ERROR GB_write_lossless_int(GBDATA *gbd, int32_t i)
Definition: arbdb.cxx:1521
#define SRT_AUTOCORRECT_ALINAME
Definition: arbdbt.h:80
#define AWAR_CON_STORE_SIM_SP_NO
const char * prepare_and_get_selected_itemfield(AW_root *awr, const char *awar_name, GBDATA *gb_main, const ItemSelector &itemtype, FailIfField failIf)
#define AWAR_CON_SELECTED_ALI
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
Definition: aditem.cxx:133
char * read_string() const
Definition: AW_awar.cxx:201
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
Definition: arbdb.h:86
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:670
long GBT_count_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:353
#define nt_assert(cond)
Definition: NT_local.h:27
AW_DB_selection * awt_create_ALI_selection_list(GBDATA *gb_main, AW_window *aws, const char *varname, const char *ali_type_match)
GBDATA * GBT_searchOrCreate_itemfield_according_to_changekey(GBDATA *gb_item, const char *field_name, const char *change_key_path)
Definition: adChangeKey.cxx:62
static void alitype_changed_cb(AW_root *aw_root, AW_DB_selection *db_sel)
GB_ERROR GB_export_errorf(const char *templat,...)
Definition: arb_msg.cxx:264
#define AWAR_CON_SEQUENCE_TYPE
MergeSpeciesType
static void mergeSimilarSpecies(AW_window *aws, MergeSpeciesType mergeType, AW_selection *selected_alis)
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
SpeciesConcatenateList * next
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2761
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
char * get_alitype_eval(AW_root *aw_root)
ItemSelector & SPECIES_get_selector()
Definition: species.cxx:139
void awt_reconfigure_ALI_selection_list(AW_DB_selection *dbsel, const char *ali_type_match)
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:353
void aw_message(const char *msg)
Definition: AW_status.cxx:932
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:302
AW_root * get_root()
Definition: aw_window.hxx:348
AW_window * NT_createConcatenationWindow(AW_root *aw_root)
#define NULp
Definition: cxxforward.h:97
GBDATA * gb_main
Definition: NT_local.h:37
static AW_window * createMergeSimilarSpeciesWindow(AW_root *aw_root, MergeSpeciesType mergeType, AW_selection *selected_alis)
#define NO_ALI_SELECTED
GB_ERROR write_string(const char *aw_string)
static AW_window * NT_createMergeSimilarSpeciesAndConcatenateWindow(AW_root *aw_root, AW_selection *selected_alis)
#define offset(field)
Definition: GLwDrawA.c:73
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:675
const char * get_data() const
Definition: arb_strbuf.h:70
GB_TYPES
Definition: arbdb.h:62
void GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
CONSTEXPR long FIELD_FILTER_INT_WRITEABLE
Definition: item_sel_list.h:43
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:65
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:898
GBDATA * gb_main
Definition: adname.cxx:33
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
GB_ERROR GBT_check_data(GBDATA *Main, const char *alignment_name)
Definition: adali.cxx:217
GBDATA * GBT_get_presets(GBDATA *gb_main)
Definition: adali.cxx:29
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:441
static int info[maxsites+1]
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:274
AW_selection_list * get_sellist()
Definition: aw_select.hxx:196
CONSTEXPR long FIELD_FILTER_STRING_READABLE
Definition: item_sel_list.h:47
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
NT_global GLOBAL
Definition: NT_main.cxx:44
static AW_DB_selection * createSelectionList(GBDATA *gb_main, AW_window *aws, const char *awarName)
static void freeSpeciesConcatenateList(SpeciesConcatenateList *scl)
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105