ARB
ad_spec.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ad_spec.cxx //
4 // Purpose : Create and modify species and SAI. //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "NT_local.h"
12 #include "map_viewer.h"
13 
14 #include <dbui.h>
15 
16 #include <awt_www.hxx>
17 #include <awt_canvas.hxx>
18 
19 #include <aw_awars.hxx>
20 #include <aw_msg.hxx>
21 #include <aw_root.hxx>
22 
23 #include <arb_progress.h>
24 #include <arb_defs.h>
25 
26 #include <cctype>
27 
28 static const char * const SAI_COUNTED_CHARS = "COUNTED_CHARS";
29 
31  // @@@ extract algorithm and call extracted from testcode
32  ARB_ERROR error = GB_begin_transaction(gb_main); // open transaction
33 
34  if (!error) {
36  if (!alignment_name) {
37  error = GB_await_error();
38  }
39  else {
40  int alignment_len = GBT_get_alignment_len(gb_main, alignment_name);
41  if (alignment_len<=0) {
42  error = GB_await_error();
43  }
44 
45  if (!error) {
46  int is_amino = GBT_is_alignment_protein(gb_main, alignment_name);
47 
48  const int MAXLETTER = 256;
49  const int FIRSTLETTER = 0; // cppcheck-suppress variableScope
50 
51  typedef bool letterOccurs[MAXLETTER];
52 
53  letterOccurs *occurs = ARB_alloc<letterOccurs>(alignment_len);
54  for (int i = 0; i<MAXLETTER; ++i) { // LOOP_VECTORIZED[!<6.0,!>8.0]
55  for (int p = 0; p<alignment_len; ++p) { // LOOP_VECTORIZED[!<8.0]
56  occurs[p][i] = false;
57  }
58  }
59 
60  // loop over all marked species
61  {
62  long all_marked = GBT_count_marked_species(gb_main);
63  arb_progress progress("Counting different characters", all_marked);
64 
65  for (GBDATA *gb_species = GBT_first_marked_species(gb_main);
66  gb_species && !error;
67  gb_species = GBT_next_marked_species(gb_species))
68  {
69  GBDATA *gb_ali = GB_entry(gb_species, alignment_name); // search the sequence database entry ( ali_xxx/data )
70  if (gb_ali) {
71  GBDATA *gb_data = GB_entry(gb_ali, "data");
72  if (gb_data) {
73  const char * const seq = GB_read_char_pntr(gb_data);
74  if (seq) {
75  for (int i=0; i< alignment_len; ++i) {
76  unsigned char c = seq[i];
77  if (!c) break;
78 
79  occurs[i][c-FIRSTLETTER] = true;
80  }
81  }
82  }
83  }
84  progress.inc_and_check_user_abort(error);
85  }
86  }
87 
88  if (!error) {
89 
90  char filter[256];
91  if (is_amino) for (int c = 0; c<256; ++c) filter[c] = isupper(c) && !strchr("BJOUZ", c);
92  else for (int c = 0; c<256; ++c) filter[c] = bool(strchr("ACGTU", c));
93 
94  char result[alignment_len+1];
95  for (int i=0; i<alignment_len; i++) {
96  int sum = 0;
97  for (int c = 'A'; c < 'Z'; ++c) {
98  if (filter[c]) {
99  sum += (occurs[i][c] || occurs[i][tolower(c)]);
100  }
101  }
102  result[i] = sum<10 ? '0'+sum : 'A'-10+sum;
103  }
104  result[alignment_len] = 0;
105 
106  {
107  GBDATA *gb_sai = GBT_find_or_create_SAI(gb_main, SAI_COUNTED_CHARS);
108  if (!gb_sai) error = GB_await_error();
109  else {
110  GBDATA *gb_data = GBT_add_data(gb_sai, alignment_name, "data", GB_STRING);
111  if (!gb_data) error = GB_await_error();
112  else error = GB_write_string(gb_data, result);
113  }
114  }
115  }
116 
117  free(occurs);
118  }
119 
120  free(alignment_name);
121  }
122  }
123 
124  GB_end_transaction_show_error(gb_main, error, aw_message);
125 }
126 
149 
150  GB_ERROR error = NULp;
151  char *sai_name = NULp;
152  char *sec_struct = NULp;
153  bool canceled = false;
154 
155  // get the selected species
156  char *species_name = aww->get_root()->awar(AWAR_SPECIES_NAME)->read_string();
157  GBDATA *gb_species = NULp;
158  if (!strcmp(species_name, "") || !(gb_species = GBT_find_species(GLOBAL.gb_main, species_name))) {
159  error = "Please select a species first.";
160  }
161  else {
162  // search for the field "sec_struct"
163  GBDATA *gb_species_sec_struct = GB_entry(gb_species, "sec_struct");
164  if (!gb_species_sec_struct) {
165  error = "Field \"sec_struct\" not found or empty. Please select another species.";
166  }
167  else if (!(sec_struct = GB_read_string(gb_species_sec_struct))) {
168  error = "Couldn't read field \"sec_struct\". Is it empty?";
169  }
170  else {
171  // generate default name and name input field for the new SAI
172  {
173  char *sai_default_name = GBS_global_string_copy("%s%s", species_name, strstr(species_name, "_pfold") ? "" : "_pfold");
174  sai_name = aw_input("Name of SAI to create:", sai_default_name);
175  free(sai_default_name);
176  }
177 
178  if (!sai_name) {
179  canceled = true;
180  }
181  else if (strspn(sai_name, " ") == strlen(sai_name)) {
182  error = "Name of SAI is empty. Please enter a valid name.";
183  }
184  else {
185  GBDATA *gb_sai_data = GBT_get_SAI_data(GLOBAL.gb_main);
186  GBDATA *gb_sai = GBT_find_SAI_rel_SAI_data(gb_sai_data, sai_name);
187 
188  if (gb_sai) {
189  error = "SAI with the same name already exists. Please enter another name.";
190  }
191  else {
192  char *ali_name = GBT_get_default_alignment(GLOBAL.gb_main);
193  if (!ali_name) {
194  error = GB_await_error();
195  }
196  else {
197  // create SAI container and copy fields from the species to the SAI
198  gb_sai = GB_create_container(gb_sai_data, "extended");
199  GBDATA *gb_species_field = GB_child(gb_species);
200 
201  while (gb_species_field && !error) {
202  char *key = GB_read_key(gb_species_field);
203  GBDATA *gb_sai_field = GB_search(gb_sai, GB_read_key(gb_species_field), GB_read_type(gb_species_field));
204 
205  if (strcmp(key, "name") == 0) { // write the new name
206  error = GB_write_string(gb_sai_field, sai_name);
207  }
208  else if (strcmp(key, "sec_struct") == 0) { // write contents from the field "sec_struct" to the alignment data
209  GBDATA *gb_sai_ali = GB_search(gb_sai, ali_name, GB_CREATE_CONTAINER);
210  if (!gb_sai_ali) error = GB_await_error();
211  else error = GBT_write_string(gb_sai_ali, "data", sec_struct);
212  }
213  else if (strcmp(key, "acc") != 0 && strcmp(key, ali_name) != 0) { // don't copy "acc" and the old alignment data
214  error = GB_copy_dropProtectMarksAndTempstate(gb_sai_field, gb_species_field);
215  }
216  gb_species_field = GB_nextChild(gb_species_field);
217  free(key);
218  }
219 
220  // generate accession number and delete field "sec_struct" from the SAI
221  if (!error) {
222  // TODO: is it necessary that a new acc is generated here?
223  GBDATA *gb_sai_acc = GB_search(gb_sai, "acc", GB_FIND);
224  if (gb_sai_acc) {
225  GB_delete(gb_sai_acc);
226  GBT_gen_accession_number(gb_sai, ali_name);
227  }
228  GBDATA *gb_sai_sec_struct = GB_search(gb_sai, "sec_struct", GB_FIND);
229  if (gb_sai_sec_struct) GB_delete(gb_sai_sec_struct);
230  aww->get_root()->awar(AWAR_SAI_NAME)->write_string(sai_name);
231  }
232  free(ali_name);
233  }
234  }
235  }
236  }
237  }
238 
239  if (canceled) error = "Aborted by user";
240 
242 
243  if (!error) {
244  AW_window *sai_info = NT_create_extendeds_window(aww->get_root());
245  // TODO: why doesn't info box show anything on first startup? proper refresh needed?
246  sai_info->activate();
247  }
248 
249  free(species_name);
250  free(sai_name);
251  free(sec_struct);
252 }
253 
255  // Note: sync with ../PARSIMONY/PARS_main.cxx@PARS_map_viewer
256 
258 
259  if (!error) {
260  const char *species_name = "";
262 
263  if (gbd && GB_get_father(gbd) == gb_species_data) {
264  species_name = GBT_get_name_or_description(gbd);
265  }
266 
268  }
269 
270  if (!error && gbd && type == ADMVT_WWW) {
272  }
273 
274  error = GB_end_transaction(GLOBAL.gb_main, error);
275  if (error) aw_message(error);
276 }
277 
278 
279 // --------------------------------------------------------------------------------
280 
281 #ifdef UNIT_TESTS
282 #include <test_unit.h>
283 #include <arb_unit_test.h>
284 
285 static uint32_t counted_chars_checksum(GBDATA *gb_main) {
286  GB_transaction ta(gb_main);
287 
288  GBDATA *gb_sai;
289  GBDATA *gb_ali;
290  GBDATA *gb_counted_chars;
291 
292  char *ali_name = GBT_get_default_alignment(gb_main); // potential error will be detected by TEST_EXPECT_RESULT__NOERROREXPORTED below
293 
294  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_sai = GBT_expect_SAI(gb_main, SAI_COUNTED_CHARS));
295  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_ali = GB_entry(gb_sai, ali_name));
296  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_counted_chars = GB_entry(gb_ali, "data"));
297 
298  const char *data = GB_read_char_pntr(gb_counted_chars);
299 
300  free(ali_name);
301 
302  return GBS_checksum(data, 0, NULp);
303 }
304 
305 void TEST_count_chars() {
306  // calculate SAI for test DBs
307 
308  arb_suppress_progress silence;
309  GB_shell shell;
310 
311  for (int prot = 0; prot<2; ++prot) {
312  GBDATA *gb_main;
313  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(prot ? "TEST_prot.arb" : "TEST_nuc.arb", "rw"));
314 
315  GBT_mark_all(gb_main, 1);
316  NT_count_different_chars(NULp, gb_main);
317 
318  uint32_t expected = prot ? 0x9cad14cc : 0xefb05e4e;
319  TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), expected);
320 
321  GB_close(gb_main);
322  }
323 }
324 void TEST_SLOW_count_chars() {
325  // calculate a real big test alignment
326  //
327  // the difference to TEST_count_chars() is just in size of alignment.
328  // NT_count_different_chars() crashes for big alignments when running in gdb
329  arb_suppress_progress silence;
330  GB_shell shell;
331  {
332  arb_unit_test::test_alignment_data data_source[] = {
333  { 1, "s1", "ACGT" },
334  { 1, "s2", "ACGTN" },
335  { 1, "s3", "NANNAN" },
336  { 1, "s4", "GATTACA" },
337  };
338 
339  const int alilen = 50000;
340  const int count = ARRAY_ELEMS(data_source);
341 
342  char *longSeq[count];
343  for (int c = 0; c<count; ++c) {
344  char *dest = longSeq[c] = ARB_alloc<char>(alilen+1);
345 
346  const char *source = data_source[c].data;
347  int len = strlen(source);
348 
349  for (int p = 0; p<alilen; ++p) {
350  dest[p] = source[p%len];
351  }
352  dest[alilen] = 0;
353 
354  data_source[c].data = dest;
355  }
356 
358  GBDATA *gb_main = TEST_CREATE_DB(error, "ali_test", data_source, false);
359 
360  TEST_EXPECT_NO_ERROR(error.deliver());
361 
362  NT_count_different_chars(NULp, gb_main);
363 
364  // TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0x1d34a14f);
365  // TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0x609d788b);
366  TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0xccdfa527);
367 
368  for (int c = 0; c<count; ++c) {
369  free(longSeq[c]);
370  }
371 
372  GB_close(gb_main);
373  }
374 }
375 
376 #endif // UNIT_TESTS
377 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
GB_ERROR GB_copy_dropProtectMarksAndTempstate(GBDATA *dest, GBDATA *source)
Definition: arbdb.cxx:2152
const char * GB_ERROR
Definition: arb_core.h:25
string result
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
GB_TYPES type
void launch_MapViewer_cb(GBDATA *gbd, AD_MAP_VIEWER_TYPE type)
Definition: ad_spec.cxx:254
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
char * GB_read_key(GBDATA *gbd)
Definition: arbdb.cxx:1652
GBDATA * GB_child(GBDATA *father)
Definition: adquery.cxx:322
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
long GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
AW_window * NT_create_extendeds_window(AW_root *aw_root)
Definition: ad_ext.cxx:318
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
Definition: arbdb.cxx:2584
AD_MAP_VIEWER_TYPE
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
AW_root * aw_root
Definition: NT_local.h:36
static char * alignment_name
GBDATA * GBT_expect_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:184
bool GBT_is_alignment_protein(GBDATA *gb_main, const char *alignment_name)
Definition: adali.cxx:898
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
GBDATA * GB_get_father(GBDATA *gbd)
Definition: arbdb.cxx:1722
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
void activate()
Definition: aw_window.hxx:365
FILE * seq
Definition: rns.c:46
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1916
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:597
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
static AW_root * SINGLETON
Definition: aw_root.hxx:102
GBDATA * GB_create_container(GBDATA *father, const char *key)
Definition: arbdb.cxx:1829
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1643
GB_ERROR deliver() const
Definition: arb_error.h:116
void NT_create_sai_from_pfold(AW_window *aww)
Definition: ad_spec.cxx:127
GBDATA * gb_species_data
Definition: adname.cxx:33
static void error(const char *msg)
Definition: mkptypes.cxx:96
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
void NT_count_different_chars(AW_window *, GBDATA *gb_main)
Definition: ad_spec.cxx:30
#define AWAR_SPECIES_NAME
#define AWAR_SAI_NAME
GB_ERROR awt_openDefaultURL_with_item(AW_root *aw_root, GBDATA *gb_main, GBDATA *gb_item)
Definition: AWT_www.cxx:148
char * read_string() const
Definition: AW_awar.cxx:198
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
Definition: arbdb.h:86
long GBT_count_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:372
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:909
GBDATA * GBT_gen_accession_number(GBDATA *gb_species, const char *ali_name)
Definition: adali.cxx:655
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:352
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
AW_root * get_root()
Definition: aw_window.hxx:359
#define NULp
Definition: cxxforward.h:116
GBDATA * gb_main
Definition: NT_local.h:37
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
GB_ERROR write_string(const char *aw_string)
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
GBDATA * GBT_find_SAI_rel_SAI_data(GBDATA *gb_sai_data, const char *name)
Definition: aditem.cxx:171
GBDATA * GB_nextChild(GBDATA *child)
Definition: adquery.cxx:326
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:65
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
GBDATA * GBT_get_SAI_data(GBDATA *gb_main)
Definition: aditem.cxx:154
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
char * aw_input(const char *title, const char *prompt, const char *default_input)
Definition: AW_modal.cxx:251
static const char *const SAI_COUNTED_CHARS
Definition: ad_spec.cxx:28
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
NT_global GLOBAL
Definition: NT_main.cxx:46
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105