ARB
ad_spec.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ad_spec.cxx //
4 // Purpose : Create and modify species and SAI. //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "NT_local.h"
12 #include "map_viewer.h"
13 
14 #include <dbui.h>
15 
16 #include <www.hxx>
17 
18 #include <aw_awars.hxx>
19 #include <aw_msg.hxx>
20 #include <aw_root.hxx>
21 
22 #include <arb_progress.h>
23 #include <arb_defs.h>
24 
25 #include <cctype>
26 
27 static const char * const SAI_COUNTED_CHARS = "COUNTED_CHARS";
28 
30  // @@@ extract algorithm and call extracted from testcode
31  ARB_ERROR error = GB_begin_transaction(gb_main); // open transaction
32 
33  if (!error) {
35  if (!alignment_name) {
36  error = GB_await_error();
37  }
38  else {
39  int alignment_len = GBT_get_alignment_len(gb_main, alignment_name);
40  if (alignment_len<=0) {
41  error = GB_await_error();
42  }
43 
44  if (!error) {
45  int is_amino = GBT_is_alignment_protein(gb_main, alignment_name);
46 
47  const int MAXLETTER = 256;
48  const int FIRSTLETTER = 0; // cppcheck-suppress variableScope
49 
50  typedef bool letterOccurs[MAXLETTER];
51 
52  letterOccurs *occurs = ARB_alloc<letterOccurs>(alignment_len);
53  for (int i = 0; i<MAXLETTER; ++i) { // LOOP_VECTORIZED[!<6.0,!>8.0]
54  for (int p = 0; p<alignment_len; ++p) { // LOOP_VECTORIZED[!<8.0]
55  occurs[p][i] = false;
56  }
57  }
58 
59  // loop over all marked species
60  {
61  long all_marked = GBT_count_marked_species(gb_main);
62  arb_progress progress("Counting different characters", all_marked);
63 
64  for (GBDATA *gb_species = GBT_first_marked_species(gb_main);
65  gb_species && !error;
66  gb_species = GBT_next_marked_species(gb_species))
67  {
68  GBDATA *gb_ali = GB_entry(gb_species, alignment_name); // search the sequence database entry ( ali_xxx/data )
69  if (gb_ali) {
70  GBDATA *gb_data = GB_entry(gb_ali, "data");
71  if (gb_data) {
72  const char * const seq = GB_read_char_pntr(gb_data);
73  if (seq) {
74  for (int i=0; i< alignment_len; ++i) {
75  unsigned char c = seq[i];
76  if (!c) break;
77 
78  occurs[i][c-FIRSTLETTER] = true;
79  }
80  }
81  }
82  }
83  progress.inc_and_check_user_abort(error);
84  }
85  }
86 
87  if (!error) {
88 
89  char filter[256];
90  if (is_amino) for (int c = 0; c<256; ++c) filter[c] = isupper(c) && !strchr("BJOUZ", c);
91  else for (int c = 0; c<256; ++c) filter[c] = bool(strchr("ACGTU", c));
92 
93  char result[alignment_len+1];
94  for (int i=0; i<alignment_len; i++) {
95  int sum = 0;
96  for (int c = 'A'; c < 'Z'; ++c) {
97  if (filter[c]) {
98  sum += (occurs[i][c] || occurs[i][tolower(c)]);
99  }
100  }
101  result[i] = sum<10 ? '0'+sum : 'A'-10+sum;
102  }
103  result[alignment_len] = 0;
104 
105  {
106  GBDATA *gb_sai = GBT_find_or_create_SAI(gb_main, SAI_COUNTED_CHARS);
107  if (!gb_sai) error = GB_await_error();
108  else {
109  GBDATA *gb_data = GBT_add_data(gb_sai, alignment_name, "data", GB_STRING);
110  if (!gb_data) error = GB_await_error();
111  else error = GB_write_string(gb_data, result);
112  }
113  }
114  }
115 
116  free(occurs);
117  }
118 
119  free(alignment_name);
120  }
121  }
122 
123  GB_end_transaction_show_error(gb_main, error, aw_message);
124 }
125 
148 
149  GB_ERROR error = NULp;
150  char *sai_name = NULp;
151  char *sec_struct = NULp;
152  bool canceled = false;
153 
154  // get the selected species
155  char *species_name = aww->get_root()->awar(AWAR_SPECIES_NAME)->read_string();
156  GBDATA *gb_species = NULp;
157  if (!strcmp(species_name, "") || !(gb_species = GBT_find_species(GLOBAL.gb_main, species_name))) {
158  error = "Please select a species first.";
159  }
160  else {
161  // search for the field "sec_struct"
162  GBDATA *gb_species_sec_struct = GB_entry(gb_species, "sec_struct");
163  if (!gb_species_sec_struct) {
164  error = "Field \"sec_struct\" not found or empty. Please select another species.";
165  }
166  else if (!(sec_struct = GB_read_string(gb_species_sec_struct))) {
167  error = "Couldn't read field \"sec_struct\". Is it empty?";
168  }
169  else {
170  // generate default name and name input field for the new SAI
171  {
172  char *sai_default_name = GBS_global_string_copy("%s%s", species_name, strstr(species_name, "_pfold") ? "" : "_pfold");
173  sai_name = aw_input("Name of SAI to create:", sai_default_name);
174  free(sai_default_name);
175  }
176 
177  if (!sai_name) {
178  canceled = true;
179  }
180  else if (strspn(sai_name, " ") == strlen(sai_name)) {
181  error = "Name of SAI is empty. Please enter a valid name.";
182  }
183  else {
184  GBDATA *gb_sai_data = GBT_get_SAI_data(GLOBAL.gb_main);
185  GBDATA *gb_sai = GBT_find_SAI_rel_SAI_data(gb_sai_data, sai_name);
186 
187  if (gb_sai) {
188  error = "SAI with the same name already exists. Please enter another name.";
189  }
190  else {
191  char *ali_name = GBT_get_default_alignment(GLOBAL.gb_main);
192  if (!ali_name) {
193  error = GB_await_error();
194  }
195  else {
196  // create SAI container and copy fields from the species to the SAI
197  gb_sai = GB_create_container(gb_sai_data, "extended");
198  GBDATA *gb_species_field = GB_child(gb_species);
199 
200  while (gb_species_field && !error) {
201  char *key = GB_read_key(gb_species_field);
202  GBDATA *gb_sai_field = GB_search(gb_sai, GB_read_key(gb_species_field), GB_read_type(gb_species_field));
203 
204  if (strcmp(key, "name") == 0) { // write the new name
205  error = GB_write_string(gb_sai_field, sai_name);
206  }
207  else if (strcmp(key, "sec_struct") == 0) { // write contents from the field "sec_struct" to the alignment data
208  GBDATA *gb_sai_ali = GB_search(gb_sai, ali_name, GB_CREATE_CONTAINER);
209  if (!gb_sai_ali) error = GB_await_error();
210  else error = GBT_write_string(gb_sai_ali, "data", sec_struct);
211  }
212  else if (strcmp(key, "acc") != 0 && strcmp(key, ali_name) != 0) { // don't copy "acc" and the old alignment data
213  error = GB_copy_dropProtectMarksAndTempstate(gb_sai_field, gb_species_field);
214  }
215  gb_species_field = GB_nextChild(gb_species_field);
216  free(key);
217  }
218 
219  // generate accession number and delete field "sec_struct" from the SAI
220  if (!error) {
221  // TODO: is it necessary that a new acc is generated here?
222  GBDATA *gb_sai_acc = GB_search(gb_sai, "acc", GB_FIND);
223  if (gb_sai_acc) {
224  GB_delete(gb_sai_acc);
225  GBT_gen_accession_number(gb_sai, ali_name);
226  }
227  GBDATA *gb_sai_sec_struct = GB_search(gb_sai, "sec_struct", GB_FIND);
228  if (gb_sai_sec_struct) GB_delete(gb_sai_sec_struct);
229  aww->get_root()->awar(AWAR_SAI_NAME)->write_string(sai_name);
230  }
231  free(ali_name);
232  }
233  }
234  }
235  }
236  }
237 
238  if (canceled) error = "Aborted by user";
239 
241 
242  if (!error) {
243  AW_window *sai_info = NT_create_extendeds_window(aww->get_root());
244  // TODO: why doesn't info box show anything on first startup? proper refresh needed?
245  sai_info->activate();
246  }
247 
248  free(species_name);
249  free(sai_name);
250  free(sec_struct);
251 }
252 
254  // Note: sync with ../PARSIMONY/PARS_main.cxx@PARS_map_viewer
255 
257 
258  if (!error) {
259  const char *species_name = "";
261 
262  if (gbd && GB_get_father(gbd) == gb_species_data) {
263  species_name = GBT_get_name_or_description(gbd);
264  }
265 
267  }
268 
269  if (!error && gbd && type == ADMVT_WWW) {
271  }
272 
273  error = GB_end_transaction(GLOBAL.gb_main, error);
274  if (error) aw_message(error);
275 }
276 
277 
278 // --------------------------------------------------------------------------------
279 
280 #ifdef UNIT_TESTS
281 #include <test_unit.h>
282 #include <arb_unit_test.h>
283 
284 static uint32_t counted_chars_checksum(GBDATA *gb_main) {
285  GB_transaction ta(gb_main);
286 
287  GBDATA *gb_sai;
288  GBDATA *gb_ali;
289  GBDATA *gb_counted_chars;
290 
291  char *ali_name = GBT_get_default_alignment(gb_main); // potential error will be detected by TEST_EXPECT_RESULT__NOERROREXPORTED below
292 
293  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_sai = GBT_expect_SAI(gb_main, SAI_COUNTED_CHARS));
294  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_ali = GB_entry(gb_sai, ali_name));
295  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_counted_chars = GB_entry(gb_ali, "data"));
296 
297  const char *data = GB_read_char_pntr(gb_counted_chars);
298 
299  free(ali_name);
300 
301  return GBS_checksum(data, 0, NULp);
302 }
303 
304 void TEST_count_chars() {
305  // calculate SAI for test DBs
306 
307  arb_suppress_progress silence;
308  GB_shell shell;
309 
310  for (int prot = 0; prot<2; ++prot) {
311  GBDATA *gb_main;
312  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(prot ? "TEST_prot.arb" : "TEST_nuc.arb", "rw"));
313 
314  GBT_mark_all(gb_main, 1);
315  NT_count_different_chars(NULp, gb_main);
316 
317  uint32_t expected = prot ? 0x9cad14cc : 0xefb05e4e;
318  TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), expected);
319 
320  GB_close(gb_main);
321  }
322 }
323 void TEST_SLOW_count_chars() {
324  // calculate a real big test alignment
325  //
326  // the difference to TEST_count_chars() is just in size of alignment.
327  // NT_count_different_chars() crashes for big alignments when running in gdb
328  arb_suppress_progress silence;
329  GB_shell shell;
330  {
331  arb_unit_test::test_alignment_data data_source[] = {
332  { 1, "s1", "ACGT" },
333  { 1, "s2", "ACGTN" },
334  { 1, "s3", "NANNAN" },
335  { 1, "s4", "GATTACA" },
336  };
337 
338  const int alilen = 50000;
339  const int count = ARRAY_ELEMS(data_source);
340 
341  char *longSeq[count];
342  for (int c = 0; c<count; ++c) {
343  char *dest = longSeq[c] = ARB_alloc<char>(alilen+1);
344 
345  const char *source = data_source[c].data;
346  int len = strlen(source);
347 
348  for (int p = 0; p<alilen; ++p) {
349  dest[p] = source[p%len];
350  }
351  dest[alilen] = 0;
352 
353  data_source[c].data = dest;
354  }
355 
357  GBDATA *gb_main = TEST_CREATE_DB(error, "ali_test", data_source, false);
358 
359  TEST_EXPECT_NO_ERROR(error.deliver());
360 
361  NT_count_different_chars(NULp, gb_main);
362 
363  // TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0x1d34a14f);
364  // TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0x609d788b);
365  TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0xccdfa527);
366 
367  for (int c = 0; c<count; ++c) {
368  free(longSeq[c]);
369  }
370 
371  GB_close(gb_main);
372  }
373 }
374 
375 #endif // UNIT_TESTS
376 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
GB_ERROR GB_copy_dropProtectMarksAndTempstate(GBDATA *dest, GBDATA *source)
Definition: arbdb.cxx:2152
const char * GB_ERROR
Definition: arb_core.h:25
string result
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
GB_TYPES type
void launch_MapViewer_cb(GBDATA *gbd, AD_MAP_VIEWER_TYPE type)
Definition: ad_spec.cxx:253
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
char * GB_read_key(GBDATA *gbd)
Definition: arbdb.cxx:1652
GBDATA * GB_child(GBDATA *father)
Definition: adquery.cxx:322
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
long GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
AW_window * NT_create_extendeds_window(AW_root *aw_root)
Definition: ad_ext.cxx:318
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
Definition: arbdb.cxx:2584
AD_MAP_VIEWER_TYPE
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:843
AW_root * aw_root
Definition: NT_local.h:36
static char * alignment_name
GBDATA * GBT_expect_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:184
bool GBT_is_alignment_protein(GBDATA *gb_main, const char *alignment_name)
Definition: adali.cxx:908
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
GBDATA * GB_get_father(GBDATA *gbd)
Definition: arbdb.cxx:1722
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
void activate()
Definition: aw_window.hxx:365
FILE * seq
Definition: rns.c:46
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1916
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:607
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
static AW_root * SINGLETON
Definition: aw_root.hxx:102
GBDATA * GB_create_container(GBDATA *father, const char *key)
Definition: arbdb.cxx:1829
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1643
GB_ERROR deliver() const
Definition: arb_error.h:116
void NT_create_sai_from_pfold(AW_window *aww)
Definition: ad_spec.cxx:126
GBDATA * gb_species_data
Definition: adname.cxx:33
static void error(const char *msg)
Definition: mkptypes.cxx:96
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
void NT_count_different_chars(AW_window *, GBDATA *gb_main)
Definition: ad_spec.cxx:29
#define AWAR_SPECIES_NAME
#define AWAR_SAI_NAME
char * read_string() const
Definition: AW_awar.cxx:198
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
GB_ERROR awt_openDefaultURL_with_item(AW_root *aw_root, GBDATA *gb_main, GBDATA *gb_item)
Definition: www.cxx:147
Definition: arbdb.h:86
long GBT_count_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:372
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:909
GBDATA * GBT_gen_accession_number(GBDATA *gb_species, const char *ali_name)
Definition: adali.cxx:665
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:352
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
AW_root * get_root()
Definition: aw_window.hxx:359
#define NULp
Definition: cxxforward.h:116
GBDATA * gb_main
Definition: NT_local.h:37
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
GB_ERROR write_string(const char *aw_string)
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:757
GBDATA * GBT_find_SAI_rel_SAI_data(GBDATA *gb_sai_data, const char *name)
Definition: aditem.cxx:171
GBDATA * GB_nextChild(GBDATA *child)
Definition: adquery.cxx:326
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:65
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
GBDATA * GBT_get_SAI_data(GBDATA *gb_main)
Definition: aditem.cxx:154
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
char * aw_input(const char *title, const char *prompt, const char *default_input)
Definition: AW_modal.cxx:251
static const char *const SAI_COUNTED_CHARS
Definition: ad_spec.cxx:27
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
NT_global GLOBAL
Definition: NT_main.cxx:46
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105