ARB
ad_spec.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ad_spec.cxx //
4 // Purpose : Create and modify species and SAI. //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "NT_local.h"
12 #include "map_viewer.h"
13 
14 #include <dbui.h>
15 
16 #include <awt_www.hxx>
17 #include <awt_canvas.hxx>
18 
19 #include <aw_awars.hxx>
20 #include <aw_msg.hxx>
21 #include <aw_root.hxx>
22 
23 #include <arb_progress.h>
24 #include <arb_defs.h>
25 
26 #include <cctype>
27 
28 static const char * const SAI_COUNTED_CHARS = "COUNTED_CHARS";
29 
31  // @@@ extract algorithm and call extracted from testcode
32  ARB_ERROR error = GB_begin_transaction(gb_main); // open transaction
33 
34  char *alignment_name = GBT_get_default_alignment(gb_main); // info about sequences
35  int alignment_len = GBT_get_alignment_len(gb_main, alignment_name);
36  int is_amino = GBT_is_alignment_protein(gb_main, alignment_name);
37 
38  if (!error) {
39  const int MAXLETTER = 256;
40  const int FIRSTLETTER = 0; // cppcheck-suppress variableScope
41 
42  typedef bool letterOccurs[MAXLETTER];
43 
44  letterOccurs *occurs = ARB_alloc<letterOccurs>(alignment_len);
45  for (int i = 0; i<MAXLETTER; ++i) { // LOOP_VECTORIZED[!<6.0,!>8.0]
46  for (int p = 0; p<alignment_len; ++p) { // LOOP_VECTORIZED[!<8.0]
47  occurs[p][i] = false;
48  }
49  }
50 
51  // loop over all marked species
52  {
53  long all_marked = GBT_count_marked_species(gb_main);
54  arb_progress progress("Counting different characters", all_marked);
55 
56  for (GBDATA *gb_species = GBT_first_marked_species(gb_main);
57  gb_species && !error;
58  gb_species = GBT_next_marked_species(gb_species))
59  {
60  GBDATA *gb_ali = GB_entry(gb_species, alignment_name); // search the sequence database entry ( ali_xxx/data )
61  if (gb_ali) {
62  GBDATA *gb_data = GB_entry(gb_ali, "data");
63  if (gb_data) {
64  const char * const seq = GB_read_char_pntr(gb_data);
65  if (seq) {
66  for (int i=0; i< alignment_len; ++i) {
67  unsigned char c = seq[i];
68  if (!c) break;
69 
70  occurs[i][c-FIRSTLETTER] = true;
71  }
72  }
73  }
74  }
75  progress.inc_and_check_user_abort(error);
76  }
77  }
78 
79  if (!error) {
80 
81  char filter[256];
82  if (is_amino) for (int c = 0; c<256; ++c) filter[c] = isupper(c) && !strchr("BJOUZ", c);
83  else for (int c = 0; c<256; ++c) filter[c] = bool(strchr("ACGTU", c));
84 
85  char result[alignment_len+1];
86  for (int i=0; i<alignment_len; i++) {
87  int sum = 0;
88  for (int c = 'A'; c < 'Z'; ++c) {
89  if (filter[c]) {
90  sum += (occurs[i][c] || occurs[i][tolower(c)]);
91  }
92  }
93  result[i] = sum<10 ? '0'+sum : 'A'-10+sum;
94  }
95  result[alignment_len] = 0;
96 
97  {
98  GBDATA *gb_sai = GBT_find_or_create_SAI(gb_main, SAI_COUNTED_CHARS);
99  if (!gb_sai) error = GB_await_error();
100  else {
101  GBDATA *gb_data = GBT_add_data(gb_sai, alignment_name, "data", GB_STRING);
102  if (!gb_data) error = GB_await_error();
103  else error = GB_write_string(gb_data, result);
104  }
105  }
106  }
107 
108  free(occurs);
109  }
110 
111  free(alignment_name);
112 
113  GB_end_transaction_show_error(gb_main, error, aw_message);
114 }
115 
138 
139  GB_ERROR error = NULp;
140  char *sai_name = NULp;
141  char *sec_struct = NULp;
142  bool canceled = false;
143 
144  // get the selected species
145  char *species_name = aww->get_root()->awar(AWAR_SPECIES_NAME)->read_string();
146  GBDATA *gb_species = NULp;
147  if (!strcmp(species_name, "") || !(gb_species = GBT_find_species(GLOBAL.gb_main, species_name))) {
148  error = "Please select a species first.";
149  }
150  else {
151  // search for the field "sec_struct"
152  GBDATA *gb_species_sec_struct = GB_entry(gb_species, "sec_struct");
153  if (!gb_species_sec_struct) {
154  error = "Field \"sec_struct\" not found or empty. Please select another species.";
155  }
156  else if (!(sec_struct = GB_read_string(gb_species_sec_struct))) {
157  error = "Couldn't read field \"sec_struct\". Is it empty?";
158  }
159  else {
160  // generate default name and name input field for the new SAI
161  {
162  char *sai_default_name = GBS_global_string_copy("%s%s", species_name, strstr(species_name, "_pfold") ? "" : "_pfold");
163  sai_name = aw_input("Name of SAI to create:", sai_default_name);
164  free(sai_default_name);
165  }
166 
167  if (!sai_name) {
168  canceled = true;
169  }
170  else if (strspn(sai_name, " ") == strlen(sai_name)) {
171  error = "Name of SAI is empty. Please enter a valid name.";
172  }
173  else {
174  GBDATA *gb_sai_data = GBT_get_SAI_data(GLOBAL.gb_main);
175  GBDATA *gb_sai = GBT_find_SAI_rel_SAI_data(gb_sai_data, sai_name);
176  char *ali_name = GBT_get_default_alignment(GLOBAL.gb_main);
177 
178  if (gb_sai) {
179  error = "SAI with the same name already exists. Please enter another name.";
180  }
181  else {
182  // create SAI container and copy fields from the species to the SAI
183  gb_sai = GB_create_container(gb_sai_data, "extended");
184  GBDATA *gb_species_field = GB_child(gb_species);
185 
186  while (gb_species_field && !error) {
187  char *key = GB_read_key(gb_species_field);
188  GBDATA *gb_sai_field = GB_search(gb_sai, GB_read_key(gb_species_field), GB_read_type(gb_species_field));
189 
190  if (strcmp(key, "name") == 0) { // write the new name
191  error = GB_write_string(gb_sai_field, sai_name);
192  }
193  else if (strcmp(key, "sec_struct") == 0) { // write contents from the field "sec_struct" to the alignment data
194  GBDATA *gb_sai_ali = GB_search(gb_sai, ali_name, GB_CREATE_CONTAINER);
195  if (!gb_sai_ali) error = GB_await_error();
196  else error = GBT_write_string(gb_sai_ali, "data", sec_struct);
197  }
198  else if (strcmp(key, "acc") != 0 && strcmp(key, ali_name) != 0) { // don't copy "acc" and the old alignment data
199  error = GB_copy_dropProtectMarksAndTempstate(gb_sai_field, gb_species_field);
200  }
201  gb_species_field = GB_nextChild(gb_species_field);
202  free(key);
203  }
204 
205  // generate accession number and delete field "sec_struct" from the SAI
206  if (!error) {
207  // TODO: is it necessary that a new acc is generated here?
208  GBDATA *gb_sai_acc = GB_search(gb_sai, "acc", GB_FIND);
209  if (gb_sai_acc) {
210  GB_delete(gb_sai_acc);
211  GBT_gen_accession_number(gb_sai, ali_name);
212  }
213  GBDATA *gb_sai_sec_struct = GB_search(gb_sai, "sec_struct", GB_FIND);
214  if (gb_sai_sec_struct) GB_delete(gb_sai_sec_struct);
215  aww->get_root()->awar(AWAR_SAI_NAME)->write_string(sai_name);
216  }
217  }
218  }
219  }
220  }
221 
222  if (canceled) error = "Aborted by user";
223 
225 
226  if (!error) {
227  AW_window *sai_info = NT_create_extendeds_window(aww->get_root());
228  // TODO: why doesn't info box show anything on first startup? proper refresh needed?
229  sai_info->activate();
230  }
231 
232  free(species_name);
233  free(sai_name);
234  free(sec_struct);
235 }
236 
238  // Note: sync with ../PARSIMONY/PARS_main.cxx@PARS_map_viewer
239 
241 
242  if (!error) {
243  const char *species_name = "";
245 
246  if (gbd && GB_get_father(gbd) == gb_species_data) {
247  species_name = GBT_get_name_or_description(gbd);
248  }
249 
251  }
252 
253  if (!error && gbd && type == ADMVT_WWW) {
255  }
256 
257  error = GB_end_transaction(GLOBAL.gb_main, error);
258  if (error) aw_message(error);
259 }
260 
261 
262 // --------------------------------------------------------------------------------
263 
264 #ifdef UNIT_TESTS
265 #include <test_unit.h>
266 #include <arb_unit_test.h>
267 
268 static uint32_t counted_chars_checksum(GBDATA *gb_main) {
269  GB_transaction ta(gb_main);
270 
271  GBDATA *gb_sai;
272  GBDATA *gb_ali;
273  GBDATA *gb_counted_chars;
274 
275  char *ali_name = GBT_get_default_alignment(gb_main);
276 
277  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_sai = GBT_expect_SAI(gb_main, SAI_COUNTED_CHARS));
278  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_ali = GB_entry(gb_sai, ali_name));
279  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_counted_chars = GB_entry(gb_ali, "data"));
280 
281  const char *data = GB_read_char_pntr(gb_counted_chars);
282 
283  free(ali_name);
284 
285  return GBS_checksum(data, 0, NULp);
286 }
287 
288 void TEST_count_chars() {
289  // calculate SAI for test DBs
290 
291  arb_suppress_progress silence;
292  GB_shell shell;
293 
294  for (int prot = 0; prot<2; ++prot) {
295  GBDATA *gb_main;
296  TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main = GB_open(prot ? "TEST_prot.arb" : "TEST_nuc.arb", "rw"));
297 
298  GBT_mark_all(gb_main, 1);
299  NT_count_different_chars(NULp, gb_main);
300 
301  uint32_t expected = prot ? 0x9cad14cc : 0xefb05e4e;
302  TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), expected);
303 
304  GB_close(gb_main);
305  }
306 }
307 void TEST_SLOW_count_chars() {
308  // calculate a real big test alignment
309  //
310  // the difference to TEST_count_chars() is just in size of alignment.
311  // NT_count_different_chars() crashes for big alignments when running in gdb
312  arb_suppress_progress silence;
313  GB_shell shell;
314  {
315  arb_unit_test::test_alignment_data data_source[] = {
316  { 1, "s1", "ACGT" },
317  { 1, "s2", "ACGTN" },
318  { 1, "s3", "NANNAN" },
319  { 1, "s4", "GATTACA" },
320  };
321 
322  const int alilen = 50000;
323  const int count = ARRAY_ELEMS(data_source);
324 
325  char *longSeq[count];
326  for (int c = 0; c<count; ++c) {
327  char *dest = longSeq[c] = ARB_alloc<char>(alilen+1);
328 
329  const char *source = data_source[c].data;
330  int len = strlen(source);
331 
332  for (int p = 0; p<alilen; ++p) {
333  dest[p] = source[p%len];
334  }
335  dest[alilen] = 0;
336 
337  data_source[c].data = dest;
338  }
339 
341  GBDATA *gb_main = TEST_CREATE_DB(error, "ali_test", data_source, false);
342 
343  TEST_EXPECT_NO_ERROR(error.deliver());
344 
345  NT_count_different_chars(NULp, gb_main);
346 
347  // TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0x1d34a14f);
348  // TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0x609d788b);
349  TEST_EXPECT_EQUAL(counted_chars_checksum(gb_main), 0xccdfa527);
350 
351  for (int c = 0; c<count; ++c) {
352  free(longSeq[c]);
353  }
354 
355  GB_close(gb_main);
356  }
357 }
358 
359 #endif // UNIT_TESTS
360 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2516
GB_ERROR GB_copy_dropProtectMarksAndTempstate(GBDATA *dest, GBDATA *source)
Definition: arbdb.cxx:2144
const char * GB_ERROR
Definition: arb_core.h:25
string result
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
GB_TYPES type
void launch_MapViewer_cb(GBDATA *gbd, AD_MAP_VIEWER_TYPE type)
Definition: ad_spec.cxx:237
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
char * GB_read_key(GBDATA *gbd)
Definition: arbdb.cxx:1650
GBDATA * GB_child(GBDATA *father)
Definition: adquery.cxx:322
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1385
AW_window * NT_create_extendeds_window(AW_root *aw_root)
Definition: ad_ext.cxx:318
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2549
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
Definition: arbdb.cxx:2572
AD_MAP_VIEWER_TYPE
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:706
AW_root * aw_root
Definition: NT_local.h:35
static char * alignment_name
GBDATA * GBT_expect_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:184
bool GBT_is_alignment_protein(GBDATA *gb_main, const char *alignment_name)
Definition: adali.cxx:757
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
GBDATA * GB_get_father(GBDATA *gbd)
Definition: arbdb.cxx:1720
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2482
void activate()
Definition: aw_window.hxx:354
FILE * seq
Definition: rns.c:46
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1904
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:559
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
static AW_root * SINGLETON
Definition: aw_root.hxx:102
GBDATA * GB_create_container(GBDATA *father, const char *key)
Definition: arbdb.cxx:1827
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1641
GB_ERROR deliver() const
Definition: arb_error.h:114
void NT_create_sai_from_pfold(AW_window *aww)
Definition: ad_spec.cxx:116
GBDATA * gb_species_data
Definition: adname.cxx:34
static void error(const char *msg)
Definition: mkptypes.cxx:96
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
void NT_count_different_chars(AW_window *, GBDATA *gb_main)
Definition: ad_spec.cxx:30
#define AWAR_SPECIES_NAME
#define AWAR_SAI_NAME
GB_ERROR awt_openDefaultURL_with_item(AW_root *aw_root, GBDATA *gb_main, GBDATA *gb_item)
Definition: AWT_www.cxx:148
char * read_string() const
Definition: AW_awar.cxx:201
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
Definition: arbdb.h:86
long GBT_count_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:353
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:903
GBDATA * GBT_gen_accession_number(GBDATA *gb_species, const char *ali_name)
Definition: adali.cxx:617
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:353
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
void aw_message(const char *msg)
Definition: AW_status.cxx:932
AW_root * get_root()
Definition: aw_window.hxx:348
#define NULp
Definition: cxxforward.h:97
GBDATA * gb_main
Definition: NT_local.h:36
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
GB_ERROR write_string(const char *aw_string)
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:675
GBDATA * GBT_find_SAI_rel_SAI_data(GBDATA *gb_sai_data, const char *name)
Definition: aditem.cxx:171
void GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
GBDATA * GB_nextChild(GBDATA *child)
Definition: adquery.cxx:326
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:65
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:898
GBDATA * gb_main
Definition: adname.cxx:33
GBDATA * GBT_get_SAI_data(GBDATA *gb_main)
Definition: aditem.cxx:154
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:441
char * aw_input(const char *title, const char *prompt, const char *default_input)
Definition: AW_modal.cxx:251
static const char *const SAI_COUNTED_CHARS
Definition: ad_spec.cxx:28
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:274
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:649
NT_global GLOBAL
Definition: NT_main.cxx:44
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105