ARB
SQ_functions.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : SQ_functions.cxx //
4 // Purpose : Implementation of SQ_functions.h //
5 // //
6 // //
7 // Coded by Juergen Huber in July 2003 - February 2004 //
8 // Coded by Kai Bader (baderk@in.tum.de) in 2007 - 2008 //
9 // Copyright Department of Microbiology (Technical University Munich) //
10 // //
11 // Visit our web site at: http://www.arb-home.de/ //
12 // //
13 // ==================================================================== //
14 
15 #include "SQ_ambiguities.h"
16 #include "SQ_helix.h"
17 #include "SQ_physical_layout.h"
18 #include "SQ_functions.h"
19 
20 #include <aw_preset.hxx>
21 #include <arb_progress.h>
22 #include <TreeNode.h>
23 
24 using namespace std;
25 
26 typedef GBDATA *(*species_iterator)(GBDATA *);
27 
29 
30 enum {
32 };
33 
36  swap(tmp, group_dict);
37 }
38 
39 static GB_ERROR no_data_error(GBDATA * gb_species, const char *ali_name) {
40  GBDATA *gb_name = GB_entry(gb_species, "name");
41  const char *name = gb_name ? GB_read_char_pntr(gb_name) : "<unknown>";
42  return GBS_global_string("Species '%s' has no data in alignment '%s'", name, ali_name);
43 }
44 
45 static int sq_round(double value) {
46  int x;
47 
48  value += 0.5;
49  x = (int) floor(value);
50  return x;
51 }
52 
54  GB_push_transaction(gb_main);
56 
57  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species
58  = GBT_next_species(gb_species)) {
59  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_FIND);
60  if (gb_quality)
61  error = GB_delete(gb_quality);
62  }
63  if (error)
64  GB_abort_transaction(gb_main);
65  else
66  GB_pop_transaction(gb_main);
67  return error;
68 }
69 
70 GB_ERROR SQ_evaluate(GBDATA * gb_main, const SQ_weights & weights, bool marked_only) {
71  GB_push_transaction(gb_main);
72  char *alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
73 
76 
78  for (GBDATA *gb_species = getFirst(gb_main); gb_species && !error; gb_species = getNext(gb_species)) {
79  GBDATA *gb_name = GB_entry(gb_species, "name");
80  if (!gb_name) {
81  error = GB_get_error();
82  }
83  else {
84  GBDATA *gb_quality = GB_entry(gb_species, "quality");
85  if (gb_quality) {
86  GBDATA *gb_quality_ali = GB_entry(gb_quality, alignment_name);
87 
88  if (!gb_quality_ali) {
89  error = GBS_global_string("No alignment entry '%s' in quality data", alignment_name);
90  }
91  else {
92 
93  // evaluate the percentage of bases the actual sequence consists of
94  GBDATA *gb_result1 = GB_search(gb_quality_ali, "percent_of_bases", GB_INT);
95  int bases = GB_read_int(gb_result1);
96 
97  double result = bases<4 ? 0 : (bases<6 ? 1 : 2);
98  if (result != 0) result = (result * weights.bases) / 2;
99 
100  double value = 0;
101  value += result;
102 
103  // evaluate the difference in number of bases from sequence to group
104  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
105  int dfa = GB_read_int(gb_result2);
106  if (abs(dfa) < 2) {
107  result = 5;
108  }
109  else {
110  if (abs(dfa) < 4) {
111  result = 4;
112  }
113  else {
114  if (abs(dfa) < 6) {
115  result = 3;
116  }
117  else {
118  if (abs(dfa) < 8) {
119  result = 2;
120  }
121  else {
122  if (abs(dfa) < 10) {
123  result = 1;
124  }
125  else {
126  result = 0;
127  }
128  }
129  }
130  }
131  }
132  if (result != 0) result = (result * weights.diff_from_average) / 5;
133  value += result;
134 
135  // evaluate the number of positions where no helix can be built
136  GBDATA *gb_result3 = GB_search(gb_quality_ali, "number_of_no_helix", GB_INT);
137  int noh = GB_read_int(gb_result3);
138  if (noh < 20) {
139  result = 5;
140  }
141  else {
142  if (noh < 50) {
143  result = 4;
144  }
145  else {
146  if (noh < 125) {
147  result = 3;
148  }
149  else {
150  if (noh < 250) {
151  result = 2;
152  }
153  else {
154  if (noh < 500) {
155  result = 1;
156  }
157  else {
158  result = 0;
159  }
160  }
161  }
162  }
163  }
164  if (result != 0) result = (result * weights.helix) / 5;
165  value += result;
166 
167  // evaluate the consensus
168  GBDATA *gb_result4 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
169  int cos = GB_read_int(gb_result4);
170 
171  result = cos;
172  if (result != 0) result = (result * weights.consensus) / 12;
173  value += result;
174 
175  // evaluate the number of iupacs in a sequence
176  GBDATA *gb_result5 = GB_search(gb_quality_ali, "iupac_value", GB_INT);
177  int iupv = GB_read_int(gb_result5);
178 
179  if (iupv < 1) {
180  result = 3;
181  }
182  else {
183  if (iupv < 5) {
184  result = 2;
185  }
186  else {
187  if (iupv < 10) {
188  result = 1;
189  }
190  else {
191  result = 0;
192  }
193  }
194  }
195  if (result != 0) result = (result * weights.iupac) / 3;
196  value += result;
197 
198  // evaluate the difference in the GC proportion from sequence to group
199  GBDATA *gb_result6 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
200  int gcprop = GB_read_int(gb_result6);
201 
202  if (abs(gcprop) < 1) {
203  result = 5;
204  }
205  else {
206  if (abs(gcprop) < 2) {
207  result = 4;
208  }
209  else {
210  if (abs(gcprop) < 4) {
211  result = 3;
212  }
213  else {
214  if (abs(gcprop) < 8) {
215  result = 2;
216  }
217  else {
218  if (abs(gcprop) < 16)
219  result = 1;
220  else {
221  result = 0;
222  }
223  }
224  }
225  }
226  }
227  if (result != 0) result = (result * weights.gc) / 5;
228  value += result;
229 
230  // write the final value of the evaluation
231  int value2 = sq_round(value);
232  GBDATA *gb_result7 = GB_search(gb_quality_ali, "evaluation", GB_INT);
233  seq_assert(gb_result7);
234  GB_write_int(gb_result7, value2);
235  }
236  }
237  }
238  }
239  free(alignment_name);
240 
241  error = GB_end_transaction(gb_main, error);
242  return error;
243 }
244 
245 static char *SQ_fetch_filtered_sequence(GBDATA * read_sequence, AP_filter * filter) {
246  char *filteredSequence = NULp;
247  if (read_sequence) {
248  const char *rawSequence = GB_read_char_pntr(read_sequence);
249 
250  UNCOVERED(); // @@@ use AP_filter::filter_string here! (need tests first)
251 
252  int filteredLength = filter->get_filtered_length();
253  const size_t *filterpos_2_seqpos = filter->get_filterpos_2_seqpos();
254 
255  ARB_alloc(filteredSequence, filteredLength);
256  if (filteredSequence) {
257  for (int i = 0; i < filteredLength; ++i) {
258  filteredSequence[i] = rawSequence[filterpos_2_seqpos[i]];
259  }
260  }
261  }
262  return filteredSequence;
263 }
264 
265 static GB_ERROR SQ_pass1(SQ_GroupData * globalData, GBDATA * gb_main, TreeNode * node, AP_filter * filter) {
266  GB_push_transaction(gb_main);
267 
268  GB_ERROR error = NULp;
269  char *alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
270  GBDATA *gb_species = node->gb_node;
271  GBDATA *gb_name = GB_entry(gb_species, "name");
272 
273  if (!gb_name) error = GB_get_error();
274  else {
275  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
276 
277  if (!gb_ali) {
278  error = no_data_error(gb_species, alignment_name);
279  }
280  else {
281  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
282 
283  if (!gb_quality) {
284  error = GB_get_error();
285  }
286 
287  GBDATA *read_sequence = GB_entry(gb_ali, "data");
288  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
289 
290  if (!gb_quality_ali) error = GB_get_error();
291 
292  // real calculations start here
293  if (read_sequence) {
294  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
295  int sequenceLength = filter->get_filtered_length();
296 
297  // calculate physical layout of sequence
298  {
299  SQ_physical_layout ps_chan;
300  ps_chan.SQ_calc_physical_layout(rawSequence, sequenceLength, gb_quality_ali);
301 
302  // calculate the average number of bases in group
303  globalData->SQ_count_sequences();
304  globalData->SQ_set_avg_bases(ps_chan.
305  SQ_get_number_of_bases());
306  globalData->SQ_set_avg_gc(ps_chan.
307  SQ_get_gc_proportion());
308  }
309 
310  // get values for ambiguities
311  {
312  SQ_ambiguities ambi_chan;
313  ambi_chan.SQ_count_ambiguities(rawSequence, sequenceLength, gb_quality_ali);
314  }
315 
316  // calculate the number of strong, weak and no helixes
317  {
318  SQ_helix heli_chan(sequenceLength);
319  heli_chan.SQ_calc_helix_layout(rawSequence, gb_main, alignment_name, gb_quality_ali, filter);
320  }
321 
322  // calculate consensus sequence
323  {
324  if (!globalData->SQ_is_initialized()) {
325  globalData->SQ_init_consensus(sequenceLength);
326  }
327  globalData->SQ_add_sequence(rawSequence);
328  }
329 
330  free(rawSequence);
331  }
332  }
333  }
334 
335  free(alignment_name);
336 
337  if (error)
338  GB_abort_transaction(gb_main);
339  else
340  GB_pop_transaction(gb_main);
341 
342  return error;
343 }
344 
346  GBDATA *read_sequence = NULp;
347 
348  GB_push_transaction(gb_main);
349 
350  char *alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
351  GB_ERROR error = NULp;
352 
353  // first pass operations
354  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species = GBT_next_species(gb_species)) {
355  GBDATA *gb_name = GB_entry(gb_species, "name");
356 
357  if (!gb_name) {
358  error = GB_get_error();
359  }
360  else {
361  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
362 
363  if (!gb_ali) {
364  error = no_data_error(gb_species, alignment_name);
365  }
366  else {
367  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
368  if (!gb_quality) {
369  error = GB_get_error();
370  }
371 
372  read_sequence = GB_entry(gb_ali, "data");
373 
374  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
375  if (!gb_quality_ali)
376  error = GB_get_error();
377 
378  // real calculations start here
379  if (read_sequence) {
380  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
381  int sequenceLength = filter->get_filtered_length();
382 
383  // calculate physical layout of sequence
384  SQ_physical_layout *ps_chan = new SQ_physical_layout;
385  ps_chan->SQ_calc_physical_layout(rawSequence, sequenceLength, gb_quality_ali);
386 
387  // calculate the average number of bases in group
388  globalData->SQ_count_sequences();
389  globalData->SQ_set_avg_bases(ps_chan->SQ_get_number_of_bases());
390  globalData->SQ_set_avg_gc(ps_chan->SQ_get_gc_proportion());
391  delete ps_chan;
392 
393  // get values for ambiguities
394  SQ_ambiguities *ambi_chan = new SQ_ambiguities;
395  ambi_chan->SQ_count_ambiguities(rawSequence, sequenceLength, gb_quality_ali);
396  delete ambi_chan;
397 
398  // calculate the number of strong, weak and no helixes
399  SQ_helix *heli_chan = new SQ_helix(sequenceLength);
400  heli_chan->SQ_calc_helix_layout(rawSequence, gb_main, alignment_name, gb_quality_ali, filter);
401  delete heli_chan;
402 
403  // calculate consensus sequence
404  {
405  if (!globalData->SQ_is_initialized()) {
406  globalData->SQ_init_consensus(sequenceLength);
407  }
408  globalData->SQ_add_sequence(rawSequence);
409  }
410  delete(rawSequence);
411  }
412  }
413  }
414  progress.inc_and_check_user_abort(error);
415  }
416 
417  free(alignment_name);
418 
419  if (error)
420  GB_abort_transaction(gb_main);
421  else
422  GB_pop_transaction(gb_main);
423 
424  return error;
425 }
426 
427 static GB_ERROR SQ_pass2(const SQ_GroupData * globalData, GBDATA * gb_main, TreeNode * node, AP_filter * filter) {
428  GB_push_transaction(gb_main);
429 
430  char *alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
431 
432  GBDATA *gb_species = node->gb_node;
433  GBDATA *gb_name = GB_entry(gb_species, "name");
434  GB_ERROR error = NULp;
435 
436  if (!gb_name) error = GB_get_error();
437  else {
438  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
439 
440  if (!gb_ali) {
441  error = no_data_error(gb_species, alignment_name);
442  }
443  else {
444  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
445  if (!gb_quality) error = GB_get_error();
446 
447  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
448  if (!gb_quality_ali) error = GB_get_error();
449 
450  GBDATA *read_sequence = GB_entry(gb_ali, "data");
451 
452  // real calculations start here
453  if (read_sequence) {
454  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
455 
456  /*
457  calculate the average number of bases in group, and the difference of
458  a single sequence in group from it
459  */
460  {
461  GBDATA *gb_result1 = GB_search(gb_quality_ali, "number_of_bases", GB_INT);
462  int bases = GB_read_int(gb_result1);
463  int avg_bases = globalData->SQ_get_avg_bases();
464  int diff_percent = 0;
465 
466  if (avg_bases != 0) {
467  double diff = bases - avg_bases;
468  diff = (100 * diff) / avg_bases;
469  diff_percent = sq_round(diff);
470  }
471 
472  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
473  seq_assert(gb_result2);
474  GB_write_int(gb_result2, diff_percent);
475  }
476 
477  /*
478  calculate the average gc proportion in group, and the difference of
479  a single sequence in group from it
480  */
481  {
482  GBDATA *gb_result6 = GB_search(gb_quality_ali, "GC_proportion", GB_FLOAT);
483  double gcp = GB_read_float(gb_result6);
484  double avg_gc = globalData->SQ_get_avg_gc();
485  int diff_percent = 0;
486 
487  if (avg_gc != 0) {
488  double diff = gcp - avg_gc;
489  diff = (100 * diff) / avg_gc;
490  diff_percent = sq_round(diff);
491  }
492 
493  GBDATA *gb_result7 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
494  seq_assert(gb_result7);
495  GB_write_int(gb_result7, diff_percent);
496  }
497 
498  /*
499  get groupnames of visited groups
500  search for name in group dictionary
501  evaluate sequence with group consensus
502  */
503  GBDATA *gb_con = GB_search(gb_quality_ali, "consensus_conformity", GB_CREATE_CONTAINER);
504  if (!gb_con) error = GB_get_error();
505 
506  GBDATA *gb_dev = GB_search(gb_quality_ali, "consensus_deviation", GB_CREATE_CONTAINER);
507  if (!gb_dev) error = GB_get_error();
508 
509  TreeNode *backup = node; // needed?
510  int whilecounter = 0;
511  double eval = 0;
512 
513  while (backup->father) {
514  if (backup->name) {
515  SQ_GroupDataDictionary::iterator GDI = group_dict.find(backup->name);
516  if (GDI != group_dict.end()) {
517  SQ_GroupDataPtr GD_ptr = GDI->second;
518 
519  consensus_result cr = GD_ptr->SQ_calc_consensus(rawSequence);
520 
521  double value1 = cr.conformity;
522  double value2 = cr.deviation;
523  int value3 = GD_ptr->SQ_get_nr_sequences();
524 
525  GBDATA *gb_node_entry = GB_search(gb_con, "name", GB_STRING);
526  seq_assert(gb_node_entry);
527  GB_write_string(gb_node_entry, backup->name);
528 
529  gb_node_entry = GB_search(gb_con, "value", GB_FLOAT); seq_assert(gb_node_entry);
530  GB_write_float(gb_node_entry, value1);
531 
532  gb_node_entry = GB_search(gb_con, "num_species", GB_INT); seq_assert(gb_node_entry);
533  GB_write_int(gb_node_entry, value3);
534 
535  gb_node_entry = GB_search(gb_dev, "name", GB_STRING); seq_assert(gb_node_entry);
536  GB_write_string(gb_node_entry, backup->name);
537 
538  gb_node_entry = GB_search(gb_dev, "value", GB_FLOAT); seq_assert(gb_node_entry);
539  GB_write_float(gb_node_entry, value2);
540 
541  gb_node_entry = GB_search(gb_dev, "num_species", GB_INT); seq_assert(gb_node_entry);
542  GB_write_int(gb_node_entry, value3);
543 
544  // if you parse the upper two values in the evaluate() function cut the following out
545  // for time reasons i do the evaluation here, as i still have the upper two values
546  // -------------cut this-----------------
547  if (value1 > 0.95) {
548  eval += 5;
549  }
550  else {
551  if (value1 > 0.8) {
552  eval += 4;
553  }
554  else {
555  if (value1 > 0.6) {
556  eval += 3;
557  }
558  else {
559  if (value1 > 0.4) {
560  eval += 2;
561  }
562  else {
563  if (value1 > 0.25) {
564  eval += 1;
565  }
566  else {
567  eval += 0;
568  }
569  }
570  }
571  }
572  }
573  if (value2 > 0.6) {
574  eval += 0;
575  }
576  else {
577  if (value2 > 0.4) {
578  eval += 1;
579  }
580  else {
581  if (value2 > 0.2) {
582  eval += 2;
583  }
584  else {
585  if (value2 > 0.1) {
586  eval += 3;
587  }
588  else {
589  if (value2 > 0.05) {
590  eval += 4;
591  }
592  else {
593  if (value2 > 0.025) {
594  eval += 5;
595  }
596  else {
597  if (value2 > 0.01) {
598  eval += 6;
599  }
600  else {
601  eval += 7;
602  }
603  }
604  }
605  }
606  }
607  }
608  }
609  whilecounter++;
610  // ---------to this and scroll down--------
611  }
612  }
613  backup = backup->get_father();
614  }
615 
616  // --------also cut this------
617  int evaluation = 0;
618  if (eval != 0) {
619  eval = eval / whilecounter;
620  evaluation = sq_round(eval);
621  }
622  GBDATA *gb_result5 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
623  seq_assert(gb_result5);
624  GB_write_int(gb_result5, evaluation);
625  // --------end cut this-------
626 
627  free(rawSequence);
628  }
629  }
630  }
631 
632  free(alignment_name);
633 
634  if (error)
635  GB_abort_transaction(gb_main);
636  else
637  GB_pop_transaction(gb_main);
638 
639  return error;
640 }
641 
642 GB_ERROR SQ_pass2_no_tree(const SQ_GroupData * globalData, GBDATA * gb_main, AP_filter * filter, arb_progress& progress) {
643  GBDATA *read_sequence = NULp;
644 
645  GB_push_transaction(gb_main);
646 
647  char *alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
648 
649  // second pass operations
650  GB_ERROR error = NULp;
651  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species = GBT_next_species(gb_species)) {
652  GBDATA *gb_name = GB_entry(gb_species, "name");
653 
654  if (!gb_name) {
655  error = GB_get_error();
656  }
657  else {
658  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
659  if (!gb_ali) {
660  error = no_data_error(gb_species, alignment_name);
661  }
662  else {
663  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
664  if (!gb_quality) error = GB_get_error();
665 
666  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
667  if (!gb_quality_ali) error = GB_get_error();
668 
669  read_sequence = GB_entry(gb_ali, "data");
670 
671  // real calculations start here
672  if (read_sequence) {
673  const char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
674 
675  /*
676  calculate the average number of bases in group, and the difference of
677  a single sequence in group from it
678  */
679  {
680  GBDATA *gb_result1 = GB_search(gb_quality_ali, "number_of_bases", GB_INT);
681  int bases = GB_read_int(gb_result1);
682  int avg_bases = globalData->SQ_get_avg_bases();
683  int diff_percent = 0;
684 
685  if (avg_bases != 0) {
686  double diff = bases - avg_bases;
687  diff = (100 * diff) / avg_bases;
688  diff_percent = sq_round(diff);
689  }
690 
691  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
692  seq_assert(gb_result2);
693  GB_write_int(gb_result2, diff_percent);
694  }
695 
696  /*
697  calculate the average gc proportion in group, and the difference of
698  a single sequence in group from it
699  */
700  {
701  GBDATA *gb_result6 = GB_search(gb_quality_ali, "GC_proportion", GB_FLOAT);
702  double gcp = GB_read_float(gb_result6);
703  double avg_gc = globalData->SQ_get_avg_gc();
704  int diff_percent = 0;
705 
706  if (avg_gc != 0) {
707  double diff = gcp - avg_gc;
708  diff = (100 * diff) / avg_gc;
709  diff_percent = sq_round(diff);
710  }
711 
712  GBDATA *gb_result7 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
713  seq_assert(gb_result7);
714  GB_write_int(gb_result7, diff_percent);
715  }
716  /*
717  get groupnames of visited groups
718  search for name in group dictionary
719  evaluate sequence with group consensus
720  */
721  GBDATA *gb_con = GB_search(gb_quality_ali, "consensus_conformity", GB_CREATE_CONTAINER);
722  if (!gb_con) error = GB_get_error();
723 
724  GBDATA *gb_dev = GB_search(gb_quality_ali, "consensus_deviation", GB_CREATE_CONTAINER);
725  if (!gb_dev) error = GB_get_error();
726 
727  consensus_result cr = globalData->SQ_calc_consensus(rawSequence);
728 
729  double value1 = cr.conformity;
730  double value2 = cr.deviation;
731  int value3 = globalData->SQ_get_nr_sequences();
732 
733  GBDATA *gb_node_entry = GB_search(gb_con, "name", GB_STRING);
734  seq_assert(gb_node_entry);
735  GB_write_string(gb_node_entry, "one global consensus");
736 
737  gb_node_entry = GB_search(gb_con, "value", GB_FLOAT); seq_assert(gb_node_entry);
738  GB_write_float(gb_node_entry, value1);
739 
740  gb_node_entry = GB_search(gb_con, "num_species", GB_INT); seq_assert(gb_node_entry);
741  GB_write_int(gb_node_entry, value3);
742 
743  gb_node_entry = GB_search(gb_dev, "name", GB_STRING); seq_assert(gb_node_entry);
744  GB_write_string(gb_node_entry, "one global consensus");
745 
746  gb_node_entry = GB_search(gb_dev, "value", GB_FLOAT); seq_assert(gb_node_entry);
747  GB_write_float(gb_node_entry, value2);
748 
749  gb_node_entry = GB_search(gb_dev, "num_species", GB_INT); seq_assert(gb_node_entry);
750  GB_write_int(gb_node_entry, value3);
751 
752  double eval = 0;
753 
754  // if you parse the upper two values in the evaluate() function cut the following out
755  // for time reasons i do the evaluation here, as i still have the upper two values
756  // -------------cut this-----------------
757  if (value1 > 0.95) {
758  eval += 5;
759  }
760  else {
761  if (value1 > 0.8) {
762  eval += 4;
763  }
764  else {
765  if (value1 > 0.6) {
766  eval += 3;
767  }
768  else {
769  if (value1 > 0.4) {
770  eval += 2;
771  }
772  else {
773  if (value1 > 0.25) {
774  eval += 1;
775  }
776  else {
777  eval += 0;
778  }
779  }
780  }
781  }
782  }
783  if (value2 > 0.6) {
784  eval += 0;
785  }
786  else {
787  if (value2 > 0.4) {
788  eval += 1;
789  }
790  else {
791  if (value2 > 0.2) {
792  eval += 2;
793  }
794  else {
795  if (value2 > 0.1) {
796  eval += 3;
797  }
798  else {
799  if (value2 > 0.05) {
800  eval += 4;
801  }
802  else {
803  if (value2 > 0.025) {
804  eval += 5;
805  }
806  else {
807  if (value2 > 0.01) {
808  eval += 6;
809  }
810  else {
811  eval += 7;
812  }
813  }
814  }
815  }
816  }
817  }
818  }
819 
820  {
821  int evaluation = 0;
822  if (eval != 0) evaluation = sq_round(eval);
823 
824  GBDATA *gb_result5 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
825  seq_assert(gb_result5);
826  GB_write_int(gb_result5, evaluation);
827  }
828  // --------end cut this-------
829  delete(rawSequence);
830  }
831  }
832  }
833  progress.inc_and_check_user_abort(error);
834  }
835  free(alignment_name);
836 
837  if (error)
838  GB_abort_transaction(gb_main);
839  else
840  GB_pop_transaction(gb_main);
841 
842  return error;
843 }
844 
846  SQ_GroupData *newData = data->clone(); // save actual consensus
847  *newData = *data;
848  group_dict[node->name] = newData; // and link it with an name
849 }
850 
852  if (node->is_leaf()) {
853  if (node->gb_node) {
854  SQ_pass1(data, gb_main, node, filter);
855  seq_assert(data->getSize()> 0);
856  }
857  }
858  else {
859  TreeNode *node1 = node->get_leftson();
860  TreeNode *node2 = node->get_rightson();
861 
862  if (node->name) {
863  SQ_GroupData *leftData = NULp;
864  bool parentIsEmpty = false;
865 
866  if (data->getSize() == 0) {
867  parentIsEmpty = true;
868  SQ_calc_and_apply_group_data(node1, gb_main, data, filter, progress); // process left branch with empty data
869  seq_assert(data->getSize()> 0);
870  }
871  else {
872  leftData = data->clone(); // create new empty SQ_GroupData
873  SQ_calc_and_apply_group_data(node1, gb_main, leftData, filter, progress); // process left branch
874  seq_assert(leftData->getSize()> 0);
875  }
876 
877  SQ_GroupData *rightData = data->clone(); // create new empty SQ_GroupData
878  SQ_calc_and_apply_group_data(node2, gb_main, rightData, filter, progress); // process right branch
879  seq_assert(rightData->getSize()> 0);
880 
881  if (!parentIsEmpty) {
882  data->SQ_add(*leftData);
883  delete leftData;
884  }
885 
886  data->SQ_add(*rightData);
887  delete rightData;
888 
889  create_multi_level_consensus(node, data);
890  }
891  else {
892  SQ_calc_and_apply_group_data(node1, gb_main, data, filter, progress); // enter left branch
893  seq_assert(data->getSize()> 0);
894 
895  SQ_calc_and_apply_group_data(node2, gb_main, data, filter, progress); // enter right branch
896  seq_assert(data->getSize()> 0);
897  }
898  }
899  progress.inc();
900 }
901 
903  if (node->is_leaf()) {
904  if (node->gb_node) {
905  SQ_pass2(data, gb_main, node, filter);
906  }
907  }
908  else {
909  TreeNode *node1 = node->get_leftson();
910  TreeNode *node2 = node->get_rightson();
911 
912  if (node1) SQ_calc_and_apply_group_data2(node1, gb_main, data, filter, progress);
913  if (node2) SQ_calc_and_apply_group_data2(node2, gb_main, data, filter, progress);
914  }
915  progress.inc();
916 }
917 
918 // marks species that are below threshold "evaluation"
919 GB_ERROR SQ_mark_species(GBDATA * gb_main, int condition, bool marked_only) {
920  char *alignment_name;
921  int result = 0;
922 
923  GBDATA *read_sequence = NULp;
924  GBDATA *gb_species;
925  GB_ERROR error = NULp;
926 
927  GB_push_transaction(gb_main);
928  alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
929 
930  species_iterator getFirst = NULp;
931  species_iterator getNext = NULp;
932 
933  if (marked_only) {
934  getFirst = GBT_first_marked_species;
935  getNext = GBT_next_marked_species;
936  }
937  else {
938  getFirst = GBT_first_species;
939  getNext = GBT_next_species;
940  }
941 
942  for (gb_species = getFirst(gb_main); gb_species; gb_species = getNext(gb_species)) {
943  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
944  bool marked = false;
945  if (gb_ali) {
946  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
947  if (gb_quality) {
948  read_sequence = GB_entry(gb_ali, "data");
949  if (read_sequence) {
950  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
951  if (gb_quality_ali) {
952  GBDATA *gb_result1 = GB_search(gb_quality_ali, "evaluation", GB_INT);
953  result = GB_read_int(gb_result1);
954 
955  if (result < condition) marked = true;
956  }
957  }
958  }
959  }
960 
961  if (GB_read_flag(gb_species) != marked) {
962  GB_write_flag(gb_species, marked);
963  }
964  }
965  free(alignment_name);
966 
967  if (error)
968  GB_abort_transaction(gb_main);
969  else
970  GB_pop_transaction(gb_main);
971 
972  return error;
973 }
974 
976  SQ_TREE_ERROR retval = NONE;
977 
978  if (!node)
979  return MISSING_NODE;
980 
981  if (node->is_leaf()) {
982  if (!node->gb_node)
983  retval = ZOMBIE;
984  }
985  else {
986  retval = SQ_check_tree_structure(node->get_leftson());
987  if (retval == NONE)
988  retval = SQ_check_tree_structure(node->get_rightson());
989  }
990 
991  return retval;
992 }
GB_ERROR GB_get_error()
Definition: arb_msg.cxx:333
static GB_ERROR no_data_error(GBDATA *gb_species, const char *ali_name)
const char * GB_ERROR
Definition: arb_core.h:25
static char * SQ_fetch_filtered_sequence(GBDATA *read_sequence, AP_filter *filter)
#define NONE
Definition: GDE_def.h:34
static GB_ERROR SQ_pass2(const SQ_GroupData *globalData, GBDATA *gb_main, TreeNode *node, AP_filter *filter)
string result
const size_t * get_filterpos_2_seqpos() const
Definition: AP_filter.hxx:91
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
long GB_read_int(GBDATA *gbd)
Definition: arbdb.cxx:729
int getSize() const
Definition: SQ_GroupData.h:81
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
bool SQ_is_initialized() const
Definition: SQ_GroupData.h:71
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
void SQ_calc_and_apply_group_data(TreeNode *node, GBDATA *gb_main, SQ_GroupData *data, AP_filter *filter, arb_progress &progress)
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
virtual void SQ_add_sequence(const char *sequence)=0
static char * alignment_name
STL namespace.
void SQ_calc_physical_layout(const char *sequence, int size, GBDATA *gb_quality_ali)
void SQ_count_ambiguities(const char *iupac, int length, GBDATA *gb_quality)
static int sq_round(double value)
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
void SQ_count_sequences()
Definition: SQ_GroupData.h:65
std::map< std::string, SQ_GroupDataPtr > SQ_GroupDataDictionary
Definition: SQ_functions.h:42
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1916
virtual consensus_result SQ_calc_consensus(const char *sequence) const =0
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
Definition: arbdb.h:67
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
void SQ_calc_helix_layout(const char *sequence, GBDATA *gb_main, char *alignment_name, GBDATA *gb_quality, AP_filter *filter)
Definition: SQ_helix.h:97
virtual SQ_GroupData * clone() const =0
int SQ_get_avg_bases() const
Definition: SQ_GroupData.h:56
static void create_multi_level_consensus(TreeNode *node, SQ_GroupData *data)
GB_ERROR SQ_mark_species(GBDATA *gb_main, int condition, bool marked_only)
Generic smart pointer.
Definition: smartptr.h:149
GB_ERROR SQ_pass2_no_tree(const SQ_GroupData *globalData, GBDATA *gb_main, AP_filter *filter, arb_progress &progress)
void SQ_clear_group_dictionary()
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
void SQ_set_avg_bases(int bases)
Definition: SQ_GroupData.h:53
TreeNode * father
Definition: TreeNode.h:171
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR GB_abort_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2539
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
Definition: ad_io_inline.h:19
float GB_read_float(GBDATA *gbd)
Definition: arbdb.cxx:744
SQ_TREE_ERROR
Definition: SQ_functions.h:80
int GB_read_flag(GBDATA *gbd)
Definition: arbdb.cxx:2796
virtual void SQ_add(const SQ_GroupData &other)=0
#define seq_assert(bed)
GB_ERROR GB_pop_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2524
Definition: arbdb.h:86
GB_ERROR GB_write_float(GBDATA *gbd, float f)
Definition: arbdb.cxx:1281
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1250
size_t get_filtered_length() const
Definition: AP_filter.hxx:82
GB_ERROR SQ_pass1_no_tree(SQ_GroupData *globalData, GBDATA *gb_main, AP_filter *filter, arb_progress &progress)
double SQ_get_gc_proportion() const
int SQ_get_number_of_bases() const
static GB_ERROR SQ_pass1(SQ_GroupData *globalData, GBDATA *gb_main, TreeNode *node, AP_filter *filter)
bool is_leaf() const
Definition: TreeNode.h:211
int SQ_get_nr_sequences() const
Definition: SQ_GroupData.h:68
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2773
GB_ERROR SQ_evaluate(GBDATA *gb_main, const SQ_weights &weights, bool marked_only)
char * name
Definition: TreeNode.h:174
virtual void SQ_init_consensus(int size)=0
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
#define abs(x)
Definition: f2c.h:151
void SQ_set_avg_gc(double gc)
Definition: SQ_GroupData.h:59
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:114
double SQ_get_avg_gc() const
Definition: SQ_GroupData.h:62
static SQ_GroupDataDictionary group_dict
SQ_TREE_ERROR SQ_check_tree_structure(TreeNode *node)
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_node
Definition: TreeNode.h:173
GBDATA * gb_main
Definition: adname.cxx:32
GBDATA *(* species_iterator)(GBDATA *)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
void SQ_calc_and_apply_group_data2(TreeNode *node, GBDATA *gb_main, const SQ_GroupData *data, AP_filter *filter, arb_progress &progress)
int diff_from_average
Definition: SQ_functions.h:59
GB_ERROR SQ_remove_quality_entries(GBDATA *gb_main)
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:332
#define UNCOVERED()
Definition: arb_assert.h:380
Definition: arbdb.h:66