ARB
SQ_functions.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : SQ_functions.cxx //
4 // Purpose : Implementation of SQ_functions.h //
5 // //
6 // //
7 // Coded by Juergen Huber in July 2003 - February 2004 //
8 // Coded by Kai Bader (baderk@in.tum.de) in 2007 - 2008 //
9 // Copyright Department of Microbiology (Technical University Munich) //
10 // //
11 // Visit our web site at: http://www.arb-home.de/ //
12 // //
13 // ==================================================================== //
14 
15 #include "SQ_ambiguities.h"
16 #include "SQ_helix.h"
17 #include "SQ_physical_layout.h"
18 #include "SQ_functions.h"
19 
20 #include <aw_preset.hxx>
21 #include <arb_progress.h>
22 #include <TreeNode.h>
23 
24 using namespace std;
25 
26 typedef GBDATA *(*species_iterator)(GBDATA *);
27 
29 
30 enum {
32 };
33 
36  swap(tmp, group_dict);
37 }
38 
39 static GB_ERROR no_data_error(GBDATA * gb_species, const char *ali_name) {
40  GBDATA *gb_name = GB_entry(gb_species, "name");
41  const char *name = "<unknown>";
42  if (gb_name)
43  name = GB_read_char_pntr(gb_name);
44  return GBS_global_string("Species '%s' has no data in alignment '%s'",
45  name, ali_name);
46 }
47 
48 static int sq_round(double value) {
49  int x;
50 
51  value += 0.5;
52  x = (int) floor(value);
53  return x;
54 }
55 
57  GB_push_transaction(gb_main);
59 
60  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species
61  = GBT_next_species(gb_species)) {
62  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_FIND);
63  if (gb_quality)
64  error = GB_delete(gb_quality);
65  }
66  if (error)
67  GB_abort_transaction(gb_main);
68  else
69  GB_pop_transaction(gb_main);
70  return error;
71 }
72 
73 GB_ERROR SQ_evaluate(GBDATA * gb_main, const SQ_weights & weights, bool marked_only) {
74  GB_push_transaction(gb_main);
76  seq_assert(alignment_name);
77 
80 
82  for (GBDATA *gb_species = getFirst(gb_main); gb_species && !error; gb_species = getNext(gb_species)) {
83  GBDATA *gb_name = GB_entry(gb_species, "name");
84  if (!gb_name) {
85  error = GB_get_error();
86  }
87  else {
88  GBDATA *gb_quality = GB_entry(gb_species, "quality");
89  if (gb_quality) {
90  GBDATA *gb_quality_ali = GB_entry(gb_quality, alignment_name);
91 
92  if (!gb_quality_ali) {
93  error = GBS_global_string("No alignment entry '%s' in quality data", alignment_name);
94  }
95  else {
96 
97  // evaluate the percentage of bases the actual sequence consists of
98  GBDATA *gb_result1 = GB_search(gb_quality_ali, "percent_of_bases", GB_INT);
99  int bases = GB_read_int(gb_result1);
100 
101  double result = bases<4 ? 0 : (bases<6 ? 1 : 2);
102 
103  if (result != 0) result = (result * weights.bases) / 2;
104 
105  double value = 0;
106  value += result;
107 
108  // evaluate the difference in number of bases from sequence to group
109  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
110  int dfa = GB_read_int(gb_result2);
111  if (abs(dfa) < 2) {
112  result = 5;
113  }
114  else {
115  if (abs(dfa) < 4) {
116  result = 4;
117  }
118  else {
119  if (abs(dfa) < 6) {
120  result = 3;
121  }
122  else {
123  if (abs(dfa) < 8) {
124  result = 2;
125  }
126  else {
127  if (abs(dfa) < 10) {
128  result = 1;
129  }
130  else {
131  result = 0;
132  }
133  }
134  }
135  }
136  }
137  if (result != 0) result = (result * weights.diff_from_average) / 5;
138  value += result;
139 
140  // evaluate the number of positions where no helix can be built
141  GBDATA *gb_result3 = GB_search(gb_quality_ali, "number_of_no_helix", GB_INT);
142  int noh = GB_read_int(gb_result3);
143  if (noh < 20) {
144  result = 5;
145  }
146  else {
147  if (noh < 50) {
148  result = 4;
149  }
150  else {
151  if (noh < 125) {
152  result = 3;
153  }
154  else {
155  if (noh < 250) {
156  result = 2;
157  }
158  else {
159  if (noh < 500) {
160  result = 1;
161  }
162  else {
163  result = 0;
164  }
165  }
166  }
167  }
168  }
169  if (result != 0) result = (result * weights.helix) / 5;
170  value += result;
171 
172  // evaluate the consensus
173  GBDATA *gb_result4 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
174  int cos = GB_read_int(gb_result4);
175 
176  result = cos;
177  if (result != 0) result = (result * weights.consensus) / 12;
178  value += result;
179 
180  // evaluate the number of iupacs in a sequence
181  GBDATA *gb_result5 = GB_search(gb_quality_ali, "iupac_value", GB_INT);
182  int iupv = GB_read_int(gb_result5);
183 
184  if (iupv < 1) {
185  result = 3;
186  }
187  else {
188  if (iupv < 5) {
189  result = 2;
190  }
191  else {
192  if (iupv < 10) {
193  result = 1;
194  }
195  else {
196  result = 0;
197  }
198  }
199  }
200  if (result != 0) result = (result * weights.iupac) / 3;
201  value += result;
202 
203  // evaluate the difference in the GC proportion from sequence to group
204  GBDATA *gb_result6 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
205  int gcprop = GB_read_int(gb_result6);
206 
207  if (abs(gcprop) < 1) {
208  result = 5;
209  }
210  else {
211  if (abs(gcprop) < 2) {
212  result = 4;
213  }
214  else {
215  if (abs(gcprop) < 4) {
216  result = 3;
217  }
218  else {
219  if (abs(gcprop) < 8) {
220  result = 2;
221  }
222  else {
223  if (abs(gcprop) < 16)
224  result = 1;
225  else {
226  result = 0;
227  }
228  }
229  }
230  }
231  }
232  if (result != 0) result = (result * weights.gc) / 5;
233  value += result;
234 
235  // write the final value of the evaluation
236  int value2 = sq_round(value);
237  GBDATA *gb_result7 = GB_search(gb_quality_ali, "evaluation", GB_INT);
238  seq_assert(gb_result7);
239  GB_write_int(gb_result7, value2);
240  }
241  }
242  }
243  }
244  free(alignment_name);
245 
246  error = GB_end_transaction(gb_main, error);
247  return error;
248 }
249 
250 static char *SQ_fetch_filtered_sequence(GBDATA * read_sequence, AP_filter * filter) {
251  char *filteredSequence = NULp;
252  if (read_sequence) {
253  const char *rawSequence = GB_read_char_pntr(read_sequence);
254 
255  UNCOVERED(); // @@@ use AP_filter::filter_string here! (need tests first)
256 
257  int filteredLength = filter->get_filtered_length();
258  const size_t *filterpos_2_seqpos = filter->get_filterpos_2_seqpos();
259 
260  ARB_alloc(filteredSequence, filteredLength);
261  if (filteredSequence) {
262  for (int i = 0; i < filteredLength; ++i) {
263  filteredSequence[i] = rawSequence[filterpos_2_seqpos[i]];
264  }
265  }
266  }
267  return filteredSequence;
268 }
269 
270 static GB_ERROR SQ_pass1(SQ_GroupData * globalData, GBDATA * gb_main, TreeNode * node, AP_filter * filter) {
271  GB_push_transaction(gb_main);
272 
273  GB_ERROR error = NULp;
274  char *alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
275  GBDATA *gb_species = node->gb_node;
276  GBDATA *gb_name = GB_entry(gb_species, "name");
277 
278  if (!gb_name) error = GB_get_error();
279  else {
280  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
281 
282  if (!gb_ali) {
283  error = no_data_error(gb_species, alignment_name);
284  }
285  else {
286  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
287 
288  if (!gb_quality) {
289  error = GB_get_error();
290  }
291 
292  GBDATA *read_sequence = GB_entry(gb_ali, "data");
293  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
294 
295  if (!gb_quality_ali) error = GB_get_error();
296 
297  // real calculations start here
298  if (read_sequence) {
299  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
300  int sequenceLength = filter->get_filtered_length();
301 
302  // calculate physical layout of sequence
303  {
304  SQ_physical_layout ps_chan;
305  ps_chan.SQ_calc_physical_layout(rawSequence, sequenceLength, gb_quality_ali);
306 
307  // calculate the average number of bases in group
308  globalData->SQ_count_sequences();
309  globalData->SQ_set_avg_bases(ps_chan.
310  SQ_get_number_of_bases());
311  globalData->SQ_set_avg_gc(ps_chan.
312  SQ_get_gc_proportion());
313  }
314 
315  // get values for ambiguities
316  {
317  SQ_ambiguities ambi_chan;
318  ambi_chan.SQ_count_ambiguities(rawSequence, sequenceLength, gb_quality_ali);
319  }
320 
321  // calculate the number of strong, weak and no helixes
322  {
323  SQ_helix heli_chan(sequenceLength);
324  heli_chan.SQ_calc_helix_layout(rawSequence, gb_main, alignment_name, gb_quality_ali, filter);
325  }
326 
327  // calculate consensus sequence
328  {
329  if (!globalData->SQ_is_initialized()) {
330  globalData->SQ_init_consensus(sequenceLength);
331  }
332  globalData->SQ_add_sequence(rawSequence);
333  }
334 
335  free(rawSequence);
336  }
337  }
338  }
339 
340  free(alignment_name);
341 
342  if (error)
343  GB_abort_transaction(gb_main);
344  else
345  GB_pop_transaction(gb_main);
346 
347  return error;
348 }
349 
351  GBDATA *read_sequence = NULp;
352 
353  GB_push_transaction(gb_main);
354 
355  char *alignment_name = GBT_get_default_alignment(gb_main);
356  GB_ERROR error = NULp;
357 
358  seq_assert(alignment_name);
359 
360  // first pass operations
361  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species = GBT_next_species(gb_species)) {
362  GBDATA *gb_name = GB_entry(gb_species, "name");
363 
364  if (!gb_name) {
365  error = GB_get_error();
366  }
367  else {
368  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
369 
370  if (!gb_ali) {
371  error = no_data_error(gb_species, alignment_name);
372  }
373  else {
374  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
375  if (!gb_quality) {
376  error = GB_get_error();
377  }
378 
379  read_sequence = GB_entry(gb_ali, "data");
380 
381  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
382  if (!gb_quality_ali)
383  error = GB_get_error();
384 
385  // real calculations start here
386  if (read_sequence) {
387  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
388  int sequenceLength = filter->get_filtered_length();
389 
390  // calculate physical layout of sequence
391  SQ_physical_layout *ps_chan = new SQ_physical_layout;
392  ps_chan->SQ_calc_physical_layout(rawSequence, sequenceLength, gb_quality_ali);
393 
394  // calculate the average number of bases in group
395  globalData->SQ_count_sequences();
396  globalData->SQ_set_avg_bases(ps_chan->SQ_get_number_of_bases());
397  globalData->SQ_set_avg_gc(ps_chan->SQ_get_gc_proportion());
398  delete ps_chan;
399 
400  // get values for ambiguities
401  SQ_ambiguities *ambi_chan = new SQ_ambiguities;
402  ambi_chan->SQ_count_ambiguities(rawSequence, sequenceLength, gb_quality_ali);
403  delete ambi_chan;
404 
405  // calculate the number of strong, weak and no helixes
406  SQ_helix *heli_chan = new SQ_helix(sequenceLength);
407  heli_chan->SQ_calc_helix_layout(rawSequence, gb_main, alignment_name, gb_quality_ali, filter);
408  delete heli_chan;
409 
410  // calculate consensus sequence
411  {
412  if (!globalData->SQ_is_initialized()) {
413  globalData->SQ_init_consensus(sequenceLength);
414  }
415  globalData->SQ_add_sequence(rawSequence);
416  }
417  delete(rawSequence);
418  }
419  }
420  }
421  progress.inc_and_check_user_abort(error);
422  }
423 
424  free(alignment_name);
425 
426  if (error)
427  GB_abort_transaction(gb_main);
428  else
429  GB_pop_transaction(gb_main);
430 
431  return error;
432 }
433 
434 static GB_ERROR SQ_pass2(const SQ_GroupData * globalData, GBDATA * gb_main, TreeNode * node, AP_filter * filter) {
435  GB_push_transaction(gb_main);
436 
437  char *alignment_name = GBT_get_default_alignment(gb_main);
438  seq_assert(alignment_name);
439 
440  GBDATA *gb_species = node->gb_node;
441  GBDATA *gb_name = GB_entry(gb_species, "name");
442  GB_ERROR error = NULp;
443 
444  if (!gb_name) error = GB_get_error();
445  else {
446  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
447 
448  if (!gb_ali) {
449  error = no_data_error(gb_species, alignment_name);
450  }
451  else {
452  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
453  if (!gb_quality) error = GB_get_error();
454 
455  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
456  if (!gb_quality_ali) error = GB_get_error();
457 
458  GBDATA *read_sequence = GB_entry(gb_ali, "data");
459 
460  // real calculations start here
461  if (read_sequence) {
462  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
463 
464  /*
465  calculate the average number of bases in group, and the difference of
466  a single sequence in group from it
467  */
468  {
469  GBDATA *gb_result1 = GB_search(gb_quality_ali, "number_of_bases", GB_INT);
470  int bases = GB_read_int(gb_result1);
471  int avg_bases = globalData->SQ_get_avg_bases();
472  int diff_percent = 0;
473 
474  if (avg_bases != 0) {
475  double diff = bases - avg_bases;
476  diff = (100 * diff) / avg_bases;
477  diff_percent = sq_round(diff);
478  }
479 
480  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
481  seq_assert(gb_result2);
482  GB_write_int(gb_result2, diff_percent);
483  }
484 
485  /*
486  calculate the average gc proportion in group, and the difference of
487  a single sequence in group from it
488  */
489  {
490  GBDATA *gb_result6 = GB_search(gb_quality_ali, "GC_proportion", GB_FLOAT);
491  double gcp = GB_read_float(gb_result6);
492  double avg_gc = globalData->SQ_get_avg_gc();
493  int diff_percent = 0;
494 
495  if (avg_gc != 0) {
496  double diff = gcp - avg_gc;
497  diff = (100 * diff) / avg_gc;
498  diff_percent = sq_round(diff);
499  }
500 
501  GBDATA *gb_result7 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
502  seq_assert(gb_result7);
503  GB_write_int(gb_result7, diff_percent);
504  }
505 
506  /*
507  get groupnames of visited groups
508  search for name in group dictionary
509  evaluate sequence with group consensus
510  */
511  GBDATA *gb_con = GB_search(gb_quality_ali, "consensus_conformity", GB_CREATE_CONTAINER);
512  if (!gb_con) error = GB_get_error();
513 
514  GBDATA *gb_dev = GB_search(gb_quality_ali, "consensus_deviation", GB_CREATE_CONTAINER);
515  if (!gb_dev) error = GB_get_error();
516 
517  TreeNode *backup = node; // needed?
518  int whilecounter = 0;
519  double eval = 0;
520 
521  while (backup->father) {
522  if (backup->name) {
523  SQ_GroupDataDictionary::iterator GDI = group_dict.find(backup->name);
524  if (GDI != group_dict.end()) {
525  SQ_GroupDataPtr GD_ptr = GDI->second;
526 
527  consensus_result cr = GD_ptr->SQ_calc_consensus(rawSequence);
528 
529  double value1 = cr.conformity;
530  double value2 = cr.deviation;
531  int value3 = GD_ptr->SQ_get_nr_sequences();
532 
533  GBDATA *gb_node_entry = GB_search(gb_con, "name", GB_STRING);
534  seq_assert(gb_node_entry);
535  GB_write_string(gb_node_entry, backup->name);
536 
537  gb_node_entry = GB_search(gb_con, "value", GB_FLOAT); seq_assert(gb_node_entry);
538  GB_write_float(gb_node_entry, value1);
539 
540  gb_node_entry = GB_search(gb_con, "num_species", GB_INT); seq_assert(gb_node_entry);
541  GB_write_int(gb_node_entry, value3);
542 
543  gb_node_entry = GB_search(gb_dev, "name", GB_STRING); seq_assert(gb_node_entry);
544  GB_write_string(gb_node_entry, backup->name);
545 
546  gb_node_entry = GB_search(gb_dev, "value", GB_FLOAT); seq_assert(gb_node_entry);
547  GB_write_float(gb_node_entry, value2);
548 
549  gb_node_entry = GB_search(gb_dev, "num_species", GB_INT); seq_assert(gb_node_entry);
550  GB_write_int(gb_node_entry, value3);
551 
552  // if you parse the upper two values in the evaluate() function cut the following out
553  // for time reasons i do the evaluation here, as i still have the upper two values
554  // -------------cut this-----------------
555  if (value1 > 0.95) {
556  eval += 5;
557  }
558  else {
559  if (value1 > 0.8) {
560  eval += 4;
561  }
562  else {
563  if (value1 > 0.6) {
564  eval += 3;
565  }
566  else {
567  if (value1 > 0.4) {
568  eval += 2;
569  }
570  else {
571  if (value1 > 0.25) {
572  eval += 1;
573  }
574  else {
575  eval += 0;
576  }
577  }
578  }
579  }
580  }
581  if (value2 > 0.6) {
582  eval += 0;
583  }
584  else {
585  if (value2 > 0.4) {
586  eval += 1;
587  }
588  else {
589  if (value2 > 0.2) {
590  eval += 2;
591  }
592  else {
593  if (value2 > 0.1) {
594  eval += 3;
595  }
596  else {
597  if (value2 > 0.05) {
598  eval += 4;
599  }
600  else {
601  if (value2 > 0.025) {
602  eval += 5;
603  }
604  else {
605  if (value2 > 0.01) {
606  eval += 6;
607  }
608  else {
609  eval += 7;
610  }
611  }
612  }
613  }
614  }
615  }
616  }
617  whilecounter++;
618  // ---------to this and scroll down--------
619  }
620  }
621  backup = backup->get_father();
622  }
623 
624  // --------also cut this------
625  int evaluation = 0;
626  if (eval != 0) {
627  eval = eval / whilecounter;
628  evaluation = sq_round(eval);
629  }
630  GBDATA *gb_result5 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
631  seq_assert(gb_result5);
632  GB_write_int(gb_result5, evaluation);
633  // --------end cut this-------
634 
635  free(rawSequence);
636  }
637  }
638  }
639 
640  free(alignment_name);
641 
642  if (error)
643  GB_abort_transaction(gb_main);
644  else
645  GB_pop_transaction(gb_main);
646 
647  return error;
648 }
649 
650 GB_ERROR SQ_pass2_no_tree(const SQ_GroupData * globalData, GBDATA * gb_main, AP_filter * filter, arb_progress& progress) {
651  GBDATA *read_sequence = NULp;
652 
653  GB_push_transaction(gb_main);
654 
655  char *alignment_name = GBT_get_default_alignment(gb_main);
656  seq_assert(alignment_name);
657 
658  // second pass operations
659  GB_ERROR error = NULp;
660  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species = GBT_next_species(gb_species)) {
661  GBDATA *gb_name = GB_entry(gb_species, "name");
662 
663  if (!gb_name) {
664  error = GB_get_error();
665  }
666  else {
667  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
668  if (!gb_ali) {
669  error = no_data_error(gb_species, alignment_name);
670  }
671  else {
672  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
673  if (!gb_quality) error = GB_get_error();
674 
675  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
676  if (!gb_quality_ali) error = GB_get_error();
677 
678  read_sequence = GB_entry(gb_ali, "data");
679 
680  // real calculations start here
681  if (read_sequence) {
682  const char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
683 
684  /*
685  calculate the average number of bases in group, and the difference of
686  a single sequence in group from it
687  */
688  {
689  GBDATA *gb_result1 = GB_search(gb_quality_ali, "number_of_bases", GB_INT);
690  int bases = GB_read_int(gb_result1);
691  int avg_bases = globalData->SQ_get_avg_bases();
692  int diff_percent = 0;
693 
694  if (avg_bases != 0) {
695  double diff = bases - avg_bases;
696  diff = (100 * diff) / avg_bases;
697  diff_percent = sq_round(diff);
698  }
699 
700  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
701  seq_assert(gb_result2);
702  GB_write_int(gb_result2, diff_percent);
703  }
704 
705  /*
706  calculate the average gc proportion in group, and the difference of
707  a single sequence in group from it
708  */
709  {
710  GBDATA *gb_result6 = GB_search(gb_quality_ali, "GC_proportion", GB_FLOAT);
711  double gcp = GB_read_float(gb_result6);
712  double avg_gc = globalData->SQ_get_avg_gc();
713  int diff_percent = 0;
714 
715  if (avg_gc != 0) {
716  double diff = gcp - avg_gc;
717  diff = (100 * diff) / avg_gc;
718  diff_percent = sq_round(diff);
719  }
720 
721  GBDATA *gb_result7 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
722  seq_assert(gb_result7);
723  GB_write_int(gb_result7, diff_percent);
724  }
725  /*
726  get groupnames of visited groups
727  search for name in group dictionary
728  evaluate sequence with group consensus
729  */
730  GBDATA *gb_con = GB_search(gb_quality_ali, "consensus_conformity", GB_CREATE_CONTAINER);
731  if (!gb_con) error = GB_get_error();
732 
733  GBDATA *gb_dev = GB_search(gb_quality_ali, "consensus_deviation", GB_CREATE_CONTAINER);
734  if (!gb_dev) error = GB_get_error();
735 
736  consensus_result cr = globalData->SQ_calc_consensus(rawSequence);
737 
738  double value1 = cr.conformity;
739  double value2 = cr.deviation;
740  int value3 = globalData->SQ_get_nr_sequences();
741 
742  GBDATA *gb_node_entry = GB_search(gb_con, "name", GB_STRING);
743  seq_assert(gb_node_entry);
744  GB_write_string(gb_node_entry, "one global consensus");
745 
746  gb_node_entry = GB_search(gb_con, "value", GB_FLOAT); seq_assert(gb_node_entry);
747  GB_write_float(gb_node_entry, value1);
748 
749  gb_node_entry = GB_search(gb_con, "num_species", GB_INT); seq_assert(gb_node_entry);
750  GB_write_int(gb_node_entry, value3);
751 
752  gb_node_entry = GB_search(gb_dev, "name", GB_STRING); seq_assert(gb_node_entry);
753  GB_write_string(gb_node_entry, "one global consensus");
754 
755  gb_node_entry = GB_search(gb_dev, "value", GB_FLOAT); seq_assert(gb_node_entry);
756  GB_write_float(gb_node_entry, value2);
757 
758  gb_node_entry = GB_search(gb_dev, "num_species", GB_INT); seq_assert(gb_node_entry);
759  GB_write_int(gb_node_entry, value3);
760 
761  double eval = 0;
762 
763  // if you parse the upper two values in the evaluate() function cut the following out
764  // for time reasons i do the evaluation here, as i still have the upper two values
765  // -------------cut this-----------------
766  if (value1 > 0.95) {
767  eval += 5;
768  }
769  else {
770  if (value1 > 0.8) {
771  eval += 4;
772  }
773  else {
774  if (value1 > 0.6) {
775  eval += 3;
776  }
777  else {
778  if (value1 > 0.4) {
779  eval += 2;
780  }
781  else {
782  if (value1 > 0.25) {
783  eval += 1;
784  }
785  else {
786  eval += 0;
787  }
788  }
789  }
790  }
791  }
792  if (value2 > 0.6) {
793  eval += 0;
794  }
795  else {
796  if (value2 > 0.4) {
797  eval += 1;
798  }
799  else {
800  if (value2 > 0.2) {
801  eval += 2;
802  }
803  else {
804  if (value2 > 0.1) {
805  eval += 3;
806  }
807  else {
808  if (value2 > 0.05) {
809  eval += 4;
810  }
811  else {
812  if (value2 > 0.025) {
813  eval += 5;
814  }
815  else {
816  if (value2 > 0.01) {
817  eval += 6;
818  }
819  else {
820  eval += 7;
821  }
822  }
823  }
824  }
825  }
826  }
827  }
828 
829  {
830  int evaluation = 0;
831  if (eval != 0) evaluation = sq_round(eval);
832 
833  GBDATA *gb_result5 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
834  seq_assert(gb_result5);
835  GB_write_int(gb_result5, evaluation);
836  }
837  // --------end cut this-------
838  delete(rawSequence);
839  }
840  }
841  }
842  progress.inc_and_check_user_abort(error);
843  }
844  free(alignment_name);
845 
846  if (error)
847  GB_abort_transaction(gb_main);
848  else
849  GB_pop_transaction(gb_main);
850 
851  return error;
852 }
853 
855  SQ_GroupData *newData = data->clone(); // save actual consensus
856  *newData = *data;
857  group_dict[node->name] = newData; // and link it with an name
858 }
859 
861  if (node->is_leaf()) {
862  if (node->gb_node) {
863  SQ_pass1(data, gb_main, node, filter);
864  seq_assert(data->getSize()> 0);
865  }
866  }
867  else {
868  TreeNode *node1 = node->get_leftson();
869  TreeNode *node2 = node->get_rightson();
870 
871  if (node->name) {
872  SQ_GroupData *leftData = NULp;
873  bool parentIsEmpty = false;
874 
875  if (data->getSize() == 0) {
876  parentIsEmpty = true;
877  SQ_calc_and_apply_group_data(node1, gb_main, data, filter, progress); // process left branch with empty data
878  seq_assert(data->getSize()> 0);
879  }
880  else {
881  leftData = data->clone(); // create new empty SQ_GroupData
882  SQ_calc_and_apply_group_data(node1, gb_main, leftData, filter, progress); // process left branch
883  seq_assert(leftData->getSize()> 0);
884  }
885 
886  SQ_GroupData *rightData = data->clone(); // create new empty SQ_GroupData
887  SQ_calc_and_apply_group_data(node2, gb_main, rightData, filter, progress); // process right branch
888  seq_assert(rightData->getSize()> 0);
889 
890  if (!parentIsEmpty) {
891  data->SQ_add(*leftData);
892  delete leftData;
893  }
894 
895  data->SQ_add(*rightData);
896  delete rightData;
897 
898  create_multi_level_consensus(node, data);
899  }
900  else {
901  SQ_calc_and_apply_group_data(node1, gb_main, data, filter, progress); // enter left branch
902  seq_assert(data->getSize()> 0);
903 
904  SQ_calc_and_apply_group_data(node2, gb_main, data, filter, progress); // enter right branch
905  seq_assert(data->getSize()> 0);
906  }
907  }
908  progress.inc();
909 }
910 
912  if (node->is_leaf()) {
913  if (node->gb_node) {
914  SQ_pass2(data, gb_main, node, filter);
915  }
916  }
917  else {
918  TreeNode *node1 = node->get_leftson();
919  TreeNode *node2 = node->get_rightson();
920 
921  if (node1) SQ_calc_and_apply_group_data2(node1, gb_main, data, filter, progress);
922  if (node2) SQ_calc_and_apply_group_data2(node2, gb_main, data, filter, progress);
923  }
924  progress.inc();
925 }
926 
927 // marks species that are below threshold "evaluation"
928 GB_ERROR SQ_mark_species(GBDATA * gb_main, int condition, bool marked_only) {
929  char *alignment_name;
930  int result = 0;
931 
932  GBDATA *read_sequence = NULp;
933  GBDATA *gb_species;
934  GB_ERROR error = NULp;
935 
936  GB_push_transaction(gb_main);
937  alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
938 
939  species_iterator getFirst = NULp;
940  species_iterator getNext = NULp;
941 
942  if (marked_only) {
943  getFirst = GBT_first_marked_species;
944  getNext = GBT_next_marked_species;
945  }
946  else {
947  getFirst = GBT_first_species;
948  getNext = GBT_next_species;
949  }
950 
951  for (gb_species = getFirst(gb_main); gb_species; gb_species = getNext(gb_species)) {
952  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
953  bool marked = false;
954  if (gb_ali) {
955  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
956  if (gb_quality) {
957  read_sequence = GB_entry(gb_ali, "data");
958  if (read_sequence) {
959  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
960  if (gb_quality_ali) {
961  GBDATA *gb_result1 = GB_search(gb_quality_ali, "evaluation", GB_INT);
962  result = GB_read_int(gb_result1);
963 
964  if (result < condition) marked = true;
965  }
966  }
967  }
968  }
969 
970  if (GB_read_flag(gb_species) != marked) {
971  GB_write_flag(gb_species, marked);
972  }
973  }
974  free(alignment_name);
975 
976  if (error)
977  GB_abort_transaction(gb_main);
978  else
979  GB_pop_transaction(gb_main);
980 
981  return error;
982 }
983 
985  SQ_TREE_ERROR retval = NONE;
986 
987  if (!node)
988  return MISSING_NODE;
989 
990  if (node->is_leaf()) {
991  if (!node->gb_node)
992  retval = ZOMBIE;
993  }
994  else {
995  retval = SQ_check_tree_structure(node->get_leftson());
996  if (retval == NONE)
997  retval = SQ_check_tree_structure(node->get_rightson());
998  }
999 
1000  return retval;
1001 }
GB_ERROR GB_get_error()
Definition: arb_msg.cxx:344
static GB_ERROR no_data_error(GBDATA *gb_species, const char *ali_name)
const char * GB_ERROR
Definition: arb_core.h:25
static char * SQ_fetch_filtered_sequence(GBDATA *read_sequence, AP_filter *filter)
#define NONE
Definition: GDE_def.h:34
static GB_ERROR SQ_pass2(const SQ_GroupData *globalData, GBDATA *gb_main, TreeNode *node, AP_filter *filter)
string result
const size_t * get_filterpos_2_seqpos() const
Definition: AP_filter.hxx:91
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
long GB_read_int(GBDATA *gbd)
Definition: arbdb.cxx:723
int getSize() const
Definition: SQ_GroupData.h:81
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1385
bool SQ_is_initialized() const
Definition: SQ_GroupData.h:71
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2549
void SQ_calc_and_apply_group_data(TreeNode *node, GBDATA *gb_main, SQ_GroupData *data, AP_filter *filter, arb_progress &progress)
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
virtual void SQ_add_sequence(const char *sequence)=0
static char * alignment_name
STL namespace.
void SQ_calc_physical_layout(const char *sequence, int size, GBDATA *gb_quality_ali)
void SQ_count_ambiguities(const char *iupac, int length, GBDATA *gb_quality)
static int sq_round(double value)
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2482
void SQ_count_sequences()
Definition: SQ_GroupData.h:65
std::map< std::string, SQ_GroupDataPtr > SQ_GroupDataDictionary
Definition: SQ_functions.h:42
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1904
virtual consensus_result SQ_calc_consensus(const char *sequence) const =0
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
Definition: arbdb.h:67
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
void SQ_calc_helix_layout(const char *sequence, GBDATA *gb_main, char *alignment_name, GBDATA *gb_quality, AP_filter *filter)
Definition: SQ_helix.h:97
virtual SQ_GroupData * clone() const =0
int SQ_get_avg_bases() const
Definition: SQ_GroupData.h:56
static void create_multi_level_consensus(TreeNode *node, SQ_GroupData *data)
GB_ERROR SQ_mark_species(GBDATA *gb_main, int condition, bool marked_only)
Generic smart pointer.
Definition: smartptr.h:149
GB_ERROR SQ_pass2_no_tree(const SQ_GroupData *globalData, GBDATA *gb_main, AP_filter *filter, arb_progress &progress)
void SQ_clear_group_dictionary()
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
void SQ_set_avg_bases(int bases)
Definition: SQ_GroupData.h:53
TreeNode * father
Definition: TreeNode.h:131
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR GB_abort_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2527
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
Definition: ad_io_inline.h:19
float GB_read_float(GBDATA *gbd)
Definition: arbdb.cxx:738
SQ_TREE_ERROR
Definition: SQ_functions.h:80
int GB_read_flag(GBDATA *gbd)
Definition: arbdb.cxx:2784
virtual void SQ_add(const SQ_GroupData &other)=0
#define seq_assert(bed)
GB_ERROR GB_pop_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2512
Definition: arbdb.h:86
GB_ERROR GB_write_float(GBDATA *gbd, float f)
Definition: arbdb.cxx:1279
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1244
size_t get_filtered_length() const
Definition: AP_filter.hxx:82
GB_ERROR SQ_pass1_no_tree(SQ_GroupData *globalData, GBDATA *gb_main, AP_filter *filter, arb_progress &progress)
double SQ_get_gc_proportion() const
int SQ_get_number_of_bases() const
static GB_ERROR SQ_pass1(SQ_GroupData *globalData, GBDATA *gb_main, TreeNode *node, AP_filter *filter)
bool is_leaf() const
Definition: TreeNode.h:171
int SQ_get_nr_sequences() const
Definition: SQ_GroupData.h:68
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2761
GB_ERROR SQ_evaluate(GBDATA *gb_main, const SQ_weights &weights, bool marked_only)
char * name
Definition: TreeNode.h:134
virtual void SQ_init_consensus(int size)=0
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
#define abs(x)
Definition: f2c.h:151
void SQ_set_avg_gc(double gc)
Definition: SQ_GroupData.h:59
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:97
double SQ_get_avg_gc() const
Definition: SQ_GroupData.h:62
static SQ_GroupDataDictionary group_dict
SQ_TREE_ERROR SQ_check_tree_structure(TreeNode *node)
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:675
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:898
GBDATA * gb_node
Definition: TreeNode.h:133
GBDATA * gb_main
Definition: adname.cxx:33
GBDATA *(* species_iterator)(GBDATA *)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
void SQ_calc_and_apply_group_data2(TreeNode *node, GBDATA *gb_main, const SQ_GroupData *data, AP_filter *filter, arb_progress &progress)
int diff_from_average
Definition: SQ_functions.h:59
GB_ERROR SQ_remove_quality_entries(GBDATA *gb_main)
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:274
#define UNCOVERED()
Definition: arb_assert.h:380
Definition: arbdb.h:66