ARB
SQ_functions.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : SQ_functions.cxx //
4 // Purpose : Implementation of SQ_functions.h //
5 // //
6 // //
7 // Coded by Juergen Huber in July 2003 - February 2004 //
8 // Coded by Kai Bader (baderk@in.tum.de) in 2007 - 2008 //
9 // Copyright Department of Microbiology (Technical University Munich) //
10 // //
11 // Visit our web site at: http://www.arb-home.de/ //
12 // //
13 // ==================================================================== //
14 
15 #include "SQ_ambiguities.h"
16 #include "SQ_helix.h"
17 #include "SQ_physical_layout.h"
18 #include "SQ_functions.h"
19 
20 #include <aw_preset.hxx>
21 #include <arb_progress.h>
22 #include <TreeNode.h>
23 
24 using namespace std;
25 
26 typedef GBDATA *(*species_iterator)(GBDATA *);
27 
29 
30 enum {
32 };
33 
36  swap(tmp, group_dict);
37 }
38 
39 static GB_ERROR no_data_error(GBDATA * gb_species, const char *ali_name) {
40  GBDATA *gb_name = GB_entry(gb_species, "name");
41  const char *name = "<unknown>";
42  if (gb_name)
43  name = GB_read_char_pntr(gb_name);
44  return GBS_global_string("Species '%s' has no data in alignment '%s'",
45  name, ali_name);
46 }
47 
48 static int sq_round(double value) {
49  int x;
50 
51  value += 0.5;
52  x = (int) floor(value);
53  return x;
54 }
55 
57  GB_push_transaction(gb_main);
59 
60  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species
61  = GBT_next_species(gb_species)) {
62  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_FIND);
63  if (gb_quality)
64  error = GB_delete(gb_quality);
65  }
66  if (error)
67  GB_abort_transaction(gb_main);
68  else
69  GB_pop_transaction(gb_main);
70  return error;
71 }
72 
73 GB_ERROR SQ_evaluate(GBDATA * gb_main, const SQ_weights & weights, bool marked_only) {
74  GB_push_transaction(gb_main);
76  seq_assert(alignment_name);
77 
80 
82  for (GBDATA *gb_species = getFirst(gb_main); gb_species && !error; gb_species = getNext(gb_species)) {
83  GBDATA *gb_name = GB_entry(gb_species, "name");
84  if (!gb_name) {
85  error = GB_get_error();
86  }
87  else {
88  GBDATA *gb_quality = GB_entry(gb_species, "quality");
89  if (gb_quality) {
90  GBDATA *gb_quality_ali = GB_entry(gb_quality, alignment_name);
91 
92  if (!gb_quality_ali) {
93  error = GBS_global_string("No alignment entry '%s' in quality data", alignment_name);
94  }
95  else {
96 
97  // evaluate the percentage of bases the actual sequence consists of
98  GBDATA *gb_result1 = GB_search(gb_quality_ali, "percent_of_bases", GB_INT);
99  int bases = GB_read_int(gb_result1);
100 
101  double result = bases<4 ? 0 : (bases<6 ? 1 : 2);
102  if (result != 0) result = (result * weights.bases) / 2;
103 
104  double value = 0;
105  value += result;
106 
107  // evaluate the difference in number of bases from sequence to group
108  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
109  int dfa = GB_read_int(gb_result2);
110  if (abs(dfa) < 2) {
111  result = 5;
112  }
113  else {
114  if (abs(dfa) < 4) {
115  result = 4;
116  }
117  else {
118  if (abs(dfa) < 6) {
119  result = 3;
120  }
121  else {
122  if (abs(dfa) < 8) {
123  result = 2;
124  }
125  else {
126  if (abs(dfa) < 10) {
127  result = 1;
128  }
129  else {
130  result = 0;
131  }
132  }
133  }
134  }
135  }
136  if (result != 0) result = (result * weights.diff_from_average) / 5;
137  value += result;
138 
139  // evaluate the number of positions where no helix can be built
140  GBDATA *gb_result3 = GB_search(gb_quality_ali, "number_of_no_helix", GB_INT);
141  int noh = GB_read_int(gb_result3);
142  if (noh < 20) {
143  result = 5;
144  }
145  else {
146  if (noh < 50) {
147  result = 4;
148  }
149  else {
150  if (noh < 125) {
151  result = 3;
152  }
153  else {
154  if (noh < 250) {
155  result = 2;
156  }
157  else {
158  if (noh < 500) {
159  result = 1;
160  }
161  else {
162  result = 0;
163  }
164  }
165  }
166  }
167  }
168  if (result != 0) result = (result * weights.helix) / 5;
169  value += result;
170 
171  // evaluate the consensus
172  GBDATA *gb_result4 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
173  int cos = GB_read_int(gb_result4);
174 
175  result = cos;
176  if (result != 0) result = (result * weights.consensus) / 12;
177  value += result;
178 
179  // evaluate the number of iupacs in a sequence
180  GBDATA *gb_result5 = GB_search(gb_quality_ali, "iupac_value", GB_INT);
181  int iupv = GB_read_int(gb_result5);
182 
183  if (iupv < 1) {
184  result = 3;
185  }
186  else {
187  if (iupv < 5) {
188  result = 2;
189  }
190  else {
191  if (iupv < 10) {
192  result = 1;
193  }
194  else {
195  result = 0;
196  }
197  }
198  }
199  if (result != 0) result = (result * weights.iupac) / 3;
200  value += result;
201 
202  // evaluate the difference in the GC proportion from sequence to group
203  GBDATA *gb_result6 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
204  int gcprop = GB_read_int(gb_result6);
205 
206  if (abs(gcprop) < 1) {
207  result = 5;
208  }
209  else {
210  if (abs(gcprop) < 2) {
211  result = 4;
212  }
213  else {
214  if (abs(gcprop) < 4) {
215  result = 3;
216  }
217  else {
218  if (abs(gcprop) < 8) {
219  result = 2;
220  }
221  else {
222  if (abs(gcprop) < 16)
223  result = 1;
224  else {
225  result = 0;
226  }
227  }
228  }
229  }
230  }
231  if (result != 0) result = (result * weights.gc) / 5;
232  value += result;
233 
234  // write the final value of the evaluation
235  int value2 = sq_round(value);
236  GBDATA *gb_result7 = GB_search(gb_quality_ali, "evaluation", GB_INT);
237  seq_assert(gb_result7);
238  GB_write_int(gb_result7, value2);
239  }
240  }
241  }
242  }
243  free(alignment_name);
244 
245  error = GB_end_transaction(gb_main, error);
246  return error;
247 }
248 
249 static char *SQ_fetch_filtered_sequence(GBDATA * read_sequence, AP_filter * filter) {
250  char *filteredSequence = NULp;
251  if (read_sequence) {
252  const char *rawSequence = GB_read_char_pntr(read_sequence);
253 
254  UNCOVERED(); // @@@ use AP_filter::filter_string here! (need tests first)
255 
256  int filteredLength = filter->get_filtered_length();
257  const size_t *filterpos_2_seqpos = filter->get_filterpos_2_seqpos();
258 
259  ARB_alloc(filteredSequence, filteredLength);
260  if (filteredSequence) {
261  for (int i = 0; i < filteredLength; ++i) {
262  filteredSequence[i] = rawSequence[filterpos_2_seqpos[i]];
263  }
264  }
265  }
266  return filteredSequence;
267 }
268 
269 static GB_ERROR SQ_pass1(SQ_GroupData * globalData, GBDATA * gb_main, TreeNode * node, AP_filter * filter) {
270  GB_push_transaction(gb_main);
271 
272  GB_ERROR error = NULp;
273  char *alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
274  GBDATA *gb_species = node->gb_node;
275  GBDATA *gb_name = GB_entry(gb_species, "name");
276 
277  if (!gb_name) error = GB_get_error();
278  else {
279  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
280 
281  if (!gb_ali) {
282  error = no_data_error(gb_species, alignment_name);
283  }
284  else {
285  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
286 
287  if (!gb_quality) {
288  error = GB_get_error();
289  }
290 
291  GBDATA *read_sequence = GB_entry(gb_ali, "data");
292  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
293 
294  if (!gb_quality_ali) error = GB_get_error();
295 
296  // real calculations start here
297  if (read_sequence) {
298  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
299  int sequenceLength = filter->get_filtered_length();
300 
301  // calculate physical layout of sequence
302  {
303  SQ_physical_layout ps_chan;
304  ps_chan.SQ_calc_physical_layout(rawSequence, sequenceLength, gb_quality_ali);
305 
306  // calculate the average number of bases in group
307  globalData->SQ_count_sequences();
308  globalData->SQ_set_avg_bases(ps_chan.
309  SQ_get_number_of_bases());
310  globalData->SQ_set_avg_gc(ps_chan.
311  SQ_get_gc_proportion());
312  }
313 
314  // get values for ambiguities
315  {
316  SQ_ambiguities ambi_chan;
317  ambi_chan.SQ_count_ambiguities(rawSequence, sequenceLength, gb_quality_ali);
318  }
319 
320  // calculate the number of strong, weak and no helixes
321  {
322  SQ_helix heli_chan(sequenceLength);
323  heli_chan.SQ_calc_helix_layout(rawSequence, gb_main, alignment_name, gb_quality_ali, filter);
324  }
325 
326  // calculate consensus sequence
327  {
328  if (!globalData->SQ_is_initialized()) {
329  globalData->SQ_init_consensus(sequenceLength);
330  }
331  globalData->SQ_add_sequence(rawSequence);
332  }
333 
334  free(rawSequence);
335  }
336  }
337  }
338 
339  free(alignment_name);
340 
341  if (error)
342  GB_abort_transaction(gb_main);
343  else
344  GB_pop_transaction(gb_main);
345 
346  return error;
347 }
348 
350  GBDATA *read_sequence = NULp;
351 
352  GB_push_transaction(gb_main);
353 
354  char *alignment_name = GBT_get_default_alignment(gb_main);
355  GB_ERROR error = NULp;
356 
357  seq_assert(alignment_name);
358 
359  // first pass operations
360  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species = GBT_next_species(gb_species)) {
361  GBDATA *gb_name = GB_entry(gb_species, "name");
362 
363  if (!gb_name) {
364  error = GB_get_error();
365  }
366  else {
367  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
368 
369  if (!gb_ali) {
370  error = no_data_error(gb_species, alignment_name);
371  }
372  else {
373  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
374  if (!gb_quality) {
375  error = GB_get_error();
376  }
377 
378  read_sequence = GB_entry(gb_ali, "data");
379 
380  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
381  if (!gb_quality_ali)
382  error = GB_get_error();
383 
384  // real calculations start here
385  if (read_sequence) {
386  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
387  int sequenceLength = filter->get_filtered_length();
388 
389  // calculate physical layout of sequence
390  SQ_physical_layout *ps_chan = new SQ_physical_layout;
391  ps_chan->SQ_calc_physical_layout(rawSequence, sequenceLength, gb_quality_ali);
392 
393  // calculate the average number of bases in group
394  globalData->SQ_count_sequences();
395  globalData->SQ_set_avg_bases(ps_chan->SQ_get_number_of_bases());
396  globalData->SQ_set_avg_gc(ps_chan->SQ_get_gc_proportion());
397  delete ps_chan;
398 
399  // get values for ambiguities
400  SQ_ambiguities *ambi_chan = new SQ_ambiguities;
401  ambi_chan->SQ_count_ambiguities(rawSequence, sequenceLength, gb_quality_ali);
402  delete ambi_chan;
403 
404  // calculate the number of strong, weak and no helixes
405  SQ_helix *heli_chan = new SQ_helix(sequenceLength);
406  heli_chan->SQ_calc_helix_layout(rawSequence, gb_main, alignment_name, gb_quality_ali, filter);
407  delete heli_chan;
408 
409  // calculate consensus sequence
410  {
411  if (!globalData->SQ_is_initialized()) {
412  globalData->SQ_init_consensus(sequenceLength);
413  }
414  globalData->SQ_add_sequence(rawSequence);
415  }
416  delete(rawSequence);
417  }
418  }
419  }
420  progress.inc_and_check_user_abort(error);
421  }
422 
423  free(alignment_name);
424 
425  if (error)
426  GB_abort_transaction(gb_main);
427  else
428  GB_pop_transaction(gb_main);
429 
430  return error;
431 }
432 
433 static GB_ERROR SQ_pass2(const SQ_GroupData * globalData, GBDATA * gb_main, TreeNode * node, AP_filter * filter) {
434  GB_push_transaction(gb_main);
435 
436  char *alignment_name = GBT_get_default_alignment(gb_main);
437  seq_assert(alignment_name);
438 
439  GBDATA *gb_species = node->gb_node;
440  GBDATA *gb_name = GB_entry(gb_species, "name");
441  GB_ERROR error = NULp;
442 
443  if (!gb_name) error = GB_get_error();
444  else {
445  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
446 
447  if (!gb_ali) {
448  error = no_data_error(gb_species, alignment_name);
449  }
450  else {
451  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
452  if (!gb_quality) error = GB_get_error();
453 
454  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
455  if (!gb_quality_ali) error = GB_get_error();
456 
457  GBDATA *read_sequence = GB_entry(gb_ali, "data");
458 
459  // real calculations start here
460  if (read_sequence) {
461  char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
462 
463  /*
464  calculate the average number of bases in group, and the difference of
465  a single sequence in group from it
466  */
467  {
468  GBDATA *gb_result1 = GB_search(gb_quality_ali, "number_of_bases", GB_INT);
469  int bases = GB_read_int(gb_result1);
470  int avg_bases = globalData->SQ_get_avg_bases();
471  int diff_percent = 0;
472 
473  if (avg_bases != 0) {
474  double diff = bases - avg_bases;
475  diff = (100 * diff) / avg_bases;
476  diff_percent = sq_round(diff);
477  }
478 
479  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
480  seq_assert(gb_result2);
481  GB_write_int(gb_result2, diff_percent);
482  }
483 
484  /*
485  calculate the average gc proportion in group, and the difference of
486  a single sequence in group from it
487  */
488  {
489  GBDATA *gb_result6 = GB_search(gb_quality_ali, "GC_proportion", GB_FLOAT);
490  double gcp = GB_read_float(gb_result6);
491  double avg_gc = globalData->SQ_get_avg_gc();
492  int diff_percent = 0;
493 
494  if (avg_gc != 0) {
495  double diff = gcp - avg_gc;
496  diff = (100 * diff) / avg_gc;
497  diff_percent = sq_round(diff);
498  }
499 
500  GBDATA *gb_result7 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
501  seq_assert(gb_result7);
502  GB_write_int(gb_result7, diff_percent);
503  }
504 
505  /*
506  get groupnames of visited groups
507  search for name in group dictionary
508  evaluate sequence with group consensus
509  */
510  GBDATA *gb_con = GB_search(gb_quality_ali, "consensus_conformity", GB_CREATE_CONTAINER);
511  if (!gb_con) error = GB_get_error();
512 
513  GBDATA *gb_dev = GB_search(gb_quality_ali, "consensus_deviation", GB_CREATE_CONTAINER);
514  if (!gb_dev) error = GB_get_error();
515 
516  TreeNode *backup = node; // needed?
517  int whilecounter = 0;
518  double eval = 0;
519 
520  while (backup->father) {
521  if (backup->name) {
522  SQ_GroupDataDictionary::iterator GDI = group_dict.find(backup->name);
523  if (GDI != group_dict.end()) {
524  SQ_GroupDataPtr GD_ptr = GDI->second;
525 
526  consensus_result cr = GD_ptr->SQ_calc_consensus(rawSequence);
527 
528  double value1 = cr.conformity;
529  double value2 = cr.deviation;
530  int value3 = GD_ptr->SQ_get_nr_sequences();
531 
532  GBDATA *gb_node_entry = GB_search(gb_con, "name", GB_STRING);
533  seq_assert(gb_node_entry);
534  GB_write_string(gb_node_entry, backup->name);
535 
536  gb_node_entry = GB_search(gb_con, "value", GB_FLOAT); seq_assert(gb_node_entry);
537  GB_write_float(gb_node_entry, value1);
538 
539  gb_node_entry = GB_search(gb_con, "num_species", GB_INT); seq_assert(gb_node_entry);
540  GB_write_int(gb_node_entry, value3);
541 
542  gb_node_entry = GB_search(gb_dev, "name", GB_STRING); seq_assert(gb_node_entry);
543  GB_write_string(gb_node_entry, backup->name);
544 
545  gb_node_entry = GB_search(gb_dev, "value", GB_FLOAT); seq_assert(gb_node_entry);
546  GB_write_float(gb_node_entry, value2);
547 
548  gb_node_entry = GB_search(gb_dev, "num_species", GB_INT); seq_assert(gb_node_entry);
549  GB_write_int(gb_node_entry, value3);
550 
551  // if you parse the upper two values in the evaluate() function cut the following out
552  // for time reasons i do the evaluation here, as i still have the upper two values
553  // -------------cut this-----------------
554  if (value1 > 0.95) {
555  eval += 5;
556  }
557  else {
558  if (value1 > 0.8) {
559  eval += 4;
560  }
561  else {
562  if (value1 > 0.6) {
563  eval += 3;
564  }
565  else {
566  if (value1 > 0.4) {
567  eval += 2;
568  }
569  else {
570  if (value1 > 0.25) {
571  eval += 1;
572  }
573  else {
574  eval += 0;
575  }
576  }
577  }
578  }
579  }
580  if (value2 > 0.6) {
581  eval += 0;
582  }
583  else {
584  if (value2 > 0.4) {
585  eval += 1;
586  }
587  else {
588  if (value2 > 0.2) {
589  eval += 2;
590  }
591  else {
592  if (value2 > 0.1) {
593  eval += 3;
594  }
595  else {
596  if (value2 > 0.05) {
597  eval += 4;
598  }
599  else {
600  if (value2 > 0.025) {
601  eval += 5;
602  }
603  else {
604  if (value2 > 0.01) {
605  eval += 6;
606  }
607  else {
608  eval += 7;
609  }
610  }
611  }
612  }
613  }
614  }
615  }
616  whilecounter++;
617  // ---------to this and scroll down--------
618  }
619  }
620  backup = backup->get_father();
621  }
622 
623  // --------also cut this------
624  int evaluation = 0;
625  if (eval != 0) {
626  eval = eval / whilecounter;
627  evaluation = sq_round(eval);
628  }
629  GBDATA *gb_result5 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
630  seq_assert(gb_result5);
631  GB_write_int(gb_result5, evaluation);
632  // --------end cut this-------
633 
634  free(rawSequence);
635  }
636  }
637  }
638 
639  free(alignment_name);
640 
641  if (error)
642  GB_abort_transaction(gb_main);
643  else
644  GB_pop_transaction(gb_main);
645 
646  return error;
647 }
648 
649 GB_ERROR SQ_pass2_no_tree(const SQ_GroupData * globalData, GBDATA * gb_main, AP_filter * filter, arb_progress& progress) {
650  GBDATA *read_sequence = NULp;
651 
652  GB_push_transaction(gb_main);
653 
654  char *alignment_name = GBT_get_default_alignment(gb_main);
655  seq_assert(alignment_name);
656 
657  // second pass operations
658  GB_ERROR error = NULp;
659  for (GBDATA *gb_species = GBT_first_species(gb_main); gb_species && !error; gb_species = GBT_next_species(gb_species)) {
660  GBDATA *gb_name = GB_entry(gb_species, "name");
661 
662  if (!gb_name) {
663  error = GB_get_error();
664  }
665  else {
666  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
667  if (!gb_ali) {
668  error = no_data_error(gb_species, alignment_name);
669  }
670  else {
671  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
672  if (!gb_quality) error = GB_get_error();
673 
674  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
675  if (!gb_quality_ali) error = GB_get_error();
676 
677  read_sequence = GB_entry(gb_ali, "data");
678 
679  // real calculations start here
680  if (read_sequence) {
681  const char *rawSequence = SQ_fetch_filtered_sequence(read_sequence, filter);
682 
683  /*
684  calculate the average number of bases in group, and the difference of
685  a single sequence in group from it
686  */
687  {
688  GBDATA *gb_result1 = GB_search(gb_quality_ali, "number_of_bases", GB_INT);
689  int bases = GB_read_int(gb_result1);
690  int avg_bases = globalData->SQ_get_avg_bases();
691  int diff_percent = 0;
692 
693  if (avg_bases != 0) {
694  double diff = bases - avg_bases;
695  diff = (100 * diff) / avg_bases;
696  diff_percent = sq_round(diff);
697  }
698 
699  GBDATA *gb_result2 = GB_search(gb_quality_ali, "percent_base_deviation", GB_INT);
700  seq_assert(gb_result2);
701  GB_write_int(gb_result2, diff_percent);
702  }
703 
704  /*
705  calculate the average gc proportion in group, and the difference of
706  a single sequence in group from it
707  */
708  {
709  GBDATA *gb_result6 = GB_search(gb_quality_ali, "GC_proportion", GB_FLOAT);
710  double gcp = GB_read_float(gb_result6);
711  double avg_gc = globalData->SQ_get_avg_gc();
712  int diff_percent = 0;
713 
714  if (avg_gc != 0) {
715  double diff = gcp - avg_gc;
716  diff = (100 * diff) / avg_gc;
717  diff_percent = sq_round(diff);
718  }
719 
720  GBDATA *gb_result7 = GB_search(gb_quality_ali, "percent_GC_difference", GB_INT);
721  seq_assert(gb_result7);
722  GB_write_int(gb_result7, diff_percent);
723  }
724  /*
725  get groupnames of visited groups
726  search for name in group dictionary
727  evaluate sequence with group consensus
728  */
729  GBDATA *gb_con = GB_search(gb_quality_ali, "consensus_conformity", GB_CREATE_CONTAINER);
730  if (!gb_con) error = GB_get_error();
731 
732  GBDATA *gb_dev = GB_search(gb_quality_ali, "consensus_deviation", GB_CREATE_CONTAINER);
733  if (!gb_dev) error = GB_get_error();
734 
735  consensus_result cr = globalData->SQ_calc_consensus(rawSequence);
736 
737  double value1 = cr.conformity;
738  double value2 = cr.deviation;
739  int value3 = globalData->SQ_get_nr_sequences();
740 
741  GBDATA *gb_node_entry = GB_search(gb_con, "name", GB_STRING);
742  seq_assert(gb_node_entry);
743  GB_write_string(gb_node_entry, "one global consensus");
744 
745  gb_node_entry = GB_search(gb_con, "value", GB_FLOAT); seq_assert(gb_node_entry);
746  GB_write_float(gb_node_entry, value1);
747 
748  gb_node_entry = GB_search(gb_con, "num_species", GB_INT); seq_assert(gb_node_entry);
749  GB_write_int(gb_node_entry, value3);
750 
751  gb_node_entry = GB_search(gb_dev, "name", GB_STRING); seq_assert(gb_node_entry);
752  GB_write_string(gb_node_entry, "one global consensus");
753 
754  gb_node_entry = GB_search(gb_dev, "value", GB_FLOAT); seq_assert(gb_node_entry);
755  GB_write_float(gb_node_entry, value2);
756 
757  gb_node_entry = GB_search(gb_dev, "num_species", GB_INT); seq_assert(gb_node_entry);
758  GB_write_int(gb_node_entry, value3);
759 
760  double eval = 0;
761 
762  // if you parse the upper two values in the evaluate() function cut the following out
763  // for time reasons i do the evaluation here, as i still have the upper two values
764  // -------------cut this-----------------
765  if (value1 > 0.95) {
766  eval += 5;
767  }
768  else {
769  if (value1 > 0.8) {
770  eval += 4;
771  }
772  else {
773  if (value1 > 0.6) {
774  eval += 3;
775  }
776  else {
777  if (value1 > 0.4) {
778  eval += 2;
779  }
780  else {
781  if (value1 > 0.25) {
782  eval += 1;
783  }
784  else {
785  eval += 0;
786  }
787  }
788  }
789  }
790  }
791  if (value2 > 0.6) {
792  eval += 0;
793  }
794  else {
795  if (value2 > 0.4) {
796  eval += 1;
797  }
798  else {
799  if (value2 > 0.2) {
800  eval += 2;
801  }
802  else {
803  if (value2 > 0.1) {
804  eval += 3;
805  }
806  else {
807  if (value2 > 0.05) {
808  eval += 4;
809  }
810  else {
811  if (value2 > 0.025) {
812  eval += 5;
813  }
814  else {
815  if (value2 > 0.01) {
816  eval += 6;
817  }
818  else {
819  eval += 7;
820  }
821  }
822  }
823  }
824  }
825  }
826  }
827 
828  {
829  int evaluation = 0;
830  if (eval != 0) evaluation = sq_round(eval);
831 
832  GBDATA *gb_result5 = GB_search(gb_quality_ali, "consensus_evaluated", GB_INT);
833  seq_assert(gb_result5);
834  GB_write_int(gb_result5, evaluation);
835  }
836  // --------end cut this-------
837  delete(rawSequence);
838  }
839  }
840  }
841  progress.inc_and_check_user_abort(error);
842  }
843  free(alignment_name);
844 
845  if (error)
846  GB_abort_transaction(gb_main);
847  else
848  GB_pop_transaction(gb_main);
849 
850  return error;
851 }
852 
854  SQ_GroupData *newData = data->clone(); // save actual consensus
855  *newData = *data;
856  group_dict[node->name] = newData; // and link it with an name
857 }
858 
860  if (node->is_leaf()) {
861  if (node->gb_node) {
862  SQ_pass1(data, gb_main, node, filter);
863  seq_assert(data->getSize()> 0);
864  }
865  }
866  else {
867  TreeNode *node1 = node->get_leftson();
868  TreeNode *node2 = node->get_rightson();
869 
870  if (node->name) {
871  SQ_GroupData *leftData = NULp;
872  bool parentIsEmpty = false;
873 
874  if (data->getSize() == 0) {
875  parentIsEmpty = true;
876  SQ_calc_and_apply_group_data(node1, gb_main, data, filter, progress); // process left branch with empty data
877  seq_assert(data->getSize()> 0);
878  }
879  else {
880  leftData = data->clone(); // create new empty SQ_GroupData
881  SQ_calc_and_apply_group_data(node1, gb_main, leftData, filter, progress); // process left branch
882  seq_assert(leftData->getSize()> 0);
883  }
884 
885  SQ_GroupData *rightData = data->clone(); // create new empty SQ_GroupData
886  SQ_calc_and_apply_group_data(node2, gb_main, rightData, filter, progress); // process right branch
887  seq_assert(rightData->getSize()> 0);
888 
889  if (!parentIsEmpty) {
890  data->SQ_add(*leftData);
891  delete leftData;
892  }
893 
894  data->SQ_add(*rightData);
895  delete rightData;
896 
897  create_multi_level_consensus(node, data);
898  }
899  else {
900  SQ_calc_and_apply_group_data(node1, gb_main, data, filter, progress); // enter left branch
901  seq_assert(data->getSize()> 0);
902 
903  SQ_calc_and_apply_group_data(node2, gb_main, data, filter, progress); // enter right branch
904  seq_assert(data->getSize()> 0);
905  }
906  }
907  progress.inc();
908 }
909 
911  if (node->is_leaf()) {
912  if (node->gb_node) {
913  SQ_pass2(data, gb_main, node, filter);
914  }
915  }
916  else {
917  TreeNode *node1 = node->get_leftson();
918  TreeNode *node2 = node->get_rightson();
919 
920  if (node1) SQ_calc_and_apply_group_data2(node1, gb_main, data, filter, progress);
921  if (node2) SQ_calc_and_apply_group_data2(node2, gb_main, data, filter, progress);
922  }
923  progress.inc();
924 }
925 
926 // marks species that are below threshold "evaluation"
927 GB_ERROR SQ_mark_species(GBDATA * gb_main, int condition, bool marked_only) {
928  char *alignment_name;
929  int result = 0;
930 
931  GBDATA *read_sequence = NULp;
932  GBDATA *gb_species;
933  GB_ERROR error = NULp;
934 
935  GB_push_transaction(gb_main);
936  alignment_name = GBT_get_default_alignment(gb_main); seq_assert(alignment_name);
937 
938  species_iterator getFirst = NULp;
939  species_iterator getNext = NULp;
940 
941  if (marked_only) {
942  getFirst = GBT_first_marked_species;
943  getNext = GBT_next_marked_species;
944  }
945  else {
946  getFirst = GBT_first_species;
947  getNext = GBT_next_species;
948  }
949 
950  for (gb_species = getFirst(gb_main); gb_species; gb_species = getNext(gb_species)) {
951  GBDATA *gb_ali = GB_entry(gb_species, alignment_name);
952  bool marked = false;
953  if (gb_ali) {
954  GBDATA *gb_quality = GB_search(gb_species, "quality", GB_CREATE_CONTAINER);
955  if (gb_quality) {
956  read_sequence = GB_entry(gb_ali, "data");
957  if (read_sequence) {
958  GBDATA *gb_quality_ali = GB_search(gb_quality, alignment_name, GB_CREATE_CONTAINER);
959  if (gb_quality_ali) {
960  GBDATA *gb_result1 = GB_search(gb_quality_ali, "evaluation", GB_INT);
961  result = GB_read_int(gb_result1);
962 
963  if (result < condition) marked = true;
964  }
965  }
966  }
967  }
968 
969  if (GB_read_flag(gb_species) != marked) {
970  GB_write_flag(gb_species, marked);
971  }
972  }
973  free(alignment_name);
974 
975  if (error)
976  GB_abort_transaction(gb_main);
977  else
978  GB_pop_transaction(gb_main);
979 
980  return error;
981 }
982 
984  SQ_TREE_ERROR retval = NONE;
985 
986  if (!node)
987  return MISSING_NODE;
988 
989  if (node->is_leaf()) {
990  if (!node->gb_node)
991  retval = ZOMBIE;
992  }
993  else {
994  retval = SQ_check_tree_structure(node->get_leftson());
995  if (retval == NONE)
996  retval = SQ_check_tree_structure(node->get_rightson());
997  }
998 
999  return retval;
1000 }
GB_ERROR GB_get_error()
Definition: arb_msg.cxx:332
static GB_ERROR no_data_error(GBDATA *gb_species, const char *ali_name)
const char * GB_ERROR
Definition: arb_core.h:25
static char * SQ_fetch_filtered_sequence(GBDATA *read_sequence, AP_filter *filter)
#define NONE
Definition: GDE_def.h:34
static GB_ERROR SQ_pass2(const SQ_GroupData *globalData, GBDATA *gb_main, TreeNode *node, AP_filter *filter)
string result
const size_t * get_filterpos_2_seqpos() const
Definition: AP_filter.hxx:91
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
long GB_read_int(GBDATA *gbd)
Definition: arbdb.cxx:723
int getSize() const
Definition: SQ_GroupData.h:81
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1381
bool SQ_is_initialized() const
Definition: SQ_GroupData.h:71
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2544
void SQ_calc_and_apply_group_data(TreeNode *node, GBDATA *gb_main, SQ_GroupData *data, AP_filter *filter, arb_progress &progress)
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:202
virtual void SQ_add_sequence(const char *sequence)=0
static char * alignment_name
STL namespace.
void SQ_calc_physical_layout(const char *sequence, int size, GBDATA *gb_quality_ali)
void SQ_count_ambiguities(const char *iupac, int length, GBDATA *gb_quality)
static int sq_round(double value)
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2477
void SQ_count_sequences()
Definition: SQ_GroupData.h:65
std::map< std::string, SQ_GroupDataPtr > SQ_GroupDataDictionary
Definition: SQ_functions.h:42
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1899
virtual consensus_result SQ_calc_consensus(const char *sequence) const =0
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
Definition: arbdb.h:67
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
void SQ_calc_helix_layout(const char *sequence, GBDATA *gb_main, char *alignment_name, GBDATA *gb_quality, AP_filter *filter)
Definition: SQ_helix.h:97
virtual SQ_GroupData * clone() const =0
int SQ_get_avg_bases() const
Definition: SQ_GroupData.h:56
static void create_multi_level_consensus(TreeNode *node, SQ_GroupData *data)
GB_ERROR SQ_mark_species(GBDATA *gb_main, int condition, bool marked_only)
Generic smart pointer.
Definition: smartptr.h:149
GB_ERROR SQ_pass2_no_tree(const SQ_GroupData *globalData, GBDATA *gb_main, AP_filter *filter, arb_progress &progress)
void SQ_clear_group_dictionary()
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
void SQ_set_avg_bases(int bases)
Definition: SQ_GroupData.h:53
TreeNode * father
Definition: TreeNode.h:131
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR GB_abort_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2522
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
CONSTEXPR_INLINE_Cxx14 void swap(unsigned char &c1, unsigned char &c2)
Definition: ad_io_inline.h:19
float GB_read_float(GBDATA *gbd)
Definition: arbdb.cxx:738
SQ_TREE_ERROR
Definition: SQ_functions.h:80
int GB_read_flag(GBDATA *gbd)
Definition: arbdb.cxx:2779
virtual void SQ_add(const SQ_GroupData &other)=0
#define seq_assert(bed)
GB_ERROR GB_pop_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2507
Definition: arbdb.h:86
GB_ERROR GB_write_float(GBDATA *gbd, float f)
Definition: arbdb.cxx:1275
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1244
size_t get_filtered_length() const
Definition: AP_filter.hxx:82
GB_ERROR SQ_pass1_no_tree(SQ_GroupData *globalData, GBDATA *gb_main, AP_filter *filter, arb_progress &progress)
double SQ_get_gc_proportion() const
int SQ_get_number_of_bases() const
static GB_ERROR SQ_pass1(SQ_GroupData *globalData, GBDATA *gb_main, TreeNode *node, AP_filter *filter)
bool is_leaf() const
Definition: TreeNode.h:171
int SQ_get_nr_sequences() const
Definition: SQ_GroupData.h:68
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2756
GB_ERROR SQ_evaluate(GBDATA *gb_main, const SQ_weights &weights, bool marked_only)
char * name
Definition: TreeNode.h:134
virtual void SQ_init_consensus(int size)=0
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
#define abs(x)
Definition: f2c.h:151
void SQ_set_avg_gc(double gc)
Definition: SQ_GroupData.h:59
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:97
double SQ_get_avg_gc() const
Definition: SQ_GroupData.h:62
static SQ_GroupDataDictionary group_dict
SQ_TREE_ERROR SQ_check_tree_structure(TreeNode *node)
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:687
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:898
GBDATA * gb_node
Definition: TreeNode.h:133
GBDATA * gb_main
Definition: adname.cxx:33
GBDATA *(* species_iterator)(GBDATA *)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
void SQ_calc_and_apply_group_data2(TreeNode *node, GBDATA *gb_main, const SQ_GroupData *data, AP_filter *filter, arb_progress &progress)
int diff_from_average
Definition: SQ_functions.h:59
GB_ERROR SQ_remove_quality_entries(GBDATA *gb_main)
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void inc_and_check_user_abort(GB_ERROR &error)
Definition: arb_progress.h:274
#define UNCOVERED()
Definition: arb_assert.h:380
Definition: arbdb.h:66