ARB
PRD_Design.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : PRD_Design.cxx //
4 // Purpose : //
5 // //
6 // Coded by Wolfram Foerster in February 2001 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // =============================================================== //
11 
12 #include "PRD_Design.hxx"
13 #include "PRD_SequenceIterator.hxx"
14 #include "PRD_SearchFIFO.hxx"
15 #include <arb_progress.h>
16 
17 #include <iostream>
18 
19 #include <cmath>
20 
21 using std::printf;
22 using std::deque;
23 using std::cout;
24 using std::endl;
25 using std::pair;
26 
27 //
28 // Constructor
29 //
30 void PrimerDesign::init (const char *sequence_, long int seqLength_,
31  Range pos1_, Range pos2_, Range length_, Range distance_,
32  Range ratio_, Range temperature_, int min_dist_to_next_, bool expand_IUPAC_Codes_,
33  int max_count_primerpairs_, double GC_factor_, double temp_factor_)
34 {
35  error = NULp;
36  sequence = sequence_;
37  seqLength = seqLength_;
38  root1 = NULp;
39  root2 = NULp;
40  list1 = NULp;
41  list2 = NULp;
42  pairs = NULp;
43 
44  reset_node_counters();
45 
46  setPositionalParameters (pos1_, pos2_, length_, distance_);
47  setConditionalParameters(ratio_, temperature_, min_dist_to_next_, expand_IUPAC_Codes_, max_count_primerpairs_, GC_factor_, temp_factor_);
48 }
49 
50 PrimerDesign::PrimerDesign(const char *sequence_, long int seqLength_,
51  Range pos1_, Range pos2_, Range length_, Range distance_,
52  Range ratio_, Range temperature_, int min_dist_to_next_, bool expand_IUPAC_Codes_,
53  int max_count_primerpairs_, double GC_factor_, double temp_factor_)
54 {
55  init (sequence_, seqLength_, pos1_, pos2_, length_, distance_, ratio_, temperature_, min_dist_to_next_, expand_IUPAC_Codes_, max_count_primerpairs_, GC_factor_, temp_factor_);
56 }
57 
58 
59 PrimerDesign::PrimerDesign(const char *sequence_, long int seqLength_,
60  Range pos1_, Range pos2_, Range length_, Range distance_, int max_count_primerpairs_, double GC_factor_, double temp_factor_)
61 {
62  init (sequence_, seqLength_, pos1_, pos2_, length_, distance_, Range(0, 0), Range(0, 0), -1, false, max_count_primerpairs_, GC_factor_, temp_factor_);
63 }
64 
65 
66 PrimerDesign::PrimerDesign(const char *sequence_, long int seqLength_) {
67  init (sequence_, seqLength_, Range(0, 0), Range(0, 0), Range(0, 0), Range(0, 0), Range(0, 0), Range(0, 0), -1, false, 10, 0.5, 0.5);
68 }
69 
70 
71 //
72 // Destructor
73 //
75  if (root1) delete root1;
76  if (root2) delete root2;
77  if (list1) delete list1;
78  if (list2) delete list2;
79  if (pairs) delete [] pairs;
80 }
81 
82 
83 void PrimerDesign::setPositionalParameters(Range pos1_, Range pos2_, Range length_, Range distance_) {
84  // first take all parameters, then test em
85  primer1 = (pos1_.min() < pos2_.min()) ? pos1_ : pos2_;
86  primer2 = (pos1_.min() < pos2_.min()) ? pos2_ : pos1_;
87  primer_length = length_;
88  primer_distance = distance_;
89 
90  // length > 0
91  if ((length_.min() <= 0) || (length_.max() <= 0)) {
92  error = "invalid primer length (length <= 0)";
93  return;
94  }
95 
96  // determine end of ranges
98  while (i->nextBase() != SequenceIterator::EOS) ;
99  primer1.max(i->pos);
101  while (i->nextBase() != SequenceIterator::EOS) ;
102  primer2.max(i->pos);
103 
104  // primer1.max > primer2_.min => distance must be given
105  if ((primer2.min() <= primer1.max()) && (distance_.min() <= 0)) {
106  error = "using overlapping primer positions you MUST give a primer distance";
107  return;
108  }
109 
110  // use distance-parameter ? (-1 = no)
111  if (distance_.min() > 0) {
112  // pos2_.max - pos1_.min must be greater than distance_.min
113  if (distance_.min() > primer2.max() - primer1.min()) {
114  error = "invalid minimal primer distance (more than right.max - left.min)";
115  return;
116  }
117  // pos2_.min - pos1_.max must be less than distance_.max
118  if (distance_.max() < primer2.min() - primer1.max()) {
119  error = "invalid maximal primer distance (less than right.min - left.max)";
120  return;
121  }
122  }
123 }
124 
125 
126 void PrimerDesign::setConditionalParameters(Range ratio_, Range temperature_, int min_dist_to_next_, bool expand_IUPAC_Codes_, int max_count_primerpairs_, double GC_factor_, double temp_factor_) {
127  if ((ratio_.min() < 0) || (ratio_.max() <= 0))
128  GC_ratio = Range(-1, -1);
129  else
130  GC_ratio = ratio_;
131 
132  if ((temperature_.min() <= 0) || (temperature_.max() <= 0))
133  temperature = Range (-1, -1);
134  else
135  temperature = temperature_;
136 
137  GC_factor = GC_factor_;
138  temperature_factor = temp_factor_;
139  min_distance_to_next_match = min_dist_to_next_;
140  expand_IUPAC_Codes = expand_IUPAC_Codes_;
141  max_count_primerpairs = max_count_primerpairs_;
142 
143  if (pairs) delete [] pairs;
144  pairs = new Pair[max_count_primerpairs];
145 }
146 
147 
148 //
149 // run the necessary steps .. abort on error
150 //
152 #if defined(DUMP_PRIMER)
153  printf("PrimerDesign : parameters are\n");
154  primer1.print("left : ", "\n");
155  primer2.print("right : ", "\n");
156  primer_length.print("length : ", "\n");
157  primer_distance.print("distance : ", "\n");
158  GC_ratio.print("GC ratio : ", "\n");
159  temperature.print("temperature : ", "\n");
160  printf("GC factor : %f\n", GC_factor);
161  printf("temp. factor : %f\n", temperature_factor);
162  printf("min. dist to next match : %i\n", min_distance_to_next_match);
163  printf("expand IUPAC : %s\n", expand_IUPAC_Codes ? "true" : "false");
164  printf("error : %s\n", error);
165 #endif
166 
167  if (error) {
168  printf("not run cause : %s\n", error);
169  return;
170  }
171 
172 #if defined(DUMP_PRIMER)
173  printf("sizeof(Node) = %zu\n", sizeof(Node));
174 #endif // DEBUG
175 
177  if (error) return;
178 #if defined(DUMP_PRIMER)
179  printPrimerTrees();
180 #endif
181 
183  if (error) return;
184 #if defined(DUMP_PRIMER)
185  printPrimerTrees();
186 #endif
187 
188  arb_progress::show_comment("evaluating primer pairs");
190  if (error) return;
191 #if defined(DUMP_PRIMER)
192  printPrimerLists();
193 #endif
194 
196  if (error) return;
197 #if defined(DUMP_PRIMER)
198  printPrimerPairs();
199 #endif
200 }
201 
202 //
203 // (re)construct raw primer trees
204 //
205 
206 // add new child to parent or update existing child
207 // returns child_index
208 int PrimerDesign::insertNode (Node *current_, unsigned char base_, PRD_Sequence_Pos pos_, int delivered_, int offset_, int left_, int right_) {
209  int index = CHAR2CHILD.INDEX[base_];
210  bool is_primer = primer_length.includes(delivered_); // new child is primer if true
211 
212  //
213  // create new node if necessary or update existing node if its a primer
214  //
215  if (!current_->child[index]) {
216  total_node_counter_left += left_;
217  total_node_counter_right += right_;
218 
219  if (is_primer) {
220  current_->child[index] = new Node (current_, base_, pos_, offset_); // primer => new child with positive last_base_index
221  primer_node_counter_left += left_;
222  primer_node_counter_right += right_;
223  }
224  else {
225  current_->child[index] = new Node (current_, base_, 0); // no primer => new child with zero position
226 
227  }
228  current_->child_bits |= CHAR2BIT.FIELD[base_]; // update child_bits of current node
229  }
230  else {
231  if (is_primer) {
232  current_->child[index]->last_base_index = -pos_; // primer, but already exists => set pos to negative
233  primer_node_counter_left -= left_;
234  primer_node_counter_right -= right_;
235  }
236  }
237 
238  return index;
239 }
240 
241 // check if (sub)tree contains a valid primer
242 bool PrimerDesign::treeContainsPrimer (Node *start) {
243  if (start->isValidPrimer()) return true;
244 
245  for (int i = 0; i < 4; i++) {
246  if (start->child[i]) {
247  if (treeContainsPrimer(start->child[i])) return true;
248  }
249  }
250 
251  return false;
252 }
253 
254 // remove non-primer-leafs and branches from the tree
255 void PrimerDesign::clearTree (Node *start, int left_, int right_) {
256  // check children
257  for (int i = 0; i < 4; i++)
258  if (start->child[i]) clearTree(start->child[i], left_, right_);
259 
260  // check self
261  if (start->isLeaf() && !start->isValidPrimer() && start->parent) {
262  total_node_counter_left -= left_;
263  total_node_counter_right -= right_;
264 
265  // remove self from parent
266  start->parent->child[CHAR2CHILD.INDEX[start->base]] = NULp;
267  start->parent->child_bits &= ~CHAR2BIT.FIELD[start->base];
268 
269  // erase node
270  delete start;
271  }
272 }
273 
275  arb_progress progress("searching possible primers",
276  primer1.max()+primer2.max()-2*primer_length.min());
277 
278 #if defined(DUMP_PRIMER)
279  primer1.print("buildPrimerTrees : pos1\t\t", "\n");
280  primer2.print("buildPrimerTrees : pos2\t\t", "\n");
281  primer_length.print("buildPrimerTrees : length\t", "\n");
282  printf("buildPrimerTrees : (pos,base,delivered,last_base_index)\n");
283 #endif
284 
285  //
286  // destroy old trees if exist
287  //
288  if (root1) delete root1;
289  if (root2) delete root2;
290  root1 = new Node;
291  root2 = new Node;
292 
293  reset_node_counters();
294 
295  // init iterator with sequence
296  SequenceIterator *sequence_iterator = new SequenceIterator(sequence);
297  char base;
298  int child_index;
299  Node *current_node;
300  int offset = 0;
301 
302  //
303  // init. offset-counter
304  //
305  sequence_iterator->restart(0, primer1.min(), SequenceIterator::IGNORE, SequenceIterator::FORWARD); // start at 1st absolute pos in aligned seq.
306  while (sequence_iterator->nextBase() != SequenceIterator::EOS) // count bases till left range
307  offset++;
308 
309  //
310  // build first tree
311  //
312  for (PRD_Sequence_Pos start_pos = primer1.min();
313  (start_pos < primer1.max()-primer_length.min()) && (sequence[start_pos] != '\x00');
314  start_pos++)
315  {
316  // start iterator at new position
317  sequence_iterator->restart(start_pos, primer1.max(), primer_length.max(), SequenceIterator::FORWARD);
318  sequence_iterator->nextBase();
319  if (sequence_iterator->pos != start_pos) { // sequence_iterator has skipped spaces = > restart at first valid base
320  start_pos = sequence_iterator->pos;
321  }
322  sequence_iterator->restart(start_pos, primer1.max(), primer_length.max(), SequenceIterator::FORWARD);
323 
324  // start at top of tree
325  current_node = root1;
326 
327  // iterate through sequence till end-of-sequence or primer_length.max bases read
328  base = sequence_iterator->nextBase();
329  while (base != SequenceIterator::EOS) {
330  if (! ((base == 'A') || (base == 'T') || (base == 'U') || (base == 'C') || (base == 'G'))) break; // stop at IUPAC-Codes
331  child_index = insertNode(current_node, base, sequence_iterator->pos, sequence_iterator->delivered, offset, 1, 0);
332  current_node = current_node->child[child_index];
333 
334  // get next base
335  base = sequence_iterator->nextBase();
336  }
337 
338  offset++;
339  progress.inc();
340  }
341 
342  //
343  // clear left tree
344  //
345 #if defined(DUMP_PRIMER)
346  printf (" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
347  printf (" clearing left tree\n");
348 #endif
349  clearTree(root1, 1, 0);
350 #if defined(DUMP_PRIMER)
351  printf (" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
352 #endif
353 
354  //
355  // count bases till right range
356  //
357  sequence_iterator->restart(sequence_iterator->pos, primer2.min(), SequenceIterator::IGNORE, SequenceIterator::FORWARD); // run from current pos
358  while (sequence_iterator->nextBase() != SequenceIterator::EOS) // till begin of right range
359  offset++;
360 
361  if (!treeContainsPrimer(root1)) {
362  error = "no primer in left range found .. maybe only spaces in that range ?";
363  delete sequence_iterator;
364  return;
365  }
366 
367  //
368  // build second tree
369  //
370  for (PRD_Sequence_Pos start_pos = primer2.min();
371  (start_pos < primer2.max()-primer_length.min()) && (sequence[start_pos] != '\x00');
372  start_pos++)
373  {
374  // start iterator at new position
375  sequence_iterator->restart(start_pos, primer2.max(), primer_length.max(), SequenceIterator::FORWARD);
376  sequence_iterator->nextBase();
377  if (sequence_iterator->pos != start_pos) { // sequence_iterator has skipped spaces = > restart at first valid base
378  start_pos = sequence_iterator->pos;
379  }
380  sequence_iterator->restart(start_pos, primer2.max(), primer_length.max(), SequenceIterator::FORWARD);
381 
382  // start at top of tree
383  current_node = root2;
384 
385  // iterate through sequence till end-of-sequence or primer_length.max bases read
386  base = sequence_iterator->nextBase();
387  while (base != SequenceIterator::EOS) {
388  if (! ((base == 'A') || (base == 'T') || (base == 'U') || (base == 'C') || (base == 'G'))) break; // stop at unsure bases
389  child_index = insertNode(current_node, base, sequence_iterator->pos, sequence_iterator->delivered, offset+sequence_iterator->delivered, 0, 1);
390  current_node = current_node->child[child_index];
391 
392  // get next base
393  base = sequence_iterator->nextBase();
394  }
395 
396  offset++;
397  progress.inc();
398  }
399  progress.done();
400 
401  if (!treeContainsPrimer(root2)) {
402  error = "no primer in right range found .. maybe only spaces in that range ?";
403  }
404 
405  delete sequence_iterator;
406 
407  //
408  // clear left tree
409  //
410 #if defined(DUMP_PRIMER)
411  printf (" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
412  printf (" clearing right tree\n");
413 #endif
414  clearTree(root2, 0, 1);
415 #if defined(DUMP_PRIMER)
416  printf (" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
417 #endif
418 }
419 
420 
421 //
422 // print all primers in trees
423 //
424 PRD_Sequence_Pos PrimerDesign::followUp(Node *node_, deque<char> *primer_, int direction_) {
425  if (direction_ == FORWARD) primer_->push_front(node_->base);
426  else primer_->push_back(node_->base);
427 
428  if (!node_->parent) return node_->last_base_index;
429  else followUp(node_->parent, primer_, direction_);
430  return 0;
431 }
432 
433 void PrimerDesign::findNextPrimer(Node *start_at_, int depth_, int *counter_, int direction_) {
434  // current node
435  printf ("findNextPrimer : start_at_ [ %c,%4li,%2i (%c %c %c %c) ]\n",
436  start_at_->base,
437  start_at_->last_base_index,
438  start_at_->child_bits,
439  start_at_->child[1] ? 'G' : ' ',
440  start_at_->child[0] ? 'C' : ' ',
441  start_at_->child[3] ? 'T' : ' ',
442  start_at_->child[3] ? 'X' : ' ');
443 
444  if (primer_length.includes(depth_) && start_at_->isValidPrimer())
445  depth_++;
446  for (int i=0; i < 4; i++)
447  if (start_at_->child[i]) findNextPrimer(start_at_->child[i], depth_, counter_, direction_);
448 }
449 
450 #if defined(DUMP_PRIMER)
451 void PrimerDesign::printPrimerTrees () {
452  // start at root1 with zero depth
453  int primer_counter;
454  primer_counter = 0;
455  cout << "findNextPrimer : depth last_base:index first_base:index base" << endl;
456  findNextPrimer(root1, 0, &primer_counter, FORWARD);
457  cout << "printPrimerTrees : " << primer_counter << " primer found" << endl << endl;
458 
459  // start at root2 with zero depth
460  primer_counter = 0;
461  cout << "findNextPrimer : depth last_base:index first_base:index base" << endl;
462  findNextPrimer(root2, 0, &primer_counter, BACKWARD);
463  cout << "printPrimerTrees : " << primer_counter << " primer found" << endl;
464 }
465 #endif
466 
467 
468 //
469 // match the sequence against the primer-trees
470 //
472  SearchFIFO *fifo1 = new SearchFIFO(root1, min_distance_to_next_match, expand_IUPAC_Codes);
473  SearchFIFO *fifo2 = new SearchFIFO(root2, min_distance_to_next_match, expand_IUPAC_Codes);
475  char base;
476  PRD_Sequence_Pos pos = sequence_iterator->pos;
477 
478 #if defined(DUMP_PRIMER)
479  printf("root1 : [C %p, G %p, A %p, TU %p]\n", root1->child[0], root1->child[1], root1->child[2], root1->child[3]);
480  printf("root2 : [C %p, G %p, A %p, TU %p]\n", root2->child[0], root2->child[1], root2->child[2], root2->child[3]);
481 #endif
482 
483  arb_progress progress(seqLength*2);
484 
485  progress.subtitle("match possible primers vs. seq. (forward)");
486 
487  base = sequence_iterator->nextBase();
488  while (base != SequenceIterator::EOS) {
489  pos = sequence_iterator->pos;
490 
491  // tree/fifo 1
492  if (primer1.includes(pos)) { // flush fifo1 if in range of Primer 1
493  fifo1->flush();
494  }
495  else {
496  // iterate through fifo1
497  fifo1->iterateWith(pos, base);
498 
499  // append base to fifo1
500  fifo1->push(pos);
501  }
502 
503  // tree/fifo2
504  if (primer2.includes(pos)) { // flush fifo2 if in range of Primer 2
505  fifo2->flush();
506  }
507  else {
508  // iterate through fifo2
509  fifo2->iterateWith(pos, base);
510 
511  // append base to fifo2
512  fifo2->push(pos);
513  }
514 
515  // get next base in sequence
516  base = sequence_iterator->nextBase();
517  progress.inc();
518  }
519 
520  sequence_iterator->restart(pos, 0, SequenceIterator::IGNORE, SequenceIterator::BACKWARD);
521  fifo1->flush();
522  fifo2->flush();
523 
524  progress.subtitle("match possible primers vs. seq. (backward)");
525  base = INVERT.BASE[sequence_iterator->nextBase()];
526  while (base != SequenceIterator::EOS) {
527  pos = sequence_iterator->pos;
528 
529  // tree/fifo 1
530  if (primer1.includes(pos)) { // flush fifo1 if in range of Primer 1
531  fifo1->flush();
532  }
533  else {
534  // iterate through fifo1
535  fifo1->iterateWith(pos, base);
536 
537  // append base to fifo1
538  fifo1->push(pos);
539  }
540 
541  // tree/fifo2
542  if (primer2.includes(pos)) { // flush fifo2 if in range of Primer 2
543  fifo2->flush();
544  }
545  else {
546  // iterate through fifo2
547  fifo2->iterateWith(pos, base);
548 
549  // append base to fifo2
550  fifo2->push(base);
551  }
552 
553  // get next base in sequence
554  base = INVERT.BASE[sequence_iterator->nextBase()];
555  progress.inc();
556  }
557  progress.done();
558 
559  delete fifo1;
560  delete fifo2;
561  delete sequence_iterator;
562 
563  if (!treeContainsPrimer(root1)) {
564  error = "no primer in left range after match vs. sequence .. maybe some clustered IUPAC-Codes in sequence ?";
565  return;
566  }
567 
568  if (!treeContainsPrimer(root2)) {
569  error = "no primer in right range after match vs. sequence .. maybe some clustered IUPAC-Codes in sequence ?";
570  }
571 }
572 
573 
574 //
575 // convertTreesToLists
576 //
577 // this also checks GC-ratio, temperature and distance
578 //
579 void PrimerDesign::calcGCandAT (int &GC_, int &AT_, Node *start_at_) {
580  if (!start_at_) return;
581 
582  if ((start_at_->base == 'C') || (start_at_->base == 'G'))
583  GC_++;
584  else
585  AT_++;
586 
587  calcGCandAT(GC_, AT_, start_at_->parent);
588 }
589 
591 #if defined(DUMP_PRIMER)
592  GC_ratio.print("convertTreesToLists : GC_ratio ", " ");
593  temperature.print("temperature ", " ");
594  primer_distance.print("primer_distance ", "\n");
595 #endif
596 
597  Node *cur_node;
598  Item *cur_item;
599  Item *new_item;
600  int depth;
601  int AT;
602  int GC;
603  bool GC_and_temperature_matched;
604  PRD_Sequence_Pos min_offset_1 = PRD_MAX_SEQUENCE_POS;
605  PRD_Sequence_Pos max_offset_1 = -1;
606  PRD_Sequence_Pos min_offset_2 = PRD_MAX_SEQUENCE_POS;
607  PRD_Sequence_Pos max_offset_2 = -1;
608  pair< Node*, int > new_pair; // < cur_node->child[i], depth >
609  deque< pair<Node*, int> > *stack;
610 
611  //
612  // list 1
613  //
614  cur_item = NULp;
615  stack = new deque< pair<Node*, int> >;
616  // push children of root on stack
617  for (int index = 0; index < 4; index++)
618  if (root1->child[index]) {
619  new_pair = pair<Node*, int>(root1->child[index], 1);
620  stack->push_back(new_pair);
621  }
622 
623  if (!stack->empty()) {
624  GC_and_temperature_matched = false;
625 
626  while (!stack->empty()) {
627  // next node
628  cur_node = stack->back().first;
629  depth = stack->back().second;
630  stack->pop_back();
631 
632  // handle node
633  if (primer_length.includes(depth) && cur_node->isValidPrimer()) {
634  // calculate conditional parameters
635  GC = AT = 0;
636  calcGCandAT(GC, AT, cur_node);
637  AT = 4*GC + 2*AT;
638  GC = (GC * 100) / depth;
639 
640  // create new item if conditional parameters are in range
641  if (GC_ratio.includes(GC) && temperature.includes(AT)) {
642  GC_and_temperature_matched = true;
643  new_item = new Item(cur_node->last_base_index, cur_node->offset, depth, GC, AT, NULp);
644 
645  // begin list with new item or append to list
646  if (!cur_item) list1 = new_item;
647  else cur_item->next = new_item;
648 
649  cur_item = new_item;
650 
651  // store position
652  if (cur_node->offset < min_offset_1) min_offset_1 = cur_node->offset;
653  if (cur_node->offset > max_offset_1) max_offset_1 = cur_node->offset;
654  }
655  }
656 
657  // push children on stack
658  for (int index = 0; index < 4; index++)
659  if (cur_node->child[index]) {
660  new_pair = pair<Node*, int>(cur_node->child[index], depth+1);
661  stack->push_back(new_pair);
662  }
663  }
664  if (!GC_and_temperature_matched) {
665  error = "no primer over left range matched the given GC-Ratio/Temperature";
666  delete stack;
667  return;
668  }
669  }
670  else {
671  error = "convertTreesToLists : primertree over left range was empty :( ?";
672  delete stack;
673  return;
674  }
675 
676 #if defined(DUMP_PRIMER)
677  printf("convertTreesToLists : list1 : min_offset %7li max_offset %7li\n", min_offset_1, max_offset_1);
678 #endif
679 
680  //
681  // list 2
682  //
683  cur_item = NULp;
684  stack->clear();
685  bool distance_matched = false;
686  // push children of root on stack
687  for (int index = 0; index < 4; index++)
688  if (root2->child[index]) {
689  new_pair = pair<Node*, int>(root2->child[index], 1);
690  stack->push_back(new_pair);
691  }
692 
693  if (!stack->empty()) {
694  GC_and_temperature_matched = false;
695 
696  while (!stack->empty()) {
697  // next node
698  cur_node = stack->back().first;
699  depth = stack->back().second;
700  stack->pop_back();
701 
702  // push children on stack
703  for (int index = 0; index < 4; index++)
704  if (cur_node->child[index]) {
705  new_pair = pair<Node*, int>(cur_node->child[index], depth+1);
706  stack->push_back(new_pair);
707  }
708 
709  // handle node
710  if (primer_length.includes(depth) && cur_node->isValidPrimer()) {
711  // check position
712  if (primer_distance.min() > 0) {
713  distance_matched = primer_distance.includes(cur_node->offset-max_offset_1, cur_node->offset-min_offset_1);
714  if (!distance_matched) continue;
715  }
716 
717  // calculate conditional parameters
718  GC = AT = 0;
719  calcGCandAT(GC, AT, cur_node);
720  AT = 4*GC + 2*AT;
721  GC = (GC * 100) / depth;
722 
723  // create new item if conditional parameters are in range
724  if (GC_ratio.includes(GC) && temperature.includes(AT)) {
725  GC_and_temperature_matched = true;
726  new_item = new Item(cur_node->last_base_index, cur_node->offset, depth, GC, AT, NULp);
727 
728  // begin list with new item or append to list
729  if (!cur_item) list2 = new_item;
730  else cur_item->next = new_item;
731 
732  cur_item = new_item;
733 
734  // store position
735  if (cur_node->offset < min_offset_2) min_offset_2 = cur_node->offset;
736  if (cur_node->offset > max_offset_2) max_offset_2 = cur_node->offset;
737  }
738  }
739  }
740 
741  if (primer_distance.min() > 0) {
742  if (!distance_matched) {
743  error = "no primer over right range was in reach of primers in left range .. check distance parameter";
744  delete stack;
745  return;
746  }
747  }
748 
749  if (!GC_and_temperature_matched) {
750  error = "no primer over right range matched the given GC-Ratio and Temperature";
751  delete stack;
752  return;
753  }
754  }
755  else {
756  error = "convertTreesToLists : primertree over right range was empty :( ?";
757  delete stack;
758  return;
759  }
760 
761  delete stack;
762 
763 #if defined(DUMP_PRIMER)
764  printf("convertTreesToLists : list2 : min_offset %7li max_offset %7li\n", min_offset_2, max_offset_2);
765 #endif
766 
767  //
768  // check positions of list1
769  //
770  if (primer_distance.min() > 0) { // skip check if primer_distance < 0
771  Item *to_remove = NULp;
772  cur_item = list1;
773 
774  while (cur_item) {
775  if (!primer_distance.includes(max_offset_2-cur_item->offset, min_offset_2-cur_item->offset)) {
776  // primer in list 1 out of range of primers in list 2 => remove from list 1
777 
778  if (cur_item == list1) {
779  // 'to delete'-item is first in list
780  list1 = list1->next; // list 1 starts with next item
781  delete cur_item; // delete former first item
782  cur_item = list1; // new first item is next to be examined
783  }
784  else {
785  // run through list till 'next' is to delete
786  to_remove = cur_item; // remember 'to delete'-item
787  cur_item = list1; // start at first
788  while (cur_item ->next != to_remove) // run till 'next' is 'to delete'
789  cur_item = cur_item->next;
790  cur_item->next = cur_item->next->next; // unlink 'next' from list
791  cur_item = cur_item->next; // 'next' of 'to delete' is next to be examined
792  delete to_remove; // ...
793  }
794  }
795  else
796  cur_item = cur_item->next;
797  }
798 
799  int counter = 0;
800  for (cur_item = list1; cur_item; cur_item = cur_item->next)
801  counter++;
802  if (counter == 0) {
803  error = "no primers over left range are in reach of primers over right range .. check distance parameter";
804  return;
805  }
806  }
807 
808  int counter = 0;
809  for (cur_item = list1; cur_item; cur_item = cur_item->next)
810  counter++;
811  if (counter == 0) {
812  error = "no primers left in left range after GC-ratio/Temperature/Distance - Check";
813  return;
814  }
815 
816  counter = 0;
817  for (cur_item = list2; cur_item; cur_item = cur_item->next)
818  counter++;
819  if (counter == 0) {
820  error = "no primers left in right range after GC-ratio/Temperature/Distance - Check";
821  return;
822  }
823 }
824 
825 
826 //
827 // printPrimerLists
828 //
829 #if defined(DUMP_PRIMER)
830 void PrimerDesign::printPrimerLists () {
831  int count = 0;
832 
833  printf("printPrimerLists : list 1 : [(start_pos,offset,length), GC_ratio, temperature]\n");
834  Item *current = list1;
835  while (current) {
836  current->print("", "\n");
837  current = current->next;
838  count++;
839  }
840  printf(" : %i valid primers\nprintPrimerLists : list 2 : [(start_pos,offset,length), GC_ratio, temperature]\n", count);
841  count = 0;
842  current = list2;
843  while (current) {
844  current->print("", "\n");
845  current = current->next;
846  count++;
847  }
848  printf(" : %i valid primers\n", count);
849 }
850 #endif
851 
852 
853 
854 //
855 // evaluatePrimerPairs
856 //
857 inline double PrimerDesign::evaluatePair (Item *one_, Item *two_) {
858  if ((!one_) || (!two_))
859  return -1.0;
860 
861  return abs(one_->GC_ratio - two_->GC_ratio)*GC_factor + abs(one_->temperature - two_->temperature)*temperature_factor;
862 }
863 
864 void PrimerDesign::insertPair(double rating_, Item *one_, Item *two_) {
865  int index = -1;
866 
867  // search position
868  for (int i = max_count_primerpairs-1; (i >= 0) && (index == -1); i--) {
869  // maybe insert new pair ?
870  if (pairs[i].rating >= rating_) {
871  index = i+1;
872  }
873  }
874 
875  if (index == -1) index = 0;
876 
877  // swap till position
878  for (int i = max_count_primerpairs-2; i >= index; i--) {
879  pairs[i+1].rating = pairs[i].rating;
880  pairs[i+1].one = pairs[i].one;
881  pairs[i+1].two = pairs[i].two;
882  }
883 
884  // insert new values
885  pairs[index].rating = rating_;
886  pairs[index].one = one_;
887  pairs[index].two = two_;
888 
889 #if defined(DUMP_PRIMER)
890  printf("insertPair : [%3i] %6.2f\n", index, rating_);
891 #endif
892 }
893 
895  Item *one = list1;
896  Item *two;
897  double rating;
898  long int counter = 0;
899 
900 #if defined(DUMP_PRIMER)
901  printf ("evaluatePrimerPairs : ...\ninsertPair : [index], rating\n");
902 #endif
903 
904  long list1_elems = 0;
905  while (one) {
906  list1_elems++;
907  one = one->next;
908  }
909  one = list1;
910 
911  arb_progress progress("evaluating primer pairs", list1_elems);
912  // outer loop <= > run through list1
913  while (one) {
914  // inner loop <= > run through list2
915  two = list2;
916  while (two) {
917  // evaluate pair
918  rating = evaluatePair(one, two);
919 
920  // test if rating is besster that the worst of the best
921  if (rating > pairs[max_count_primerpairs-1].rating) {
922  // insert in the pairs
923  insertPair(rating, one, two);
924  counter++;
925  }
926 
927  // next in inner loop
928  two = two->next;
929  }
930 
931  // next in outer loop
932  one = one->next;
933  progress.inc();
934  }
935 
936  if (counter == 0) error = "no primer pair could be found, sorry :(";
937 
938 #if defined(DUMP_PRIMER)
939  printf ("evaluatePrimerPairs : ratings [ %.3f .. %.3f ]\n\n", pairs[max_count_primerpairs-1].rating, pairs[0].rating);
940 #endif
941 }
942 
943 #if defined(DUMP_PRIMER)
944 void PrimerDesign::printPrimerPairs () {
945  printf ("printPairs [index] [rating ( primer1[(start_pos,offset,length),(GC,temp)] , primer2[(pos,offs,len),(GC,temp)])] \n");
946  for (int i = 0; i < max_count_primerpairs; i++) {
947  printf("printPairs : [%3i]", i);
948  pairs[i].print("\t", "\n", sequence);
949  }
950 }
951 #endif
952 
953 const char *PrimerDesign::get_result(int num, const char *&primers, int max_primer_length, int max_position_length, int max_length_length) const {
954  if ((num < 0) || (num >= max_count_primerpairs)) return NULp; // check for valid index
955  if (!pairs[num].one || !pairs[num].two) return NULp; // check for valid items at given index
956 
957  primers = pairs[num].get_primers(sequence);
958  return pairs[num].get_result(sequence, max_primer_length, max_position_length, max_length_length);
959 }
void buildPrimerTrees()
Definition: PRD_Design.cxx:274
const char * get_result(int num, const char *&primers, int max_primer_length, int max_position_length, int max_length_length) const
Definition: PRD_Design.cxx:953
unsigned char base
Definition: PRD_Node.hxx:15
Item * two
Definition: PRD_Pair.hxx:21
void print(const char *prefix_, const char *suffix_)
Definition: PRD_Item.cxx:37
unsigned int FIELD[128]
Definition: PRD_Globals.hxx:82
Definition: PRD_Node.hxx:9
Item * one
Definition: PRD_Pair.hxx:20
unsigned char nextBase()
#define PRD_MAX_SEQUENCE_POS
Definition: PRD_Globals.hxx:20
Node * child[4]
Definition: PRD_Node.hxx:14
void push(unsigned char base_)
const char * get_result(const char *sequence_, int max_primer_length, int max_position_length, int max_length_length)
Definition: PRD_Pair.cxx:62
static BitField CHAR2BIT
void matchSequenceAgainstPrimerTrees()
Definition: PRD_Design.cxx:471
bool isLeaf()
Definition: PRD_Node.hxx:29
void print(const char *prefix, const char *suffix, const char *sequence_)
Definition: PRD_Pair.cxx:107
static ChildLookupTable CHAR2CHILD
Definition: PRD_Globals.hxx:72
unsigned int child_bits
Definition: PRD_Node.hxx:16
double rating
Definition: PRD_Pair.hxx:22
PRD_Sequence_Pos pos
Definition: PRD_Item.hxx:9
static HelixNrInfo * start
void print(const char *prefix_, const char *suffix_)
Definition: PRD_Range.cxx:41
void restart(PRD_Sequence_Pos start_pos_, PRD_Sequence_Pos stop_pos_, int max_length_, int direction_)
static void show_comment(const char *comment)
Definition: arb_progress.h:349
static const int FORWARD
void setPositionalParameters(Range pos1_, Range pos2_, Range length_, Range distance_)
Definition: PRD_Design.cxx:83
int temperature
Definition: PRD_Item.hxx:16
bool includes(PRD_Sequence_Pos value_)
Definition: PRD_Range.cxx:23
PRD_Sequence_Pos last_base_index
Definition: PRD_Node.hxx:18
PrimerDesign(const char *sequence_, long int seqLength_, Range pos1_, Range pos2_, Range length_, Range distance_, Range ratio_, Range temperature_, int min_dist_to_next_, bool expand_IUPAC_Codes_, int max_count_primerpairs_, double GC_factor_, double temp_factor_)
Definition: PRD_Design.cxx:50
void convertTreesToLists()
Definition: PRD_Design.cxx:590
bool isValidPrimer()
Definition: PRD_Node.hxx:27
Item * next
Definition: PRD_Item.hxx:18
static const int BACKWARD
PRD_Sequence_Pos max() const
Definition: PRD_Range.hxx:19
const char * get_primers(const char *sequence_)
Definition: PRD_Pair.cxx:33
void subtitle(const char *stitle)
Definition: arb_progress.h:321
void iterateWith(PRD_Sequence_Pos pos_, unsigned char base_)
PRD_Sequence_Pos min() const
Definition: PRD_Range.hxx:18
void setConditionalParameters(Range ratio_, Range temperature_, int min_dist_to_next_, bool expand_IUPAC_Codes_, int max_count_primerpairs_, double GC_factor_, double temp_factor_)
Definition: PRD_Design.cxx:126
#define abs(x)
Definition: f2c.h:151
PRD_Sequence_Pos offset
Definition: PRD_Node.hxx:17
#define NULp
Definition: cxxforward.h:116
PRD_Sequence_Pos offset
Definition: PRD_Item.hxx:12
void evaluatePrimerPairs()
Definition: PRD_Design.cxx:894
#define offset(field)
Definition: GLwDrawA.c:73
int GC_ratio
Definition: PRD_Item.hxx:15
long int PRD_Sequence_Pos
Definition: PRD_Globals.hxx:21
static const char EOS
Node * parent
Definition: PRD_Node.hxx:13
static const int IGNORE