30 void PrimerDesign::init (
const char *sequence_,
long int seqLength_,
32 Range ratio_,
Range temperature_,
int min_dist_to_next_,
bool expand_IUPAC_Codes_,
33 int max_count_primerpairs_,
double GC_factor_,
double temp_factor_)
37 seqLength = seqLength_;
44 reset_node_counters();
47 setConditionalParameters(ratio_, temperature_, min_dist_to_next_, expand_IUPAC_Codes_, max_count_primerpairs_, GC_factor_, temp_factor_);
52 Range ratio_,
Range temperature_,
int min_dist_to_next_,
bool expand_IUPAC_Codes_,
53 int max_count_primerpairs_,
double GC_factor_,
double temp_factor_)
55 init (sequence_, seqLength_, pos1_, pos2_, length_, distance_, ratio_, temperature_, min_dist_to_next_, expand_IUPAC_Codes_, max_count_primerpairs_, GC_factor_, temp_factor_);
60 Range pos1_,
Range pos2_,
Range length_,
Range distance_,
int max_count_primerpairs_,
double GC_factor_,
double temp_factor_)
62 init (sequence_, seqLength_, pos1_, pos2_, length_, distance_,
Range(0, 0),
Range(0, 0), -1,
false, max_count_primerpairs_, GC_factor_, temp_factor_);
67 init (sequence_, seqLength_,
Range(0, 0),
Range(0, 0),
Range(0, 0),
Range(0, 0),
Range(0, 0),
Range(0, 0), -1,
false, 10, 0.5, 0.5);
75 if (root1)
delete root1;
76 if (root2)
delete root2;
77 if (list1)
delete list1;
78 if (list2)
delete list2;
79 if (pairs)
delete [] pairs;
85 primer1 = (pos1_.
min() < pos2_.
min()) ? pos1_ : pos2_;
86 primer2 = (pos1_.
min() < pos2_.
min()) ? pos2_ : pos1_;
87 primer_length = length_;
88 primer_distance = distance_;
91 if ((length_.
min() <= 0) || (length_.
max() <= 0)) {
92 error =
"invalid primer length (length <= 0)";
105 if ((primer2.
min() <= primer1.
max()) && (distance_.
min() <= 0)) {
106 error =
"using overlapping primer positions you MUST give a primer distance";
111 if (distance_.
min() > 0) {
113 if (distance_.
min() > primer2.
max() - primer1.
min()) {
114 error =
"invalid minimal primer distance (more than right.max - left.min)";
118 if (distance_.
max() < primer2.
min() - primer1.
max()) {
119 error =
"invalid maximal primer distance (less than right.min - left.max)";
127 if ((ratio_.
min() < 0) || (ratio_.
max() <= 0))
128 GC_ratio =
Range(-1, -1);
132 if ((temperature_.
min() <= 0) || (temperature_.
max() <= 0))
133 temperature =
Range (-1, -1);
135 temperature = temperature_;
137 GC_factor = GC_factor_;
138 temperature_factor = temp_factor_;
139 min_distance_to_next_match = min_dist_to_next_;
140 expand_IUPAC_Codes = expand_IUPAC_Codes_;
141 max_count_primerpairs = max_count_primerpairs_;
143 if (pairs)
delete [] pairs;
144 pairs =
new Pair[max_count_primerpairs];
152 #if defined(DUMP_PRIMER)
153 printf(
"PrimerDesign : parameters are\n");
154 primer1.
print(
"left : ",
"\n");
155 primer2.
print(
"right : ",
"\n");
156 primer_length.
print(
"length : ",
"\n");
157 primer_distance.
print(
"distance : ",
"\n");
158 GC_ratio.
print(
"GC ratio : ",
"\n");
159 temperature.
print(
"temperature : ",
"\n");
160 printf(
"GC factor : %f\n", GC_factor);
161 printf(
"temp. factor : %f\n", temperature_factor);
162 printf(
"min. dist to next match : %i\n", min_distance_to_next_match);
163 printf(
"expand IUPAC : %s\n", expand_IUPAC_Codes ?
"true" :
"false");
164 printf(
"error : %s\n", error);
168 printf(
"not run cause : %s\n", error);
172 #if defined(DUMP_PRIMER)
173 printf(
"sizeof(Node) = %zu\n",
sizeof(
Node));
178 #if defined(DUMP_PRIMER)
184 #if defined(DUMP_PRIMER)
191 #if defined(DUMP_PRIMER)
197 #if defined(DUMP_PRIMER)
208 int PrimerDesign::insertNode (
Node *current_,
unsigned char base_,
PRD_Sequence_Pos pos_,
int delivered_,
int offset_,
int left_,
int right_) {
210 bool is_primer = primer_length.
includes(delivered_);
215 if (!current_->
child[index]) {
216 total_node_counter_left += left_;
217 total_node_counter_right += right_;
220 current_->
child[index] =
new Node (current_, base_, pos_, offset_);
221 primer_node_counter_left += left_;
222 primer_node_counter_right += right_;
225 current_->
child[index] =
new Node (current_, base_, 0);
233 primer_node_counter_left -= left_;
234 primer_node_counter_right -= right_;
242 bool PrimerDesign::treeContainsPrimer (
Node *
start) {
245 for (
int i = 0; i < 4; i++) {
246 if (start->
child[i]) {
247 if (treeContainsPrimer(start->
child[i]))
return true;
255 void PrimerDesign::clearTree (
Node *start,
int left_,
int right_) {
257 for (
int i = 0; i < 4; i++)
258 if (start->
child[i]) clearTree(start->
child[i], left_, right_);
262 total_node_counter_left -= left_;
263 total_node_counter_right -= right_;
276 primer1.
max()+primer2.
max()-2*primer_length.
min());
278 #if defined(DUMP_PRIMER)
279 primer1.
print(
"buildPrimerTrees : pos1\t\t",
"\n");
280 primer2.
print(
"buildPrimerTrees : pos2\t\t",
"\n");
281 primer_length.
print(
"buildPrimerTrees : length\t",
"\n");
282 printf(
"buildPrimerTrees : (pos,base,delivered,last_base_index)\n");
288 if (root1)
delete root1;
289 if (root2)
delete root2;
293 reset_node_counters();
313 (start_pos < primer1.
max()-primer_length.
min()) && (sequence[start_pos] !=
'\x00');
319 if (sequence_iterator->
pos != start_pos) {
320 start_pos = sequence_iterator->
pos;
325 current_node = root1;
328 base = sequence_iterator->
nextBase();
330 if (! ((base ==
'A') || (base ==
'T') || (base ==
'U') || (base ==
'C') || (base ==
'G')))
break;
331 child_index = insertNode(current_node, base, sequence_iterator->
pos, sequence_iterator->
delivered, offset, 1, 0);
332 current_node = current_node->
child[child_index];
335 base = sequence_iterator->
nextBase();
345 #if defined(DUMP_PRIMER)
346 printf (
" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
347 printf (
" clearing left tree\n");
349 clearTree(root1, 1, 0);
350 #if defined(DUMP_PRIMER)
351 printf (
" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
361 if (!treeContainsPrimer(root1)) {
362 error =
"no primer in left range found .. maybe only spaces in that range ?";
363 delete sequence_iterator;
371 (start_pos < primer2.
max()-primer_length.
min()) && (sequence[start_pos] !=
'\x00');
377 if (sequence_iterator->
pos != start_pos) {
378 start_pos = sequence_iterator->
pos;
383 current_node = root2;
386 base = sequence_iterator->
nextBase();
388 if (! ((base ==
'A') || (base ==
'T') || (base ==
'U') || (base ==
'C') || (base ==
'G')))
break;
389 child_index = insertNode(current_node, base, sequence_iterator->
pos, sequence_iterator->
delivered, offset+sequence_iterator->
delivered, 0, 1);
390 current_node = current_node->
child[child_index];
393 base = sequence_iterator->
nextBase();
401 if (!treeContainsPrimer(root2)) {
402 error =
"no primer in right range found .. maybe only spaces in that range ?";
405 delete sequence_iterator;
410 #if defined(DUMP_PRIMER)
411 printf (
" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
412 printf (
" clearing right tree\n");
414 clearTree(root2, 0, 1);
415 #if defined(DUMP_PRIMER)
416 printf (
" %li nodes left (%li primers) %li nodes right (%li primers)\n", total_node_counter_left, primer_node_counter_left, total_node_counter_right, primer_node_counter_right);
425 if (direction_ == FORWARD) primer_->push_front(node_->
base);
426 else primer_->push_back(node_->
base);
429 else followUp(node_->
parent, primer_, direction_);
433 void PrimerDesign::findNextPrimer(
Node *start_at_,
int depth_,
int *counter_,
int direction_) {
435 printf (
"findNextPrimer : start_at_ [ %c,%4li,%2i (%c %c %c %c) ]\n",
439 start_at_->
child[1] ?
'G' :
' ',
440 start_at_->
child[0] ?
'C' :
' ',
441 start_at_->
child[3] ?
'T' :
' ',
442 start_at_->
child[3] ?
'X' :
' ');
446 for (
int i=0; i < 4; i++)
447 if (start_at_->
child[i]) findNextPrimer(start_at_->
child[i], depth_, counter_, direction_);
450 #if defined(DUMP_PRIMER)
451 void PrimerDesign::printPrimerTrees () {
455 cout <<
"findNextPrimer : depth last_base:index first_base:index base" << endl;
456 findNextPrimer(root1, 0, &primer_counter, FORWARD);
457 cout <<
"printPrimerTrees : " << primer_counter <<
" primer found" << endl << endl;
461 cout <<
"findNextPrimer : depth last_base:index first_base:index base" << endl;
462 findNextPrimer(root2, 0, &primer_counter, BACKWARD);
463 cout <<
"printPrimerTrees : " << primer_counter <<
" primer found" << endl;
478 #if defined(DUMP_PRIMER)
479 printf(
"root1 : [C %p, G %p, A %p, TU %p]\n", root1->
child[0], root1->
child[1], root1->
child[2], root1->
child[3]);
480 printf(
"root2 : [C %p, G %p, A %p, TU %p]\n", root2->
child[0], root2->
child[1], root2->
child[2], root2->
child[3]);
485 progress.
subtitle(
"match possible primers vs. seq. (forward)");
487 base = sequence_iterator->
nextBase();
489 pos = sequence_iterator->
pos;
516 base = sequence_iterator->
nextBase();
524 progress.
subtitle(
"match possible primers vs. seq. (backward)");
527 pos = sequence_iterator->
pos;
561 delete sequence_iterator;
563 if (!treeContainsPrimer(root1)) {
564 error =
"no primer in left range after match vs. sequence .. maybe some clustered IUPAC-Codes in sequence ?";
568 if (!treeContainsPrimer(root2)) {
569 error =
"no primer in right range after match vs. sequence .. maybe some clustered IUPAC-Codes in sequence ?";
579 void PrimerDesign::calcGCandAT (
int &GC_,
int &AT_,
Node *start_at_) {
580 if (!start_at_)
return;
582 if ((start_at_->
base ==
'C') || (start_at_->
base ==
'G'))
587 calcGCandAT(GC_, AT_, start_at_->
parent);
591 #if defined(DUMP_PRIMER)
592 GC_ratio.
print(
"convertTreesToLists : GC_ratio ",
" ");
593 temperature.
print(
"temperature ",
" ");
594 primer_distance.
print(
"primer_distance ",
"\n");
603 bool GC_and_temperature_matched;
608 pair< Node*, int > new_pair;
609 deque< pair<Node*, int> > *stack;
615 stack =
new deque< pair<Node*, int> >;
617 for (
int index = 0; index < 4; index++)
618 if (root1->
child[index]) {
619 new_pair = pair<Node*, int>(root1->
child[index], 1);
620 stack->push_back(new_pair);
623 if (!stack->empty()) {
624 GC_and_temperature_matched =
false;
626 while (!stack->empty()) {
628 cur_node = stack->back().first;
629 depth = stack->back().second;
636 calcGCandAT(GC, AT, cur_node);
638 GC = (GC * 100) / depth;
642 GC_and_temperature_matched =
true;
646 if (!cur_item) list1 = new_item;
647 else cur_item->
next = new_item;
652 if (cur_node->
offset < min_offset_1) min_offset_1 = cur_node->
offset;
653 if (cur_node->
offset > max_offset_1) max_offset_1 = cur_node->
offset;
658 for (
int index = 0; index < 4; index++)
659 if (cur_node->
child[index]) {
660 new_pair = pair<Node*, int>(cur_node->
child[index], depth+1);
661 stack->push_back(new_pair);
664 if (!GC_and_temperature_matched) {
665 error =
"no primer over left range matched the given GC-Ratio/Temperature";
671 error =
"convertTreesToLists : primertree over left range was empty :( ?";
676 #if defined(DUMP_PRIMER)
677 printf(
"convertTreesToLists : list1 : min_offset %7li max_offset %7li\n", min_offset_1, max_offset_1);
685 bool distance_matched =
false;
687 for (
int index = 0; index < 4; index++)
688 if (root2->
child[index]) {
689 new_pair = pair<Node*, int>(root2->
child[index], 1);
690 stack->push_back(new_pair);
693 if (!stack->empty()) {
694 GC_and_temperature_matched =
false;
696 while (!stack->empty()) {
698 cur_node = stack->back().first;
699 depth = stack->back().second;
703 for (
int index = 0; index < 4; index++)
704 if (cur_node->
child[index]) {
705 new_pair = pair<Node*, int>(cur_node->
child[index], depth+1);
706 stack->push_back(new_pair);
712 if (primer_distance.
min() > 0) {
713 distance_matched = primer_distance.
includes(cur_node->
offset-max_offset_1, cur_node->
offset-min_offset_1);
714 if (!distance_matched)
continue;
719 calcGCandAT(GC, AT, cur_node);
721 GC = (GC * 100) / depth;
725 GC_and_temperature_matched =
true;
729 if (!cur_item) list2 = new_item;
730 else cur_item->
next = new_item;
735 if (cur_node->
offset < min_offset_2) min_offset_2 = cur_node->
offset;
736 if (cur_node->
offset > max_offset_2) max_offset_2 = cur_node->
offset;
741 if (primer_distance.
min() > 0) {
742 if (!distance_matched) {
743 error =
"no primer over right range was in reach of primers in left range .. check distance parameter";
749 if (!GC_and_temperature_matched) {
750 error =
"no primer over right range matched the given GC-Ratio and Temperature";
756 error =
"convertTreesToLists : primertree over right range was empty :( ?";
763 #if defined(DUMP_PRIMER)
764 printf(
"convertTreesToLists : list2 : min_offset %7li max_offset %7li\n", min_offset_2, max_offset_2);
770 if (primer_distance.
min() > 0) {
778 if (cur_item == list1) {
786 to_remove = cur_item;
788 while (cur_item ->next != to_remove)
789 cur_item = cur_item->
next;
791 cur_item = cur_item->
next;
796 cur_item = cur_item->
next;
800 for (cur_item = list1; cur_item; cur_item = cur_item->
next)
803 error =
"no primers over left range are in reach of primers over right range .. check distance parameter";
809 for (cur_item = list1; cur_item; cur_item = cur_item->
next)
812 error =
"no primers left in left range after GC-ratio/Temperature/Distance - Check";
817 for (cur_item = list2; cur_item; cur_item = cur_item->
next)
820 error =
"no primers left in right range after GC-ratio/Temperature/Distance - Check";
829 #if defined(DUMP_PRIMER)
830 void PrimerDesign::printPrimerLists () {
833 printf(
"printPrimerLists : list 1 : [(start_pos,offset,length), GC_ratio, temperature]\n");
834 Item *current = list1;
836 current->
print(
"",
"\n");
837 current = current->
next;
840 printf(
" : %i valid primers\nprintPrimerLists : list 2 : [(start_pos,offset,length), GC_ratio, temperature]\n", count);
844 current->
print(
"",
"\n");
845 current = current->
next;
848 printf(
" : %i valid primers\n", count);
857 inline double PrimerDesign::evaluatePair (
Item *one_,
Item *two_) {
858 if ((!one_) || (!two_))
864 void PrimerDesign::insertPair(
double rating_,
Item *one_,
Item *two_) {
868 for (
int i = max_count_primerpairs-1; (i >= 0) && (index == -1); i--) {
870 if (pairs[i].rating >= rating_) {
875 if (index == -1) index = 0;
878 for (
int i = max_count_primerpairs-2; i >= index; i--) {
880 pairs[i+1].
one = pairs[i].
one;
881 pairs[i+1].
two = pairs[i].
two;
885 pairs[index].
rating = rating_;
886 pairs[index].
one = one_;
887 pairs[index].
two = two_;
889 #if defined(DUMP_PRIMER)
890 printf(
"insertPair : [%3i] %6.2f\n", index, rating_);
900 #if defined(DUMP_PRIMER)
901 printf (
"evaluatePrimerPairs : ...\ninsertPair : [index], rating\n");
904 long list1_elems = 0;
911 arb_progress progress(
"evaluating primer pairs", list1_elems);
918 rating = evaluatePair(one, two);
921 if (rating > pairs[max_count_primerpairs-1].rating) {
923 insertPair(rating, one, two);
936 if (counter == 0) error =
"no primer pair could be found, sorry :(";
938 #if defined(DUMP_PRIMER)
939 printf (
"evaluatePrimerPairs : ratings [ %.3f .. %.3f ]\n\n", pairs[max_count_primerpairs-1].rating, pairs[0].rating);
943 #if defined(DUMP_PRIMER)
944 void PrimerDesign::printPrimerPairs () {
945 printf (
"printPairs [index] [rating ( primer1[(start_pos,offset,length),(GC,temp)] , primer2[(pos,offs,len),(GC,temp)])] \n");
946 for (
int i = 0; i < max_count_primerpairs; i++) {
947 printf(
"printPairs : [%3i]", i);
948 pairs[i].
print(
"\t",
"\n", sequence);
953 const char *
PrimerDesign::get_result(
int num,
const char *&primers,
int max_primer_length,
int max_position_length,
int max_length_length)
const {
954 if ((num < 0) || (num >= max_count_primerpairs))
return NULp;
955 if (!pairs[num].one || !pairs[num].two)
return NULp;
958 return pairs[num].
get_result(sequence, max_primer_length, max_position_length, max_length_length);
const char * get_result(int num, const char *&primers, int max_primer_length, int max_position_length, int max_length_length) const
void print(const char *prefix_, const char *suffix_)
#define PRD_MAX_SEQUENCE_POS
void push(unsigned char base_)
const char * get_result(const char *sequence_, int max_primer_length, int max_position_length, int max_length_length)
void matchSequenceAgainstPrimerTrees()
void print(const char *prefix, const char *suffix, const char *sequence_)
static ChildLookupTable CHAR2CHILD
static HelixNrInfo * start
void print(const char *prefix_, const char *suffix_)
void restart(PRD_Sequence_Pos start_pos_, PRD_Sequence_Pos stop_pos_, int max_length_, int direction_)
static void show_comment(const char *comment)
void setPositionalParameters(Range pos1_, Range pos2_, Range length_, Range distance_)
bool includes(PRD_Sequence_Pos value_)
PRD_Sequence_Pos last_base_index
PrimerDesign(const char *sequence_, long int seqLength_, Range pos1_, Range pos2_, Range length_, Range distance_, Range ratio_, Range temperature_, int min_dist_to_next_, bool expand_IUPAC_Codes_, int max_count_primerpairs_, double GC_factor_, double temp_factor_)
void convertTreesToLists()
static const int BACKWARD
PRD_Sequence_Pos max() const
const char * get_primers(const char *sequence_)
void subtitle(const char *stitle)
void iterateWith(PRD_Sequence_Pos pos_, unsigned char base_)
PRD_Sequence_Pos min() const
void setConditionalParameters(Range ratio_, Range temperature_, int min_dist_to_next_, bool expand_IUPAC_Codes_, int max_count_primerpairs_, double GC_factor_, double temp_factor_)
void evaluatePrimerPairs()
long int PRD_Sequence_Pos