102 pt_server_alignment(strdup(pt_server_alignment_)),
103 maxRelatives(maxRelatives_)
107 free(pt_server_alignment);
117 #define QUALITY_NAME "ASC_ALIGNER_CLIENT_SCORE"
118 #define INSERTS_NAME "AMI_ALIGNER_MASTER_INSERTS"
120 #define FA_AWAR_ROOT "faligner/"
121 #define FA_AWAR_TO_ALIGN FA_AWAR_ROOT "what"
122 #define FA_AWAR_REFERENCE FA_AWAR_ROOT "against"
123 #define FA_AWAR_REFERENCE_NAME FA_AWAR_ROOT "sagainst"
124 #define FA_AWAR_RANGE FA_AWAR_ROOT "range"
125 #define FA_AWAR_PROTECTION FA_AWAR_ROOT "protection"
126 #define FA_AWAR_AROUND FA_AWAR_ROOT "around"
127 #define FA_AWAR_MIRROR FA_AWAR_ROOT "mirror"
128 #define FA_AWAR_REPORT FA_AWAR_ROOT "report"
129 #define FA_AWAR_SHOW_GAPS_MESSAGES FA_AWAR_ROOT "show_gaps"
130 #define FA_AWAR_CONTINUE_ON_ERROR FA_AWAR_ROOT "continue_on_error"
131 #define FA_AWAR_ACTION_ON_ERROR FA_AWAR_ROOT "action_on_error"
132 #define FA_AWAR_USE_SECONDARY FA_AWAR_ROOT "use_secondary"
133 #define FA_AWAR_NEXT_RELATIVES FA_AWAR_ROOT "next_relatives"
134 #define FA_AWAR_RELATIVE_RANGE FA_AWAR_ROOT "relrange"
135 #define FA_AWAR_PT_SERVER_ALIGNMENT "tmp/" FA_AWAR_ROOT "relative_ali"
136 #define FA_AWAR_SAI_RANGE_NAME FA_AWAR_ROOT "sai/sainame"
137 #define FA_AWAR_SAI_RANGE_CHARS FA_AWAR_ROOT "sai/chars"
139 #define FA_AWAR_ISLAND_HOPPING_ROOT "island_hopping/"
140 #define FA_AWAR_USE_ISLAND_HOPPING FA_AWAR_ISLAND_HOPPING_ROOT "use"
141 #define FA_AWAR_ESTIMATE_BASE_FREQ FA_AWAR_ISLAND_HOPPING_ROOT "estimate_base_freq"
142 #define FA_AWAR_BASE_FREQ_A FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_a"
143 #define FA_AWAR_BASE_FREQ_C FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_c"
144 #define FA_AWAR_BASE_FREQ_G FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_g"
145 #define FA_AWAR_BASE_FREQ_T FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_t"
146 #define FA_AWAR_SUBST_PARA_AC FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ac"
147 #define FA_AWAR_SUBST_PARA_AG FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ag"
148 #define FA_AWAR_SUBST_PARA_AT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_at"
149 #define FA_AWAR_SUBST_PARA_CG FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_cg"
150 #define FA_AWAR_SUBST_PARA_CT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ct"
151 #define FA_AWAR_SUBST_PARA_GT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_gt"
152 #define FA_AWAR_EXPECTED_DISTANCE FA_AWAR_ISLAND_HOPPING_ROOT "expected_dist"
153 #define FA_AWAR_STRUCTURE_SUPPLEMENT FA_AWAR_ISLAND_HOPPING_ROOT "struct_suppl"
154 #define FA_AWAR_THRESHOLD FA_AWAR_ISLAND_HOPPING_ROOT "threshold"
155 #define FA_AWAR_GAP_A FA_AWAR_ISLAND_HOPPING_ROOT "gapa"
156 #define FA_AWAR_GAP_B FA_AWAR_ISLAND_HOPPING_ROOT "gapb"
157 #define FA_AWAR_GAP_C FA_AWAR_ISLAND_HOPPING_ROOT "gapc"
184 if (my_protection<=max_protection) {
219 switch (revComplWhat) {
231 error =
"There is no marked species";
249 error =
"There is no selected species!";
275 : Old(old_), New(new_)
293 range =
ExplicitRange(exp_start-gaps_before, exp_end+gaps_after);
295 return compressed.
size();
300 typedef std::list<ExplicitRange> Ranges;
310 ranges.push_front(range);
322 for (Ranges::iterator r = ranges.begin(); r != ranges.end(); ++r) {
323 basecount += change.
follow(*r);
341 if (!gbd)
return "GROUP-CONSENSUS";
343 return name ? name :
"<unnamed-species>";
355 if (slave==master) result =
'-';
356 else if (slave==
GAP_CHAR) result =
'+';
357 else if (master==
GAP_CHAR) result =
'+';
367 static char *lstr(
const char *
s,
int len) {
372 if (alloc) free(buffer);
376 memcpy(buffer, s, len);
384 inline char compareChar(
char base1,
char base2) {
385 return base1==base2 ?
'=' : (
relatedBases(base1, base2) ?
'x' :
'X');
388 #if defined(TRACE_COMPRESSED_ALIGNMENT)
390 static void dump_n_compare_one(
const char *seq1,
const char *seq2,
long len,
long offset) {
392 char compare[BUFLEN+1];
394 for (
long l=0; l<len; l++) {
395 compare[l] = (
is_ali_gap(seq1[l]) &&
is_ali_gap(seq2[l])) ?
' ' : compareChar(seq1[l], seq2[l]);
400 printf(
" %li '%s'\n", offset, lstr(seq1, len));
401 printf(
" %li '%s'\n", offset, lstr(seq2, len));
402 printf(
" %li '%s'\n", offset, compare);
405 inline void dump_rest(
const char *
seq,
long len,
int idx,
long offset) {
406 printf(
" Rest von Sequenz %i:\n", idx);
408 printf(
" %li '%s'\n", offset, lstr(seq, BUFLEN));
415 printf(
" '%s'\n", lstr(seq, len));
418 static void dump_n_compare(
const char *text,
const char *seq1,
long len1,
const char *seq2,
long len2) {
421 printf(
" Comparing %s:\n", text);
423 while (len1>0 && len2>0) {
426 if (len1>=BUFLEN && len2>=BUFLEN) {
427 dump_n_compare_one(seq1, seq2, done=BUFLEN, offset);
430 long min = len1<len2 ? len1 : len2;
431 dump_n_compare_one(seq1, seq2, done=min, offset);
441 if (len1>0) dump_rest(seq1, len1, 1, offset);
442 if (len2>0) dump_rest(seq2, len2, 2, offset);
443 printf(
" -------------------\n");
449 #endif // TRACE_COMPRESSED_ALIGNMENT
453 inline void dumpSeq(
const char *seq,
long len,
long pos) {
454 printf(
"'%s' ", lstr(seq, len));
455 printf(
"(Pos=%li,Len=%li)", pos, len);
460 double sig = partSignificance(sequence().length(), slaveSequence.length(), bestLength); \
462 printf(" Score = %li (Significance=%f)\n" \
463 " Master = ", bestScore, sig); \
464 dumpSeq(bestMasterLeft.text(), bestLength, bestMasterLeft.leftOf()); \
467 dumpSeq(bestSlaveLeft.text(), bestLength, bestSlaveLeft.leftOf()); \
478 return log(d)/log(3.0);
483 return log3((seq1len-partlen)*(seq2len-partlen)) - partlen;
487 return "Cannot align - reserved buffer is to small";
500 inserts = nextBase-alignBuffer->
offset();
509 char slaveBase = *slave.
text();
510 char masterBase = *master.
text();
532 char masterBase = *master.
text();
542 const char *masterAlignment,
const char *slaveAlignment,
long alignmentLength,
546 #define ACID '*' // contents of 'masterAlignment' and 'slaveAlignment'
552 for (pos=0; pos<alignmentLength; pos++) {
555 if (masterAlignment[pos]==
ACID) {
564 if (slaveAlignment[pos]==
ACID) {
565 insertBase(alignBuffer, master, slave, report);
577 for (slave_bases=1; pos+slave_bases<alignmentLength && masterAlignment[pos+slave_bases]==
GAP; slave_bases++) {
580 if (!baseAtLeft && insert>slave_bases) {
581 int ins_gaps = insert-slave_bases;
588 if (insert<slave_bases) {
591 else if (insert>slave_bases) {
600 pos += slave_bases-1;
622 long min_insert = insert;
626 while (insert<0 && partLength) {
627 if (insert<min_insert) min_insert = insert;
628 if (!alignBuffer->
free()) {
631 insertBase(alignBuffer, master, slave, report);
652 while (partLength--) {
661 if (!alignBuffer->
free()) {
666 insertBase(alignBuffer, master, slave, report);
679 const char *mtext = master.
text(moffset);
680 const char *stext = slaveSequence.
text(soffset);
685 #ifdef TRACE_CLUSTAL_DATA
686 printf(
"ClustalV-Align:\n");
687 printf(
" mseq = '%s'\n", lstr(mtext, mlength));
688 printf(
" sseq = '%s'\n", lstr(stext, slength));
689 #endif // TRACE_CLUSTAL_DATA
697 default: error =
"Unknown alignment type - aligner aborted";
break;
700 const char *maligned, *saligned;
706 mtext, mlength, stext, slength,
709 maligned, saligned, len, score);
713 #ifdef TRACE_CLUSTAL_DATA
714 printf(
"ClustalV returns:\n");
715 printf(
" maligned = '%s'\n", lstr(maligned, len));
716 printf(
" saligned = '%s'\n", lstr(saligned, len));
717 #endif // TRACE_CLUSTAL_DATA
724 #if (defined(DEBUG) && 0)
728 char *
cmp =
new char[len];
730 for (
int l=0; l<len; l++) {
733 if (maligned[l]==
'*') {
734 maligned[l] = *master2.text();
741 if (saligned[l]==
'*') {
742 saligned[l] = *slave2.text();
749 cmp[l] = gaps || maligned[l]==saligned[l] ?
'=' :
'X';
752 printf(
" master = '%s'\n", lstr(maligned, len));
753 printf(
" slave = '%s'\n", lstr(saligned, len));
754 printf(
" '%s'\n", lstr(cmp, len));
761 if (slength<=alignBuffer->free()) {
765 alignBuffer->
copy(slaveSequence.
text(soffset),
'?', slength);
781 #define TEST_BETTER_SCORE() \
783 if (score>bestScore) { \
785 bestLength = masterRight.text() - masterLeft.text(); \
786 bestMasterLeft = masterLeft; \
787 bestSlaveLeft = slaveLeft; \
791 #define CAN_SCORE_LEFT() (masterLeft.leftOf() && slaveLeft.leftOf())
792 #define CAN_SCORE_RIGHT() (masterRight.rightOf() && slaveRight.rightOf())
794 #define SCORE_LEFT() \
796 score += *(--masterLeft).text()==*(--slaveLeft).text() ? match : mismatch; \
797 TEST_BETTER_SCORE(); \
800 #define SCORE_RIGHT() \
802 score += *(++masterRight).text()==*(++slaveRight).text() ? match : mismatch; \
803 TEST_BETTER_SCORE(); \
810 int match,
int mismatch,
825 static double lowSignificance;
826 static int lowSignificanceInitialized;
828 if (!lowSignificanceInitialized) {
829 lowSignificance =
log3(0.01);
830 lowSignificanceInitialized = 1;
843 while (occurrence.found()) {
844 long score = match*3;
872 occurrence.gotoNext();
874 if (rightmostSlave<slaveRight) {
875 rightmostSlave = slaveRight;
880 if (rightmostSlave>slave) slave = rightmostSlave;
887 if (sig<lowSignificance) {
888 long masterLeftOf = bestMasterLeft.
leftOf();
889 long masterRightStart = masterLeftOf+bestLength;
890 long masterRightOf = bestMasterLeft.
rightOf()-bestLength;
891 long slaveLeftOf = bestSlaveLeft.
leftOf();
892 long slaveRightStart = slaveLeftOf+bestLength;
893 long slaveRightOf = bestSlaveLeft.
rightOf()-bestLength;
895 #define MIN_ALIGNMENT_RANGE 4
903 alignBuffer, max_seq_length, match, mismatch, report);
905 else if (slaveLeftOf>0) {
907 slaveSequence, 0, slaveLeftOf,
908 max_seq_length, alignBuffer, report);
917 #if (defined(DEBUG) && 0)
918 long offset = alignBuffer->
offset();
921 error =
insertAligned(alignBuffer, bestMasterLeft, bestSlaveLeft, bestLength, report);
922 #if (defined(DEBUG) && 0)
924 printf(
"aligned '%s' (len=%li, address=%li)\n", lstr(alignBuffer->
text()+
offset, used), used,
long(alignBuffer));
936 max_seq_length, match, mismatch, report);
938 else if (slaveRightOf>0) {
940 slaveSequence, slaveRightStart, slaveRightOf,
941 max_seq_length, alignBuffer, report);
952 if (!aligned && !error) {
954 slaveSequence, 0, slaveSequence.
length(),
955 max_seq_length, alignBuffer, report);
962 #undef TEST_BETTER_SCORE
963 #undef CAN_SCORE_LEFT
964 #undef CAN_SCORE_RIGHT
995 int firstColumn = range.
start();
997 int lastColumn = range.
end();
1003 while (firstColumn>0 &&
is_ali_gap(data[firstColumn-1])) {
1006 if (lastColumn!=-1) {
1007 while (lastColumn<(length-1) &&
is_ali_gap(data[lastColumn+1])) lastColumn++;
1011 partData = data+firstColumn;
1012 int slen = length-firstColumn;
1015 fa_assert((
size_t)slen==strlen(partData));
1017 if (lastColumn==-1) {
1021 partLength = lastColumn-firstColumn+1;
1022 if (partLength>slen) partLength = slen;
1044 if (dataPtr) *dataPtr =
NULp;
1088 const char *master_name =
read_name(gb_alignTo);
1094 if (gb_toAlign!=last_gb_toAlign) {
1095 last_gb_toAlign = gb_toAlign;
1096 currentSequenceNumber++;
1100 #ifdef TRACE_COMPRESSED_ALIGNMENT
1101 printf(
"alignCompactedTo(): master='%s' ", master_name);
1102 printf(
"slave='%s'\n", toAlignSequence->
name());
1103 #endif // TRACE_COMPRESSED_ALIGNMENT
1112 #ifdef TRACE_ISLANDHOPPER_DATA
1113 printf(
"Island-Hopper returns:\n");
1116 #endif // TRACE_ISLANDHOPPER_DATA
1125 error = alignTo->
fast_align(*toAlignSequence, &alignBuffer, max_seq_length, 2, -10, &report);
1131 if (alignBuffer.
free()) {
1137 #ifdef TRACE_COMPRESSED_ALIGNMENT
1140 dump_n_compare(
"reference vs. aligned:", alignTo->
sequence(), alignedSlave);
1142 #endif // TRACE_COMPRESSED_ALIGNMENT
1146 if (!error) error = err;
1151 error =
"Internal aligner error (sequence checksum changed) -- aborted";
1153 #ifdef TRACE_COMPRESSED_ALIGNMENT
1155 dump_n_compare(
"Old Slave vs. new Slave", *toAlignSequence, alignedSlave);
1156 #endif // TRACE_COMPRESSED_ALIGNMENT
1164 error =
"Can't find/create sequence data";
1172 int lenToCopy = ali_params.
range.
size();
1179 error =
"Internal aligner error (sequence checksum changed) -- aborted";
1180 # ifdef TRACE_COMPRESSED_ALIGNMENT
1182 dump_n_compare(
"Old seq vs. new seq (slave)", buffer_org, len, buffer, len);
1184 # endif // TRACE_COMPRESSED_ALIGNMENT
1208 int buflen = max_seq_length*2;
1209 char *buffer = ARB_alloc<char>(buflen+1);
1210 char *afterLast =
buffer;
1213 error =
"out of memory";
1216 memset(buffer,
'-', buflen);
1221 memset(buffer+inserts->
offset(),
'>', inserts->
gaps());
1222 afterLast = buffer+inserts->
offset()+inserts->
gaps();
1223 inserts = inserts->
next();
1272 const char *name_toAlign =
read_name(gb_toAlign);
1273 const char *name_alignTo =
read_name(gb_alignTo);
1278 name_toAlign, name_alignTo, olderr.
deliver());
1307 error =
alignCompactedTo(toAlignSequence, alignTo, max_seq_length, alignment, chksum, gb_toAlign, gb_alignTo, ali_params);
1308 if (error) error =
align_error(error, gb_toAlign, gb_alignTo);
1309 delete toAlignSequence;
1322 char *consensus = get_consensus(
read_name(gb_toAlign), ali_params.
range);
1323 size_t cons_len = strlen(consensus);
1327 for (
size_t i = 0; i<cons_len; ++i) {
1328 switch (consensus[i]) {
1329 case '=': consensus[i] =
'-';
break;
1338 error =
alignTo(gb_toAlign, alignment, &fast,
NULp, max_seq_length, ali_params);
1350 if (usedBasePositions<0) {
1358 char *newString =
NULp;
1363 newString = currInfo;
1367 freeset(*toString, newString);
1371 inline int min(
int i,
int j) {
return i<j ? i : j; }
1381 bool use_different_pt_server_alignment = 0 != strcmp(relSearch.
pt_server_alignment, alignment);
1386 char **nearestRelative =
new char*[relativesToTest+1];
1391 if (use_different_pt_server_alignment) {
1395 for (next_relatives=0; next_relatives<relativesToTest; next_relatives++) {
1396 nearestRelative[next_relatives] =
NULp;
1404 char *findRelsBySeq =
NULp;
1405 if (use_different_pt_server_alignment) {
1421 delete toAlignSequence;
1425 while (next_relatives) {
1427 freenull(nearestRelative[next_relatives]);
1444 double bestScore = 0;
1447 double lastScore = -1;
1448 #if defined(TRACE_RELATIVES)
1450 #endif // TRACE_RELATIVES
1453 if (strcmp(toAlignSequence->
name(), fl->name)!=0) {
1455 double thisScore = familyFinder->
uses_rel_matches() ? fl->rel_matches : fl->matches;
1458 fa_assert(lastScore < 0 || lastScore >= thisScore);
1459 lastScore = thisScore;
1460 #if defined(TRACE_RELATIVES)
1461 fprintf(stderr,
"- %s (%5.2f)\n", fl->name, thisScore);
1462 #endif // TRACE_RELATIVES
1464 if (thisScore>=bestScore) bestScore = thisScore;
1465 if (next_relatives<(relativesToTest+1)) {
1466 nearestRelative[next_relatives] = strdup(fl->name);
1475 char *mirroredSequence = strdup(findRelsBySeq);
1476 long length = strlen(mirroredSequence);
1477 double bestMirroredScore = 0;
1487 double lastScore = -1;
1488 #if defined(TRACE_RELATIVES)
1490 #endif // TRACE_RELATIVES
1493 double thisScore = familyFinder->
uses_rel_matches() ? fl->rel_matches : fl->matches;
1496 fa_assert(lastScore < 0 || lastScore >= thisScore);
1497 lastScore = thisScore;
1498 #if defined(TRACE_RELATIVES)
1499 fprintf(stderr,
"- %s (%5.2f)\n", fl->name, thisScore);
1500 #endif // TRACE_RELATIVES
1502 if (thisScore >= bestMirroredScore) {
1503 if (strcmp(toAlignSequence->
name(), fl->name)!=0) {
1511 if (bestMirroredScore>bestScore) {
1515 message =
GBS_global_string(
"'%s' seems to be the other way round (score: %.1f%%, score if turned: %.1f%%)",
1516 toAlignSequence->
name(), bestScore*100, bestMirroredScore*100);
1519 message =
GBS_global_string(
"'%s' seems to be the other way round (score: %li, score if turned: %li)",
1520 toAlignSequence->
name(),
long(bestScore+.5),
long(bestMirroredScore+.5));
1522 turnIt =
aw_question(
"fastali_turn_sequence", message,
"Turn sequence,Leave sequence alone")==0;
1527 #if defined(TRACE_RELATIVES)
1528 fprintf(stderr,
"Using turned sequence!\n");
1529 #endif // TRACE_RELATIVES
1538 delete toAlignSequence;
1543 free(mirroredSequence);
1546 free(findRelsBySeq);
1550 if (!next_relatives) {
1552 sprintf(warning,
"No relative found for '%s'", toAlignSequence->
name());
1560 GBDATAP *gb_reference =
new GBDATAP[relSearch.
maxRelatives];
1563 for (i=0; i<relSearch.
maxRelatives && i<next_relatives; i++) {
1572 gb_reference[i] = gb_species;
1575 free(nearestRelative[i]);
1576 for (
int j = i+1; j<next_relatives; ++j) {
1577 nearestRelative[j-1] = nearestRelative[j];
1580 nearestRelative[next_relatives] =
NULp;
1586 for (; i<next_relatives; ++i) freenull(nearestRelative[i]);
1603 if (gb_ref && gb_align) {
1621 max_seq_length, alignment, chksum,
1622 gb_toAlign, gb_reference[0], ali_params);
1626 error =
align_error(error, gb_toAlign, gb_reference[0]);
1629 char *used_relatives =
NULp;
1637 if (next_relatives>1) error =
"Island hopping uses only one relative";
1643 int unaligned_positions;
1649 delete alignedSequence;
1653 int toalign_positions = toAlignSequence->
length();
1654 if (unaligned_positions<toalign_positions) {
1659 for (i=1; i<next_relatives && !
error; i++) {
1660 loose.
append(loose_for_next_relative);
1661 int unaligned_positions_for_next = 0;
1671 fa_assert(contradicted(error, toAlignPart));
1678 max_seq_length, alignment, part_chksum,
1679 gb_toAlign, gb_reference[i], loose_ali_params);
1694 fa_assert(unaligned_positions_for_next <= unaligned_positions);
1695 if (unaligned_positions_for_next<unaligned_positions) {
1697 unaligned_positions = unaligned_positions_for_next;
1707 free(used_relatives);
1710 delete alignToSequence;
1713 delete [] gb_reference;
1717 delete toAlignSequence;
1719 for (i=0; i<next_relatives; i++) freenull(nearestRelative[i]);
1720 delete [] nearestRelative;
1735 int max_seq_length_,
1737 : alignment(alignment_),
1738 max_seq_length(max_seq_length_),
1739 ali_params(ali_params_)
1763 int max_seq_length_,
1766 targetSequence(targetSequence_),
1767 gb_alignTo(gb_alignTo_)
1783 int max_seq_length_,
1786 get_consensus(get_consensus_)
1802 int max_seq_length_,
1807 relSearch(relSearch_),
1808 turnAllowed(turnAllowed_)
1841 int wasNotAllowedToAlign;
1843 bool continue_on_error;
1850 typedef std::list<GBDATA*> GBDATAlist;
1851 GBDATAlist species_to_mark;
1856 ARB_ERROR alignToExplicitReference(
GBDATA *gb_species_data,
int max_seq_length);
1857 ARB_ERROR alignToConsensus(
GBDATA *gb_species_data,
int max_seq_length);
1858 ARB_ERROR alignToRelatives(
GBDATA *gb_species_data,
int max_seq_length);
1860 void triggerAction(
GBDATA *gb_species,
bool has_been_aligned) {
1862 switch (error_action) {
1867 if (mark) species_to_mark.push_back(gb_species);
1892 bool continue_on_error_,
1894 : gb_main(gb_main_),
1895 alignWhat(alignWhat_),
1896 alignment(alignment_),
1898 get_first_selected_species(get_first_selected_species_),
1899 get_next_selected_species(get_next_selected_species_),
1900 reference(reference_),
1901 get_consensus(get_consensus_),
1902 relSearch(relSearch_),
1903 turnAllowed(turnAllowed_),
1904 ali_params(ali_params_),
1905 maxProtection(maxProtection_),
1906 wasNotAllowedToAlign(0),
1908 continue_on_error(continue_on_error_),
1909 error_action(continue_on_error ? error_action_ :
FA_NO_ACTION)
1926 if (myProtection<=maxProtection) {
1932 if (continue_on_error) {
1939 if (!error) error = ref.
align_to(gb_toalign);
1942 if (error) err_count++;
1943 triggerAction(gb_toalign, !error);
1946 if (continue_on_error) {
1955 wasNotAllowedToAlign++;
1956 triggerAction(gb_toalign,
false);
1967 switch (alignWhat) {
1976 currentSequenceNumber = overallSequenceNumber = 1;
1977 error = alignToReference(gb_toalign, ref);
1985 arb_progress progress(
"Aligning marked species",
long(count));
1986 progress.auto_subtitles(
"Species");
1988 currentSequenceNumber = 1;
1989 overallSequenceNumber = count;
1991 while (gb_species && !error) {
1992 error = alignToReference(gb_species, ref);
1993 progress.inc_and_check_user_abort(error);
2000 GBDATA *gb_species = get_first_selected_species(&count);
2003 currentSequenceNumber = 1;
2004 overallSequenceNumber = count;
2010 arb_progress progress(
"Aligning selected species",
long(count));
2011 progress.auto_subtitles(
"Species");
2013 while (gb_species && !error) {
2014 error = alignToReference(gb_species, ref);
2015 progress.inc_and_check_user_abort(error);
2016 gb_species = get_next_selected_species();
2027 ARB_ERROR Aligner::alignToExplicitReference(
GBDATA *gb_species_data,
int max_seq_length) {
2031 if (!gb_reference) {
2035 long referenceChksum;
2056 ExplicitReference target(alignment, &referenceFastSeq, gb_reference, max_seq_length, ali_params);
2058 error = alignTargetsToReference(target, gb_species_data);
2060 delete referenceSeq;
2065 ARB_ERROR Aligner::alignToConsensus(
GBDATA *gb_species_data,
int max_seq_length) {
2066 return alignTargetsToReference(
ConsensusReference(alignment, get_consensus, max_seq_length, ali_params),
2070 ARB_ERROR Aligner::alignToRelatives(
GBDATA *gb_species_data,
int max_seq_length) {
2071 return alignTargetsToReference(
SearchRelativesReference(relSearch, max_seq_length, turnAllowed, alignment, ali_params),
2080 bool search_by_pt_server = !reference && !get_consensus;
2083 wasNotAllowedToAlign = 0;
2084 species_to_mark.clear();
2086 fa_assert(!reference || !get_consensus);
2089 if ((ali_params.
range.
is_part()) || !search_by_pt_server) {
2099 if (search_by_pt_server) {
2104 else if (pt_server_alignmentType !=
GB_AT_RNA && pt_server_alignmentType !=
GB_AT_DNA) {
2105 error =
"pt_servers only support RNA/DNA sequences.\n"
2106 "In the aligner window you may specify a RNA/DNA alignment \n"
2107 "and use a pt_server build on that alignment.";
2117 if (reference) error = alignToExplicitReference(gb_species_data, max_seq_length);
2118 else if (get_consensus) error = alignToConsensus(gb_species_data, max_seq_length);
2119 else error = alignToRelatives(gb_species_data, max_seq_length);
2123 unaligned_bases.
clear();
2127 if (wasNotAllowedToAlign>0) {
2128 const char *mess =
GBS_global_string(
"%i species were not aligned (because of protection level)", wasNotAllowedToAlign);
2134 error =
GBS_global_string(
"Aligner produced %i error%c", err_count, err_count==1 ?
'\0' :
's');
2142 for (GBDATAlist::iterator sp = species_to_mark.begin(); sp != species_to_mark.end(); ++sp) {
2146 const char *whatsMarked = (error_action ==
FA_MARK_ALIGNED) ?
"aligned" :
"failed";
2147 size_t markCount = species_to_mark.size();
2152 (markCount == 1) ?
"has" :
"have");
2162 char *reference =
NULp;
2163 char *toalign =
NULp;
2165 int get_consensus = 0;
2166 int pt_server_id = -1;
2181 error =
"Warning: No HELIX found. Can't use secondary structure";
2209 switch (alignWhat) {
2238 error =
"Can't get group consensus here.";
2244 if (pt_server_id<0) {
2245 error =
"No pt_server selected";
2255 bool autoRestrictRange4nextRelSearch =
true;
2260 autoRestrictRange4nextRelSearch =
false;
2274 error =
"There is no selected species!";
2296 :
GBS_global_string(
"SAI '%s' has no data in alignment '%s'", sai_name, aliuse);
2321 long alignment_length;
2331 if (autoRestrictRange4nextRelSearch) {
2335 int region_plus = atoi(relrange);
2356 pt_server_alignment,
2377 get_first_selected_species,
2378 get_next_selected_species,
2389 error = aligner.run();
2391 if (error && cont_on_error) {
2397 free(pt_server_alignment);
2406 if (toalign) free(toalign);
2482 AW_window_simple *aws =
new AW_window_simple;
2484 aws->init(root,
"ISLAND_HOPPING_PARA",
"Parameters for Island Hopping");
2485 aws->
load_xfig(
"faligner/islandhopping.fig");
2489 aws->create_button(
"CLOSE",
"CLOSE",
"O");
2493 aws->create_button(
"HELP",
"HELP");
2495 aws->at(
"use_secondary");
2496 aws->label(
"Use secondary structure (only for re-align)");
2501 aws->insert_default_toggle(
"Estimate",
"E", 1);
2502 aws->insert_toggle(
"Define here: ",
"D", 0);
2503 aws->update_toggle_field();
2510 int xpos[4], ypos[4];
2512 aws->button_length(1);
2515 aws->at(
"h_a"); aws->get_at_position(&xpos[0], &dummy); aws->create_button(
NULp,
"A");
2516 aws->at(
"h_c"); aws->get_at_position(&xpos[1], &dummy); aws->create_button(
NULp,
"C");
2517 aws->at(
"h_g"); aws->get_at_position(&xpos[2], &dummy); aws->create_button(
NULp,
"G");
2518 aws->at(
"h_t"); aws->get_at_position(&xpos[3], &dummy); aws->create_button(
NULp,
"T");
2520 aws->at(
"v_a"); aws->get_at_position(&dummy, &ypos[0]); aws->create_button(
NULp,
"A");
2521 aws->at(
"v_c"); aws->get_at_position(&dummy, &ypos[1]); aws->create_button(
NULp,
"C");
2522 aws->at(
"v_g"); aws->get_at_position(&dummy, &ypos[2]); aws->create_button(
NULp,
"G");
2523 aws->at(
"v_t"); aws->get_at_position(&dummy, &ypos[3]); aws->create_button(
NULp,
"T");
2526 aws->at(
"subst"); aws->create_button(
NULp,
"Substitution rate parameters:");
2541 aws->label_length(22);
2544 aws->label(
"Expected distance");
2548 aws->label(
"Structure supplement");
2552 aws->label(
"Threshold");
2555 aws->label_length(10);
2558 aws->label(
"Gap A");
2562 aws->label(
"Gap B");
2566 aws->label(
"Gap C");
2573 static AW_window_simple *aws =
NULp;
2576 aws =
new AW_window_simple;
2578 aws->init(root,
"FAMILY_PARAMS",
"Family search parameters");
2579 aws->load_xfig(
"faligner/family_settings.fig");
2583 aws->create_button(
"CLOSE",
"CLOSE",
"O");
2587 aws->create_button(
"HELP",
"HELP");
2589 aws->auto_space(5, 5);
2637 AW_window_simple *aws =
new AW_window_simple;
2640 aws->
load_xfig(
"faligner/faligner.fig");
2642 aws->label_length(10);
2643 aws->button_length(10);
2647 aws->create_button(
"CLOSE",
"CLOSE",
"O");
2651 aws->create_button(
"HELP",
"HELP");
2655 aws->insert_default_toggle(
"Fast aligner",
"F", 0);
2657 aws->insert_toggle (
"Island Hopping",
"I", 1);
2659 aws->update_toggle_field();
2661 aws->button_length(12);
2662 aws->at(
"island_para");
2665 aws->create_button(
"island_para",
"Parameters",
"");
2668 aws->button_length(10);
2670 aws->at(
"rev_compl");
2672 aws->create_button(
"reverse_complement",
"Turn now!",
"");
2676 aws->insert_toggle (
"Current Species:",
"A",
FA_CURRENT);
2677 aws->insert_default_toggle(
"Marked Species",
"M",
FA_MARKED);
2678 aws->insert_toggle (
"Selected Species",
"S",
FA_SELECTED);
2679 aws->update_toggle_field();
2688 aws->insert_default_toggle(
"Auto search by pt_server:",
"A",
FA_REF_RELATIVES);
2689 aws->update_toggle_field();
2691 aws->at(
"sagainst");
2696 aws->create_button(
"Copy",
"Copy",
"");
2698 aws->label_length(0);
2699 aws->at(
"pt_server");
2702 aws->label_length(23);
2703 aws->at(
"relrange");
2704 aws->label(
"Data from range only, plus");
2707 aws->at(
"relatives");
2708 aws->label(
"Number of relatives to use");
2711 aws->label_length(9);
2713 aws->label(
"Alignment");
2718 aws->create_autosize_button(
"Settings",
"More settings",
"");
2722 aws->label_length(10);
2729 aws->update_toggle_field();
2742 aws->at(
"protection");
2743 aws->label(
"Protection");
2745 aws->insert_default_option(
"0",
NULp, 0);
2746 aws->insert_option (
"1",
NULp, 1);
2747 aws->insert_option (
"2",
NULp, 2);
2748 aws->insert_option (
"3",
NULp, 3);
2749 aws->insert_option (
"4",
NULp, 4);
2750 aws->insert_option (
"5",
NULp, 5);
2751 aws->insert_option (
"6",
NULp, 6);
2752 aws->update_option_menu();
2757 aws->label(
"Turn check");
2759 aws->insert_option (
"Never turn sequence",
"",
FA_TURN_NEVER);
2761 aws->insert_option (
"Automatically turn sequence",
"",
FA_TURN_ALWAYS);
2762 aws->update_option_menu();
2767 aws->label(
"Report");
2771 aws->insert_default_option(
"Report to temporary entries",
"",
FA_TEMP_REPORT);
2772 aws->insert_option (
"Report to resident entries",
"",
FA_REPORT);
2774 aws->update_option_menu();
2779 aws->at(
"continue");
2782 aws->at(
"on_failure");
2783 aws->label(
"On failure");
2785 aws->insert_default_option(
"do nothing",
"",
FA_NO_ACTION);
2788 aws->update_option_menu();
2793 aws->create_button(
"GO",
"GO",
"G");
2816 typedef map<string, size_t> OligoCount;
2818 class OligoCounter {
2822 mutable OligoCount occurrence;
2824 static string removeGaps(
const char *seq) {
2825 size_t len = strlen(seq);
2827 nogaps.reserve(len);
2829 for (
size_t p = 0; p<len; ++p) {
2831 if (!
is_gap(c)) nogaps.append(1, c);
2836 void count_oligos(
const string& seq) {
2838 size_t max_pos = seq.length()-oligo_len;
2839 for (
size_t p = 0; p <= max_pos; ++p) {
2840 string oligo(seq, p, oligo_len);
2841 occurrence[oligo]++;
2850 OligoCounter(
const char *seq,
size_t oligo_len_)
2851 : oligo_len(oligo_len_)
2853 string seq_nogaps = removeGaps(seq);
2854 datasize = seq_nogaps.length();
2855 count_oligos(seq_nogaps);
2858 size_t oligo_count(
const char *oligo) {
2860 return occurrence[oligo];
2863 size_t similarity_score(
const OligoCounter& other)
const {
2865 if (oligo_len == other.oligo_len) {
2866 for (OligoCount::const_iterator o = occurrence.begin(); o != occurrence.end(); ++o) {
2867 const string& oligo = o->first;
2868 size_t count = o->second;
2870 score +=
min(count, other.occurrence[oligo]);
2876 size_t getDataSize()
const {
return datasize; }
2879 void TEST_OligoCounter() {
2880 OligoCounter oc1(
"CCAGGT", 3);
2881 OligoCounter oc2(
"GGTCCA", 3);
2882 OligoCounter oc2_gaps(
"..GGT--CCA..", 3);
2883 OligoCounter oc3(
"AGGTCC", 3);
2884 OligoCounter oc4(
"AGGTCCAGG", 3);
2890 int sc1_2 = oc1.similarity_score(oc2);
2891 int sc2_1 = oc2.similarity_score(oc1);
2894 int sc1_2gaps = oc1.similarity_score(oc2_gaps);
2897 int sc1_3 = oc1.similarity_score(oc3);
2898 int sc2_3 = oc2.similarity_score(oc3);
2899 int sc3_4 = oc3.similarity_score(oc4);
2916 map<string, OligoCounter> oligos_counted;
2921 FakeFamilyFinder(
GBDATA *gb_main_,
string ali_name_,
bool rel_matches_,
size_t oligo_len_)
2924 ali_name(ali_name_),
2925 counted_for_range(
PosRange::whole()),
2926 oligo_len(oligo_len_)
2936 OligoCounter seq_oligo_count(sequence, oligo_len);
2938 if (range != counted_for_range) {
2939 oligos_counted.clear();
2940 counted_for_range =
range;
2943 char *buffer =
NULp;
2946 bool partial_match = range.is_part();
2952 gb_species && results<max_results;
2956 if (oligos_counted.find(name) == oligos_counted.end()) {
2960 if (partial_match) {
2964 if (buffersize<range_len) {
2966 buffersize = range_len;
2967 buffer =
new char[buffersize+1];
2970 range.copy_corresponding_part(buffer, spec_seq, spec_seq_len);
2971 oligos_counted[name] = OligoCounter(buffer, oligo_len);
2974 oligos_counted[name] = OligoCounter(spec_seq, oligo_len);
2978 const OligoCounter& spec_oligo_count = oligos_counted[name];
2979 size_t score = seq_oligo_count.similarity_score(spec_oligo_count);
2981 if (score>=min_score) {
2984 newMember->
name = strdup(name.c_str());
2986 newMember->
rel_matches = score/spec_oligo_count.getDataSize();
3005 static const char *test_aliname =
"ali_test";
3007 static const char *get_aligned_data_of(
GBDATA *gb_main,
const char *species_name) {
3010 const char *data =
NULp;
3028 static const char *get_used_rels_for(
GBDATA *gb_main,
const char *species_name) {
3032 if (!gb_species) result =
GBS_global_string(
"<No such species '%s'>", species_name);
3035 if (!gb_used_rels) result =
"<No such field 'used_rels'>";
3045 gb_species && !
error;
3057 #define ALIGNED_DATA_OF(name) get_aligned_data_of(gb_main, name)
3058 #define USED_RELS_FOR(name) get_used_rels_for(gb_main, name)
3062 static GBDATA *selection_fake_gb_main =
NULp;
3063 static GBDATA *selection_fake_gb_last =
NULp;
3065 static GBDATA *fake_first_selected(
int *count) {
3066 selection_fake_gb_last =
NULp;
3070 static GBDATA *fake_next_selected() {
3071 if (!selection_fake_gb_last) {
3075 selection_fake_gb_last =
NULp;
3077 return selection_fake_gb_last;
3080 static char *fake_get_consensus(
const char*,
PosRange range) {
3081 const char *data = get_aligned_data_of(selection_fake_gb_main,
"s1");
3082 if (range.
is_whole())
return strdup(data);
3086 static void test_install_fakes(
GBDATA *gb_main) {
3087 selection_fake_gb_main =
gb_main;
3107 static struct arb_unit_test::test_alignment_data TestAlignmentData_TargetAndReferenceHandling[] = {
3108 { 0,
"s1",
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C..........." },
3109 { 0,
"s2",
"AUCUCCUAAACCCAACCGUAGUUCGAAUUGAGGACUGUAACUC......................................................" },
3110 { 1,
"m1",
"UAGAGGAUUUGGGUUGGCAUCAAGCUUAACUCCUGACAUUGAG......................................................" },
3111 { 1,
"m2",
"...UCCUAAACCAACCCGUAGUUCGAAUUGAGGACUGUAA........................................................." },
3112 { 1,
"m3",
"AUC---UAAACCAACCCGUAGUUCGAAUUGAGGACUG---CUC......................................................" },
3113 { 0,
"c1",
"AUCUCCUAAACCCAACC--------AAUUGAGGACUGUAACUC......................................................" },
3114 { 0,
"c2",
"AUCUCCU------AACCGUAGUUCCCCGAA------ACUGUAACUC..................................................." },
3115 { 0,
"r1",
"GAGUUACAGUCCUCAAUUCGGGGAACUACGGUUGGGUUUAGGAGAU..................................................." },
3118 void TEST_Aligner_TargetAndReferenceHandling() {
3123 GBDATA *gb_main = TEST_CREATE_DB(error, test_aliname, TestAlignmentData_TargetAndReferenceHandling,
false);
3127 SearchRelativeParams search_relative_params(
new FakeFamilyFinder(gb_main, test_aliname,
false, 8),
3131 test_install_fakes(gb_main);
3135 bool cont_on_err =
false;
3147 search_relative_params,
3153 error = aligner.run();
3166 search_relative_params,
3172 error = aligner.run();
3182 fake_first_selected,
3186 search_relative_params,
3192 error = aligner.run();
3206 search_relative_params,
3213 error = aligner.run();
3219 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s2"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3221 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m1"),
".......UAG--AGG-A------U-U-UGGGU-UG-G-C-A-U-CAA-GCU--------UAA-C-UCCUG-AC--A-UUGAG...............");
3222 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m2"),
"..............U-C------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA................");
3223 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m3"),
".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C...........");
3225 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C...........");
3226 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c2"),
".........A--UCU-C------C-U-AA---------C-C-G-UAG-UUC------------C-CCGAA-AC--U-GUAA-CU-C...........");
3228 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"r1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUCCCC-----GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3236 search_relative_params.maxRelatives = 5;
3239 int species_count =
ARRAY_ELEMS(TestAlignmentData_TargetAndReferenceHandling);
3240 for (
int sp = 0; sp<species_count; ++sp) {
3241 const char *name = TestAlignmentData_TargetAndReferenceHandling[sp].name;
3242 if (strcmp(name,
"r1") != 0) {
3251 search_relative_params,
3258 error = aligner.run();
3275 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3276 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s2"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3278 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m1"),
".........U--AGA-G------G---AUUUG-GG-U-U-G-G-CAU-CAAGCU-----UAA-C-UCCUG-AC--A-UUGAG---------------");
3280 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m2"),
".........U--C----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----UA-A...........");
3281 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m3"),
".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C...........");
3282 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C...........");
3283 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c2"),
".........A--UCU-C------C-U--------A-A-C-C-G-UAG-UUCCCC-----GA--------A-AC--U-GUAA-CU-C...........");
3288 search_relative_params.getFamilyFinder()->restrict_2_region(test_ali_params_partial.
range);
3291 for (
int sp = 0; sp<species_count; ++sp) {
3292 const char *name = TestAlignmentData_TargetAndReferenceHandling[sp].name;
3301 search_relative_params,
3303 test_ali_params_partial,
3308 error = aligner.run();
3325 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3326 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s2"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3328 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m1"),
".........U--AGA-G------G-A-UU-UG-GG-U-U-G-G-CAU-CAAGCU-----UAA-C-UCCUG-AC--A-UUGAG---------------");
3329 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m2"),
".........U--C----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----UA-A...........");
3330 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m3"),
".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C...........");
3332 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C...........");
3333 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c2"),
".........A--UCU-C------C---------UA-A-C-C-G-UAG-UUCCCC-----GA--------A-AC--U-GUAA-CU-C...........");
3335 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"r1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUCCCC-----GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3342 static struct arb_unit_test::test_alignment_data TestAlignmentData_checksumError[] = {
3343 { 0,
"MtnK1722",
"...G-GGC-C-G............CCC-GG--------CAAUGGGGGCGGCCCGGCGGAC----GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCC....." },
3344 { 0,
"MhnFormi",
"...A-CGA-U-C------------CUUCGG--------GGUCG-U-GG-C-GU-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCCU...." },
3345 { 0,
"MhnT1916",
"...A-CGA-A-C------------CUU-GU--------GUUCG-U-GG-C-GA-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCCU...." },
3346 { 0,
"MthVanni",
"...U-GGU-U-U------------C-------------GGCCA-U-GG-C-GG-A--C------GG--C-UCAUU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCC....." },
3347 { 0,
"ThcCeler",
"...G-GGG-C-G...CC-U---U--------GC--G--CGCAC-C-GG-C-GG-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUA-CGGC-UCGAUCACCUCCU...." },
3350 void TEST_SLOW_Aligner_checksumError() {
3358 GBDATA *gb_main = TEST_CREATE_DB(error, test_aliname, TestAlignmentData_checksumError,
false);
3360 SearchRelativeParams search_relative_params(
new FakeFamilyFinder(gb_main, test_aliname,
false, 8),
3364 test_install_fakes(gb_main);
3367 bool cont_on_err =
true;
3377 search_relative_params,
3384 error = aligner.run();
3399 const char *result =
"";
3408 void TEST_BASIC_UnalignedBases() {
3452 " 3/18 8/15 0/6 3/11 8/11 10/15 10/17",
3453 " 3/18 8/17 0/6 3/11 8/13 10/15 10/18");
3457 " 1/7 3/5 0/1 1/3 3/3 4/5 4/6",
3458 " 1/7 3/7 0/2 1/4 3/5 4/6 4/7");
3460 " 1/7 3/6 0/1 1/3 3/4 4/5 4/7",
3461 " 1/7 3/7 0/2 1/4 3/5 4/6 4/7");
3466 #endif // UNIT_TESTS
GB_ERROR GB_begin_transaction(GBDATA *gbd)
#define FA_AWAR_SUBST_PARA_CT
void AWTC_create_common_next_neighbour_fields(AW_window *aws, int scaler_length)
void delete_family_list()
const CompactedSubSequence & sequence() const
int expdPosition(int cPos) const
void restoreDots(CompactedSubSequence &slaveSequence)
void FastAligner_create_variables(AW_root *root, AW_default db1)
static LooseBases unaligned_bases
char alignQuality(char slave, char master)
void GB_warning(const char *message)
SearchRelativeParams(FamilyFinder *ff_, const char *pt_server_alignment_, int maxRelatives_)
long expdPosition() const
static int currentSequenceNumber
const FastAlignInsertion * insertion() const
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
#define FA_AWAR_ESTIMATE_BASE_FREQ
virtual ARB_ERROR align_to(GBDATA *gb_toalign) const =0
void count_aligned_base(int mismatched)
#define FA_AWAR_RELATIVE_RANGE
ARB_ERROR fast_align(const CompactedSubSequence &align_to, AlignBuffer *alignBuffer, int max_seq_length, int matchScore, int mismatchScore, FastAlignReport *report) const
GBDATA * GBT_first_marked_species_rel_species_data(GBDATA *gb_species_data)
bool may_refer_to_same_part_as(const CompactedSubSequence &other) const
static ARB_ERROR alignToGroupConsensus(GBDATA *gb_toAlign, GB_CSTR alignment, Aligner_get_consensus_func get_consensus, int max_seq_length, const AlignParams &ali_params)
return string(buffer, length)
ARB_ERROR species_not_found(GB_CSTR species_name)
#define FA_AWAR_EXPECTED_DISTANCE
GB_ERROR GB_append_exportedError(GB_ERROR error)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
void load_xfig(const char *file, bool resize=true)
virtual GB_ERROR searchFamily(const char *sequence, FF_complement compl_mode, int max_results, double min_score)=0
FamilyFinder * getFamilyFinder()
PosRange intersection(PosRange r1, PosRange r2)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
long GBT_mark_all(GBDATA *gb_main, int flag)
#define FA_AWAR_BASE_FREQ_A
int relatedBases(char base1, char base2)
void FastAligner_set_align_current(AW_root *root, AW_default db1)
Aligner_get_first_selected_species get_first_selected_species
#define FA_AWAR_SUBST_PARA_AG
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
GB_CSTR get_alignment() const
void insertGap(AlignBuffer *alignBuffer, SequencePosition &master, FastAlignReport *report)
const char * text() const
#define INTEGRATED_ALIGNERS_TITLE
range_set::const_iterator iterator
void AWT_insert_config_manager(AW_window *aww, AW_default default_file_, const char *id, const StoreConfigCallback &store_cb, const RestoreConfigCallback &load_or_reset_cb, const char *macro_id, const AWT_predefined_config *predef)
#define AWAR_DEFAULT_ALIGNMENT
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
static GBDATA * get_next_selected_species()
AW_awar * set_minmax(float min, float max)
#define FA_AWAR_SHOW_GAPS_MESSAGES
#define FA_AWAR_BASE_FREQ_T
const char * GBS_global_string(const char *templat,...)
void warning(int warning_num, const char *warning_message)
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
static char * alignment_name
void AW_POPDOWN(AW_window *window)
std::string alignment_name
GBDATA * GBT_expect_SAI(GBDATA *gb_main, const char *name)
char * ARB_strpartdup(const char *start, const char *end)
Aligner_get_next_selected_species get_next_selected_species
uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude)
void add(const PosRange &range)
void awt_create_SAI_selection_button(GBDATA *gb_main, AW_window *aws, const char *varname, const SaiSelectionlistFilterCallback &fcb)
static void build_reverse_complement(AW_window *aw, const AlignDataAccess *data_access)
#define FA_AWAR_REFERENCE
static CompactedSubSequence * readCompactedSequence(GBDATA *gb_species, const char *ali, ARB_ERROR *errorPtr, char **dataPtr, long *seqChksum, PosRange range)
static int overallSequenceNumber
const char * name() const
#define ARRAY_ELEMS(array)
#define FA_AWAR_CONTINUE_ON_ERROR
char buffer[MESSAGE_BUFFERSIZE]
static ARB_ERROR insertClustalValigned(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, const char *masterAlignment, const char *slaveAlignment, long alignmentLength, FastAlignReport *report)
GB_ERROR GB_push_transaction(GBDATA *gbd)
ARB_ERROR bufferTooSmall()
#define AWAR_NN_REL_SCALING
GB_ERROR GB_delete(GBDATA *&source)
#define AWAR_NN_FAST_MODE
int follow_ali_change(const AliChange &change)
const char * read_char_pntr() const
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
char *(* Aligner_get_consensus_func)(const char *species_name, PosRange range)
size_t GB_read_string_count(GBDATA *gbd)
GB_ERROR GB_await_error()
WindowCallback makeHelpCallback(const char *helpfile)
#define TEST_EXPECT(cond)
long GB_read_count(GBDATA *gbd)
static GB_alignment_type global_alignmentType
void FastAligner_set_reference_species(AW_root *root)
TYPE * ARB_alloc(size_t nelem)
NOT4PERL void GBT_reverseComplementNucSequence(char *seq, long length, char T_or_U)
static AW_window * create_family_settings_window(AW_root *root)
static ARB_ERROR alignCompactedTo(CompactedSubSequence *toAlignSequence, const FastSearchSequence *alignTo, int max_seq_length, GB_CSTR alignment, long toAlignChksum, GBDATA *gb_toAlign, GBDATA *gb_alignTo, const AlignParams &ali_params)
void restrict_2_region(const PosRange &range_)
AW_awar * awar_float(const char *var_name, float default_value=0.0, AW_default default_file=AW_ROOT_DEFAULT)
#define FA_AWAR_SUBST_PARA_AT
static const char * read_name(GBDATA *gbd)
int GB_read_security_write(GBDATA *gbd)
#define FA_AWAR_PROTECTION
#define TEST_EXPECT_EQUAL__BROKEN(expr, want, got)
#define FA_AWAR_ACTION_ON_ERROR
int no_of_gaps_after(int cPos) const
const int * gapsBefore(int offset=0) const
void append(LooseBases &loose)
void FastAligner_start(AW_window *aw, const AlignDataAccess *data_access)
void copy(const char *s, char q, long len)
bool GB_is_ancestor_of(GBDATA *gb_ancestor, GBDATA *gb_descendant)
ExplicitReference(GB_CSTR alignment_, const FastSearchSequence *targetSequence_, GBDATA *gb_alignTo_, int max_seq_length_, const AlignParams &ali_params_)
void message(char *errortext)
int get_max_seq_length() const
#define TEST_REJECT(cond)
void(* refresh_display)()
GBDATA *(* Aligner_get_first_selected_species)(int *total_no_of_selected_species)
AliChange(const CompactedSubSequence &old_, const CompactedSubSequence &new_)
static void error(const char *msg)
GBDATA * GB_get_root(GBDATA *gbd)
bool uses_rel_matches() const
void insertBase(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, FastAlignReport *report)
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
#define FA_AWAR_PT_SERVER_ALIGNMENT
#define CAN_SCORE_RIGHT()
ARB_ERROR ClustalV_align(int is_dna, int weighted, const char *seq1, int length1, const char *seq2, int length2, const int *gapsBefore1, int max_seq_length, const char *&res1, const char *&res2, int &reslen, int &score)
int follow_ali_change_and_append(LooseBases &loose, const AliChange &change)
const char * text() const
GB_alignment_type GBT_get_alignment_type(GBDATA *gb_main, const char *aliname)
#define AWAR_SPECIES_NAME
void correctUnalignedPositions()
char * getHelixString() const
Aligner_get_selected_range get_selected_range
void AWTC_create_common_next_neighbour_vars(AW_root *aw_root, const RootCallback &awar_changed_cb)
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
static AW_window * create_island_hopping_window(AW_root *root)
char * read_string() const
static WindowCallback simple(void(*root_cb)(AW_root *, T), T t)
bool ARB_in_novice_mode(AW_root *awr)
AW_awar * awar(const char *awar)
GB_ERROR GB_pop_transaction(GBDATA *gbd)
#define FA_AWAR_USE_ISLAND_HOPPING
#define FA_AWAR_STRUCTURE_SUPPLEMENT
int no_of_gaps_before(int cPos) const
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
FamilyList * insertSortedBy_matches(FamilyList *other)
int baseMatch(char c1, char c2)
static ARB_ERROR alignTo(GBDATA *gb_toAlign, GB_CSTR alignment, const FastSearchSequence *alignTo, GBDATA *gb_alignTo, int max_seq_length, const AlignParams &ali_params)
void awt_create_PTSERVER_selection_button(AW_window *aws, const char *varname)
void count_unaligned_base(int no_of_bases)
long GBT_count_marked_species(GBDATA *gb_main)
#define TEST_EXPECT_ZERO(cond)
void memorize(ExplicitRange range)
GB_ERROR write_as_string(const char *aw_string)
static ARB_ERROR cannot_fast_align(const CompactedSubSequence &master, long moffset, long mlength, const CompactedSubSequence &slaveSequence, long soffset, long slength, int max_seq_length, AlignBuffer *alignBuffer, FastAlignReport *report)
GB_ERROR GB_set_temporary(GBDATA *gbd) __ATTR__USERESULT
static void appendNameAndUsedBasePositions(char **toString, GBDATA *gb_species, int usedBasePositions)
int aw_question(const char *unique_id, const char *question, const char *buttons, bool sameSizeButtons, const char *helpfile)
static AWT_config_mapping_def aligner_config_mapping[]
#define FA_AWAR_SAI_RANGE_NAME
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
#define TEST_EXPECT_NULL__BROKEN(n, got)
double partSignificance(long seq1len, long seq2len, long partlen)
#define TEST_EXPECT_NULL(n)
void GB_write_flag(GBDATA *gbd, long flag)
SearchRelativesReference(SearchRelativeParams &relSearch_, int max_seq_length_, FA_turn turnAllowed_, GB_CSTR alignment_, const AlignParams &ali_params_)
AW_window * FastAligner_create_window(AW_root *root, const AlignDataAccess *data_access)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
#define FA_AWAR_SUBST_PARA_AC
#define FA_AWAR_REFERENCE_NAME
const char * text() const
virtual ~AlignmentReference()
long insertsToNextBase(AlignBuffer *alignBuffer, const SequencePosition &master)
ARB_ERROR FastAligner_delete_temp_entries(GBDATA *gb_species, const char *alignment)
#define FA_AWAR_BASE_FREQ_G
static IslandHopping * island_hopper
fa_assert(chars< MESSAGE_BUFFERSIZE)
static ARB_ERROR align_error(ARB_ERROR olderr, GBDATA *gb_toAlign, GBDATA *gb_alignTo)
static ARB_ERROR insertAligned(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, long partLength, FastAlignReport *report)
Aligner(GBDATA *gb_main_, FA_alignTarget alignWhat_, GB_CSTR alignment_, GB_CSTR toalign_, Aligner_get_first_selected_species get_first_selected_species_, Aligner_get_next_selected_species get_next_selected_species_, GB_CSTR reference_, Aligner_get_consensus_func get_consensus_, SearchRelativeParams &relSearch_, FA_turn turnAllowed_, const AlignParams &ali_params_, int maxProtection_, bool continue_on_error_, FA_errorAction error_action_)
char * GB_read_string(GBDATA *gbd)
#define AWAR_CURSOR_POSITION_LOCAL
static ARB_ERROR reverseComplement(GBDATA *gb_species, GB_CSTR ali, int max_protection)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
const FastAlignInsertion * next() const
GBDATA *(* Aligner_get_next_selected_species)(void)
GBDATA * GBT_first_species(GBDATA *gb_main)
static ARB_ERROR writeStringToAlignment(GBDATA *gb_species, GB_CSTR alignment, GB_CSTR data_name, GB_CSTR str, bool temporary)
RangeList build_RangeList_from_string(const char *SAI_data, const char *set_bytes, bool invert)
const FamilyList * getFamilyList() const
int follow(ExplicitRange &range) const
#define TEST_EXPECT_NO_ERROR(call)
void aw_message(const char *msg)
void insertSlaveBases(AlignBuffer *alignBuffer, SequencePosition &slave, int length, FastAlignReport *report)
const PosRange & get_TargetRange() const
GBDATA * GBT_next_species(GBDATA *gb_species)
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Aligner_get_consensus_func get_group_consensus
static GBDATA * get_first_selected_species(int *total_no_of_selected_species)
AlignmentReference(GB_CSTR alignment_, int max_seq_length_, const AlignParams &ali_params_)
#define FA_AWAR_THRESHOLD
GB_ERROR write_string(const char *aw_string)
char * GBT_get_default_alignment(GBDATA *gb_main)
const char * GBT_get_name(GBDATA *gb_item)
const CompactedSubSequence & sequence() const
#define AWAR_NN_MISMATCHES
int GB_get_transaction_level(GBDATA *gbd)
static ARB_ERROR alignToNextRelative(SearchRelativeParams &relSearch, int max_seq_length, FA_turn turnAllowed, GB_CSTR alignment, GBDATA *gb_toAlign, const AlignParams &ali_params)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
#define FA_AWAR_SAI_RANGE_CHARS
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
void memorize_insertion(long offset, long gaps)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
#define FA_AWAR_USE_SECONDARY
char * pt_server_alignment
long calcSequenceChecksum(const char *data, long length)
#define FA_AWAR_NEXT_RELATIVES
int compPosition(int xPos) const
#define FA_AWAR_SUBST_PARA_CG
#define TEST_EXPECT_EQUAL(expr, want)
GB_ERROR write_int(long aw_int)
const AlignParams & get_ali_params() const
ConsensusReference(GB_CSTR alignment_, Aligner_get_consensus_func get_consensus_, int max_seq_length_, const AlignParams &ali_params_)
void aw_message_if(GB_ERROR error)
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
const char * quality() const
NOT4PERL GB_ERROR GBT_determine_T_or_U(GB_alignment_type alignment_type, char *T_or_U, const char *supposed_target)
#define FA_AWAR_BASE_FREQ_C
#define AWAR_NN_OLIGO_LEN
void setDotsAtEOSequence()
void copy_corresponding_part(char *dest, const char *source, size_t source_len) const
#define MIN_ALIGNMENT_RANGE
#define AWAR_NN_REL_MATCHES
#define FA_AWAR_SUBST_PARA_GT
GBDATA * GBT_get_species_data(GBDATA *gb_main)
GB_write_int const char s