102 pt_server_alignment(strdup(pt_server_alignment_)),
103 maxRelatives(maxRelatives_)
107 free(pt_server_alignment);
117 #define QUALITY_NAME "ASC_ALIGNER_CLIENT_SCORE"
118 #define INSERTS_NAME "AMI_ALIGNER_MASTER_INSERTS"
120 #define FA_AWAR_ROOT "faligner/"
121 #define FA_AWAR_TO_ALIGN FA_AWAR_ROOT "what"
122 #define FA_AWAR_REFERENCE FA_AWAR_ROOT "against"
123 #define FA_AWAR_REFERENCE_NAME FA_AWAR_ROOT "sagainst"
124 #define FA_AWAR_RANGE FA_AWAR_ROOT "range"
125 #define FA_AWAR_PROTECTION FA_AWAR_ROOT "protection"
126 #define FA_AWAR_AROUND FA_AWAR_ROOT "around"
127 #define FA_AWAR_MIRROR FA_AWAR_ROOT "mirror"
128 #define FA_AWAR_REPORT FA_AWAR_ROOT "report"
129 #define FA_AWAR_SHOW_GAPS_MESSAGES FA_AWAR_ROOT "show_gaps"
130 #define FA_AWAR_CONTINUE_ON_ERROR FA_AWAR_ROOT "continue_on_error"
131 #define FA_AWAR_ACTION_ON_ERROR FA_AWAR_ROOT "action_on_error"
132 #define FA_AWAR_USE_SECONDARY FA_AWAR_ROOT "use_secondary"
133 #define FA_AWAR_NEXT_RELATIVES FA_AWAR_ROOT "next_relatives"
134 #define FA_AWAR_RELATIVE_RANGE FA_AWAR_ROOT "relrange"
135 #define FA_AWAR_PT_SERVER_ALIGNMENT "tmp/" FA_AWAR_ROOT "relative_ali"
136 #define FA_AWAR_SAI_RANGE_NAME FA_AWAR_ROOT "sai/sainame"
137 #define FA_AWAR_SAI_RANGE_CHARS FA_AWAR_ROOT "sai/chars"
139 #define FA_AWAR_ISLAND_HOPPING_ROOT "island_hopping/"
140 #define FA_AWAR_USE_ISLAND_HOPPING FA_AWAR_ISLAND_HOPPING_ROOT "use"
141 #define FA_AWAR_ESTIMATE_BASE_FREQ FA_AWAR_ISLAND_HOPPING_ROOT "estimate_base_freq"
142 #define FA_AWAR_BASE_FREQ_A FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_a"
143 #define FA_AWAR_BASE_FREQ_C FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_c"
144 #define FA_AWAR_BASE_FREQ_G FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_g"
145 #define FA_AWAR_BASE_FREQ_T FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_t"
146 #define FA_AWAR_SUBST_PARA_AC FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ac"
147 #define FA_AWAR_SUBST_PARA_AG FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ag"
148 #define FA_AWAR_SUBST_PARA_AT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_at"
149 #define FA_AWAR_SUBST_PARA_CG FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_cg"
150 #define FA_AWAR_SUBST_PARA_CT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ct"
151 #define FA_AWAR_SUBST_PARA_GT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_gt"
152 #define FA_AWAR_EXPECTED_DISTANCE FA_AWAR_ISLAND_HOPPING_ROOT "expected_dist"
153 #define FA_AWAR_STRUCTURE_SUPPLEMENT FA_AWAR_ISLAND_HOPPING_ROOT "struct_suppl"
154 #define FA_AWAR_THRESHOLD FA_AWAR_ISLAND_HOPPING_ROOT "threshold"
155 #define FA_AWAR_GAP_A FA_AWAR_ISLAND_HOPPING_ROOT "gapa"
156 #define FA_AWAR_GAP_B FA_AWAR_ISLAND_HOPPING_ROOT "gapb"
157 #define FA_AWAR_GAP_C FA_AWAR_ISLAND_HOPPING_ROOT "gapc"
184 if (my_protection<=max_protection) {
218 switch (revComplWhat) {
230 error =
"There is no marked species";
248 error =
"There is no selected species!";
274 : Old(old_), New(new_)
292 range =
ExplicitRange(exp_start-gaps_before, exp_end+gaps_after);
294 return compressed.
size();
299 typedef std::list<ExplicitRange> Ranges;
309 ranges.push_front(range);
321 for (Ranges::iterator r = ranges.begin(); r != ranges.end(); ++r) {
322 basecount += change.
follow(*r);
340 if (!gbd)
return "GROUP-CONSENSUS";
342 return name ? name :
"<unnamed-species>";
354 if (slave==master) result =
'-';
355 else if (slave==
GAP_CHAR) result =
'+';
356 else if (master==
GAP_CHAR) result =
'+';
366 static char *lstr(
const char *
s,
int len) {
371 if (alloc) free(buffer);
375 memcpy(buffer, s, len);
383 inline char compareChar(
char base1,
char base2) {
384 return base1==base2 ?
'=' : (
relatedBases(base1, base2) ?
'x' :
'X');
387 #if defined(TRACE_COMPRESSED_ALIGNMENT)
389 static void dump_n_compare_one(
const char *seq1,
const char *seq2,
long len,
long offset) {
391 char compare[BUFLEN+1];
393 for (
long l=0; l<len; l++) {
394 compare[l] = (
is_ali_gap(seq1[l]) &&
is_ali_gap(seq2[l])) ?
' ' : compareChar(seq1[l], seq2[l]);
399 printf(
" %li '%s'\n", offset, lstr(seq1, len));
400 printf(
" %li '%s'\n", offset, lstr(seq2, len));
401 printf(
" %li '%s'\n", offset, compare);
404 inline void dump_rest(
const char *
seq,
long len,
int idx,
long offset) {
405 printf(
" Rest von Sequenz %i:\n", idx);
407 printf(
" %li '%s'\n", offset, lstr(seq, BUFLEN));
414 printf(
" '%s'\n", lstr(seq, len));
417 static void dump_n_compare(
const char *text,
const char *seq1,
long len1,
const char *seq2,
long len2) {
420 printf(
" Comparing %s:\n", text);
422 while (len1>0 && len2>0) {
425 if (len1>=BUFLEN && len2>=BUFLEN) {
426 dump_n_compare_one(seq1, seq2, done=BUFLEN, offset);
429 long min = len1<len2 ? len1 : len2;
430 dump_n_compare_one(seq1, seq2, done=min, offset);
440 if (len1>0) dump_rest(seq1, len1, 1, offset);
441 if (len2>0) dump_rest(seq2, len2, 2, offset);
442 printf(
" -------------------\n");
448 #endif // TRACE_COMPRESSED_ALIGNMENT
452 inline void dumpSeq(
const char *seq,
long len,
long pos) {
453 printf(
"'%s' ", lstr(seq, len));
454 printf(
"(Pos=%li,Len=%li)", pos, len);
459 double sig = partSignificance(sequence().length(), slaveSequence.length(), bestLength); \
461 printf(" Score = %li (Significance=%f)\n" \
462 " Master = ", bestScore, sig); \
463 dumpSeq(bestMasterLeft.text(), bestLength, bestMasterLeft.leftOf()); \
466 dumpSeq(bestSlaveLeft.text(), bestLength, bestSlaveLeft.leftOf()); \
477 return log(d)/log(3.0);
482 return log3((seq1len-partlen)*(seq2len-partlen)) - partlen;
486 return "Cannot align - reserved buffer is to small";
499 inserts = nextBase-alignBuffer->
offset();
508 char slaveBase = *slave.
text();
509 char masterBase = *master.
text();
531 char masterBase = *master.
text();
541 const char *masterAlignment,
const char *slaveAlignment,
long alignmentLength,
545 #define ACID '*' // contents of 'masterAlignment' and 'slaveAlignment'
551 for (pos=0; pos<alignmentLength; pos++) {
554 if (masterAlignment[pos]==
ACID) {
563 if (slaveAlignment[pos]==
ACID) {
564 insertBase(alignBuffer, master, slave, report);
576 for (slave_bases=1; pos+slave_bases<alignmentLength && masterAlignment[pos+slave_bases]==
GAP; slave_bases++) {
579 if (!baseAtLeft && insert>slave_bases) {
580 int ins_gaps = insert-slave_bases;
587 if (insert<slave_bases) {
590 else if (insert>slave_bases) {
599 pos += slave_bases-1;
621 long min_insert = insert;
625 while (insert<0 && partLength) {
626 if (insert<min_insert) min_insert = insert;
627 if (!alignBuffer->
free()) {
630 insertBase(alignBuffer, master, slave, report);
651 while (partLength--) {
660 if (!alignBuffer->
free()) {
665 insertBase(alignBuffer, master, slave, report);
678 const char *mtext = master.
text(moffset);
679 const char *stext = slaveSequence.
text(soffset);
684 #ifdef TRACE_CLUSTAL_DATA
685 printf(
"ClustalV-Align:\n");
686 printf(
" mseq = '%s'\n", lstr(mtext, mlength));
687 printf(
" sseq = '%s'\n", lstr(stext, slength));
688 #endif // TRACE_CLUSTAL_DATA
696 default: error =
"Unknown alignment type - aligner aborted";
break;
699 const char *maligned, *saligned;
705 mtext, mlength, stext, slength,
708 maligned, saligned, len, score);
712 #ifdef TRACE_CLUSTAL_DATA
713 printf(
"ClustalV returns:\n");
714 printf(
" maligned = '%s'\n", lstr(maligned, len));
715 printf(
" saligned = '%s'\n", lstr(saligned, len));
716 #endif // TRACE_CLUSTAL_DATA
723 #if (defined(DEBUG) && 0)
727 char *
cmp =
new char[len];
729 for (
int l=0; l<len; l++) {
732 if (maligned[l]==
'*') {
733 maligned[l] = *master2.text();
740 if (saligned[l]==
'*') {
741 saligned[l] = *slave2.text();
748 cmp[l] = gaps || maligned[l]==saligned[l] ?
'=' :
'X';
751 printf(
" master = '%s'\n", lstr(maligned, len));
752 printf(
" slave = '%s'\n", lstr(saligned, len));
753 printf(
" '%s'\n", lstr(cmp, len));
760 if (slength<=alignBuffer->free()) {
764 alignBuffer->
copy(slaveSequence.
text(soffset),
'?', slength);
780 #define TEST_BETTER_SCORE() \
782 if (score>bestScore) { \
784 bestLength = masterRight.text() - masterLeft.text(); \
785 bestMasterLeft = masterLeft; \
786 bestSlaveLeft = slaveLeft; \
790 #define CAN_SCORE_LEFT() (masterLeft.leftOf() && slaveLeft.leftOf())
791 #define CAN_SCORE_RIGHT() (masterRight.rightOf() && slaveRight.rightOf())
793 #define SCORE_LEFT() \
795 score += *(--masterLeft).text()==*(--slaveLeft).text() ? match : mismatch; \
796 TEST_BETTER_SCORE(); \
799 #define SCORE_RIGHT() \
801 score += *(++masterRight).text()==*(++slaveRight).text() ? match : mismatch; \
802 TEST_BETTER_SCORE(); \
809 int match,
int mismatch,
824 static double lowSignificance;
825 static int lowSignificanceInitialized;
827 if (!lowSignificanceInitialized) {
828 lowSignificance =
log3(0.01);
829 lowSignificanceInitialized = 1;
842 while (occurrence.found()) {
843 long score = match*3;
871 occurrence.gotoNext();
873 if (rightmostSlave<slaveRight) {
874 rightmostSlave = slaveRight;
879 if (rightmostSlave>slave) slave = rightmostSlave;
886 if (sig<lowSignificance) {
887 long masterLeftOf = bestMasterLeft.
leftOf();
888 long masterRightStart = masterLeftOf+bestLength;
889 long masterRightOf = bestMasterLeft.
rightOf()-bestLength;
890 long slaveLeftOf = bestSlaveLeft.
leftOf();
891 long slaveRightStart = slaveLeftOf+bestLength;
892 long slaveRightOf = bestSlaveLeft.
rightOf()-bestLength;
894 #define MIN_ALIGNMENT_RANGE 4
902 alignBuffer, max_seq_length, match, mismatch, report);
904 else if (slaveLeftOf>0) {
906 slaveSequence, 0, slaveLeftOf,
907 max_seq_length, alignBuffer, report);
916 #if (defined(DEBUG) && 0)
917 long offset = alignBuffer->
offset();
920 error =
insertAligned(alignBuffer, bestMasterLeft, bestSlaveLeft, bestLength, report);
921 #if (defined(DEBUG) && 0)
923 printf(
"aligned '%s' (len=%li, address=%li)\n", lstr(alignBuffer->
text()+
offset, used), used,
long(alignBuffer));
935 max_seq_length, match, mismatch, report);
937 else if (slaveRightOf>0) {
939 slaveSequence, slaveRightStart, slaveRightOf,
940 max_seq_length, alignBuffer, report);
951 if (!aligned && !error) {
953 slaveSequence, 0, slaveSequence.
length(),
954 max_seq_length, alignBuffer, report);
961 #undef TEST_BETTER_SCORE
962 #undef CAN_SCORE_LEFT
963 #undef CAN_SCORE_RIGHT
971 #if defined(WARN_TODO)
972 #warning firstColumn + lastColumn -> PosRange
998 int firstColumn = range.
start();
1000 int lastColumn = range.
end();
1006 while (firstColumn>0 &&
is_ali_gap(data[firstColumn-1])) {
1009 if (lastColumn!=-1) {
1010 while (lastColumn<(length-1) &&
is_ali_gap(data[lastColumn+1])) lastColumn++;
1014 partData = data+firstColumn;
1015 int slen = length-firstColumn;
1018 fa_assert((
size_t)slen==strlen(partData));
1020 if (lastColumn==-1) {
1024 partLength = lastColumn-firstColumn+1;
1025 if (partLength>slen) partLength = slen;
1047 if (dataPtr) *dataPtr =
NULp;
1091 const char *master_name =
read_name(gb_alignTo);
1097 if (gb_toAlign!=last_gb_toAlign) {
1098 last_gb_toAlign = gb_toAlign;
1099 currentSequenceNumber++;
1103 #ifdef TRACE_COMPRESSED_ALIGNMENT
1104 printf(
"alignCompactedTo(): master='%s' ", master_name);
1105 printf(
"slave='%s'\n", toAlignSequence->
name());
1106 #endif // TRACE_COMPRESSED_ALIGNMENT
1115 #ifdef TRACE_ISLANDHOPPER_DATA
1116 printf(
"Island-Hopper returns:\n");
1119 #endif // TRACE_ISLANDHOPPER_DATA
1128 error = alignTo->
fast_align(*toAlignSequence, &alignBuffer, max_seq_length, 2, -10, &report);
1134 if (alignBuffer.
free()) {
1140 #ifdef TRACE_COMPRESSED_ALIGNMENT
1143 dump_n_compare(
"reference vs. aligned:", alignTo->
sequence(), alignedSlave);
1145 #endif // TRACE_COMPRESSED_ALIGNMENT
1149 if (!error) error = err;
1154 error =
"Internal aligner error (sequence checksum changed) -- aborted";
1156 #ifdef TRACE_COMPRESSED_ALIGNMENT
1158 dump_n_compare(
"Old Slave vs. new Slave", *toAlignSequence, alignedSlave);
1159 #endif // TRACE_COMPRESSED_ALIGNMENT
1167 error =
"Can't find/create sequence data";
1175 int lenToCopy = ali_params.
range.
size();
1182 error =
"Internal aligner error (sequence checksum changed) -- aborted";
1183 # ifdef TRACE_COMPRESSED_ALIGNMENT
1185 dump_n_compare(
"Old seq vs. new seq (slave)", buffer_org, len, buffer, len);
1187 # endif // TRACE_COMPRESSED_ALIGNMENT
1211 int buflen = max_seq_length*2;
1212 char *buffer = ARB_alloc<char>(buflen+1);
1213 char *afterLast =
buffer;
1216 error =
"out of memory";
1219 memset(buffer,
'-', buflen);
1224 memset(buffer+inserts->
offset(),
'>', inserts->
gaps());
1225 afterLast = buffer+inserts->
offset()+inserts->
gaps();
1226 inserts = inserts->
next();
1275 const char *name_toAlign =
read_name(gb_toAlign);
1276 const char *name_alignTo =
read_name(gb_alignTo);
1281 name_toAlign, name_alignTo, olderr.
deliver());
1310 error =
alignCompactedTo(toAlignSequence, alignTo, max_seq_length, alignment, chksum, gb_toAlign, gb_alignTo, ali_params);
1311 if (error) error =
align_error(error, gb_toAlign, gb_alignTo);
1312 delete toAlignSequence;
1325 char *consensus = get_consensus(
read_name(gb_toAlign), ali_params.
range);
1326 size_t cons_len = strlen(consensus);
1330 for (
size_t i = 0; i<cons_len; ++i) {
1331 switch (consensus[i]) {
1332 case '=': consensus[i] =
'-';
break;
1341 error =
alignTo(gb_toAlign, alignment, &fast,
NULp, max_seq_length, ali_params);
1353 if (usedBasePositions<0) {
1361 char *newString =
NULp;
1366 newString = currInfo;
1370 freeset(*toString, newString);
1374 inline int min(
int i,
int j) {
return i<j ? i : j; }
1384 bool use_different_pt_server_alignment = 0 != strcmp(relSearch.
pt_server_alignment, alignment);
1389 char **nearestRelative =
new char*[relativesToTest+1];
1394 if (use_different_pt_server_alignment) {
1398 for (next_relatives=0; next_relatives<relativesToTest; next_relatives++) {
1399 nearestRelative[next_relatives] =
NULp;
1407 char *findRelsBySeq =
NULp;
1408 if (use_different_pt_server_alignment) {
1424 delete toAlignSequence;
1428 while (next_relatives) {
1430 freenull(nearestRelative[next_relatives]);
1447 double bestScore = 0;
1450 double lastScore = -1;
1451 #if defined(TRACE_RELATIVES)
1453 #endif // TRACE_RELATIVES
1456 if (strcmp(toAlignSequence->
name(), fl->name)!=0) {
1458 double thisScore = familyFinder->
uses_rel_matches() ? fl->rel_matches : fl->matches;
1461 fa_assert(lastScore < 0 || lastScore >= thisScore);
1462 lastScore = thisScore;
1463 #if defined(TRACE_RELATIVES)
1464 fprintf(stderr,
"- %s (%5.2f)\n", fl->name, thisScore);
1465 #endif // TRACE_RELATIVES
1467 if (thisScore>=bestScore) bestScore = thisScore;
1468 if (next_relatives<(relativesToTest+1)) {
1469 nearestRelative[next_relatives] = strdup(fl->name);
1478 char *mirroredSequence = strdup(findRelsBySeq);
1479 long length = strlen(mirroredSequence);
1480 double bestMirroredScore = 0;
1490 double lastScore = -1;
1491 #if defined(TRACE_RELATIVES)
1493 #endif // TRACE_RELATIVES
1496 double thisScore = familyFinder->
uses_rel_matches() ? fl->rel_matches : fl->matches;
1499 fa_assert(lastScore < 0 || lastScore >= thisScore);
1500 lastScore = thisScore;
1501 #if defined(TRACE_RELATIVES)
1502 fprintf(stderr,
"- %s (%5.2f)\n", fl->name, thisScore);
1503 #endif // TRACE_RELATIVES
1505 if (thisScore >= bestMirroredScore) {
1506 if (strcmp(toAlignSequence->
name(), fl->name)!=0) {
1514 if (bestMirroredScore>bestScore) {
1518 message =
GBS_global_string(
"'%s' seems to be the other way round (score: %.1f%%, score if turned: %.1f%%)",
1519 toAlignSequence->
name(), bestScore*100, bestMirroredScore*100);
1522 message =
GBS_global_string(
"'%s' seems to be the other way round (score: %li, score if turned: %li)",
1523 toAlignSequence->
name(),
long(bestScore+.5),
long(bestMirroredScore+.5));
1525 turnIt =
aw_question(
"fastali_turn_sequence", message,
"Turn sequence,Leave sequence alone")==0;
1530 #if defined(TRACE_RELATIVES)
1531 fprintf(stderr,
"Using turned sequence!\n");
1532 #endif // TRACE_RELATIVES
1541 delete toAlignSequence;
1546 free(mirroredSequence);
1549 free(findRelsBySeq);
1553 if (!next_relatives) {
1555 sprintf(warning,
"No relative found for '%s'", toAlignSequence->
name());
1563 GBDATAP *gb_reference =
new GBDATAP[relSearch.
maxRelatives];
1566 for (i=0; i<relSearch.
maxRelatives && i<next_relatives; i++) {
1575 gb_reference[i] = gb_species;
1578 free(nearestRelative[i]);
1579 for (
int j = i+1; j<next_relatives; ++j) {
1580 nearestRelative[j-1] = nearestRelative[j];
1583 nearestRelative[next_relatives] =
NULp;
1589 for (; i<next_relatives; ++i) freenull(nearestRelative[i]);
1606 if (gb_ref && gb_align) {
1624 max_seq_length, alignment, chksum,
1625 gb_toAlign, gb_reference[0], ali_params);
1629 error =
align_error(error, gb_toAlign, gb_reference[0]);
1632 char *used_relatives =
NULp;
1640 if (next_relatives>1) error =
"Island hopping uses only one relative";
1646 int unaligned_positions;
1652 delete alignedSequence;
1656 int toalign_positions = toAlignSequence->
length();
1657 if (unaligned_positions<toalign_positions) {
1662 for (i=1; i<next_relatives && !
error; i++) {
1663 loose.
append(loose_for_next_relative);
1664 int unaligned_positions_for_next = 0;
1674 fa_assert(contradicted(error, toAlignPart));
1681 max_seq_length, alignment, part_chksum,
1682 gb_toAlign, gb_reference[i], loose_ali_params);
1697 fa_assert(unaligned_positions_for_next <= unaligned_positions);
1698 if (unaligned_positions_for_next<unaligned_positions) {
1700 unaligned_positions = unaligned_positions_for_next;
1710 free(used_relatives);
1713 delete alignToSequence;
1716 delete [] gb_reference;
1720 delete toAlignSequence;
1722 for (i=0; i<next_relatives; i++) freenull(nearestRelative[i]);
1723 delete [] nearestRelative;
1738 int max_seq_length_,
1740 : alignment(alignment_),
1741 max_seq_length(max_seq_length_),
1742 ali_params(ali_params_)
1754 #if defined(WARN_TODO)
1755 #warning make alignTo a member of ExplicitReference (or of AlignmentReference)
1756 #warning let alignToGroupConsensus and alignToNextRelative use ExplicitReference
1767 int max_seq_length_,
1770 targetSequence(targetSequence_),
1771 gb_alignTo(gb_alignTo_)
1779 #if defined(WARN_TODO)
1780 #warning make alignToGroupConsensus a member of ConsensusReference
1789 int max_seq_length_,
1792 get_consensus(get_consensus_)
1800 #if defined(WARN_TODO)
1801 #warning make alignToNextRelative a member of SearchRelativesReference
1810 int max_seq_length_,
1815 relSearch(relSearch_),
1816 turnAllowed(turnAllowed_)
1849 int wasNotAllowedToAlign;
1851 bool continue_on_error;
1858 typedef std::list<GBDATA*> GBDATAlist;
1859 GBDATAlist species_to_mark;
1864 ARB_ERROR alignToExplicitReference(
GBDATA *gb_species_data,
int max_seq_length);
1865 ARB_ERROR alignToConsensus(
GBDATA *gb_species_data,
int max_seq_length);
1866 ARB_ERROR alignToRelatives(
GBDATA *gb_species_data,
int max_seq_length);
1868 void triggerAction(
GBDATA *gb_species,
bool has_been_aligned) {
1870 switch (error_action) {
1875 if (mark) species_to_mark.push_back(gb_species);
1880 #if defined(WARN_TODO)
1881 #warning pass AlignmentReference from caller (replacing reference parameters)
1902 bool continue_on_error_,
1904 : gb_main(gb_main_),
1905 alignWhat(alignWhat_),
1906 alignment(alignment_),
1908 get_first_selected_species(get_first_selected_species_),
1909 get_next_selected_species(get_next_selected_species_),
1910 reference(reference_),
1911 get_consensus(get_consensus_),
1912 relSearch(relSearch_),
1913 turnAllowed(turnAllowed_),
1914 ali_params(ali_params_),
1915 maxProtection(maxProtection_),
1916 wasNotAllowedToAlign(0),
1918 continue_on_error(continue_on_error_),
1919 error_action(continue_on_error ? error_action_ :
FA_NO_ACTION)
1929 if (myProtection<=maxProtection) {
1935 if (continue_on_error) {
1942 if (!error) error = ref.
align_to(gb_toalign);
1945 if (error) err_count++;
1946 triggerAction(gb_toalign, !error);
1949 if (continue_on_error) {
1958 wasNotAllowedToAlign++;
1959 triggerAction(gb_toalign,
false);
1970 switch (alignWhat) {
1979 currentSequenceNumber = overallSequenceNumber = 1;
1980 error = alignToReference(gb_toalign, ref);
1988 arb_progress progress(
"Aligning marked species",
long(count));
1989 progress.auto_subtitles(
"Species");
1991 currentSequenceNumber = 1;
1992 overallSequenceNumber = count;
1994 while (gb_species && !error) {
1995 error = alignToReference(gb_species, ref);
1996 progress.inc_and_check_user_abort(error);
2003 GBDATA *gb_species = get_first_selected_species(&count);
2006 currentSequenceNumber = 1;
2007 overallSequenceNumber = count;
2013 arb_progress progress(
"Aligning selected species",
long(count));
2014 progress.auto_subtitles(
"Species");
2016 while (gb_species && !error) {
2017 error = alignToReference(gb_species, ref);
2018 progress.inc_and_check_user_abort(error);
2019 gb_species = get_next_selected_species();
2030 ARB_ERROR Aligner::alignToExplicitReference(
GBDATA *gb_species_data,
int max_seq_length) {
2034 if (!gb_reference) {
2038 long referenceChksum;
2042 #if defined(WARN_TODO)
2043 #warning setting island_hopper reference has to be done in called function (seems that it is NOT done for alignToConsensus and alignToRelatives). First get tests in place!
2057 #if defined(WARN_TODO)
2058 #warning do not pass FastSearchSequence to ExplicitReference, instead pass sequence and length (ExplicitReference shall create it itself)
2062 ExplicitReference target(alignment, &referenceFastSeq, gb_reference, max_seq_length, ali_params);
2064 error = alignTargetsToReference(target, gb_species_data);
2066 delete referenceSeq;
2071 ARB_ERROR Aligner::alignToConsensus(
GBDATA *gb_species_data,
int max_seq_length) {
2072 return alignTargetsToReference(
ConsensusReference(alignment, get_consensus, max_seq_length, ali_params),
2076 ARB_ERROR Aligner::alignToRelatives(
GBDATA *gb_species_data,
int max_seq_length) {
2078 return alignTargetsToReference(
SearchRelativesReference(relSearch, max_seq_length, turnAllowed, alignment, ali_params),
2087 bool search_by_pt_server = !reference && !get_consensus;
2090 wasNotAllowedToAlign = 0;
2091 species_to_mark.clear();
2093 fa_assert(!reference || !get_consensus);
2096 if ((ali_params.
range.
is_part()) || !search_by_pt_server) {
2102 if (!error && !alignment) {
2104 if (!alignment) error =
"No default alignment";
2107 if (!error && alignment) {
2109 if (search_by_pt_server) {
2112 if (pt_server_alignmentType !=
GB_AT_RNA &&
2114 error =
"pt_servers only support RNA/DNA sequences.\n"
2115 "In the aligner window you may specify a RNA/DNA alignment \n"
2116 "and use a pt_server build on that alignment.";
2125 if (reference) error = alignToExplicitReference(gb_species_data, max_seq_length);
2126 else if (get_consensus) error = alignToConsensus(gb_species_data, max_seq_length);
2127 else error = alignToRelatives(gb_species_data, max_seq_length);
2131 unaligned_bases.
clear();
2135 if (wasNotAllowedToAlign>0) {
2136 const char *mess =
GBS_global_string(
"%i species were not aligned (because of protection level)", wasNotAllowedToAlign);
2142 error =
GBS_global_string(
"Aligner produced %i error%c", err_count, err_count==1 ?
'\0' :
's');
2150 for (GBDATAlist::iterator sp = species_to_mark.begin(); sp != species_to_mark.end(); ++sp) {
2154 const char *whatsMarked = (error_action ==
FA_MARK_ALIGNED) ?
"aligned" :
"failed";
2155 size_t markCount = species_to_mark.size();
2160 (markCount == 1) ?
"has" :
"have");
2170 char *reference =
NULp;
2171 char *toalign =
NULp;
2173 int get_consensus = 0;
2174 int pt_server_id = -1;
2189 error =
"Warning: No HELIX found. Can't use secondary structure";
2217 switch (alignWhat) {
2246 error =
"Can't get group consensus here.";
2252 if (pt_server_id<0) {
2253 error =
"No pt_server selected";
2263 bool autoRestrictRange4nextRelSearch =
true;
2268 autoRestrictRange4nextRelSearch =
false;
2282 error =
"There is no selected species!";
2303 :
GBS_global_string(
"SAI '%s' has no data in alignment '%s'", sai_name, aliuse);
2327 char *editor_alignment =
NULp;
2328 long alignment_length;
2336 free(default_alignment);
2343 if (autoRestrictRange4nextRelSearch) {
2347 int region_plus = atoi(relrange);
2368 pt_server_alignment,
2389 get_first_selected_species,
2390 get_next_selected_species,
2401 error = aligner.run();
2403 if (error && cont_on_error) {
2409 free(pt_server_alignment);
2410 free(editor_alignment);
2419 if (toalign) free(toalign);
2495 AW_window_simple *aws =
new AW_window_simple;
2497 aws->init(root,
"ISLAND_HOPPING_PARA",
"Parameters for Island Hopping");
2498 aws->
load_xfig(
"faligner/islandhopping.fig");
2502 aws->create_button(
"CLOSE",
"CLOSE",
"O");
2506 aws->create_button(
"HELP",
"HELP");
2508 aws->at(
"use_secondary");
2509 aws->label(
"Use secondary structure (only for re-align)");
2514 aws->insert_default_toggle(
"Estimate",
"E", 1);
2515 aws->insert_toggle(
"Define here: ",
"D", 0);
2516 aws->update_toggle_field();
2523 int xpos[4], ypos[4];
2525 aws->button_length(1);
2528 aws->at(
"h_a"); aws->get_at_position(&xpos[0], &dummy); aws->create_button(
NULp,
"A");
2529 aws->at(
"h_c"); aws->get_at_position(&xpos[1], &dummy); aws->create_button(
NULp,
"C");
2530 aws->at(
"h_g"); aws->get_at_position(&xpos[2], &dummy); aws->create_button(
NULp,
"G");
2531 aws->at(
"h_t"); aws->get_at_position(&xpos[3], &dummy); aws->create_button(
NULp,
"T");
2533 aws->at(
"v_a"); aws->get_at_position(&dummy, &ypos[0]); aws->create_button(
NULp,
"A");
2534 aws->at(
"v_c"); aws->get_at_position(&dummy, &ypos[1]); aws->create_button(
NULp,
"C");
2535 aws->at(
"v_g"); aws->get_at_position(&dummy, &ypos[2]); aws->create_button(
NULp,
"G");
2536 aws->at(
"v_t"); aws->get_at_position(&dummy, &ypos[3]); aws->create_button(
NULp,
"T");
2539 aws->at(
"subst"); aws->create_button(
NULp,
"Substitution rate parameters:");
2554 aws->label_length(22);
2557 aws->label(
"Expected distance");
2561 aws->label(
"Structure supplement");
2565 aws->label(
"Threshold");
2568 aws->label_length(10);
2571 aws->label(
"Gap A");
2575 aws->label(
"Gap B");
2579 aws->label(
"Gap C");
2586 static AW_window_simple *aws =
NULp;
2589 aws =
new AW_window_simple;
2591 aws->init(root,
"FAMILY_PARAMS",
"Family search parameters");
2592 aws->load_xfig(
"faligner/family_settings.fig");
2596 aws->create_button(
"CLOSE",
"CLOSE",
"O");
2600 aws->create_button(
"HELP",
"HELP");
2602 aws->auto_space(5, 5);
2650 AW_window_simple *aws =
new AW_window_simple;
2653 aws->
load_xfig(
"faligner/faligner.fig");
2655 aws->label_length(10);
2656 aws->button_length(10);
2660 aws->create_button(
"CLOSE",
"CLOSE",
"O");
2664 aws->create_button(
"HELP",
"HELP");
2668 aws->insert_default_toggle(
"Fast aligner",
"F", 0);
2670 aws->insert_toggle (
"Island Hopping",
"I", 1);
2672 aws->update_toggle_field();
2674 aws->button_length(12);
2675 aws->at(
"island_para");
2678 aws->create_button(
"island_para",
"Parameters",
"");
2681 aws->button_length(10);
2683 aws->at(
"rev_compl");
2685 aws->create_button(
"reverse_complement",
"Turn now!",
"");
2689 aws->insert_toggle (
"Current Species:",
"A",
FA_CURRENT);
2690 aws->insert_default_toggle(
"Marked Species",
"M",
FA_MARKED);
2691 aws->insert_toggle (
"Selected Species",
"S",
FA_SELECTED);
2692 aws->update_toggle_field();
2701 aws->insert_default_toggle(
"Auto search by pt_server:",
"A",
FA_REF_RELATIVES);
2702 aws->update_toggle_field();
2704 aws->at(
"sagainst");
2709 aws->create_button(
"Copy",
"Copy",
"");
2711 aws->label_length(0);
2712 aws->at(
"pt_server");
2715 aws->label_length(23);
2716 aws->at(
"relrange");
2717 aws->label(
"Data from range only, plus");
2720 aws->at(
"relatives");
2721 aws->label(
"Number of relatives to use");
2724 aws->label_length(9);
2726 aws->label(
"Alignment");
2731 aws->create_autosize_button(
"Settings",
"More settings",
"");
2735 aws->label_length(10);
2742 aws->update_toggle_field();
2755 aws->at(
"protection");
2756 aws->label(
"Protection");
2758 aws->insert_default_option(
"0",
NULp, 0);
2759 aws->insert_option (
"1",
NULp, 1);
2760 aws->insert_option (
"2",
NULp, 2);
2761 aws->insert_option (
"3",
NULp, 3);
2762 aws->insert_option (
"4",
NULp, 4);
2763 aws->insert_option (
"5",
NULp, 5);
2764 aws->insert_option (
"6",
NULp, 6);
2765 aws->update_option_menu();
2770 aws->label(
"Turn check");
2772 aws->insert_option (
"Never turn sequence",
"",
FA_TURN_NEVER);
2774 aws->insert_option (
"Automatically turn sequence",
"",
FA_TURN_ALWAYS);
2775 aws->update_option_menu();
2780 aws->label(
"Report");
2784 aws->insert_default_option(
"Report to temporary entries",
"",
FA_TEMP_REPORT);
2785 aws->insert_option (
"Report to resident entries",
"",
FA_REPORT);
2787 aws->update_option_menu();
2792 aws->at(
"continue");
2795 aws->at(
"on_failure");
2796 aws->label(
"On failure");
2798 aws->insert_default_option(
"do nothing",
"",
FA_NO_ACTION);
2801 aws->update_option_menu();
2806 aws->create_button(
"GO",
"GO",
"G");
2829 typedef map<string, size_t> OligoCount;
2831 class OligoCounter {
2835 mutable OligoCount occurrence;
2837 static string removeGaps(
const char *seq) {
2838 size_t len = strlen(seq);
2840 nogaps.reserve(len);
2842 for (
size_t p = 0; p<len; ++p) {
2844 if (!
is_gap(c)) nogaps.append(1, c);
2849 void count_oligos(
const string& seq) {
2851 size_t max_pos = seq.length()-oligo_len;
2852 for (
size_t p = 0; p <= max_pos; ++p) {
2853 string oligo(seq, p, oligo_len);
2854 occurrence[oligo]++;
2863 OligoCounter(
const char *seq,
size_t oligo_len_)
2864 : oligo_len(oligo_len_)
2866 string seq_nogaps = removeGaps(seq);
2867 datasize = seq_nogaps.length();
2868 count_oligos(seq_nogaps);
2871 size_t oligo_count(
const char *oligo) {
2873 return occurrence[oligo];
2876 size_t similarity_score(
const OligoCounter& other)
const {
2878 if (oligo_len == other.oligo_len) {
2879 for (OligoCount::const_iterator o = occurrence.begin(); o != occurrence.end(); ++o) {
2880 const string& oligo = o->first;
2881 size_t count = o->second;
2883 score +=
min(count, other.occurrence[oligo]);
2889 size_t getDataSize()
const {
return datasize; }
2892 void TEST_OligoCounter() {
2893 OligoCounter oc1(
"CCAGGT", 3);
2894 OligoCounter oc2(
"GGTCCA", 3);
2895 OligoCounter oc2_gaps(
"..GGT--CCA..", 3);
2896 OligoCounter oc3(
"AGGTCC", 3);
2897 OligoCounter oc4(
"AGGTCCAGG", 3);
2903 int sc1_2 = oc1.similarity_score(oc2);
2904 int sc2_1 = oc2.similarity_score(oc1);
2907 int sc1_2gaps = oc1.similarity_score(oc2_gaps);
2910 int sc1_3 = oc1.similarity_score(oc3);
2911 int sc2_3 = oc2.similarity_score(oc3);
2912 int sc3_4 = oc3.similarity_score(oc4);
2929 map<string, OligoCounter> oligos_counted;
2934 FakeFamilyFinder(
GBDATA *gb_main_,
string ali_name_,
bool rel_matches_,
size_t oligo_len_)
2937 ali_name(ali_name_),
2938 counted_for_range(
PosRange::whole()),
2939 oligo_len(oligo_len_)
2949 OligoCounter seq_oligo_count(sequence, oligo_len);
2951 if (range != counted_for_range) {
2952 oligos_counted.clear();
2953 counted_for_range =
range;
2956 char *buffer =
NULp;
2959 bool partial_match = range.is_part();
2965 gb_species && results<max_results;
2969 if (oligos_counted.find(name) == oligos_counted.end()) {
2973 if (partial_match) {
2977 if (buffersize<range_len) {
2979 buffersize = range_len;
2980 buffer =
new char[buffersize+1];
2983 range.copy_corresponding_part(buffer, spec_seq, spec_seq_len);
2984 oligos_counted[name] = OligoCounter(buffer, oligo_len);
2987 oligos_counted[name] = OligoCounter(spec_seq, oligo_len);
2991 const OligoCounter& spec_oligo_count = oligos_counted[name];
2992 size_t score = seq_oligo_count.similarity_score(spec_oligo_count);
2994 if (score>=min_score) {
2997 newMember->
name = strdup(name.c_str());
2999 newMember->
rel_matches = score/spec_oligo_count.getDataSize();
3018 static const char *test_aliname =
"ali_test";
3020 static const char *get_aligned_data_of(
GBDATA *gb_main,
const char *species_name) {
3023 const char *data =
NULp;
3041 static const char *get_used_rels_for(
GBDATA *gb_main,
const char *species_name) {
3045 if (!gb_species) result =
GBS_global_string(
"<No such species '%s'>", species_name);
3048 if (!gb_used_rels) result =
"<No such field 'used_rels'>";
3058 gb_species && !
error;
3070 #define ALIGNED_DATA_OF(name) get_aligned_data_of(gb_main, name)
3071 #define USED_RELS_FOR(name) get_used_rels_for(gb_main, name)
3075 static GBDATA *selection_fake_gb_main =
NULp;
3076 static GBDATA *selection_fake_gb_last =
NULp;
3078 static GBDATA *fake_first_selected(
int *count) {
3079 selection_fake_gb_last =
NULp;
3083 static GBDATA *fake_next_selected() {
3084 if (!selection_fake_gb_last) {
3088 selection_fake_gb_last =
NULp;
3090 return selection_fake_gb_last;
3093 static char *fake_get_consensus(
const char*,
PosRange range) {
3094 const char *data = get_aligned_data_of(selection_fake_gb_main,
"s1");
3095 if (range.
is_whole())
return strdup(data);
3099 static void test_install_fakes(
GBDATA *gb_main) {
3100 selection_fake_gb_main =
gb_main;
3120 static struct arb_unit_test::test_alignment_data TestAlignmentData_TargetAndReferenceHandling[] = {
3121 { 0,
"s1",
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C..........." },
3122 { 0,
"s2",
"AUCUCCUAAACCCAACCGUAGUUCGAAUUGAGGACUGUAACUC......................................................" },
3123 { 1,
"m1",
"UAGAGGAUUUGGGUUGGCAUCAAGCUUAACUCCUGACAUUGAG......................................................" },
3124 { 1,
"m2",
"...UCCUAAACCAACCCGUAGUUCGAAUUGAGGACUGUAA........................................................." },
3125 { 1,
"m3",
"AUC---UAAACCAACCCGUAGUUCGAAUUGAGGACUG---CUC......................................................" },
3126 { 0,
"c1",
"AUCUCCUAAACCCAACC--------AAUUGAGGACUGUAACUC......................................................" },
3127 { 0,
"c2",
"AUCUCCU------AACCGUAGUUCCCCGAA------ACUGUAACUC..................................................." },
3128 { 0,
"r1",
"GAGUUACAGUCCUCAAUUCGGGGAACUACGGUUGGGUUUAGGAGAU..................................................." },
3131 void TEST_Aligner_TargetAndReferenceHandling() {
3136 GBDATA *gb_main = TEST_CREATE_DB(error, test_aliname, TestAlignmentData_TargetAndReferenceHandling,
false);
3140 SearchRelativeParams search_relative_params(
new FakeFamilyFinder(gb_main, test_aliname,
false, 8),
3144 test_install_fakes(gb_main);
3148 bool cont_on_err =
false;
3160 search_relative_params,
3166 error = aligner.run();
3179 search_relative_params,
3185 error = aligner.run();
3195 fake_first_selected,
3199 search_relative_params,
3205 error = aligner.run();
3219 search_relative_params,
3226 error = aligner.run();
3232 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s2"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3234 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m1"),
".......UAG--AGG-A------U-U-UGGGU-UG-G-C-A-U-CAA-GCU--------UAA-C-UCCUG-AC--A-UUGAG...............");
3235 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m2"),
"..............U-C------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA................");
3236 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m3"),
".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C...........");
3238 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C...........");
3239 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c2"),
".........A--UCU-C------C-U-AA---------C-C-G-UAG-UUC------------C-CCGAA-AC--U-GUAA-CU-C...........");
3241 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"r1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUCCCC-----GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3249 search_relative_params.maxRelatives = 5;
3252 int species_count =
ARRAY_ELEMS(TestAlignmentData_TargetAndReferenceHandling);
3253 for (
int sp = 0; sp<species_count; ++sp) {
3254 const char *name = TestAlignmentData_TargetAndReferenceHandling[sp].name;
3255 if (strcmp(name,
"r1") != 0) {
3264 search_relative_params,
3271 error = aligner.run();
3288 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3289 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s2"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3291 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m1"),
".........U--AGA-G------G---AUUUG-GG-U-U-G-G-CAU-CAAGCU-----UAA-C-UCCUG-AC--A-UUGAG---------------");
3293 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m2"),
".........U--C----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----UA-A...........");
3294 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m3"),
".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C...........");
3295 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C...........");
3296 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c2"),
".........A--UCU-C------C-U--------A-A-C-C-G-UAG-UUCCCC-----GA--------A-AC--U-GUAA-CU-C...........");
3301 search_relative_params.getFamilyFinder()->restrict_2_region(test_ali_params_partial.
range);
3304 for (
int sp = 0; sp<species_count; ++sp) {
3305 const char *name = TestAlignmentData_TargetAndReferenceHandling[sp].name;
3314 search_relative_params,
3316 test_ali_params_partial,
3321 error = aligner.run();
3338 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3339 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"s2"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3341 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m1"),
".........U--AGA-G------G-A-UU-UG-GG-U-U-G-G-CAU-CAAGCU-----UAA-C-UCCUG-AC--A-UUGAG---------------");
3342 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m2"),
".........U--C----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----UA-A...........");
3343 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"m3"),
".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C...........");
3345 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C...........");
3346 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"c2"),
".........A--UCU-C------C---------UA-A-C-C-G-UAG-UUCCCC-----GA--------A-AC--U-GUAA-CU-C...........");
3348 TEST_EXPECT_EQUAL(ALIGNED_DATA_OF(
"r1"),
".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUCCCC-----GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3355 static struct arb_unit_test::test_alignment_data TestAlignmentData_checksumError[] = {
3356 { 0,
"MtnK1722",
"...G-GGC-C-G............CCC-GG--------CAAUGGGGGCGGCCCGGCGGAC----GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCC....." },
3357 { 0,
"MhnFormi",
"...A-CGA-U-C------------CUUCGG--------GGUCG-U-GG-C-GU-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCCU...." },
3358 { 0,
"MhnT1916",
"...A-CGA-A-C------------CUU-GU--------GUUCG-U-GG-C-GA-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCCU...." },
3359 { 0,
"MthVanni",
"...U-GGU-U-U------------C-------------GGCCA-U-GG-C-GG-A--C------GG--C-UCAUU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCC....." },
3360 { 0,
"ThcCeler",
"...G-GGG-C-G...CC-U---U--------GC--G--CGCAC-C-GG-C-GG-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUA-CGGC-UCGAUCACCUCCU...." },
3363 void TEST_SLOW_Aligner_checksumError() {
3371 GBDATA *gb_main = TEST_CREATE_DB(error, test_aliname, TestAlignmentData_checksumError,
false);
3373 SearchRelativeParams search_relative_params(
new FakeFamilyFinder(gb_main, test_aliname,
false, 8),
3377 test_install_fakes(gb_main);
3380 bool cont_on_err =
true;
3390 search_relative_params,
3397 error = aligner.run();
3412 const char *result =
"";
3421 void TEST_BASIC_UnalignedBases() {
3465 " 3/18 8/15 0/6 3/11 8/11 10/15 10/17",
3466 " 3/18 8/17 0/6 3/11 8/13 10/15 10/18");
3470 " 1/7 3/5 0/1 1/3 3/3 4/5 4/6",
3471 " 1/7 3/7 0/2 1/4 3/5 4/6 4/7");
3473 " 1/7 3/6 0/1 1/3 3/4 4/5 4/7",
3474 " 1/7 3/7 0/2 1/4 3/5 4/6 4/7");
3479 #endif // UNIT_TESTS
GB_ERROR GB_begin_transaction(GBDATA *gbd)
#define FA_AWAR_SUBST_PARA_CT
void AWTC_create_common_next_neighbour_fields(AW_window *aws, int scaler_length)
void delete_family_list()
const CompactedSubSequence & sequence() const
int expdPosition(int cPos) const
void restoreDots(CompactedSubSequence &slaveSequence)
void FastAligner_create_variables(AW_root *root, AW_default db1)
static LooseBases unaligned_bases
char alignQuality(char slave, char master)
void GB_warning(const char *message)
SearchRelativeParams(FamilyFinder *ff_, const char *pt_server_alignment_, int maxRelatives_)
long expdPosition() const
static int currentSequenceNumber
const FastAlignInsertion * insertion() const
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
#define FA_AWAR_ESTIMATE_BASE_FREQ
virtual ARB_ERROR align_to(GBDATA *gb_toalign) const =0
void count_aligned_base(int mismatched)
#define FA_AWAR_RELATIVE_RANGE
ARB_ERROR fast_align(const CompactedSubSequence &align_to, AlignBuffer *alignBuffer, int max_seq_length, int matchScore, int mismatchScore, FastAlignReport *report) const
GBDATA * GBT_first_marked_species_rel_species_data(GBDATA *gb_species_data)
bool may_refer_to_same_part_as(const CompactedSubSequence &other) const
static ARB_ERROR alignToGroupConsensus(GBDATA *gb_toAlign, GB_CSTR alignment, Aligner_get_consensus_func get_consensus, int max_seq_length, const AlignParams &ali_params)
return string(buffer, length)
ARB_ERROR species_not_found(GB_CSTR species_name)
#define FA_AWAR_EXPECTED_DISTANCE
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
void load_xfig(const char *file, bool resize=true)
virtual GB_ERROR searchFamily(const char *sequence, FF_complement compl_mode, int max_results, double min_score)=0
FamilyFinder * getFamilyFinder()
PosRange intersection(PosRange r1, PosRange r2)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
#define FA_AWAR_BASE_FREQ_A
int relatedBases(char base1, char base2)
void FastAligner_set_align_current(AW_root *root, AW_default db1)
Aligner_get_first_selected_species get_first_selected_species
#define FA_AWAR_SUBST_PARA_AG
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
GB_CSTR get_alignment() const
void insertGap(AlignBuffer *alignBuffer, SequencePosition &master, FastAlignReport *report)
const char * text() const
#define INTEGRATED_ALIGNERS_TITLE
range_set::const_iterator iterator
void AWT_insert_config_manager(AW_window *aww, AW_default default_file_, const char *id, const StoreConfigCallback &store_cb, const RestoreConfigCallback &load_or_reset_cb, const char *macro_id, const AWT_predefined_config *predef)
#define AWAR_DEFAULT_ALIGNMENT
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
static GBDATA * get_next_selected_species()
AW_awar * set_minmax(float min, float max)
#define FA_AWAR_SHOW_GAPS_MESSAGES
#define FA_AWAR_BASE_FREQ_T
const char * GBS_global_string(const char *templat,...)
void warning(int warning_num, const char *warning_message)
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
void AW_POPDOWN(AW_window *window)
GBDATA * GBT_expect_SAI(GBDATA *gb_main, const char *name)
char * ARB_strpartdup(const char *start, const char *end)
Aligner_get_next_selected_species get_next_selected_species
uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude)
void add(const PosRange &range)
void awt_create_SAI_selection_button(GBDATA *gb_main, AW_window *aws, const char *varname, const SaiSelectionlistFilterCallback &fcb)
static void build_reverse_complement(AW_window *aw, const AlignDataAccess *data_access)
#define FA_AWAR_REFERENCE
static CompactedSubSequence * readCompactedSequence(GBDATA *gb_species, const char *ali, ARB_ERROR *errorPtr, char **dataPtr, long *seqChksum, PosRange range)
static int overallSequenceNumber
const char * name() const
#define ARRAY_ELEMS(array)
#define FA_AWAR_CONTINUE_ON_ERROR
char buffer[MESSAGE_BUFFERSIZE]
static ARB_ERROR insertClustalValigned(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, const char *masterAlignment, const char *slaveAlignment, long alignmentLength, FastAlignReport *report)
GB_ERROR GB_push_transaction(GBDATA *gbd)
ARB_ERROR bufferTooSmall()
#define AWAR_NN_REL_SCALING
GB_ERROR GB_delete(GBDATA *&source)
#define AWAR_NN_FAST_MODE
int follow_ali_change(const AliChange &change)
const char * read_char_pntr() const
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
char *(* Aligner_get_consensus_func)(const char *species_name, PosRange range)
size_t GB_read_string_count(GBDATA *gbd)
GB_ERROR GB_await_error()
WindowCallback makeHelpCallback(const char *helpfile)
#define TEST_EXPECT(cond)
long GB_read_count(GBDATA *gbd)
static GB_alignment_type global_alignmentType
void FastAligner_set_reference_species(AW_root *root)
TYPE * ARB_alloc(size_t nelem)
NOT4PERL void GBT_reverseComplementNucSequence(char *seq, long length, char T_or_U)
static AW_window * create_family_settings_window(AW_root *root)
static ARB_ERROR alignCompactedTo(CompactedSubSequence *toAlignSequence, const FastSearchSequence *alignTo, int max_seq_length, GB_CSTR alignment, long toAlignChksum, GBDATA *gb_toAlign, GBDATA *gb_alignTo, const AlignParams &ali_params)
void restrict_2_region(const PosRange &range_)
AW_awar * awar_float(const char *var_name, float default_value=0.0, AW_default default_file=AW_ROOT_DEFAULT)
#define FA_AWAR_SUBST_PARA_AT
static const char * read_name(GBDATA *gbd)
int GB_read_security_write(GBDATA *gbd)
#define FA_AWAR_PROTECTION
#define TEST_EXPECT_EQUAL__BROKEN(expr, want, got)
#define FA_AWAR_ACTION_ON_ERROR
int no_of_gaps_after(int cPos) const
const int * gapsBefore(int offset=0) const
void append(LooseBases &loose)
void FastAligner_start(AW_window *aw, const AlignDataAccess *data_access)
void copy(const char *s, char q, long len)
bool GB_is_ancestor_of(GBDATA *gb_ancestor, GBDATA *gb_descendant)
ExplicitReference(GB_CSTR alignment_, const FastSearchSequence *targetSequence_, GBDATA *gb_alignTo_, int max_seq_length_, const AlignParams &ali_params_)
void message(char *errortext)
int get_max_seq_length() const
#define TEST_REJECT(cond)
void(* refresh_display)()
GBDATA *(* Aligner_get_first_selected_species)(int *total_no_of_selected_species)
AliChange(const CompactedSubSequence &old_, const CompactedSubSequence &new_)
static void error(const char *msg)
GBDATA * GB_get_root(GBDATA *gbd)
bool uses_rel_matches() const
void insertBase(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, FastAlignReport *report)
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
#define FA_AWAR_PT_SERVER_ALIGNMENT
#define CAN_SCORE_RIGHT()
ARB_ERROR ClustalV_align(int is_dna, int weighted, const char *seq1, int length1, const char *seq2, int length2, const int *gapsBefore1, int max_seq_length, const char *&res1, const char *&res2, int &reslen, int &score)
int follow_ali_change_and_append(LooseBases &loose, const AliChange &change)
const char * text() const
GB_alignment_type GBT_get_alignment_type(GBDATA *gb_main, const char *aliname)
#define AWAR_SPECIES_NAME
void correctUnalignedPositions()
char * getHelixString() const
Aligner_get_selected_range get_selected_range
void AWTC_create_common_next_neighbour_vars(AW_root *aw_root, const RootCallback &awar_changed_cb)
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
static AW_window * create_island_hopping_window(AW_root *root)
char * read_string() const
static WindowCallback simple(void(*root_cb)(AW_root *, T), T t)
bool ARB_in_novice_mode(AW_root *awr)
AW_awar * awar(const char *awar)
GB_ERROR GB_pop_transaction(GBDATA *gbd)
#define FA_AWAR_USE_ISLAND_HOPPING
#define FA_AWAR_STRUCTURE_SUPPLEMENT
int no_of_gaps_before(int cPos) const
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
FamilyList * insertSortedBy_matches(FamilyList *other)
int baseMatch(char c1, char c2)
static ARB_ERROR alignTo(GBDATA *gb_toAlign, GB_CSTR alignment, const FastSearchSequence *alignTo, GBDATA *gb_alignTo, int max_seq_length, const AlignParams &ali_params)
void awt_create_PTSERVER_selection_button(AW_window *aws, const char *varname)
void count_unaligned_base(int no_of_bases)
long GBT_count_marked_species(GBDATA *gb_main)
#define TEST_EXPECT_ZERO(cond)
void memorize(ExplicitRange range)
GB_ERROR write_as_string(const char *aw_string)
static ARB_ERROR cannot_fast_align(const CompactedSubSequence &master, long moffset, long mlength, const CompactedSubSequence &slaveSequence, long soffset, long slength, int max_seq_length, AlignBuffer *alignBuffer, FastAlignReport *report)
GB_ERROR GB_set_temporary(GBDATA *gbd) __ATTR__USERESULT
#define AWAR_EDITOR_ALIGNMENT
static void appendNameAndUsedBasePositions(char **toString, GBDATA *gb_species, int usedBasePositions)
int aw_question(const char *unique_id, const char *question, const char *buttons, bool sameSizeButtons, const char *helpfile)
static AWT_config_mapping_def aligner_config_mapping[]
#define FA_AWAR_SAI_RANGE_NAME
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
#define TEST_EXPECT_NULL__BROKEN(n, got)
double partSignificance(long seq1len, long seq2len, long partlen)
#define TEST_EXPECT_NULL(n)
void GB_write_flag(GBDATA *gbd, long flag)
SearchRelativesReference(SearchRelativeParams &relSearch_, int max_seq_length_, FA_turn turnAllowed_, GB_CSTR alignment_, const AlignParams &ali_params_)
AW_window * FastAligner_create_window(AW_root *root, const AlignDataAccess *data_access)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
#define FA_AWAR_SUBST_PARA_AC
#define FA_AWAR_REFERENCE_NAME
const char * text() const
virtual ~AlignmentReference()
long insertsToNextBase(AlignBuffer *alignBuffer, const SequencePosition &master)
ARB_ERROR FastAligner_delete_temp_entries(GBDATA *gb_species, const char *alignment)
#define FA_AWAR_BASE_FREQ_G
static IslandHopping * island_hopper
fa_assert(chars< MESSAGE_BUFFERSIZE)
static ARB_ERROR align_error(ARB_ERROR olderr, GBDATA *gb_toAlign, GBDATA *gb_alignTo)
static ARB_ERROR insertAligned(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, long partLength, FastAlignReport *report)
Aligner(GBDATA *gb_main_, FA_alignTarget alignWhat_, GB_CSTR alignment_, GB_CSTR toalign_, Aligner_get_first_selected_species get_first_selected_species_, Aligner_get_next_selected_species get_next_selected_species_, GB_CSTR reference_, Aligner_get_consensus_func get_consensus_, SearchRelativeParams &relSearch_, FA_turn turnAllowed_, const AlignParams &ali_params_, int maxProtection_, bool continue_on_error_, FA_errorAction error_action_)
char * GB_read_string(GBDATA *gbd)
#define AWAR_CURSOR_POSITION_LOCAL
static ARB_ERROR reverseComplement(GBDATA *gb_species, GB_CSTR ali, int max_protection)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
const FastAlignInsertion * next() const
GBDATA *(* Aligner_get_next_selected_species)(void)
GBDATA * GBT_first_species(GBDATA *gb_main)
static ARB_ERROR writeStringToAlignment(GBDATA *gb_species, GB_CSTR alignment, GB_CSTR data_name, GB_CSTR str, bool temporary)
RangeList build_RangeList_from_string(const char *SAI_data, const char *set_bytes, bool invert)
const FamilyList * getFamilyList() const
int follow(ExplicitRange &range) const
#define TEST_EXPECT_NO_ERROR(call)
void aw_message(const char *msg)
void insertSlaveBases(AlignBuffer *alignBuffer, SequencePosition &slave, int length, FastAlignReport *report)
const PosRange & get_TargetRange() const
GBDATA * GBT_next_species(GBDATA *gb_species)
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Aligner_get_consensus_func get_group_consensus
static GBDATA * get_first_selected_species(int *total_no_of_selected_species)
AlignmentReference(GB_CSTR alignment_, int max_seq_length_, const AlignParams &ali_params_)
#define FA_AWAR_THRESHOLD
GB_ERROR write_string(const char *aw_string)
char * GBT_get_default_alignment(GBDATA *gb_main)
const char * GBT_get_name(GBDATA *gb_item)
const CompactedSubSequence & sequence() const
#define AWAR_NN_MISMATCHES
int GB_get_transaction_level(GBDATA *gbd)
void GBT_mark_all(GBDATA *gb_main, int flag)
static ARB_ERROR alignToNextRelative(SearchRelativeParams &relSearch, int max_seq_length, FA_turn turnAllowed, GB_CSTR alignment, GBDATA *gb_toAlign, const AlignParams &ali_params)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
#define FA_AWAR_SAI_RANGE_CHARS
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
void memorize_insertion(long offset, long gaps)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
#define FA_AWAR_USE_SECONDARY
char * pt_server_alignment
long calcSequenceChecksum(const char *data, long length)
#define FA_AWAR_NEXT_RELATIVES
int compPosition(int xPos) const
#define FA_AWAR_SUBST_PARA_CG
#define TEST_EXPECT_EQUAL(expr, want)
GB_ERROR write_int(long aw_int)
const AlignParams & get_ali_params() const
ConsensusReference(GB_CSTR alignment_, Aligner_get_consensus_func get_consensus_, int max_seq_length_, const AlignParams &ali_params_)
void aw_message_if(GB_ERROR error)
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
const char * quality() const
NOT4PERL GB_ERROR GBT_determine_T_or_U(GB_alignment_type alignment_type, char *T_or_U, const char *supposed_target)
#define FA_AWAR_BASE_FREQ_C
#define AWAR_NN_OLIGO_LEN
void setDotsAtEOSequence()
void copy_corresponding_part(char *dest, const char *source, size_t source_len) const
#define MIN_ALIGNMENT_RANGE
#define AWAR_NN_REL_MATCHES
#define FA_AWAR_SUBST_PARA_GT
GBDATA * GBT_get_species_data(GBDATA *gb_main)
GB_write_int const char s