19 #define MESSAGE_BUFFERSIZE 300
22 inline int check_equal(
int o,
int n) {
24 fprintf(stderr,
"o=%i\nn=%i\n", o, n);
46 if (Next) Next->
append(neu);
64 int dotOffset = firstBase + strcspn(Text+firstBase,
".");
66 while (dotOffset <= lastBase) {
69 dotOffset += strspn(Text+dotOffset,
".");
70 dotOffset += strcspn(Text+dotOffset,
".");
75 myText =
new char[cLen+1];
78 gapsBeforePosition =
new int[cLen+1];
80 for (
int cPos = 0; cPos<cLen; ++cPos) {
82 myText[cPos] = toupper(Text[xPos]);
89 myStartOffset += start_offset;
94 delete[] gapsBeforePosition;
100 if (myNext)
delete myNext;
104 memset((
char*)myOffset, 0,
MAX_TRIPLES*
sizeof(*myOffset));
110 int tidx = triple_index(triple.
text());
113 myOffset[tidx] = top;
120 delete myOffset[tidx];
130 char *found = (
char*)memchr(qual+off,
'?', rest);
132 if (!found || (found-qual) >= rest)
break;
135 for (cnt=0; found[cnt]==
'?'; cnt++) found[cnt] =
'+';
137 long from = found-myQuality;
139 long a_off = from+cnt;
142 for (before=0; b_off>=0 && isGlobalGap(b_off); before++, b_off--) ;
143 for (after=0; a_off<used && isGlobalGap(a_off); after++, a_off++) ;
145 if (b_off<=0) before = LONG_MAX;
146 if (a_off>=used) after = LONG_MAX;
151 while (cnt--) moveUnaligned(from++, to++);
154 else if (after!=LONG_MAX) {
157 long to = from+
after;
159 while (cnt--) moveUnaligned(from--, to--);
174 while (nextDot!=-1 && rest) {
175 unsigned char gap = myBuffer[off];
177 if (nextDot==count) {
183 if (nextDot==count) {
184 messagef(
"Couldn't restore dots at offset %li of '%s' (gap removed by aligner)",
185 off, slaveSequence.
name());
198 for (
int i=0;
is_gap(myBuffer[i]) && i<used; ++i) myBuffer[i] =
'.';
199 for (
int i=used-1;
is_gap(myBuffer[i]) && i>=0; --i) myBuffer[i] =
'.';
231 int operator()(
int i)
const {
235 case 2:
return css[i];
238 case 5:
return get_dotpos(css, i);
245 #define TEST_EXPECT_CSS_SELF_REFLEXIVE(css) do { \
246 for (int b = 0; b<css.length(); ++b) { \
247 int x = css.expdPosition(b); \
248 int c = css.compPosition(x); \
249 TEST_EXPECT_EQUAL(c,b); \
253 #define CSS_COMMON(in,offset) \
254 int len = strlen(in); \
255 fprintf(stderr, "in='%s'\n", in); \
256 CompactedSubSequence css(in, len, "noname", offset); \
257 TEST_EXPECT_CSS_SELF_REFLEXIVE(css); \
258 bind_css bound_css(css)
260 #define GEN_COMP_EXPD() \
261 bound_css.test_mode = 0; \
262 char *comp = collectIntFunResults(bound_css, 0, css.expdLength()-1, 3, 0, 1); \
263 bound_css.test_mode = 1; \
264 char *expd = collectIntFunResults(bound_css, 0, css.length(), 3, 0, 0)
266 #define GEN_TEXT(in) \
267 bound_css.test_mode = 2; \
268 char *text = collectIntFunResults(bound_css, 0, css.length()-1, 3, 0, 0)
271 bound_css.test_mode = 3; \
272 char *gaps_before = collectIntFunResults(bound_css, 0, css.length(), 3, 0, 0); \
273 bound_css.test_mode = 4; \
274 char *gaps_after = collectIntFunResults(bound_css, 0, css.length()-1, 3, 0, 1)
276 #define GEN_DOTS(in) \
277 bound_css.test_mode = 5; \
278 char *dots = collectIntFunResults(bound_css, 0, count_dotpos(css)-1, 3, 0, 1)
280 #define FREE_COMP_EXPD() \
284 #define FREE_GAPS() \
288 #define COMP_EXPD_CHECK(exp_comp,exp_expd) \
290 TEST_EXPECT_EQUAL(comp, exp_comp); \
291 TEST_EXPECT_EQUAL(expd, exp_expd); \
294 #define GAPS_CHECK(exp_before,exp_after) \
296 TEST_EXPECT_EQUAL(gaps_before, exp_before); \
297 TEST_EXPECT_EQUAL(gaps_after, exp_after); \
300 #define DOTS_CHECK(exp_dots) \
302 TEST_EXPECT_EQUAL(dots, exp_dots); \
307 #define TEST_CS_EQUALS(in,exp_comp,exp_expd) do { \
309 COMP_EXPD_CHECK(exp_comp,exp_expd); \
312 #define TEST_CS_EQUALS_OFFSET(in,offset,exp_comp,exp_expd) do { \
313 CSS_COMMON(in, offset); \
314 COMP_EXPD_CHECK(exp_comp,exp_expd); \
317 #define TEST_GAPS_EQUALS_OFFSET(in,offset,exp_before,exp_after) do { \
318 CSS_COMMON(in, offset); \
319 GAPS_CHECK(exp_before,exp_after); \
322 #define TEST_DOTS_EQUALS_OFFSET(in,offset,exp_dots) do { \
323 CSS_COMMON(in, offset); \
324 DOTS_CHECK(exp_dots); \
327 #define TEST_CS_TEXT(in,exp_text) do { \
330 TEST_EXPECT_EQUAL(text, exp_text); \
334 #define TEST_CS_CBROKN(in,exp_comp,exp_expd) do { \
337 TEST_EXPECT_EQUAL__BROKEN(comp, exp_comp); \
338 TEST_EXPECT_EQUAL(expd, exp_expd); \
346 TEST_CS_EQUALS(
"-",
" 0 [0]",
" 1");
347 TEST_CS_EQUALS(
"--",
" 0 0 [0]",
" 2");
348 TEST_CS_EQUALS(
"---",
" 0 0 0 [0]",
" 3");
350 TEST_CS_TEXT(
"---",
"");
351 TEST_CS_TEXT(
"-?~",
"");
352 TEST_CS_TEXT(
"-A-",
" 65");
353 TEST_CS_TEXT(
"A-C",
" 65 67");
355 TEST_CS_EQUALS(
"----------",
" 0 0 0 0 0 0 0 0 0 0 [0]",
" 10");
358 TEST_CS_EQUALS(
"A---------",
" 0 1 1 1 1 1 1 1 1 1 [1]",
" 0 10");
359 TEST_CS_EQUALS(
"-A--------",
" 0 0 1 1 1 1 1 1 1 1 [1]",
" 1 10");
360 TEST_CS_EQUALS(
"---A------",
" 0 0 0 0 1 1 1 1 1 1 [1]",
" 3 10");
361 TEST_CS_EQUALS(
"-----A----",
" 0 0 0 0 0 0 1 1 1 1 [1]",
" 5 10");
362 TEST_CS_EQUALS(
"-------A--",
" 0 0 0 0 0 0 0 0 1 1 [1]",
" 7 10");
363 TEST_CS_EQUALS(
"---------A",
" 0 0 0 0 0 0 0 0 0 0 [1]",
" 9 10");
366 TEST_CS_EQUALS(
"AC--------",
" 0 1 2 2 2 2 2 2 2 2 [2]",
" 0 1 10");
368 TEST_CS_EQUALS(
"A-C-------",
" 0 1 1 2 2 2 2 2 2 2 [2]",
" 0 2 10");
369 TEST_CS_EQUALS(
"A--------C",
" 0 1 1 1 1 1 1 1 1 1 [2]",
" 0 9 10");
370 TEST_CS_EQUALS(
"-AC-------",
" 0 0 1 2 2 2 2 2 2 2 [2]",
" 1 2 10");
371 TEST_CS_EQUALS(
"-A------C-",
" 0 0 1 1 1 1 1 1 1 2 [2]",
" 1 8 10");
372 TEST_CS_EQUALS(
"-A-------C",
" 0 0 1 1 1 1 1 1 1 1 [2]",
" 1 9 10");
373 TEST_CS_EQUALS(
"-------A-C",
" 0 0 0 0 0 0 0 0 1 1 [2]",
" 7 9 10");
374 TEST_CS_EQUALS(
"--------AC",
" 0 0 0 0 0 0 0 0 0 1 [2]",
" 8 9 10");
377 TEST_CS_EQUALS(
"ACG-------",
" 0 1 2 3 3 3 3 3 3 3 [3]",
" 0 1 2 10");
378 TEST_CS_EQUALS(
"AC---G----",
" 0 1 2 2 2 2 3 3 3 3 [3]",
" 0 1 5 10");
379 TEST_CS_EQUALS(
"A-C--G----",
" 0 1 1 2 2 2 3 3 3 3 [3]",
" 0 2 5 10");
380 TEST_CS_EQUALS(
"A-C--G----",
" 0 1 1 2 2 2 3 3 3 3 [3]",
" 0 2 5 10");
381 TEST_CS_EQUALS(
"A--C-G----",
" 0 1 1 1 2 2 3 3 3 3 [3]",
" 0 3 5 10");
382 TEST_CS_EQUALS(
"A---CG----",
" 0 1 1 1 1 2 3 3 3 3 [3]",
" 0 4 5 10");
384 TEST_CS_EQUALS(
"A---C---G-",
" 0 1 1 1 1 2 2 2 2 3 [3]",
" 0 4 8 10");
385 TEST_CS_EQUALS(
"A---C----G",
" 0 1 1 1 1 2 2 2 2 2 [3]",
" 0 4 9 10");
386 TEST_CS_EQUALS(
"A~~~C????G",
" 0 1 1 1 1 2 2 2 2 2 [3]",
" 0 4 9 10");
389 TEST_CS_EQUALS(
"-AC-G--T--",
" 0 0 1 2 2 3 3 3 4 4 [4]",
" 1 2 4 7 10");
392 TEST_CS_EQUALS(
"-CGTACGTAC",
" 0 0 1 2 3 4 5 6 7 8 [9]",
" 1 2 3 4 5 6 7 8 9 10");
393 TEST_CS_EQUALS(
"A-GTACGTAC",
" 0 1 1 2 3 4 5 6 7 8 [9]",
" 0 2 3 4 5 6 7 8 9 10");
394 TEST_CS_EQUALS(
"ACGTA-GTAC",
" 0 1 2 3 4 5 5 6 7 8 [9]",
" 0 1 2 3 4 6 7 8 9 10");
395 TEST_CS_EQUALS(
"ACGTACGT-C",
" 0 1 2 3 4 5 6 7 8 8 [9]",
" 0 1 2 3 4 5 6 7 9 10");
396 TEST_CS_EQUALS(
"ACGTACGTA-",
" 0 1 2 3 4 5 6 7 8 9 [9]",
" 0 1 2 3 4 5 6 7 8 10");
399 TEST_CS_EQUALS(
"ACGTACGTAC",
" 0 1 2 3 4 5 6 7 8 9 [10]",
" 0 1 2 3 4 5 6 7 8 9 10");
401 TEST_CS_EQUALS_OFFSET(
"A--C-G-", 0,
" 0 1 1 1 2 2 3 [3]",
" 0 3 5 7");
402 TEST_CS_EQUALS_OFFSET(
"A--C-G-", 2,
" 0 0 0 1 1 1 2 2 3 [3]",
" 2 5 7 9");
403 TEST_CS_EQUALS_OFFSET(
"A--C-G-", 3,
" 0 0 0 0 1 1 1 2 2 3 [3]",
" 3 6 8 10");
404 TEST_CS_EQUALS_OFFSET(
"A--C-G-", 4,
" 0 0 0 0 0 1 1 1 2 2 3 [3]",
" 4 7 9 11");
407 TEST_GAPS_EQUALS_OFFSET(
"-AC---G", 0,
" 1 0 3 0",
" 0 3 0 [-1]");
408 TEST_GAPS_EQUALS_OFFSET(
".AC..-G", 0,
" 1 0 3 0",
" 0 3 0 [-1]");
409 TEST_GAPS_EQUALS_OFFSET(
"~AC?\?-G", 0,
" 1 0 3 0",
" 0 3 0 [-1]");
410 TEST_GAPS_EQUALS_OFFSET(
"A--C-G-", 0,
" 0 2 1 1",
" 2 1 1 [-1]");
411 TEST_GAPS_EQUALS_OFFSET(
"A--C-G-", 1000,
" 0 2 1 1",
" 2 1 1 [-1]");
414 TEST_DOTS_EQUALS_OFFSET(
"----ACG--T--A-C--GT----", 0,
" [-1]");
415 TEST_DOTS_EQUALS_OFFSET(
"....ACG--T--A-C--GT....", 0,
" [-1]");
416 TEST_DOTS_EQUALS_OFFSET(
"....ACG--T..A-C--GT....", 0,
" 4 [-1]");
417 TEST_DOTS_EQUALS_OFFSET(
"....ACG..T--A.C--GT....", 0,
" 3 5 [-1]");
418 TEST_DOTS_EQUALS_OFFSET(
"....ACG..T~~A.C??GT....", 0,
" 3 5 [-1]");
420 TEST_DOTS_EQUALS_OFFSET(
"AC-----GTA-----CGT", 0,
" [-1]");
422 TEST_DOTS_EQUALS_OFFSET(
"A-.-G-...-C...T", 0,
" 1 2 3 [-1]");
423 TEST_DOTS_EQUALS_OFFSET(
"A.--G...--C...T", 0,
" 1 2 3 [-1]");
424 TEST_DOTS_EQUALS_OFFSET(
"A--.G--...C...T", 0,
" 1 2 3 [-1]");
static CharPredicate pred_is_ali_gap(is_ali_gap)
int expdPosition(int cPos) const
void restoreDots(CompactedSubSequence &slaveSequence)
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
int rel_2_abs(int rel) const
CompactedSequence(const char *text, int length, const char *name, int start_offset=0)
int firstDotPosition() const
#define MESSAGE_BUFFERSIZE
const char * name() const
char buffer[MESSAGE_BUFFERSIZE]
int no_of_gaps_before(int cPos) const
AliDataPtr after(AliDataPtr data, size_t pos)
int abs_2_rel(int abs) const
int no_of_gaps_after(int cPos) const
__ATTR__FORMAT(1) static void messagef(const char *format
void correctUnalignedPositions()
int no_of_gaps_before(int cPos) const
AliDataPtr before(AliDataPtr data, size_t pos)
#define __ATTR__REDUCED_OPTIMIZE
#define IF_ASSERTION_USED(x)
const char * text() const
fa_assert(chars< MESSAGE_BUFFERSIZE)
FastSearchSequence(const CompactedSubSequence &seq)
int first_base_abspos() const
int last_base_abspos() const
int nextDotPosition() const
void storeDots(int beforePos)
int compPosition(int xPos) const
const char * quality() const
void setDotsAtEOSequence()