ARB
fast_aligner.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : fast_aligner.cxx //
4 // Purpose : A fast aligner (not a multiple aligner!) //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in 1998 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // =============================================================== //
11 
12 
13 #include "fast_aligner.hxx"
14 #include "ClustalV.hxx"
15 #include "seq_search.hxx"
16 
17 #include <island_hopping.h>
18 
19 #include <awtc_next_neighbours.hxx>
20 #include <awt_sel_boxes.hxx>
21 
22 #include <aw_awars.hxx>
23 #include <aw_root.hxx>
24 #include <aw_question.hxx>
25 
26 #include <arbdbt.h>
27 #include <ad_cb.h>
28 
29 #include <arb_defs.h>
30 #include <arb_progress.h>
31 #include <RangeList.h>
32 
33 #include <cctype>
34 #include <cmath>
35 #include <climits>
36 
37 #include <list>
38 #include <awt_config_manager.hxx>
39 #include <rootAsWin.h>
40 
41 // --------------------------------------------------------------------------------
42 
43 #if defined(DEBUG)
44 // #define TRACE_CLUSTAL_DATA
45 // #define TRACE_ISLANDHOPPER_DATA
46 // #define TRACE_COMPRESSED_ALIGNMENT
47 // #define TRACE_RELATIVES
48 #endif // DEBUG
49 
50 // --------------------------------------------------------------------------------
51 
52 enum FA_report {
53  FA_NO_REPORT, // no report
54  FA_TEMP_REPORT, // report to temporary entries
55  FA_REPORT, // report to resident entries
56 };
57 
58 enum FA_range {
59  FA_WHOLE_SEQUENCE, // align whole sequence
60  FA_AROUND_CURSOR, // align xxx positions around current cursor position
61  FA_SELECTED_RANGE, // align selected range
62  FA_SAI_MULTI_RANGE, // align versus multi range define by SAI
63 };
64 
65 enum FA_turn {
66  FA_TURN_NEVER, // never try to turn sequence
67  FA_TURN_INTERACTIVE, // try to turn, but query user
68  FA_TURN_ALWAYS, // turn if score is better
69 };
70 
72  FA_REF_EXPLICIT, // reference sequence explicitly specified
73  FA_REF_CONSENSUS, // use group consensus as reference
74  FA_REF_RELATIVES, // search next relatives by PT server
75 };
76 
78  FA_CURRENT, // align current species
79  FA_MARKED, // align all marked species
80  FA_SELECTED, // align selected species (= range)
81 };
82 
84  FA_NO_ACTION, // do nothing
85  FA_MARK_FAILED, // mark failed species (unmark rest)
86  FA_MARK_ALIGNED, // mark aligned species (unmark rest)
87 };
88 
89 struct AlignParams {
91  bool showGapsMessages; // display messages about missing gaps in master?
92  PosRange range; // range to be aligned
93 };
94 
97  char *pt_server_alignment; // alignment used in pt_server (may differ from 'alignment')
98  int maxRelatives; // max # of relatives to use
99 
100  SearchRelativeParams(FamilyFinder *ff_, const char *pt_server_alignment_, int maxRelatives_)
101  : ff(ff_),
102  pt_server_alignment(strdup(pt_server_alignment_)),
103  maxRelatives(maxRelatives_)
104  {}
105 
107  free(pt_server_alignment);
108  delete(ff);
109  }
110 
112 };
113 
114 // --------------------------------------------------------------------------------
115 
116 #define GAP_CHAR '-'
117 #define QUALITY_NAME "ASC_ALIGNER_CLIENT_SCORE"
118 #define INSERTS_NAME "AMI_ALIGNER_MASTER_INSERTS"
119 
120 #define FA_AWAR_ROOT "faligner/"
121 #define FA_AWAR_TO_ALIGN FA_AWAR_ROOT "what"
122 #define FA_AWAR_REFERENCE FA_AWAR_ROOT "against"
123 #define FA_AWAR_REFERENCE_NAME FA_AWAR_ROOT "sagainst"
124 #define FA_AWAR_RANGE FA_AWAR_ROOT "range"
125 #define FA_AWAR_PROTECTION FA_AWAR_ROOT "protection"
126 #define FA_AWAR_AROUND FA_AWAR_ROOT "around"
127 #define FA_AWAR_MIRROR FA_AWAR_ROOT "mirror"
128 #define FA_AWAR_REPORT FA_AWAR_ROOT "report"
129 #define FA_AWAR_SHOW_GAPS_MESSAGES FA_AWAR_ROOT "show_gaps"
130 #define FA_AWAR_CONTINUE_ON_ERROR FA_AWAR_ROOT "continue_on_error"
131 #define FA_AWAR_ACTION_ON_ERROR FA_AWAR_ROOT "action_on_error"
132 #define FA_AWAR_USE_SECONDARY FA_AWAR_ROOT "use_secondary"
133 #define FA_AWAR_NEXT_RELATIVES FA_AWAR_ROOT "next_relatives"
134 #define FA_AWAR_RELATIVE_RANGE FA_AWAR_ROOT "relrange"
135 #define FA_AWAR_PT_SERVER_ALIGNMENT "tmp/" FA_AWAR_ROOT "relative_ali"
136 #define FA_AWAR_SAI_RANGE_NAME FA_AWAR_ROOT "sai/sainame"
137 #define FA_AWAR_SAI_RANGE_CHARS FA_AWAR_ROOT "sai/chars"
138 
139 #define FA_AWAR_ISLAND_HOPPING_ROOT "island_hopping/"
140 #define FA_AWAR_USE_ISLAND_HOPPING FA_AWAR_ISLAND_HOPPING_ROOT "use"
141 #define FA_AWAR_ESTIMATE_BASE_FREQ FA_AWAR_ISLAND_HOPPING_ROOT "estimate_base_freq"
142 #define FA_AWAR_BASE_FREQ_A FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_a"
143 #define FA_AWAR_BASE_FREQ_C FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_c"
144 #define FA_AWAR_BASE_FREQ_G FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_g"
145 #define FA_AWAR_BASE_FREQ_T FA_AWAR_ISLAND_HOPPING_ROOT "base_freq_t"
146 #define FA_AWAR_SUBST_PARA_AC FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ac"
147 #define FA_AWAR_SUBST_PARA_AG FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ag"
148 #define FA_AWAR_SUBST_PARA_AT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_at"
149 #define FA_AWAR_SUBST_PARA_CG FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_cg"
150 #define FA_AWAR_SUBST_PARA_CT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_ct"
151 #define FA_AWAR_SUBST_PARA_GT FA_AWAR_ISLAND_HOPPING_ROOT "subst_para_gt"
152 #define FA_AWAR_EXPECTED_DISTANCE FA_AWAR_ISLAND_HOPPING_ROOT "expected_dist"
153 #define FA_AWAR_STRUCTURE_SUPPLEMENT FA_AWAR_ISLAND_HOPPING_ROOT "struct_suppl"
154 #define FA_AWAR_THRESHOLD FA_AWAR_ISLAND_HOPPING_ROOT "threshold"
155 #define FA_AWAR_GAP_A FA_AWAR_ISLAND_HOPPING_ROOT "gapa"
156 #define FA_AWAR_GAP_B FA_AWAR_ISLAND_HOPPING_ROOT "gapb"
157 #define FA_AWAR_GAP_C FA_AWAR_ISLAND_HOPPING_ROOT "gapc"
158 
159 // --------------------------------------------------------------------------------
160 
161 static IslandHopping *island_hopper = NULp; // NULp -> use fast aligner; else use island hopper
162 
163 static GB_alignment_type global_alignmentType = GB_AT_UNKNOWN; // type of actually aligned sequence
164 
165 static int currentSequenceNumber; // used for counter
167 
168 // --------------------------------------------------------------------------------
169 
170 inline ARB_ERROR species_not_found(GB_CSTR species_name) {
171  return GBS_global_string("No species '%s' found!", species_name);
172 }
173 
174 static ARB_ERROR reverseComplement(GBDATA *gb_species, GB_CSTR ali, int max_protection) {
175  GBDATA *gbd = GBT_find_sequence(gb_species, ali);
176  ARB_ERROR error = NULp;
177 
178  if (!gbd) {
179  error = GBS_global_string("No 'data' found for species '%s'", GBT_get_name_or_description(gb_species));
180  }
181  else {
182  int my_protection = GB_read_security_write(gbd);
183 
184  if (my_protection<=max_protection) { // ok
185  char *seq = GB_read_string(gbd);
186  int length = GB_read_string_count(gbd);
187  GBDATA *gb_main = GB_get_root(gb_species);
188 
189  GB_alignment_type ali_type = GBT_get_alignment_type(gb_main, ali);
190  fa_assert(ali_type != GB_AT_UNKNOWN);
191 
192  char T_or_U;
193  error = GBT_determine_T_or_U(ali_type, &T_or_U, "reverse-complement");
194  if (!error) {
195  GBT_reverseComplementNucSequence(seq, length, T_or_U);
196  error = GB_write_string(gbd, seq);
197  }
198  }
199  else { // protection error
200  error = GBS_global_string("Cannot reverse-complement species '%s' because of protection level", GBT_get_name_or_description(gb_species));
201  }
202 
203  }
204 
205  return error;
206 }
207 
208 static void build_reverse_complement(AW_window *aw, const AlignDataAccess *data_access) {
209  GBDATA *gb_main = data_access->gb_main;
210 
211  GB_push_transaction(gb_main);
212 
213  AW_root *root = aw->get_root();
214  FA_alignTarget revComplWhat = static_cast<FA_alignTarget>(root->awar(FA_AWAR_TO_ALIGN)->read_int());
215  GB_CSTR alignment = data_access->alignment_name.c_str();
216  ARB_ERROR error = NULp;
217  int max_protection = root->awar(FA_AWAR_PROTECTION)->read_int();
218 
219  switch (revComplWhat) {
220  case FA_CURRENT: { // current species
221  GB_CSTR species_name = root->awar(AWAR_SPECIES_NAME)->read_string();
222  GBDATA *gb_species = GBT_find_species(gb_main, species_name);
223  if (!gb_species) error = species_not_found(species_name);
224  if (!error) error = reverseComplement(gb_species, alignment, max_protection);
225  break;
226  }
227  case FA_MARKED: { // marked species
228  GBDATA *gb_species = GBT_first_marked_species(gb_main);
229 
230  if (!gb_species) {
231  error = "There is no marked species";
232  }
233  while (gb_species) {
234  error = reverseComplement(gb_species, alignment, max_protection);
235  if (error) break;
236  gb_species = GBT_next_marked_species(gb_species);
237  }
238  break;
239  }
240  case FA_SELECTED: { // selected species (editor selection!)
241 
244 
245  int count = 0;
246  GBDATA *gb_species = get_first_selected_species(&count);
247 
248  if (!gb_species) {
249  error = "There is no selected species!";
250  }
251  while (gb_species) {
252  error = reverseComplement(gb_species, alignment, max_protection);
253  if (error) break;
254  gb_species = get_next_selected_species();
255  }
256  break;
257  }
258  default: {
259  fa_assert(0);
260  break;
261  }
262  }
263 
264  GB_end_transaction_show_error(gb_main, error, aw_message);
265 }
266 
267 // --------------------------------------------------------------------------------
268 
269 class AliChange { // describes a local alignment change
270  const CompactedSubSequence& Old;
271  const CompactedSubSequence& New;
272 
273 public:
275  : Old(old_), New(new_)
276  {
278  }
279 
280  int follow(ExplicitRange& range) const {
281  ExplicitRange compressed(Old.compPosition(range.start()),
282  Old.compPosition(range.end()));
283 
284  int exp_start = New.expdPosition(compressed.start());
285  int exp_end = New.expdPosition(compressed.end());
286 
287  int gaps_before = New.no_of_gaps_before(compressed.start());
288  int gaps_after = New.no_of_gaps_after(compressed.end());
289 
290  fa_assert(gaps_before >= 0);
291  fa_assert(gaps_after >= 0);
292 
293  range = ExplicitRange(exp_start-gaps_before, exp_end+gaps_after);
294 
295  return compressed.size(); // number of bases
296  }
297 };
298 
299 class LooseBases {
300  typedef std::list<ExplicitRange> Ranges;
301 
302  Ranges ranges;
303 
304 public:
305 
306  bool is_empty() const { return ranges.empty(); }
307  void clear() { ranges.clear(); }
308 
309  void memorize(ExplicitRange range) {
310  ranges.push_front(range);
311  }
313  ExplicitRange range = ranges.front();
314  ranges.pop_front();
315  return range;
316  }
317  int follow_ali_change(const AliChange& change) {
318  // transform positions according to changed alignment (oldSequence -> newSequence)
319  // returns the number of bases contained in 'this'
320  int basecount = 0;
321  if (!is_empty()) {
322  for (Ranges::iterator r = ranges.begin(); r != ranges.end(); ++r) {
323  basecount += change.follow(*r);
324  }
325  }
326  return basecount;
327  }
328  void append(LooseBases& loose) { ranges.splice(ranges.end(), loose.ranges); }
329  int follow_ali_change_and_append(LooseBases& loose, const AliChange& change) {
330  // returns the number of loose bases (added from 'loose')
331  int basecount = loose.follow_ali_change(change);
332  append(loose);
333  return basecount;
334  }
335 };
336 
337 static LooseBases unaligned_bases; // if fast_align cannot align (no master bases) -> stores positions here
338 
339 
340 static const char *read_name(GBDATA *gbd) {
341  if (!gbd) return "GROUP-CONSENSUS";
342  const char *name = GBT_get_name(gbd);
343  return name ? name : "<unnamed-species>";
344 }
345 
346 inline int relatedBases(char base1, char base2) {
347  return baseMatch(base1, base2)==1;
348 }
349 
350 inline char alignQuality(char slave, char master) {
351  fa_assert(slave);
352  fa_assert(master);
353 
354  char result = '#';
355  if (slave==master) result = '-'; // equal
356  else if (slave==GAP_CHAR) result = '+'; // inserted gap
357  else if (master==GAP_CHAR) result = '+'; // no gap in master
358  else if (relatedBases(slave, master)) result = '~'; // mutation (related bases)
359 
360  return result; // mutation (non-related bases)
361 }
362 
363 // -------------------------
364 // Debugging stuff
365 
366 #ifdef DEBUG
367 static char *lstr(const char *s, int len) {
368  static int alloc;
369  static char *buffer;
370 
371  if (alloc<(len+1)) {
372  if (alloc) free(buffer);
373  ARB_alloc(buffer, alloc=len+100);
374  }
375 
376  memcpy(buffer, s, len);
377  buffer[len] = 0;
378 
379  return buffer;
380 }
381 
382 #define BUFLEN 120
383 
384 inline char compareChar(char base1, char base2) {
385  return base1==base2 ? '=' : (relatedBases(base1, base2) ? 'x' : 'X');
386 }
387 
388 #if defined(TRACE_COMPRESSED_ALIGNMENT)
389 
390 static void dump_n_compare_one(const char *seq1, const char *seq2, long len, long offset) {
391  fa_assert(len<=BUFLEN);
392  char compare[BUFLEN+1];
393 
394  for (long l=0; l<len; l++) {
395  compare[l] = (is_ali_gap(seq1[l]) && is_ali_gap(seq2[l])) ? ' ' : compareChar(seq1[l], seq2[l]);
396  }
397 
398  compare[len] = 0;
399 
400  printf(" %li '%s'\n", offset, lstr(seq1, len));
401  printf(" %li '%s'\n", offset, lstr(seq2, len));
402  printf(" %li '%s'\n", offset, compare);
403 }
404 
405 inline void dump_rest(const char *seq, long len, int idx, long offset) {
406  printf(" Rest von Sequenz %i:\n", idx);
407  while (len>BUFLEN) {
408  printf(" %li '%s'\n", offset, lstr(seq, BUFLEN));
409  seq += BUFLEN;
410  len -= BUFLEN;
411  offset += BUFLEN;
412  }
413 
414  fa_assert(len>0);
415  printf(" '%s'\n", lstr(seq, len));
416 }
417 
418 static void dump_n_compare(const char *text, const char *seq1, long len1, const char *seq2, long len2) {
419  long offset = 0;
420 
421  printf(" Comparing %s:\n", text);
422 
423  while (len1>0 && len2>0) {
424  long done = 0;
425 
426  if (len1>=BUFLEN && len2>=BUFLEN) {
427  dump_n_compare_one(seq1, seq2, done=BUFLEN, offset);
428  }
429  else {
430  long min = len1<len2 ? len1 : len2;
431  dump_n_compare_one(seq1, seq2, done=min, offset);
432  }
433 
434  seq1 += done;
435  seq2 += done;
436  len1 -= done;
437  len2 -= done;
438  offset += done;
439  }
440 
441  if (len1>0) dump_rest(seq1, len1, 1, offset);
442  if (len2>0) dump_rest(seq2, len2, 2, offset);
443  printf(" -------------------\n");
444 }
445 
446 static void dump_n_compare(const char *text, const CompactedSubSequence& seq1, const CompactedSubSequence& seq2) {
447  dump_n_compare(text, seq1.text(), seq1.length(), seq2.text(), seq2.length());
448 }
449 #endif // TRACE_COMPRESSED_ALIGNMENT
450 
451 #undef BUFLEN
452 
453 inline void dumpSeq(const char *seq, long len, long pos) {
454  printf("'%s' ", lstr(seq, len));
455  printf("(Pos=%li,Len=%li)", pos, len);
456 }
457 
458 #define dump() \
459  do { \
460  double sig = partSignificance(sequence().length(), slaveSequence.length(), bestLength); \
461  \
462  printf(" Score = %li (Significance=%f)\n" \
463  " Master = ", bestScore, sig); \
464  dumpSeq(bestMasterLeft.text(), bestLength, bestMasterLeft.leftOf()); \
465  printf("\n" \
466  " Slave = "); \
467  dumpSeq(bestSlaveLeft.text(), bestLength, bestSlaveLeft.leftOf()); \
468  printf("\n"); \
469  } while (0)
470 
471 #endif //DEBUG
472 
473 
474 // ------------------------------------------------
475 // INLINE-functions used in fast_align():
476 
477 inline double log3(double d) {
478  return log(d)/log(3.0);
479 }
480 inline double partSignificance(long seq1len, long seq2len, long partlen) {
481  // returns log3 of significance of the part
482  // usage: partSignificance(...) < log3(maxAllowedSignificance)
483  return log3((seq1len-partlen)*(seq2len-partlen)) - partlen;
484 }
485 
487  return "Cannot align - reserved buffer is to small";
488 }
489 
490 inline long insertsToNextBase(AlignBuffer *alignBuffer, const SequencePosition& master) {
491  int inserts;
492  int nextBase;
493 
494  if (master.rightOf()>0) {
495  nextBase = master.expdPosition();
496  }
497  else {
498  nextBase = master.sequence().expdPosition(master.sequence().length());
499  }
500  inserts = nextBase-alignBuffer->offset();
501 
502  return inserts;
503 }
504 
505 inline void insertBase(AlignBuffer *alignBuffer,
506  SequencePosition& master, SequencePosition& slave,
507  FastAlignReport *report)
508 {
509  char slaveBase = *slave.text();
510  char masterBase = *master.text();
511 
512  alignBuffer->set(slaveBase, alignQuality(slaveBase, masterBase));
513  report->count_aligned_base(slaveBase!=masterBase);
514  ++slave;
515  ++master;
516 }
517 
518 inline void insertSlaveBases(AlignBuffer *alignBuffer,
519  SequencePosition& slave,
520  int length,
521  FastAlignReport *report)
522 {
523  alignBuffer->copy(slave.text(), alignQuality(*slave.text(), GAP_CHAR), length);
524  report->count_unaligned_base(length);
525  slave += length;
526 }
527 
528 inline void insertGap(AlignBuffer *alignBuffer,
529  SequencePosition& master,
530  FastAlignReport *report)
531 {
532  char masterBase = *master.text();
533 
534  alignBuffer->set(GAP_CHAR, alignQuality(GAP_CHAR, masterBase));
535  report->count_aligned_base(masterBase!=GAP_CHAR);
536  ++master;
537 }
538 
540  SequencePosition& master,
541  SequencePosition& slave,
542  const char *masterAlignment, const char *slaveAlignment, long alignmentLength,
543  FastAlignReport *report)
544 {
545  // inserts bases of 'slave' to 'alignBuffer' according to alignment in 'masterAlignment' and 'slaveAlignment'
546 #define ACID '*' // contents of 'masterAlignment' and 'slaveAlignment'
547 #define GAP '-'
548 
549  int pos;
550  int baseAtLeft = 0; // TRUE -> last position in alignBuffer contains a base
551 
552  for (pos=0; pos<alignmentLength; pos++) {
553  long insert = insertsToNextBase(alignBuffer, master); // in expanded seq
554 
555  if (masterAlignment[pos]==ACID) {
556  if (insert>0) {
557  if (insert>alignBuffer->free()) return bufferTooSmall();
558  alignBuffer->set(GAP_CHAR, alignQuality(GAP_CHAR, GAP_CHAR), insert);
559  fa_assert(insertsToNextBase(alignBuffer, master)==0);
560  insert = 0;
561  }
562 
563  if (!alignBuffer->free()) return bufferTooSmall();
564  if (slaveAlignment[pos]==ACID) {
565  insertBase(alignBuffer, master, slave, report);
566  baseAtLeft = 1;
567  }
568  else {
569  insertGap(alignBuffer, master, report);
570  baseAtLeft = 0;
571  }
572  }
573  else {
574  int slave_bases;
575 
576  fa_assert(masterAlignment[pos]==GAP);
577  for (slave_bases=1; pos+slave_bases<alignmentLength && masterAlignment[pos+slave_bases]==GAP; slave_bases++) {
578  ; // count gaps in master (= # of slave bases to insert)
579  }
580  if (!baseAtLeft && insert>slave_bases) {
581  int ins_gaps = insert-slave_bases;
582 
583  fa_assert(alignBuffer->free()>=ins_gaps);
584  alignBuffer->set(GAP_CHAR, alignQuality(GAP_CHAR, GAP_CHAR), ins_gaps);
585  insert -= ins_gaps;
586  }
587 
588  if (insert<slave_bases) { // master has less gaps than slave bases to insert
589  report->memorize_insertion(master.expdPosition(), slave_bases-insert);
590  }
591  else if (insert>slave_bases) { // master has more gaps than slave bases to insert
592  fa_assert(baseAtLeft);
593  }
594 
595  unaligned_bases.memorize(ExplicitRange(slave.expdPosition(), // memorize base positions without counterpart in master
596  slave.expdPosition(slave_bases-1)));
597 
598  if (slave_bases>alignBuffer->free()) return bufferTooSmall();
599  insertSlaveBases(alignBuffer, slave, slave_bases, report);
600  pos += slave_bases-1; // -1 compensates for()-increment
601  baseAtLeft = 1;
602  }
603  }
604 
605  return NULp;
606 
607 #undef GAP
608 #undef ACID
609 }
610 
611 static ARB_ERROR insertAligned(AlignBuffer *alignBuffer,
612  SequencePosition& master, SequencePosition& slave, long partLength,
613  FastAlignReport *report)
614 {
615  // insert bases of 'slave' to 'alignBuffer' according to 'master'
616  if (partLength) {
617  long insert = insertsToNextBase(alignBuffer, master);
618 
619  fa_assert(partLength>=0);
620 
621  if (insert<0) { // insert gaps into master
622  long min_insert = insert;
623 
624  report->memorize_insertion(master.expdPosition(), -insert);
625 
626  while (insert<0 && partLength) {
627  if (insert<min_insert) min_insert = insert;
628  if (!alignBuffer->free()) {
629  return bufferTooSmall();
630  }
631  insertBase(alignBuffer, master, slave, report);
632  partLength--;
633  insert = insertsToNextBase(alignBuffer, master);
634  }
635 
636  fa_assert(partLength>=0);
637  if (partLength==0) { // all inserted
638  return NULp;
639  }
640  }
641 
642  fa_assert(insert>=0);
643 
644  if (insert>0) { // insert gaps into slave
645  if (insert>alignBuffer->free()) return bufferTooSmall();
646  alignBuffer->set(GAP_CHAR, alignQuality(GAP_CHAR, GAP_CHAR), insert);
647  fa_assert(insertsToNextBase(alignBuffer, master)==0);
648  }
649 
650  fa_assert(partLength>=0);
651 
652  while (partLength--) {
653  insert = insertsToNextBase(alignBuffer, master);
654 
655  fa_assert(insert>=0);
656  if (insert>0) {
657  if (insert>=alignBuffer->free()) return bufferTooSmall();
658  alignBuffer->set(GAP_CHAR, alignQuality(GAP_CHAR, GAP_CHAR), insert);
659  }
660  else {
661  if (!alignBuffer->free()) {
662  return bufferTooSmall();
663  }
664  }
665 
666  insertBase(alignBuffer, master, slave, report);
667  }
668  }
669 
670  return NULp;
671 }
672 
673 static ARB_ERROR cannot_fast_align(const CompactedSubSequence& master, long moffset, long mlength,
674  const CompactedSubSequence& slaveSequence, long soffset, long slength,
675  int max_seq_length,
676  AlignBuffer *alignBuffer,
677  FastAlignReport *report)
678 {
679  const char *mtext = master.text(moffset);
680  const char *stext = slaveSequence.text(soffset);
681  ARB_ERROR error = NULp;
682 
683  if (slength) {
684  if (mlength) { // if slave- and master-sequences contain bases, we call the slow aligner
685 #ifdef TRACE_CLUSTAL_DATA
686  printf("ClustalV-Align:\n");
687  printf(" mseq = '%s'\n", lstr(mtext, mlength));
688  printf(" sseq = '%s'\n", lstr(stext, slength));
689 #endif // TRACE_CLUSTAL_DATA
690 
691  int is_dna = -1;
692 
693  switch (global_alignmentType) {
694  case GB_AT_RNA:
695  case GB_AT_DNA: is_dna = 1; break;
696  case GB_AT_AA: is_dna = 0; break;
697  default: error = "Unknown alignment type - aligner aborted"; break;
698  }
699 
700  const char *maligned, *saligned;
701  int len;
702  if (!error) {
703  int score; // unused
704  error = ClustalV_align(is_dna,
705  1,
706  mtext, mlength, stext, slength,
707  master.gapsBefore(moffset),
708  max_seq_length,
709  maligned, saligned, len, score);
710  }
711 
712  if (!error) {
713 #ifdef TRACE_CLUSTAL_DATA
714  printf("ClustalV returns:\n");
715  printf(" maligned = '%s'\n", lstr(maligned, len));
716  printf(" saligned = '%s'\n", lstr(saligned, len));
717 #endif // TRACE_CLUSTAL_DATA
718 
719  SequencePosition masterPos(master, moffset);
720  SequencePosition slavePos(slaveSequence, soffset);
721 
722  error = insertClustalValigned(alignBuffer, masterPos, slavePos, maligned, saligned, len, report);
723 
724 #if (defined(DEBUG) && 0)
725 
726  SequencePosition master2(master->sequence(), moffset);
727  SequencePosition slave2(slaveSequence, soffset);
728  char *cmp = new char[len];
729 
730  for (int l=0; l<len; l++) {
731  int gaps = 0;
732 
733  if (maligned[l]=='*') {
734  maligned[l] = *master2.text();
735  ++master2;
736  }
737  else {
738  gaps++;
739  }
740 
741  if (saligned[l]=='*') {
742  saligned[l] = *slave2.text();
743  ++slave2;
744  }
745  else {
746  gaps++;
747  }
748 
749  cmp[l] = gaps || maligned[l]==saligned[l] ? '=' : 'X';
750  }
751 
752  printf(" master = '%s'\n", lstr(maligned, len));
753  printf(" slave = '%s'\n", lstr(saligned, len));
754  printf(" '%s'\n", lstr(cmp, len));
755 
756  delete [] cmp;
757 #endif
758  }
759  }
760  else { // master is empty here, so we just copy in the slave bases
761  if (slength<=alignBuffer->free()) {
762  unaligned_bases.memorize(ExplicitRange(slaveSequence.expdPosition(soffset),
763  slaveSequence.expdPosition(soffset+slength-1)));
764 
765  alignBuffer->copy(slaveSequence.text(soffset), '?', slength); // '?' means not aligned (just inserted)
766  // corrected by at alignBuffer->correctUnalignedPositions()
767  report->count_unaligned_base(slength);
768  }
769  else {
770  error = bufferTooSmall();
771  }
772  }
773  }
774 
775  return error;
776 }
777 
778 // ------------------------------------
779 // #define's for fast_align()
780 
781 #define TEST_BETTER_SCORE() \
782  do { \
783  if (score>bestScore) { \
784  bestScore = score; \
785  bestLength = masterRight.text() - masterLeft.text(); \
786  bestMasterLeft = masterLeft; \
787  bestSlaveLeft = slaveLeft; \
788  } \
789  } while (0)
790 
791 #define CAN_SCORE_LEFT() (masterLeft.leftOf() && slaveLeft.leftOf())
792 #define CAN_SCORE_RIGHT() (masterRight.rightOf() && slaveRight.rightOf())
793 
794 #define SCORE_LEFT() \
795  do { \
796  score += *(--masterLeft).text()==*(--slaveLeft).text() ? match : mismatch; \
797  TEST_BETTER_SCORE(); \
798  } while (0)
799 
800 #define SCORE_RIGHT() \
801  do { \
802  score += *(++masterRight).text()==*(++slaveRight).text() ? match : mismatch; \
803  TEST_BETTER_SCORE(); \
804  } while (0)
805 
806 
808  AlignBuffer *alignBuffer,
809  int max_seq_length,
810  int match, int mismatch,
811  FastAlignReport *report) const
812 {
813  // aligns 'slaveSequence' to 'this'
814  //
815  // returns
816  // NULp -> all ok -> 'alignBuffer' contains aligned sequence
817  // or error -> failure -> no results
818 
819  ARB_ERROR error = NULp;
820  int aligned = 0;
821 
822  // set the following #if to zero to test ClustalV-Aligner (not fast_aligner)
823 #if 1
824 
825  static double lowSignificance;
826  static int lowSignificanceInitialized;
827 
828  if (!lowSignificanceInitialized) {
829  lowSignificance = log3(0.01);
830  lowSignificanceInitialized = 1;
831  }
832 
833  SequencePosition slave(slaveSequence);
834  long bestScore=0;
835  SequencePosition bestMasterLeft(sequence());
836  SequencePosition bestSlaveLeft(slaveSequence);
837  long bestLength=0;
838 
839  while (slave.rightOf()>=3) { // with all triples of slaveSequence
840  FastSearchOccurrence occurrence(*this, slave.text()); // "search" first
841  SequencePosition rightmostSlave = slave;
842 
843  while (occurrence.found()) { // with all occurrences of the triple
844  long score = match*3;
845  SequencePosition masterLeft(occurrence.sequence(), occurrence.offset());
846  SequencePosition masterRight(occurrence.sequence(), occurrence.offset()+3);
847  SequencePosition slaveLeft(slave);
848  SequencePosition slaveRight(slave, 3);
849 
850  while (score>0) {
851  if (CAN_SCORE_LEFT()) {
852  SCORE_LEFT();
853  }
854  else {
855  while (score>0 && CAN_SCORE_RIGHT()) {
856  SCORE_RIGHT();
857  }
858  break;
859  }
860 
861  if (CAN_SCORE_RIGHT()) {
862  SCORE_RIGHT();
863  }
864  else {
865  while (score>0 && CAN_SCORE_LEFT()) {
866  SCORE_LEFT();
867  }
868  break;
869  }
870  }
871 
872  occurrence.gotoNext(); // "search" next
873 
874  if (rightmostSlave<slaveRight) {
875  rightmostSlave = slaveRight;
876  rightmostSlave -= 3;
877  }
878  }
879 
880  if (rightmostSlave>slave) slave = rightmostSlave;
881  else ++slave;
882  }
883 
884  if (bestLength) {
885  double sig = partSignificance(sequence().length(), slaveSequence.length(), bestLength);
886 
887  if (sig<lowSignificance) {
888  long masterLeftOf = bestMasterLeft.leftOf();
889  long masterRightStart = masterLeftOf+bestLength;
890  long masterRightOf = bestMasterLeft.rightOf()-bestLength;
891  long slaveLeftOf = bestSlaveLeft.leftOf();
892  long slaveRightStart = slaveLeftOf+bestLength;
893  long slaveRightOf = bestSlaveLeft.rightOf()-bestLength;
894 
895 #define MIN_ALIGNMENT_RANGE 4
896 
897  if (!error) {
898  if (masterLeftOf >= MIN_ALIGNMENT_RANGE && slaveLeftOf >= MIN_ALIGNMENT_RANGE) {
899  CompactedSubSequence leftCompactedMaster(sequence(), 0, masterLeftOf);
900  FastSearchSequence leftMaster(leftCompactedMaster);
901 
902  error = leftMaster.fast_align(CompactedSubSequence(slaveSequence, 0, slaveLeftOf),
903  alignBuffer, max_seq_length, match, mismatch, report);
904  }
905  else if (slaveLeftOf>0) {
906  error = cannot_fast_align(sequence(), 0, masterLeftOf,
907  slaveSequence, 0, slaveLeftOf,
908  max_seq_length, alignBuffer, report);
909  }
910 
911  aligned = 1;
912  }
913 
914  // align part of slave sequence according to master sequence:
915 
916  if (!error) {
917 #if (defined(DEBUG) && 0)
918  long offset = alignBuffer->offset();
919  long used;
920 #endif
921  error = insertAligned(alignBuffer, bestMasterLeft, bestSlaveLeft, bestLength, report);
922 #if (defined(DEBUG) && 0)
923  used = alignBuffer->offset()-offset;
924  printf("aligned '%s' (len=%li, address=%li)\n", lstr(alignBuffer->text()+offset, used), used, long(alignBuffer));
925 #endif
926  aligned = 1;
927  }
928 
929  if (!error) {
930  if (masterRightOf >= MIN_ALIGNMENT_RANGE && slaveRightOf >= MIN_ALIGNMENT_RANGE) {
931  CompactedSubSequence rightCompactedMaster(sequence(), masterRightStart, masterRightOf);
932  FastSearchSequence rightMaster(rightCompactedMaster);
933 
934  error = rightMaster.fast_align(CompactedSubSequence(slaveSequence, slaveRightStart, slaveRightOf),
935  alignBuffer,
936  max_seq_length, match, mismatch, report);
937  }
938  else if (slaveRightOf>0) {
939  error = cannot_fast_align(sequence(), masterRightStart, masterRightOf,
940  slaveSequence, slaveRightStart, slaveRightOf,
941  max_seq_length, alignBuffer, report);
942  }
943 
944  aligned = 1;
945  }
946 
947  }
948  }
949 
950 #endif
951 
952  if (!aligned && !error) {
953  error = cannot_fast_align(sequence(), 0, sequence().length(),
954  slaveSequence, 0, slaveSequence.length(),
955  max_seq_length, alignBuffer, report);
956  }
957 
958  return error;
959 }
960 
961 #undef dump
962 #undef TEST_BETTER_SCORE
963 #undef CAN_SCORE_LEFT
964 #undef CAN_SCORE_RIGHT
965 #undef SCORE_LEFT
966 #undef SCORE_RIGHT
967 
968 inline long calcSequenceChecksum(const char *data, long length) {
969  return GB_checksum(data, length, 1, GAP_CHARS);
970 }
971 
973  const char *ali,
974  ARB_ERROR *errorPtr,
975  char **dataPtr, // if dataPtr is specified, it will be set to the WHOLE uncompacted sequence
976  long *seqChksum, // may be NULp (of part of sequence)
977  PosRange range) // if !range.is_whole() -> return only part of the sequence
978 {
979  ARB_ERROR error = NULp;
980  GBDATA *gbd;
981  CompactedSubSequence *seq = NULp;
982 
983  gbd = GBT_find_sequence(gb_species, ali); // get sequence
984 
985  if (gbd) {
986  long length = GB_read_string_count(gbd);
987  char *data = GB_read_string(gbd);
988  long partLength;
989  char *partData;
990 
991  if (dataPtr) { // make a copy of the whole uncompacted sequence (returned to caller)
992  *dataPtr = data;
993  }
994 
995  int firstColumn = range.start();
996  if (range.is_part()) { // take only part of sequence
997  int lastColumn = range.end();
998 
999  fa_assert(firstColumn>=0);
1000  fa_assert(firstColumn<=lastColumn);
1001 
1002  // include all surrounding gaps (@@@ this might cause problems with partial alignment)
1003  while (firstColumn>0 && is_ali_gap(data[firstColumn-1])) {
1004  firstColumn--;
1005  }
1006  if (lastColumn!=-1) {
1007  while (lastColumn<(length-1) && is_ali_gap(data[lastColumn+1])) lastColumn++;
1008  fa_assert(lastColumn<length);
1009  }
1010 
1011  partData = data+firstColumn;
1012  int slen = length-firstColumn;
1013 
1014  fa_assert(slen>=0);
1015  fa_assert((size_t)slen==strlen(partData));
1016 
1017  if (lastColumn==-1) { // take all till end of sequence
1018  partLength = slen;
1019  }
1020  else {
1021  partLength = lastColumn-firstColumn+1;
1022  if (partLength>slen) partLength = slen; // cut rest, if we have any
1023  }
1024  }
1025  else {
1026  partLength = length;
1027  partData = data;
1028  }
1029 
1030  if (!error) {
1031  if (seqChksum) {
1032  *seqChksum = calcSequenceChecksum(partData, partLength);
1033  }
1034 
1035  seq = new CompactedSubSequence(partData, partLength, GBT_get_name_or_description(gb_species), firstColumn);
1036  }
1037 
1038  if (!dataPtr) { // free sequence only if user has not requested to get it
1039  free(data);
1040  }
1041  }
1042  else {
1043  error = GBS_global_string("No 'data' found for species '%s'", GBT_get_name_or_description(gb_species));
1044  if (dataPtr) *dataPtr = NULp; // (user must not care to free data if we fail)
1045  }
1046 
1047  fa_assert(errorPtr);
1048  *errorPtr = error;
1049 
1050  return seq;
1051 }
1052 
1053 static ARB_ERROR writeStringToAlignment(GBDATA *gb_species, GB_CSTR alignment, GB_CSTR data_name, GB_CSTR str, bool temporary) {
1054  GBDATA *gb_ali = GB_search(gb_species, alignment, GB_DB);
1055  ARB_ERROR error = NULp;
1056  GBDATA *gb_name = GB_search(gb_ali, data_name, GB_STRING);
1057 
1058  if (gb_name) {
1059  fa_assert(GB_is_ancestor_of(gb_ali, gb_name));
1060  error = GB_write_string(gb_name, str);
1061  if (temporary && !error) error = GB_set_temporary(gb_name);
1062  }
1063  else {
1064  error = GBS_global_string("Cannot create entry '%s' for '%s'", data_name, GBT_get_name_or_description(gb_species));
1065  }
1066 
1067  return error;
1068 }
1069 
1070 // --------------------------------------------------------------------------------
1071 
1073  const FastSearchSequence *alignTo,
1074  int max_seq_length,
1075  GB_CSTR alignment,
1076  long toAlignChksum,
1077  GBDATA *gb_toAlign,
1078  GBDATA *gb_alignTo, // may be NULp
1079  const AlignParams& ali_params)
1080 {
1081  // if only part of the sequence should be aligned, then this functions already gets only the part
1082  // (i.o.w.: toAlignSequence, alignTo and toAlignChksum refer to the partial sequence)
1083  AlignBuffer alignBuffer(max_seq_length);
1084  if (ali_params.range.start()>0) {
1085  alignBuffer.set(GAP_CHAR, alignQuality(GAP_CHAR, GAP_CHAR), ali_params.range.start());
1086  }
1087 
1088  const char *master_name = read_name(gb_alignTo);
1089 
1090  FastAlignReport report(master_name, ali_params.showGapsMessages);
1091 
1092  {
1093  static GBDATA *last_gb_toAlign = NULp;
1094  if (gb_toAlign!=last_gb_toAlign) {
1095  last_gb_toAlign = gb_toAlign;
1096  currentSequenceNumber++;
1097  }
1098  }
1099 
1100 #ifdef TRACE_COMPRESSED_ALIGNMENT
1101  printf("alignCompactedTo(): master='%s' ", master_name);
1102  printf("slave='%s'\n", toAlignSequence->name());
1103 #endif // TRACE_COMPRESSED_ALIGNMENT
1104 
1105  ARB_ERROR error = GB_pop_transaction(gb_toAlign);
1106  if (!error) {
1107  if (island_hopper) {
1108  error = island_hopper->do_align();
1109  if (!error) {
1110  fa_assert(island_hopper->was_aligned());
1111 
1112 #ifdef TRACE_ISLANDHOPPER_DATA
1113  printf("Island-Hopper returns:\n");
1114  printf("maligned = '%s'\n", lstr(island_hopper->get_result_ref(), island_hopper->get_result_length()));
1115  printf("saligned = '%s'\n", lstr(island_hopper->get_result(), island_hopper->get_result_length()));
1116 #endif // TRACE_ISLANDHOPPER_DATA
1117 
1118  SequencePosition masterPos(alignTo->sequence(), 0);
1119  SequencePosition slavePos(*toAlignSequence, 0);
1120 
1121  error = insertClustalValigned(&alignBuffer, masterPos, slavePos, island_hopper->get_result_ref(), island_hopper->get_result(), island_hopper->get_result_length(), &report);
1122  }
1123  }
1124  else {
1125  error = alignTo->fast_align(*toAlignSequence, &alignBuffer, max_seq_length, 2, -10, &report); // <- here we align
1126  }
1127  }
1128 
1129  if (!error) {
1130  alignBuffer.correctUnalignedPositions();
1131  if (alignBuffer.free()) {
1132  alignBuffer.set('-', alignQuality(GAP_CHAR, GAP_CHAR), alignBuffer.free()); // rest of alignBuffer is set to '-'
1133  }
1134  alignBuffer.restoreDots(*toAlignSequence);
1135  }
1136 
1137 #ifdef TRACE_COMPRESSED_ALIGNMENT
1138  if (!error) {
1139  CompactedSubSequence alignedSlave(alignBuffer.text(), alignBuffer.length(), toAlignSequence->name());
1140  dump_n_compare("reference vs. aligned:", alignTo->sequence(), alignedSlave);
1141  }
1142 #endif // TRACE_COMPRESSED_ALIGNMENT
1143 
1144  {
1145  GB_ERROR err = GB_push_transaction(gb_toAlign);
1146  if (!error) error = err;
1147  }
1148 
1149  if (!error) {
1150  if (calcSequenceChecksum(alignBuffer.text(), alignBuffer.length())!=toAlignChksum) { // sequence-chksum changed
1151  error = "Internal aligner error (sequence checksum changed) -- aborted";
1152 
1153 #ifdef TRACE_COMPRESSED_ALIGNMENT
1154  CompactedSubSequence alignedSlave(alignBuffer.text(), alignBuffer.length(), toAlignSequence->name());
1155  dump_n_compare("Old Slave vs. new Slave", *toAlignSequence, alignedSlave);
1156 #endif // TRACE_COMPRESSED_ALIGNMENT
1157  }
1158  else {
1159  {
1160  GB_topSecurityLevel unsecured(gb_toAlign);
1161  GBDATA *gbd = GBT_add_data(gb_toAlign, alignment, "data", GB_STRING);
1162 
1163  if (!gbd) {
1164  error = "Can't find/create sequence data";
1165  }
1166  else {
1167  if (ali_params.range.is_part()) { // we aligned just a part of the sequence
1168  char *buffer = GB_read_string(gbd); // so we read old sequence data
1169  long len = GB_read_string_count(gbd);
1170  if (!buffer) error = GB_await_error();
1171  else {
1172  int lenToCopy = ali_params.range.size();
1173  long wholeChksum = calcSequenceChecksum(buffer, len);
1174 
1175  memcpy(buffer+ali_params.range.start(), alignBuffer.text()+ali_params.range.start(), lenToCopy); // copy in the aligned part
1176  // @@@ genau um 1 position zu niedrig
1177 
1178  if (calcSequenceChecksum(buffer, len) != wholeChksum) {
1179  error = "Internal aligner error (sequence checksum changed) -- aborted";
1180 # ifdef TRACE_COMPRESSED_ALIGNMENT
1181  char *buffer_org = GB_read_string(gbd);
1182  dump_n_compare("Old seq vs. new seq (slave)", buffer_org, len, buffer, len);
1183  free(buffer_org);
1184 # endif // TRACE_COMPRESSED_ALIGNMENT
1185  }
1186  else {
1187  GB_write_string(gbd, "");
1188  error = GB_write_string(gbd, buffer);
1189  }
1190  }
1191 
1192  free(buffer);
1193  }
1194  else {
1195  alignBuffer.setDotsAtEOSequence();
1196  error = GB_write_string(gbd, alignBuffer.text()); // aligned all -> write all
1197  }
1198  }
1199  }
1200 
1201  if (!error && ali_params.report != FA_NO_REPORT) {
1202  // create temp-entry for slave containing alignment quality:
1203 
1204  error = writeStringToAlignment(gb_toAlign, alignment, QUALITY_NAME, alignBuffer.quality(), ali_params.report==FA_TEMP_REPORT);
1205  if (!error) {
1206  // create temp-entry for master containing insert dots:
1207 
1208  int buflen = max_seq_length*2;
1209  char *buffer = ARB_alloc<char>(buflen+1);
1210  char *afterLast = buffer;
1211 
1212  if (!buffer) {
1213  error = "out of memory";
1214  }
1215  else {
1216  memset(buffer, '-', buflen);
1217  buffer[buflen] = 0;
1218 
1219  const FastAlignInsertion *inserts = report.insertion();
1220  while (inserts) {
1221  memset(buffer+inserts->offset(), '>', inserts->gaps());
1222  afterLast = buffer+inserts->offset()+inserts->gaps();
1223  inserts = inserts->next();
1224  }
1225 
1226  *afterLast = 0;
1227  if (gb_alignTo) {
1228  error = writeStringToAlignment(gb_alignTo, alignment, INSERTS_NAME, buffer, ali_params.report==FA_TEMP_REPORT);
1229  }
1230  }
1231  }
1232  }
1233  }
1234  }
1235  return error;
1236 }
1237 
1238 ARB_ERROR FastAligner_delete_temp_entries(GBDATA *gb_species, const char *alignment) {
1239  fa_assert(gb_species);
1240  fa_assert(alignment);
1241 
1242  GBDATA *gb_ali = GB_search(gb_species, alignment, GB_FIND);
1243  ARB_ERROR error = NULp;
1244 
1245  if (gb_ali) {
1246  GBDATA *gb_name = GB_search(gb_ali, QUALITY_NAME, GB_FIND);
1247  if (gb_name) {
1248  error = GB_delete(gb_name);
1249 #if defined(DEBUG)
1250  printf("deleted '%s' of '%s' (gb_name=%p)\n", QUALITY_NAME, GBT_get_name_or_description(gb_species), gb_name);
1251 #endif
1252  }
1253 
1254  if (!error) {
1255  gb_name = GB_search(gb_ali, INSERTS_NAME, GB_FIND);
1256  if (gb_name) {
1257  error = GB_delete(gb_name);
1258 #if defined(DEBUG)
1259  printf("deleted '%s' of '%s' (gb_name=%p)\n", INSERTS_NAME, GBT_get_name_or_description(gb_species), gb_name);
1260 #endif
1261  }
1262  }
1263  }
1264 
1265  return error;
1266 }
1267 
1268 static ARB_ERROR align_error(ARB_ERROR olderr, GBDATA *gb_toAlign, GBDATA *gb_alignTo) {
1269  // used by alignTo() and alignToNextRelative() to transform errors coming from subroutines
1270  // can be used by higher functions
1271 
1272  const char *name_toAlign = read_name(gb_toAlign);
1273  const char *name_alignTo = read_name(gb_alignTo);
1274 
1275  fa_assert(olderr);
1276 
1277  return GBS_global_string("Error while aligning '%s' to '%s':\n%s",
1278  name_toAlign, name_alignTo, olderr.deliver());
1279 }
1280 
1281 static ARB_ERROR alignTo(GBDATA *gb_toAlign,
1282  GB_CSTR alignment,
1283  const FastSearchSequence *alignTo,
1284  GBDATA *gb_alignTo, // may be NULp
1285  int max_seq_length,
1286  const AlignParams& ali_params)
1287 {
1288  ARB_ERROR error = NULp;
1289  long chksum = -1;
1290 
1291  CompactedSubSequence *toAlignSequence = readCompactedSequence(gb_toAlign, alignment, &error, NULp, &chksum, ali_params.range);
1292 
1293  if (island_hopper) {
1294  GBDATA *gb_seq = GBT_find_sequence(gb_toAlign, alignment); // get sequence
1295  if (gb_seq) {
1296  long length = GB_read_string_count(gb_seq);
1297  const char *data = GB_read_char_pntr(gb_seq);
1298 
1299  island_hopper->set_toAlign_sequence(data);
1300  island_hopper->set_alignment_length(length);
1301  }
1302  }
1303 
1304 
1305 
1306  if (!error) {
1307  error = alignCompactedTo(toAlignSequence, alignTo, max_seq_length, alignment, chksum, gb_toAlign, gb_alignTo, ali_params);
1308  if (error) error = align_error(error, gb_toAlign, gb_alignTo);
1309  delete toAlignSequence;
1310  }
1311 
1312  return error;
1313 }
1314 
1316  GB_CSTR alignment,
1317  Aligner_get_consensus_func get_consensus,
1318  int max_seq_length,
1319  const AlignParams& ali_params)
1320 {
1321  ARB_ERROR error = NULp;
1322  char *consensus = get_consensus(read_name(gb_toAlign), ali_params.range);
1323  size_t cons_len = strlen(consensus);
1324 
1325  fa_assert(cons_len);
1326 
1327  for (size_t i = 0; i<cons_len; ++i) { // translate consensus to be accepted by aligner
1328  switch (consensus[i]) {
1329  case '=': consensus[i] = '-'; break;
1330  default: break;
1331  }
1332  }
1333 
1334  CompactedSubSequence compacted(consensus, cons_len, "group consensus", ali_params.range.start());
1335 
1336  {
1337  FastSearchSequence fast(compacted);
1338  error = alignTo(gb_toAlign, alignment, &fast, NULp, max_seq_length, ali_params);
1339  }
1340 
1341  free(consensus);
1342 
1343  return error;
1344 }
1345 
1346 static void appendNameAndUsedBasePositions(char **toString, GBDATA *gb_species, int usedBasePositions) {
1347  // if usedBasePositions == -1 -> unknown;
1348 
1349  char *currInfo;
1350  if (usedBasePositions<0) {
1351  currInfo = strdup(GBT_get_name_or_description(gb_species));
1352  }
1353  else {
1354  fa_assert(usedBasePositions>0); // otherwise it should NOT be announced here!
1355  currInfo = GBS_global_string_copy("%s:%i", GBT_get_name_or_description(gb_species), usedBasePositions);
1356  }
1357 
1358  char *newString = NULp;
1359  if (*toString) {
1360  newString = GBS_global_string_copy("%s, %s", *toString, currInfo);
1361  }
1362  else {
1363  newString = currInfo;
1364  currInfo = NULp; // don't free
1365  }
1366 
1367  freeset(*toString, newString);
1368  free(currInfo);
1369 }
1370 
1371 inline int min(int i, int j) { return i<j ? i : j; }
1372 
1374  int max_seq_length,
1375  FA_turn turnAllowed,
1376  GB_CSTR alignment,
1377  GBDATA *gb_toAlign,
1378  const AlignParams& ali_params)
1379 {
1380  CompactedSubSequence *toAlignSequence = NULp;
1381  bool use_different_pt_server_alignment = 0 != strcmp(relSearch.pt_server_alignment, alignment);
1382 
1383  ARB_ERROR error;
1384  long chksum;
1385  int relativesToTest = relSearch.maxRelatives*2; // get more relatives from pt-server (needed when use_different_pt_server_alignment == true)
1386  char **nearestRelative = new char*[relativesToTest+1];
1387  int next_relatives;
1388  int i;
1389  GBDATA *gb_main = GB_get_root(gb_toAlign);
1390 
1391  if (use_different_pt_server_alignment) {
1392  turnAllowed = FA_TURN_NEVER; // makes no sense if we're using a different alignment for the pt_server
1393  }
1394 
1395  for (next_relatives=0; next_relatives<relativesToTest; next_relatives++) {
1396  nearestRelative[next_relatives] = NULp;
1397  }
1398  next_relatives = 0;
1399 
1400  bool restart = true;
1401  while (restart) {
1402  restart = false;
1403 
1404  char *findRelsBySeq = NULp;
1405  if (use_different_pt_server_alignment) {
1406  toAlignSequence = readCompactedSequence(gb_toAlign, alignment, &error, NULp, &chksum, ali_params.range);
1407 
1408  GBDATA *gbd = GBT_find_sequence(gb_toAlign, relSearch.pt_server_alignment); // use a different alignment for next relative search
1409  if (!gbd) {
1410  error = GBS_global_string("Species '%s' has no data in alignment '%s'", GBT_get_name_or_description(gb_toAlign), relSearch.pt_server_alignment);
1411  }
1412  else {
1413  findRelsBySeq = GB_read_string(gbd);
1414  }
1415  }
1416  else {
1417  toAlignSequence = readCompactedSequence(gb_toAlign, alignment, &error, &findRelsBySeq, &chksum, ali_params.range);
1418  }
1419 
1420  if (error) {
1421  delete toAlignSequence;
1422  return error; // @@@ leaks ?
1423  }
1424 
1425  while (next_relatives) {
1426  next_relatives--;
1427  freenull(nearestRelative[next_relatives]);
1428  }
1429 
1430  {
1431  // find relatives
1432  FamilyFinder *familyFinder = relSearch.getFamilyFinder();
1433  const PosRange& range = familyFinder->get_TargetRange();
1434 
1435  if (range.is_part()) {
1436  range.copy_corresponding_part(findRelsBySeq, findRelsBySeq, strlen(findRelsBySeq));
1437  turnAllowed = FA_TURN_NEVER; // makes no sense if we're using partial relative search
1438  }
1439 
1440  error = familyFinder->searchFamily(findRelsBySeq, FF_FORWARD, relativesToTest+1, 0); // @@@ make min_score configurable
1441 
1442  // @@@ case where no relative found (due to min score) handle how ? abort ? warn ?
1443 
1444  double bestScore = 0;
1445  if (!error) {
1446 #if defined(DEBUG)
1447  double lastScore = -1;
1448 #if defined(TRACE_RELATIVES)
1449  fprintf(stderr, "List of relatives used for '%s':\n", GBT_get_name_or_description(gb_toAlign));
1450 #endif // TRACE_RELATIVES
1451 #endif // DEBUG
1452  for (const FamilyList *fl = familyFinder->getFamilyList(); fl; fl=fl->next) {
1453  if (strcmp(toAlignSequence->name(), fl->name)!=0) {
1454  if (GBT_find_species(gb_main, fl->name)) { // @@@
1455  double thisScore = familyFinder->uses_rel_matches() ? fl->rel_matches : fl->matches;
1456 #if defined(DEBUG)
1457  // check whether family list is sorted correctly
1458  fa_assert(lastScore < 0 || lastScore >= thisScore);
1459  lastScore = thisScore;
1460 #if defined(TRACE_RELATIVES)
1461  fprintf(stderr, "- %s (%5.2f)\n", fl->name, thisScore);
1462 #endif // TRACE_RELATIVES
1463 #endif // DEBUG
1464  if (thisScore>=bestScore) bestScore = thisScore;
1465  if (next_relatives<(relativesToTest+1)) {
1466  nearestRelative[next_relatives] = strdup(fl->name);
1467  next_relatives++;
1468  }
1469  }
1470  }
1471  }
1472  }
1473 
1474  if (!error && turnAllowed != FA_TURN_NEVER) { // test if mirrored sequence has better relatives
1475  char *mirroredSequence = strdup(findRelsBySeq);
1476  long length = strlen(mirroredSequence);
1477  double bestMirroredScore = 0;
1478 
1479  char T_or_U;
1480  error = GBT_determine_T_or_U(global_alignmentType, &T_or_U, "reverse-complement");
1481  if (!error) {
1482  GBT_reverseComplementNucSequence(mirroredSequence, length, T_or_U);
1483  error = familyFinder->searchFamily(mirroredSequence, FF_FORWARD, relativesToTest+1, 0); // @@@ make min_score configurable
1484  }
1485  if (!error) {
1486 #if defined(DEBUG)
1487  double lastScore = -1;
1488 #if defined(TRACE_RELATIVES)
1489  fprintf(stderr, "List of relatives used for '%s' (turned seq):\n", GBT_get_name_or_description(gb_toAlign));
1490 #endif // TRACE_RELATIVES
1491 #endif // DEBUG
1492  for (const FamilyList *fl = familyFinder->getFamilyList(); fl; fl = fl->next) {
1493  double thisScore = familyFinder->uses_rel_matches() ? fl->rel_matches : fl->matches;
1494 #if defined(DEBUG)
1495  // check whether family list is sorted correctly
1496  fa_assert(lastScore < 0 || lastScore >= thisScore);
1497  lastScore = thisScore;
1498 #if defined(TRACE_RELATIVES)
1499  fprintf(stderr, "- %s (%5.2f)\n", fl->name, thisScore);
1500 #endif // TRACE_RELATIVES
1501 #endif // DEBUG
1502  if (thisScore >= bestMirroredScore) {
1503  if (strcmp(toAlignSequence->name(), fl->name)!=0) {
1504  if (GBT_find_species(gb_main, fl->name)) bestMirroredScore = thisScore; // @@@
1505  }
1506  }
1507  }
1508  }
1509 
1510  int turnIt = 0;
1511  if (bestMirroredScore>bestScore) {
1512  if (turnAllowed==FA_TURN_INTERACTIVE) {
1513  const char *message;
1514  if (familyFinder->uses_rel_matches()) {
1515  message = GBS_global_string("'%s' seems to be the other way round (score: %.1f%%, score if turned: %.1f%%)",
1516  toAlignSequence->name(), bestScore*100, bestMirroredScore*100);
1517  }
1518  else {
1519  message = GBS_global_string("'%s' seems to be the other way round (score: %li, score if turned: %li)",
1520  toAlignSequence->name(), long(bestScore+.5), long(bestMirroredScore+.5));
1521  }
1522  turnIt = aw_question("fastali_turn_sequence", message, "Turn sequence,Leave sequence alone")==0;
1523  }
1524  else {
1525  fa_assert(turnAllowed == FA_TURN_ALWAYS);
1526  turnIt = 1;
1527 #if defined(TRACE_RELATIVES)
1528  fprintf(stderr, "Using turned sequence!\n");
1529 #endif // TRACE_RELATIVES
1530  }
1531  }
1532 
1533  if (turnIt) { // write mirrored sequence
1534  GBDATA *gbd = GBT_find_sequence(gb_toAlign, alignment);
1535  GB_topSecurityLevel unsecured(gbd);
1536  error = GB_write_string(gbd, mirroredSequence);
1537  if (!error) {
1538  delete toAlignSequence;
1539  restart = true; // continue while loop
1540  }
1541  }
1542 
1543  free(mirroredSequence);
1544  }
1545  }
1546  free(findRelsBySeq);
1547  }
1548 
1549  if (!error) {
1550  if (!next_relatives) {
1551  char warning[200];
1552  sprintf(warning, "No relative found for '%s'", toAlignSequence->name());
1553  aw_message(warning);
1554  }
1555  else {
1556  // assuming relatives are sorted! (nearest to farthest)
1557 
1558  // get data pointers
1559  typedef GBDATA *GBDATAP;
1560  GBDATAP *gb_reference = new GBDATAP[relSearch.maxRelatives];
1561 
1562  {
1563  for (i=0; i<relSearch.maxRelatives && i<next_relatives; i++) {
1564  GBDATA *gb_species = GBT_find_species(gb_main, nearestRelative[i]);
1565  if (!gb_species) { // pt-server seems not up to date!
1566  error = species_not_found(nearestRelative[i]);
1567  break;
1568  }
1569 
1570  GBDATA *gb_sequence = GBT_find_sequence(gb_species, alignment);
1571  if (gb_sequence) { // if relative has sequence data in the current alignment ..
1572  gb_reference[i] = gb_species;
1573  }
1574  else { // remove this relative
1575  free(nearestRelative[i]);
1576  for (int j = i+1; j<next_relatives; ++j) {
1577  nearestRelative[j-1] = nearestRelative[j];
1578  }
1579  next_relatives--;
1580  nearestRelative[next_relatives] = NULp;
1581  i--; // redo same index
1582  }
1583  }
1584 
1585  // delete superfluous relatives
1586  for (; i<next_relatives; ++i) freenull(nearestRelative[i]);
1587 
1588  if (next_relatives>relSearch.maxRelatives) {
1589  next_relatives = relSearch.maxRelatives;
1590  }
1591  }
1592 
1593  // align
1594 
1595  if (!error) {
1596  CompactedSubSequence *alignToSequence = readCompactedSequence(gb_reference[0], alignment, &error, NULp, NULp, ali_params.range);
1597  fa_assert(alignToSequence);
1598 
1599  if (island_hopper) {
1600  GBDATA *gb_ref = GBT_find_sequence(gb_reference[0], alignment); // get reference sequence
1601  GBDATA *gb_align = GBT_find_sequence(gb_toAlign, alignment); // get sequence to align
1602 
1603  if (gb_ref && gb_align) {
1604  long length_ref = GB_read_string_count(gb_ref);
1605  const char *data;
1606 
1607  data = GB_read_char_pntr(gb_ref);
1608  island_hopper->set_ref_sequence(data);
1609 
1610  data = GB_read_char_pntr(gb_align);
1611  island_hopper->set_toAlign_sequence(data);
1612 
1613  island_hopper->set_alignment_length(length_ref);
1614  }
1615  }
1616 
1617  {
1618  FastSearchSequence referenceFastSeq(*alignToSequence);
1619 
1620  error = alignCompactedTo(toAlignSequence, &referenceFastSeq,
1621  max_seq_length, alignment, chksum,
1622  gb_toAlign, gb_reference[0], ali_params);
1623  }
1624 
1625  if (error) {
1626  error = align_error(error, gb_toAlign, gb_reference[0]);
1627  }
1628  else {
1629  char *used_relatives = NULp;
1630 
1631  if (unaligned_bases.is_empty()) {
1632  appendNameAndUsedBasePositions(&used_relatives, gb_reference[0], -1);
1633  }
1634  else {
1635  if (island_hopper) {
1636  appendNameAndUsedBasePositions(&used_relatives, gb_reference[0], -1);
1637  if (next_relatives>1) error = "Island hopping uses only one relative";
1638  }
1639  else {
1640  LooseBases loose;
1641  LooseBases loose_for_next_relative;
1642 
1643  int unaligned_positions;
1644  {
1645  CompactedSubSequence *alignedSequence = readCompactedSequence(gb_toAlign, alignment, &error, NULp, NULp, ali_params.range);
1646 
1647  unaligned_positions = loose.follow_ali_change_and_append(unaligned_bases, AliChange(*toAlignSequence, *alignedSequence));
1648  // now loose holds the unaligned (and recalculated) parts from last relative
1649  delete alignedSequence;
1650  }
1651 
1652  if (!error) {
1653  int toalign_positions = toAlignSequence->length();
1654  if (unaligned_positions<toalign_positions) {
1655  appendNameAndUsedBasePositions(&used_relatives, gb_reference[0], toalign_positions-unaligned_positions);
1656  }
1657  }
1658 
1659  for (i=1; i<next_relatives && !error; i++) {
1660  loose.append(loose_for_next_relative);
1661  int unaligned_positions_for_next = 0;
1662 
1663  while (!loose.is_empty() && !error) {
1664  ExplicitRange partRange(intersection(loose.recall(), ali_params.range));
1665  CompactedSubSequence *alignToPart = readCompactedSequence(gb_reference[i], alignment, &error, NULp, NULp, partRange);
1666 
1667  if (!error) {
1668  long part_chksum;
1669  CompactedSubSequence *toAlignPart = readCompactedSequence(gb_toAlign, alignment, &error, NULp, &part_chksum, partRange);
1670 
1671  fa_assert(contradicted(error, toAlignPart));
1672 
1673  if (!error) {
1674  AlignParams loose_ali_params = { ali_params.report, ali_params.showGapsMessages, partRange };
1675 
1676  FastSearchSequence referenceFastSeq(*alignToPart);
1677  error = alignCompactedTo(toAlignPart, &referenceFastSeq,
1678  max_seq_length, alignment, part_chksum,
1679  gb_toAlign, gb_reference[i], loose_ali_params);
1680  if (!error) {
1681  CompactedSubSequence *alignedPart = readCompactedSequence(gb_toAlign, alignment, &error, NULp, NULp, partRange);
1682  if (!error) {
1683  unaligned_positions_for_next += loose_for_next_relative.follow_ali_change_and_append(unaligned_bases, AliChange(*toAlignPart, *alignedPart));
1684  }
1685  delete alignedPart;
1686  }
1687  }
1688  delete toAlignPart;
1689  }
1690  delete alignToPart;
1691  }
1692 
1693  if (!error) {
1694  fa_assert(unaligned_positions_for_next <= unaligned_positions); // means: number of unaligned positions has increased by use of relative
1695  if (unaligned_positions_for_next<unaligned_positions) {
1696  appendNameAndUsedBasePositions(&used_relatives, gb_reference[i], unaligned_positions-unaligned_positions_for_next);
1697  unaligned_positions = unaligned_positions_for_next;
1698  }
1699  }
1700  }
1701  }
1702  }
1703 
1704  if (!error) { // write used relatives to db-field
1705  error = GBT_write_string(gb_toAlign, "used_rels", used_relatives);
1706  }
1707  free(used_relatives);
1708  }
1709 
1710  delete alignToSequence;
1711  }
1712 
1713  delete [] gb_reference;
1714  }
1715  }
1716 
1717  delete toAlignSequence;
1718 
1719  for (i=0; i<next_relatives; i++) freenull(nearestRelative[i]);
1720  delete [] nearestRelative;
1721 
1722  return error;
1723 }
1724 
1725 // ------------------------
1726 // AlignmentReference
1727 
1729  GB_CSTR alignment;
1730  int max_seq_length;
1731  const AlignParams& ali_params;
1732 
1733 public:
1735  int max_seq_length_,
1736  const AlignParams& ali_params_)
1737  : alignment(alignment_),
1738  max_seq_length(max_seq_length_),
1739  ali_params(ali_params_)
1740  {}
1741  virtual ~AlignmentReference() {}
1742 
1743  virtual ARB_ERROR align_to(GBDATA *gb_toalign) const = 0;
1744 
1745  GB_CSTR get_alignment() const { return alignment; }
1746  int get_max_seq_length() const { return max_seq_length; }
1747  const AlignParams& get_ali_params() const { return ali_params; }
1748 };
1749 
1750 
1751 // @@@ make alignTo a member of ExplicitReference (or of AlignmentReference)
1752 // @@@ let alignToGroupConsensus and alignToNextRelative use ExplicitReference
1753 
1754 
1755 class ExplicitReference: public AlignmentReference { // derived from a Noncopyable
1756  const FastSearchSequence *targetSequence;
1757  GBDATA *gb_alignTo;
1758 
1759 public:
1761  const FastSearchSequence *targetSequence_,
1762  GBDATA *gb_alignTo_,
1763  int max_seq_length_,
1764  const AlignParams& ali_params_)
1765  : AlignmentReference(alignment_, max_seq_length_, ali_params_),
1766  targetSequence(targetSequence_),
1767  gb_alignTo(gb_alignTo_)
1768  {}
1769 
1770  ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE {
1771  return alignTo(gb_toalign, get_alignment(), targetSequence, gb_alignTo, get_max_seq_length(), get_ali_params());
1772  }
1773 };
1774 
1775 // @@@ make alignToGroupConsensus a member of ConsensusReference
1776 
1778  Aligner_get_consensus_func get_consensus;
1779 
1780 public:
1782  Aligner_get_consensus_func get_consensus_,
1783  int max_seq_length_,
1784  const AlignParams& ali_params_)
1785  : AlignmentReference(alignment_, max_seq_length_, ali_params_),
1786  get_consensus(get_consensus_)
1787  {}
1788 
1789  ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE {
1790  return alignToGroupConsensus(gb_toalign, get_alignment(), get_consensus, get_max_seq_length(), get_ali_params());
1791  }
1792 };
1793 
1794 // @@@ make alignToNextRelative a member of SearchRelativesReference
1795 
1797  SearchRelativeParams& relSearch;
1798  FA_turn turnAllowed;
1799 
1800 public:
1802  int max_seq_length_,
1803  FA_turn turnAllowed_,
1804  GB_CSTR alignment_,
1805  const AlignParams& ali_params_)
1806  : AlignmentReference(alignment_, max_seq_length_, ali_params_),
1807  relSearch(relSearch_),
1808  turnAllowed(turnAllowed_)
1809  {}
1810 
1811  ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE {
1812  return alignToNextRelative(relSearch, get_max_seq_length(), turnAllowed, get_alignment(), gb_toalign, get_ali_params());
1813  }
1814 };
1815 
1816 
1817 // ----------------
1818 // Aligner
1819 
1820 class Aligner : virtual Noncopyable {
1821  GBDATA *gb_main;
1822 
1823  // define alignment target(s):
1824  FA_alignTarget alignWhat;
1825  GB_CSTR alignment; // name of alignment to use
1826  GB_CSTR toalign; // name of species to align (used if alignWhat == FA_CURRENT)
1827  Aligner_get_first_selected_species get_first_selected_species; // used if alignWhat == FA_SELECTED
1828  Aligner_get_next_selected_species get_next_selected_species; // --- "" ---
1829 
1830  // define reference sequence(s):
1831  GB_CSTR reference; // name of reference species
1832  Aligner_get_consensus_func get_consensus; // function to get consensus seq
1833  SearchRelativeParams& relSearch; // params to search for relatives
1834 
1835  // general params:
1836  FA_turn turnAllowed;
1837  const AlignParams& ali_params;
1838  int maxProtection; // protection level
1839 
1840  // -------------------- new members
1841  int wasNotAllowedToAlign; // number of failures caused by wrong protection
1842  int err_count; // count errors
1843  bool continue_on_error; /* true -> run single alignments in separate transactions.
1844  * If one target fails, continue with rest.
1845  * false -> run all in one transaction
1846  * One fails -> all fail!
1847  */
1848  FA_errorAction error_action;
1849 
1850  typedef std::list<GBDATA*> GBDATAlist;
1851  GBDATAlist species_to_mark; // species that will be marked after aligning
1852 
1853  ARB_ERROR alignToReference(GBDATA *gb_toalign, const AlignmentReference& ref);
1854  ARB_ERROR alignTargetsToReference(const AlignmentReference& ref, GBDATA *gb_species_data);
1855 
1856  ARB_ERROR alignToExplicitReference(GBDATA *gb_species_data, int max_seq_length);
1857  ARB_ERROR alignToConsensus(GBDATA *gb_species_data, int max_seq_length);
1858  ARB_ERROR alignToRelatives(GBDATA *gb_species_data, int max_seq_length);
1859 
1860  void triggerAction(GBDATA *gb_species, bool has_been_aligned) {
1861  bool mark = false;
1862  switch (error_action) {
1863  case FA_MARK_FAILED: mark = !has_been_aligned; break;
1864  case FA_MARK_ALIGNED: mark = has_been_aligned; break;
1865  case FA_NO_ACTION: mark = false; break;
1866  }
1867  if (mark) species_to_mark.push_back(gb_species);
1868  }
1869 
1870 public:
1871 
1872  // @@@ pass AlignmentReference from caller (replacing reference parameters)
1873 
1874  Aligner(GBDATA *gb_main_,
1875 
1876  // define alignment target(s):
1877  FA_alignTarget alignWhat_,
1878  GB_CSTR alignment_, // name of alignment to use
1879  GB_CSTR toalign_, // name of species to align (used if alignWhat == FA_CURRENT)
1880  Aligner_get_first_selected_species get_first_selected_species_, // used if alignWhat == FA_SELECTED
1881  Aligner_get_next_selected_species get_next_selected_species_, // --- "" ---
1882 
1883  // define reference sequence(s):
1884  GB_CSTR reference_, // name of reference species
1885  Aligner_get_consensus_func get_consensus_, // function to get consensus seq
1886  SearchRelativeParams& relSearch_, // params to search for relatives
1887 
1888  // general params:
1889  FA_turn turnAllowed_,
1890  const AlignParams& ali_params_,
1891  int maxProtection_, // protection level
1892  bool continue_on_error_,
1893  FA_errorAction error_action_)
1894  : gb_main(gb_main_),
1895  alignWhat(alignWhat_),
1896  alignment(alignment_),
1897  toalign(toalign_),
1898  get_first_selected_species(get_first_selected_species_),
1899  get_next_selected_species(get_next_selected_species_),
1900  reference(reference_),
1901  get_consensus(get_consensus_),
1902  relSearch(relSearch_),
1903  turnAllowed(turnAllowed_),
1904  ali_params(ali_params_),
1905  maxProtection(maxProtection_),
1906  wasNotAllowedToAlign(0),
1907  err_count(0),
1908  continue_on_error(continue_on_error_),
1909  error_action(continue_on_error ? error_action_ : FA_NO_ACTION)
1910  {
1911  gb_assert(alignment);
1912  }
1913 
1914  ARB_ERROR run();
1915 };
1916 
1917 ARB_ERROR Aligner::alignToReference(GBDATA *gb_toalign, const AlignmentReference& ref) {
1918  int myProtection;
1919  ARB_ERROR error;
1920  {
1921  GBDATA *gb_seq = GBT_find_sequence(gb_toalign, alignment);
1922  // if sequence not found (e.g. wrong alignment) => assume protection is low + let align_to() fail below
1923  myProtection = gb_seq ? GB_read_security_write(gb_seq) : 0;
1924  }
1925 
1926  if (myProtection<=maxProtection) {
1927  // Depending on 'continue_on_error' we either
1928  // * stop aligning if an error occurs or
1929  // * run the alignment of each species in its own transaction, ignore but report errors and
1930  // optionally mark aligned or failed species.
1931 
1932  if (continue_on_error) {
1933  fa_assert(GB_get_transaction_level(gb_main) == 1);
1934  error = GB_end_transaction(gb_main, error); // end global transaction
1935  }
1936 
1937  if (!error) {
1938  error = GB_push_transaction(gb_main);
1939  if (!error) error = ref.align_to(gb_toalign);
1940  error = GB_end_transaction(gb_main, error);
1941 
1942  if (error) err_count++;
1943  triggerAction(gb_toalign, !error);
1944  }
1945 
1946  if (continue_on_error) {
1947  if (error) {
1948  GB_warning(error.deliver());
1949  error = NULp;
1950  }
1951  error = GB_begin_transaction(gb_main); // re-open global transaction
1952  }
1953  }
1954  else {
1955  wasNotAllowedToAlign++;
1956  triggerAction(gb_toalign, false);
1957  }
1958 
1959  return error;
1960 }
1961 
1962 ARB_ERROR Aligner::alignTargetsToReference(const AlignmentReference& ref, GBDATA *gb_species_data) {
1963  ARB_ERROR error;
1964 
1965  fa_assert(GB_get_transaction_level(gb_main) == 1);
1966 
1967  switch (alignWhat) {
1968  case FA_CURRENT: { // align one sequence
1969  fa_assert(toalign);
1970 
1971  GBDATA *gb_toalign = GBT_find_species_rel_species_data(gb_species_data, toalign);
1972  if (!gb_toalign) {
1973  error = species_not_found(toalign);
1974  }
1975  else {
1976  currentSequenceNumber = overallSequenceNumber = 1;
1977  error = alignToReference(gb_toalign, ref);
1978  }
1979  break;
1980  }
1981  case FA_MARKED: { // align all marked sequences
1982  int count = GBT_count_marked_species(gb_main);
1983  GBDATA *gb_species = GBT_first_marked_species_rel_species_data(gb_species_data);
1984 
1985  arb_progress progress("Aligning marked species", long(count));
1986  progress.auto_subtitles("Species");
1987 
1988  currentSequenceNumber = 1;
1989  overallSequenceNumber = count;
1990 
1991  while (gb_species && !error) {
1992  error = alignToReference(gb_species, ref);
1993  progress.inc_and_check_user_abort(error);
1994  gb_species = GBT_next_marked_species(gb_species);
1995  }
1996  break;
1997  }
1998  case FA_SELECTED: { // align all selected species
1999  int count;
2000  GBDATA *gb_species = get_first_selected_species(&count);
2001 
2002 
2003  currentSequenceNumber = 1;
2004  overallSequenceNumber = count;
2005 
2006  if (!gb_species) {
2007  aw_message("There is no selected species!");
2008  }
2009  else {
2010  arb_progress progress("Aligning selected species", long(count));
2011  progress.auto_subtitles("Species");
2012 
2013  while (gb_species && !error) {
2014  error = alignToReference(gb_species, ref);
2015  progress.inc_and_check_user_abort(error);
2016  gb_species = get_next_selected_species();
2017  }
2018  }
2019  break;
2020  }
2021  }
2022 
2023  fa_assert(GB_get_transaction_level(gb_main) == 1);
2024  return error;
2025 }
2026 
2027 ARB_ERROR Aligner::alignToExplicitReference(GBDATA *gb_species_data, int max_seq_length) {
2028  ARB_ERROR error;
2029  GBDATA *gb_reference = GBT_find_species_rel_species_data(gb_species_data, reference);
2030 
2031  if (!gb_reference) {
2032  error = species_not_found(reference);
2033  }
2034  else {
2035  long referenceChksum;
2036  CompactedSubSequence *referenceSeq = readCompactedSequence(gb_reference, alignment, &error, NULp, &referenceChksum, ali_params.range);
2037 
2038  if (island_hopper) {
2039  // @@@ setting island_hopper reference has to be done in called function (seems that it is NOT done for alignToConsensus and alignToRelatives). First get tests in place!
2040 
2041  GBDATA *gb_seq = GBT_find_sequence(gb_reference, alignment); // get sequence
2042  if (gb_seq) {
2043  long length = GB_read_string_count(gb_seq);
2044  const char *data = GB_read_char_pntr(gb_seq);
2045 
2046  island_hopper->set_ref_sequence(data);
2047  island_hopper->set_alignment_length(length);
2048  }
2049  }
2050 
2051 
2052  if (!error) {
2053  // @@@ do not pass FastSearchSequence to ExplicitReference, instead pass sequence and length (ExplicitReference shall create it itself)
2054 
2055  FastSearchSequence referenceFastSeq(*referenceSeq);
2056  ExplicitReference target(alignment, &referenceFastSeq, gb_reference, max_seq_length, ali_params);
2057 
2058  error = alignTargetsToReference(target, gb_species_data);
2059  }
2060  delete referenceSeq;
2061  }
2062  return error;
2063 }
2064 
2065 ARB_ERROR Aligner::alignToConsensus(GBDATA *gb_species_data, int max_seq_length) {
2066  return alignTargetsToReference(ConsensusReference(alignment, get_consensus, max_seq_length, ali_params),
2067  gb_species_data);
2068 }
2069 
2070 ARB_ERROR Aligner::alignToRelatives(GBDATA *gb_species_data, int max_seq_length) {
2071  return alignTargetsToReference(SearchRelativesReference(relSearch, max_seq_length, turnAllowed, alignment, ali_params),
2072  gb_species_data);
2073 }
2074 
2076  fa_assert(GB_get_transaction_level(gb_main) == 0); // no open transaction allowed here!
2077  ARB_ERROR error = GB_push_transaction(gb_main);
2078 
2079  // if neither 'reference' nor 'get_consensus' are specified -> use next relative for (each) sequence:
2080  bool search_by_pt_server = !reference && !get_consensus;
2081 
2082  err_count = 0;
2083  wasNotAllowedToAlign = 0; // incremented for every sequence which has higher protection level (and was not aligned)
2084  species_to_mark.clear();
2085 
2086  fa_assert(!reference || !get_consensus); // can't do both modes
2087 
2088  if (turnAllowed != FA_TURN_NEVER) {
2089  if ((ali_params.range.is_part()) || !search_by_pt_server) {
2090  // if not selected 'Range/Whole sequence' or not selected 'Reference/Auto search..'
2091  turnAllowed = FA_TURN_NEVER; // then disable mirroring for the current call
2092  }
2093  }
2094 
2095  if (!error) {
2096  global_alignmentType = GBT_get_alignment_type(gb_main, alignment);
2098 
2099  if (search_by_pt_server) {
2100  GB_alignment_type pt_server_alignmentType = GBT_get_alignment_type(gb_main, relSearch.pt_server_alignment);
2101  if (pt_server_alignmentType == GB_AT_UNKNOWN) {
2102  error = GB_append_exportedError("failed to detect type of alignment to use for ptserver-search");
2103  }
2104  else if (pt_server_alignmentType != GB_AT_RNA && pt_server_alignmentType != GB_AT_DNA) {
2105  error = "pt_servers only support RNA/DNA sequences.\n"
2106  "In the aligner window you may specify a RNA/DNA alignment \n"
2107  "and use a pt_server build on that alignment.";
2108  }
2109  }
2110  }
2111 
2112  if (!error) {
2113  GBDATA *gb_species_data = GBT_get_species_data(gb_main);
2114  int max_seq_length = GBT_get_alignment_len(gb_main, alignment);
2115  fa_assert(max_seq_length>0);
2116 
2117  if (reference) error = alignToExplicitReference(gb_species_data, max_seq_length);
2118  else if (get_consensus) error = alignToConsensus(gb_species_data, max_seq_length);
2119  else error = alignToRelatives(gb_species_data, max_seq_length);
2120  }
2121 
2122  ClustalV_exit();
2123  unaligned_bases.clear();
2124 
2125  error = GB_end_transaction(gb_main, error); // close global transaction
2126 
2127  if (wasNotAllowedToAlign>0) {
2128  const char *mess = GBS_global_string("%i species were not aligned (because of protection level)", wasNotAllowedToAlign);
2129  aw_message(mess);
2130  }
2131 
2132  if (err_count) {
2133  aw_message_if(error);
2134  error = GBS_global_string("Aligner produced %i error%c", err_count, err_count==1 ? '\0' : 's');
2135  }
2136 
2137  if (error_action != FA_NO_ACTION) {
2138  fa_assert(continue_on_error);
2139 
2140  GB_transaction ta(gb_main);
2141  GBT_mark_all(gb_main, 0);
2142  for (GBDATAlist::iterator sp = species_to_mark.begin(); sp != species_to_mark.end(); ++sp) {
2143  GB_write_flag(*sp, 1);
2144  }
2145 
2146  const char *whatsMarked = (error_action == FA_MARK_ALIGNED) ? "aligned" : "failed";
2147  size_t markCount = species_to_mark.size();
2148  if (markCount>0) {
2149  const char *msg = GBS_global_string("%zu %s species %s been marked",
2150  markCount,
2151  whatsMarked,
2152  (markCount == 1) ? "has" : "have");
2153  aw_message(msg);
2154  }
2155  }
2156 
2157  return error;
2158 }
2159 
2160 void FastAligner_start(AW_window *aw, const AlignDataAccess *data_access) {
2161  AW_root *root = aw->get_root();
2162  char *reference = NULp; // align against next relatives
2163  char *toalign = NULp; // align marked species
2164  ARB_ERROR error = NULp;
2165  int get_consensus = 0;
2166  int pt_server_id = -1;
2167 
2170 
2172  if (root->awar(FA_AWAR_USE_ISLAND_HOPPING)->read_int()) {
2173  island_hopper = new IslandHopping;
2174  if (root->awar(FA_AWAR_USE_SECONDARY)->read_int()) {
2175  char *helix_string = data_access->getHelixString();
2176  if (helix_string) {
2177  island_hopper->set_helix(helix_string);
2178  free(helix_string);
2179  }
2180  else {
2181  error = "Warning: No HELIX found. Can't use secondary structure";
2182  }
2183  }
2184 
2185  if (!error) {
2186  island_hopper->set_parameters(root->awar(FA_AWAR_ESTIMATE_BASE_FREQ)->read_int(),
2199  root->awar(FA_AWAR_GAP_A)->read_float(),
2200  root->awar(FA_AWAR_GAP_B)->read_float(),
2201  root->awar(FA_AWAR_GAP_C)->read_float(),
2203  );
2204  }
2205  }
2206 
2207  FA_alignTarget alignWhat = static_cast<FA_alignTarget>(root->awar(FA_AWAR_TO_ALIGN)->read_int());
2208  if (!error) {
2209  switch (alignWhat) {
2210  case FA_CURRENT: { // align current species
2211  toalign = root->awar(AWAR_SPECIES_NAME)->read_string();
2212  break;
2213  }
2214  case FA_MARKED: { // align marked species
2215  break;
2216  }
2217  case FA_SELECTED: { // align selected species
2218  get_first_selected_species = data_access->get_first_selected_species;
2219  get_next_selected_species = data_access->get_next_selected_species;
2220  break;
2221  }
2222  default: {
2223  fa_assert(0);
2224  break;
2225  }
2226  }
2227 
2228  switch (static_cast<FA_reference>(root->awar(FA_AWAR_REFERENCE)->read_int())) {
2229  case FA_REF_EXPLICIT: // align against specified species
2230  reference = root->awar(FA_AWAR_REFERENCE_NAME)->read_string();
2231  break;
2232 
2233  case FA_REF_CONSENSUS: // align against group consensus
2234  if (data_access->get_group_consensus) {
2235  get_consensus = 1;
2236  }
2237  else {
2238  error = "Can't get group consensus here.";
2239  }
2240  break;
2241 
2242  case FA_REF_RELATIVES: // align against species searched via pt_server
2243  pt_server_id = root->awar(AWAR_PT_SERVER)->read_int();
2244  if (pt_server_id<0) {
2245  error = "No pt_server selected";
2246  }
2247  break;
2248 
2249  default: fa_assert(0);
2250  break;
2251  }
2252  }
2253 
2254  RangeList ranges;
2255  bool autoRestrictRange4nextRelSearch = true;
2256 
2257  if (!error) {
2258  switch (static_cast<FA_range>(root->awar(FA_AWAR_RANGE)->read_int())) {
2259  case FA_WHOLE_SEQUENCE:
2260  autoRestrictRange4nextRelSearch = false;
2261  ranges.add(PosRange::whole());
2262  break;
2263 
2264  case FA_AROUND_CURSOR: {
2265  int curpos = root->awar(AWAR_CURSOR_POSITION_LOCAL)->read_int();
2266  int size = root->awar(FA_AWAR_AROUND)->read_int();
2267 
2268  ranges.add(PosRange(curpos-size, curpos+size));
2269  break;
2270  }
2271  case FA_SELECTED_RANGE: {
2272  PosRange range;
2273  if (!data_access->get_selected_range(range)) {
2274  error = "There is no selected species!";
2275  }
2276  else {
2277  ranges.add(range);
2278  }
2279  break;
2280  }
2281 
2282  case FA_SAI_MULTI_RANGE: {
2283  GB_transaction ta(data_access->gb_main);
2284 
2285  char *sai_name = root->awar(FA_AWAR_SAI_RANGE_NAME)->read_string();
2286  char *aliuse = GBT_get_default_alignment(data_access->gb_main);
2287  fa_assert(aliuse);
2288 
2289  GBDATA *gb_sai = GBT_expect_SAI(data_access->gb_main, sai_name);
2290  if (!gb_sai) error = GB_await_error();
2291  else {
2292  GBDATA *gb_data = GBT_find_sequence(gb_sai, aliuse);
2293  if (!gb_data) {
2294  error = GB_have_error()
2295  ? GB_await_error()
2296  : GBS_global_string("SAI '%s' has no data in alignment '%s'", sai_name, aliuse);
2297  }
2298  else {
2299  char *sai_data = GB_read_string(gb_data); // @@@ NOT_ALL_SAI_HAVE_DATA
2300  char *set_bits = root->awar(FA_AWAR_SAI_RANGE_CHARS)->read_string();
2301 
2302  ranges = build_RangeList_from_string(sai_data, set_bits, false);
2303 
2304  free(set_bits);
2305  free(sai_data);
2306  }
2307  }
2308  free(aliuse);
2309  free(sai_name);
2310  break;
2311  }
2312  }
2313  }
2314 
2315  if (!error) {
2316  const char *alignment_name = data_access->alignment_name.c_str();
2317  for (RangeList::iterator r = ranges.begin(); r != ranges.end() && !error; ++r) {
2318  PosRange range = *r;
2319 
2320  GBDATA *gb_main = data_access->gb_main;
2321  long alignment_length;
2322  {
2323  GB_transaction ta(gb_main);
2324  alignment_length = GBT_get_alignment_len(gb_main, alignment_name);
2325  fa_assert(alignment_length>0);
2326  }
2327 
2328  char *pt_server_alignment = root->awar(FA_AWAR_PT_SERVER_ALIGNMENT)->read_string();
2329  PosRange relRange = PosRange::whole(); // unrestricted!
2330 
2331  if (autoRestrictRange4nextRelSearch) {
2332  AW_awar *awar_relrange = root->awar(FA_AWAR_RELATIVE_RANGE);
2333  const char *relrange = awar_relrange->read_char_pntr();
2334  if (relrange[0]) {
2335  int region_plus = atoi(relrange);
2336 
2337  fa_assert(range.is_part());
2338 
2339  relRange = PosRange(range.start()-region_plus, range.end()+region_plus); // restricted
2340  awar_relrange->write_as_string(GBS_global_string("%i", region_plus)); // set awar to detected value (avoid misbehavior when it contains ' ' or similar)
2341  }
2342  }
2343 
2344  if (island_hopper) {
2345  island_hopper->set_range(ExplicitRange(range, alignment_length));
2346  range = PosRange::whole();
2347  }
2348 
2349  SearchRelativeParams relSearch(new PT_FamilyFinder(gb_main,
2350  pt_server_id,
2351  root->awar(AWAR_NN_OLIGO_LEN)->read_int(),
2352  root->awar(AWAR_NN_MISMATCHES)->read_int(),
2353  root->awar(AWAR_NN_FAST_MODE)->read_int(),
2354  root->awar(AWAR_NN_REL_MATCHES)->read_int(),
2355  RSS_BOTH_MIN), // old scaling as b4 [8520] @@@ make configurable
2356  pt_server_alignment,
2358 
2359  relSearch.getFamilyFinder()->restrict_2_region(relRange);
2360 
2361  struct AlignParams ali_params = {
2362  static_cast<FA_report>(root->awar(FA_AWAR_REPORT)->read_int()),
2363  bool(root->awar(FA_AWAR_SHOW_GAPS_MESSAGES)->read_int()),
2364  range
2365  };
2366 
2367  {
2368  arb_progress progress("FastAligner");
2369  progress.allow_title_reuse();
2370 
2371  int cont_on_error = root->awar(FA_AWAR_CONTINUE_ON_ERROR)->read_int();
2372 
2373  Aligner aligner(gb_main,
2374  alignWhat,
2375  alignment_name,
2376  toalign,
2377  get_first_selected_species,
2378  get_next_selected_species,
2379 
2380  reference,
2381  get_consensus ? data_access->get_group_consensus : NULp,
2382  relSearch,
2383 
2384  static_cast<FA_turn>(root->awar(FA_AWAR_MIRROR)->read_int()),
2385  ali_params,
2386  root->awar(FA_AWAR_PROTECTION)->read_int(),
2387  cont_on_error,
2389  error = aligner.run();
2390 
2391  if (error && cont_on_error) {
2392  aw_message_if(error);
2393  error = NULp;
2394  }
2395  }
2396 
2397  free(pt_server_alignment);
2398  }
2399  }
2400 
2401  if (island_hopper) {
2402  delete island_hopper;
2403  island_hopper = NULp;
2404  }
2405 
2406  if (toalign) free(toalign);
2407  aw_message_if(error);
2408  if (data_access->do_refresh) data_access->refresh_display();
2409 }
2410 
2411 static void nullcb() { }
2412 
2414  root->awar_string(FA_AWAR_REFERENCE_NAME, "", db1);
2415 
2416  root->awar_int(FA_AWAR_TO_ALIGN, FA_CURRENT, db1);
2419 
2420  AW_awar *ali_protect = root->awar_int(FA_AWAR_PROTECTION, 0, db1);
2421  if (ARB_in_novice_mode(root)) {
2422  ali_protect->write_int(0); // reset protection for noobs
2423  }
2424 
2425  root->awar_int(FA_AWAR_AROUND, 25, db1);
2427  root->awar_int(FA_AWAR_REPORT, FA_NO_REPORT, db1);
2428  root->awar_int(FA_AWAR_SHOW_GAPS_MESSAGES, 1, db1);
2429  root->awar_int(FA_AWAR_CONTINUE_ON_ERROR, 1, db1);
2431  root->awar_int(FA_AWAR_USE_SECONDARY, 0, db1);
2432  root->awar_int(AWAR_PT_SERVER, 0, db1);
2433  root->awar_int(FA_AWAR_NEXT_RELATIVES, 1, db1)->set_minmax(1, 100);
2434 
2435  root->awar_string(FA_AWAR_RELATIVE_RANGE, "", db1);
2437 
2438  root->awar_string(FA_AWAR_SAI_RANGE_NAME, "", db1);
2439  root->awar_string(FA_AWAR_SAI_RANGE_CHARS, "x1", db1);
2440 
2441  // island hopping:
2442 
2443  root->awar_int(FA_AWAR_USE_ISLAND_HOPPING, 0, db1);
2444 
2445  root->awar_int(FA_AWAR_ESTIMATE_BASE_FREQ, 1, db1);
2446 
2447  root->awar_float(FA_AWAR_BASE_FREQ_A, 0.25, db1);
2448  root->awar_float(FA_AWAR_BASE_FREQ_C, 0.25, db1);
2449  root->awar_float(FA_AWAR_BASE_FREQ_G, 0.25, db1);
2450  root->awar_float(FA_AWAR_BASE_FREQ_T, 0.25, db1);
2451 
2452  root->awar_float(FA_AWAR_SUBST_PARA_AC, 1.0, db1);
2453  root->awar_float(FA_AWAR_SUBST_PARA_AG, 4.0, db1);
2454  root->awar_float(FA_AWAR_SUBST_PARA_AT, 1.0, db1);
2455  root->awar_float(FA_AWAR_SUBST_PARA_CG, 1.0, db1);
2456  root->awar_float(FA_AWAR_SUBST_PARA_CT, 4.0, db1);
2457  root->awar_float(FA_AWAR_SUBST_PARA_GT, 1.0, db1);
2458 
2459  root->awar_float(FA_AWAR_EXPECTED_DISTANCE, 0.3, db1);
2460  root->awar_float(FA_AWAR_STRUCTURE_SUPPLEMENT, 0.5, db1);
2461  root->awar_float(FA_AWAR_THRESHOLD, 0.005, db1);
2462 
2463  root->awar_float(FA_AWAR_GAP_A, 8.0, db1);
2464  root->awar_float(FA_AWAR_GAP_B, 4.0, db1);
2465  root->awar_float(FA_AWAR_GAP_C, 7.0, db1);
2466 
2467  AWTC_create_common_next_neighbour_vars(root, makeRootCallback(nullcb));
2468 }
2469 
2471  root->awar_int(FA_AWAR_TO_ALIGN, FA_CURRENT, db1);
2472 }
2473 
2475  // sets the aligner reference species to current species
2476  char *specName = root->awar(AWAR_SPECIES_NAME)->read_string();
2477  root->awar(FA_AWAR_REFERENCE_NAME)->write_string(specName);
2478  free(specName);
2479 }
2480 
2482  AW_window_simple *aws = new AW_window_simple;
2483 
2484  aws->init(root, "ISLAND_HOPPING_PARA", "Parameters for Island Hopping");
2485  aws->load_xfig("faligner/islandhopping.fig");
2486 
2487  aws->at("close");
2488  aws->callback(AW_POPDOWN);
2489  aws->create_button("CLOSE", "CLOSE", "O");
2490 
2491  aws->at("help");
2492  aws->callback(makeHelpCallback("islandhopping.hlp"));
2493  aws->create_button("HELP", "HELP");
2494 
2495  aws->at("use_secondary");
2496  aws->label("Use secondary structure (only for re-align)");
2497  aws->create_toggle(FA_AWAR_USE_SECONDARY);
2498 
2499  aws->at("freq");
2500  aws->create_toggle_field(FA_AWAR_ESTIMATE_BASE_FREQ, "Base freq.");
2501  aws->insert_default_toggle("Estimate", "E", 1);
2502  aws->insert_toggle("Define here: ", "D", 0);
2503  aws->update_toggle_field();
2504 
2505  aws->at("freq_a"); aws->label("A:"); aws->create_input_field(FA_AWAR_BASE_FREQ_A, 4);
2506  aws->at("freq_c"); aws->label("C:"); aws->create_input_field(FA_AWAR_BASE_FREQ_C, 4);
2507  aws->at("freq_g"); aws->label("G:"); aws->create_input_field(FA_AWAR_BASE_FREQ_G, 4);
2508  aws->at("freq_t"); aws->label("T:"); aws->create_input_field(FA_AWAR_BASE_FREQ_T, 4);
2509 
2510  int xpos[4], ypos[4];
2511  {
2512  aws->button_length(1);
2513 
2514  int dummy;
2515  aws->at("h_a"); aws->get_at_position(&xpos[0], &dummy); aws->create_button(NULp, "A");
2516  aws->at("h_c"); aws->get_at_position(&xpos[1], &dummy); aws->create_button(NULp, "C");
2517  aws->at("h_g"); aws->get_at_position(&xpos[2], &dummy); aws->create_button(NULp, "G");
2518  aws->at("h_t"); aws->get_at_position(&xpos[3], &dummy); aws->create_button(NULp, "T");
2519 
2520  aws->at("v_a"); aws->get_at_position(&dummy, &ypos[0]); aws->create_button(NULp, "A");
2521  aws->at("v_c"); aws->get_at_position(&dummy, &ypos[1]); aws->create_button(NULp, "C");
2522  aws->at("v_g"); aws->get_at_position(&dummy, &ypos[2]); aws->create_button(NULp, "G");
2523  aws->at("v_t"); aws->get_at_position(&dummy, &ypos[3]); aws->create_button(NULp, "T");
2524  }
2525 
2526  aws->at("subst"); aws->create_button(NULp, "Substitution rate parameters:");
2527 
2528 #define XOFF -25
2529 #define YOFF 0
2530 
2531  aws->at(xpos[1]+XOFF, ypos[0]+YOFF); aws->create_input_field(FA_AWAR_SUBST_PARA_AC, 4);
2532  aws->at(xpos[2]+XOFF, ypos[0]+YOFF); aws->create_input_field(FA_AWAR_SUBST_PARA_AG, 4);
2533  aws->at(xpos[3]+XOFF, ypos[0]+YOFF); aws->create_input_field(FA_AWAR_SUBST_PARA_AT, 4);
2534  aws->at(xpos[2]+XOFF, ypos[1]+YOFF); aws->create_input_field(FA_AWAR_SUBST_PARA_CG, 4);
2535  aws->at(xpos[3]+XOFF, ypos[1]+YOFF); aws->create_input_field(FA_AWAR_SUBST_PARA_CT, 4);
2536  aws->at(xpos[3]+XOFF, ypos[2]+YOFF); aws->create_input_field(FA_AWAR_SUBST_PARA_GT, 4);
2537 
2538 #undef XOFF
2539 #undef YOFF
2540 
2541  aws->label_length(22);
2542 
2543  aws->at("dist");
2544  aws->label("Expected distance");
2545  aws->create_input_field(FA_AWAR_EXPECTED_DISTANCE, 5);
2546 
2547  aws->at("supp");
2548  aws->label("Structure supplement");
2549  aws->create_input_field(FA_AWAR_STRUCTURE_SUPPLEMENT, 5);
2550 
2551  aws->at("thres");
2552  aws->label("Threshold");
2553  aws->create_input_field(FA_AWAR_THRESHOLD, 5);
2554 
2555  aws->label_length(10);
2556 
2557  aws->at("gapA");
2558  aws->label("Gap A");
2559  aws->create_input_field(FA_AWAR_GAP_A, 5);
2560 
2561  aws->at("gapB");
2562  aws->label("Gap B");
2563  aws->create_input_field(FA_AWAR_GAP_B, 5);
2564 
2565  aws->at("gapC");
2566  aws->label("Gap C");
2567  aws->create_input_field(FA_AWAR_GAP_C, 5);
2568 
2569  return aws;
2570 }
2571 
2573  static AW_window_simple *aws = NULp;
2574 
2575  if (!aws) {
2576  aws = new AW_window_simple;
2577 
2578  aws->init(root, "FAMILY_PARAMS", "Family search parameters");
2579  aws->load_xfig("faligner/family_settings.fig");
2580 
2581  aws->at("close");
2582  aws->callback(AW_POPDOWN);
2583  aws->create_button("CLOSE", "CLOSE", "O");
2584 
2585  aws->at("help");
2586  aws->callback(makeHelpCallback("next_neighbours_common.hlp"));
2587  aws->create_button("HELP", "HELP");
2588 
2589  aws->auto_space(5, 5);
2591  }
2592 
2593  return aws;
2594 }
2595 
2597  { FA_AWAR_USE_ISLAND_HOPPING, "island" },
2598  { FA_AWAR_TO_ALIGN, "target" },
2599  { FA_AWAR_REFERENCE, "ref" },
2600  { FA_AWAR_REFERENCE_NAME, "refname" },
2601  { FA_AWAR_RELATIVE_RANGE, "relrange" },
2602  { FA_AWAR_NEXT_RELATIVES, "relatives" },
2603  { FA_AWAR_PT_SERVER_ALIGNMENT, "ptali" },
2604 
2605  // relative-search specific parameters from subwindow (create_family_settings_window)
2606  // same as ../DB_UI/ui_species.cxx@RELATIVES_CONFIG
2607  { AWAR_NN_OLIGO_LEN, "oligolen" },
2608  { AWAR_NN_MISMATCHES, "mismatches" },
2609  { AWAR_NN_FAST_MODE, "fastmode" },
2610  { AWAR_NN_REL_MATCHES, "relmatches" },
2611  { AWAR_NN_REL_SCALING, "relscaling" },
2612 
2613  // island-hopping parameters (create_island_hopping_window)
2614  { FA_AWAR_USE_SECONDARY, "use2nd" },
2615  { FA_AWAR_ESTIMATE_BASE_FREQ, "estbasefreq" },
2616  { FA_AWAR_BASE_FREQ_A, "freq_a" },
2617  { FA_AWAR_BASE_FREQ_C, "freq_c" },
2618  { FA_AWAR_BASE_FREQ_G, "freq_g" },
2619  { FA_AWAR_BASE_FREQ_T, "freq_t" },
2620  { FA_AWAR_SUBST_PARA_AC, "subst_ac" },
2621  { FA_AWAR_SUBST_PARA_AG, "subst_ag" },
2622  { FA_AWAR_SUBST_PARA_AT, "subst_at" },
2623  { FA_AWAR_SUBST_PARA_CG, "subst_cg" },
2624  { FA_AWAR_SUBST_PARA_CT, "subst_ct" },
2625  { FA_AWAR_SUBST_PARA_GT, "subst_gt" },
2626  { FA_AWAR_EXPECTED_DISTANCE, "distance" },
2627  { FA_AWAR_STRUCTURE_SUPPLEMENT, "supplement" },
2628  { FA_AWAR_THRESHOLD, "threshold" },
2629  { FA_AWAR_GAP_A, "gap_a" },
2630  { FA_AWAR_GAP_B, "gap_b" },
2631  { FA_AWAR_GAP_C, "gap_c" },
2632 
2633  { NULp, NULp }
2634 };
2635 
2637  AW_window_simple *aws = new AW_window_simple;
2638 
2639  aws->init(root, "INTEGRATED_ALIGNERS", INTEGRATED_ALIGNERS_TITLE);
2640  aws->load_xfig("faligner/faligner.fig");
2641 
2642  aws->label_length(10);
2643  aws->button_length(10);
2644 
2645  aws->at("close");
2646  aws->callback(AW_POPDOWN);
2647  aws->create_button("CLOSE", "CLOSE", "O");
2648 
2649  aws->at("help");
2650  aws->callback(makeHelpCallback("faligner.hlp"));
2651  aws->create_button("HELP", "HELP");
2652 
2653  aws->at("aligner");
2654  aws->create_toggle_field(FA_AWAR_USE_ISLAND_HOPPING, "Aligner");
2655  aws->insert_default_toggle("Fast aligner", "F", 0);
2656  aws->sens_mask(AWM_EXP);
2657  aws->insert_toggle ("Island Hopping", "I", 1);
2658  aws->sens_mask(AWM_ALL);
2659  aws->update_toggle_field();
2660 
2661  aws->button_length(12);
2662  aws->at("island_para");
2663  aws->callback(create_island_hopping_window);
2664  aws->sens_mask(AWM_EXP);
2665  aws->create_button("island_para", "Parameters", "");
2666  aws->sens_mask(AWM_ALL);
2667 
2668  aws->button_length(10);
2669 
2670  aws->at("rev_compl");
2671  aws->callback(makeWindowCallback(build_reverse_complement, data_access));
2672  aws->create_button("reverse_complement", "Turn now!", "");
2673 
2674  aws->at("what");
2675  aws->create_toggle_field(FA_AWAR_TO_ALIGN, "Align what?");
2676  aws->insert_toggle ("Current Species:", "A", FA_CURRENT);
2677  aws->insert_default_toggle("Marked Species", "M", FA_MARKED);
2678  aws->insert_toggle ("Selected Species", "S", FA_SELECTED);
2679  aws->update_toggle_field();
2680 
2681  aws->at("swhat");
2682  aws->create_input_field(AWAR_SPECIES_NAME, 2);
2683 
2684  aws->at("against");
2685  aws->create_toggle_field(FA_AWAR_REFERENCE, "Reference");
2686  aws->insert_toggle ("Species by name:", "S", FA_REF_EXPLICIT);
2687  aws->insert_toggle ("Group consensus", "K", FA_REF_CONSENSUS);
2688  aws->insert_default_toggle("Auto search by pt_server:", "A", FA_REF_RELATIVES);
2689  aws->update_toggle_field();
2690 
2691  aws->at("sagainst");
2692  aws->create_input_field(FA_AWAR_REFERENCE_NAME, 2);
2693 
2694  aws->at("copy");
2696  aws->create_button("Copy", "Copy", "");
2697 
2698  aws->label_length(0);
2699  aws->at("pt_server");
2701 
2702  aws->label_length(23);
2703  aws->at("relrange");
2704  aws->label("Data from range only, plus");
2705  aws->create_input_field(FA_AWAR_RELATIVE_RANGE, 3);
2706 
2707  aws->at("relatives");
2708  aws->label("Number of relatives to use");
2709  aws->create_input_field(FA_AWAR_NEXT_RELATIVES, 3);
2710 
2711  aws->label_length(9);
2712  aws->at("use_ali");
2713  aws->label("Alignment");
2714  aws->create_input_field(FA_AWAR_PT_SERVER_ALIGNMENT, 12);
2715 
2716  aws->at("relSett");
2717  aws->callback(create_family_settings_window);
2718  aws->create_autosize_button("Settings", "More settings", "");
2719 
2720  // Range
2721 
2722  aws->label_length(10);
2723  aws->at("range");
2724  aws->create_toggle_field(FA_AWAR_RANGE, "Range");
2725  aws->insert_default_toggle("Whole sequence", "", FA_WHOLE_SEQUENCE);
2726  aws->insert_toggle ("Positions around cursor: ", "", FA_AROUND_CURSOR);
2727  aws->insert_toggle ("Selected range", "", FA_SELECTED_RANGE);
2728  aws->insert_toggle ("Multi-Range by SAI", "", FA_SAI_MULTI_RANGE);
2729  aws->update_toggle_field();
2730 
2731  aws->at("around");
2732  aws->create_input_field(FA_AWAR_AROUND, 2);
2733 
2734  aws->at("sai");
2736 
2737  aws->at("rchars");
2738  aws->create_input_field(FA_AWAR_SAI_RANGE_CHARS, 2);
2739 
2740  // Protection
2741 
2742  aws->at("protection");
2743  aws->label("Protection");
2744  aws->create_option_menu(FA_AWAR_PROTECTION);
2745  aws->insert_default_option("0", NULp, 0);
2746  aws->insert_option ("1", NULp, 1);
2747  aws->insert_option ("2", NULp, 2);
2748  aws->insert_option ("3", NULp, 3);
2749  aws->insert_option ("4", NULp, 4);
2750  aws->insert_option ("5", NULp, 5);
2751  aws->insert_option ("6", NULp, 6);
2752  aws->update_option_menu();
2753 
2754  // MirrorCheck
2755 
2756  aws->at("mirror");
2757  aws->label("Turn check");
2758  aws->create_option_menu(FA_AWAR_MIRROR);
2759  aws->insert_option ("Never turn sequence", "", FA_TURN_NEVER);
2760  aws->insert_default_option("User acknowledgment ", "", FA_TURN_INTERACTIVE);
2761  aws->insert_option ("Automatically turn sequence", "", FA_TURN_ALWAYS);
2762  aws->update_option_menu();
2763 
2764  // Report
2765 
2766  aws->at("insert");
2767  aws->label("Report");
2768  aws->create_option_menu(FA_AWAR_REPORT);
2769  aws->insert_option ("No report", "", FA_NO_REPORT);
2770  aws->sens_mask(AWM_EXP);
2771  aws->insert_default_option("Report to temporary entries", "", FA_TEMP_REPORT);
2772  aws->insert_option ("Report to resident entries", "", FA_REPORT);
2773  aws->sens_mask(AWM_ALL);
2774  aws->update_option_menu();
2775 
2776  aws->at("gaps");
2777  aws->create_toggle(FA_AWAR_SHOW_GAPS_MESSAGES);
2778 
2779  aws->at("continue");
2780  aws->create_toggle(FA_AWAR_CONTINUE_ON_ERROR);
2781 
2782  aws->at("on_failure");
2783  aws->label("On failure");
2784  aws->create_option_menu(FA_AWAR_ACTION_ON_ERROR);
2785  aws->insert_default_option("do nothing", "", FA_NO_ACTION);
2786  aws->insert_option ("mark failed", "", FA_MARK_FAILED);
2787  aws->insert_option ("mark aligned", "", FA_MARK_ALIGNED);
2788  aws->update_option_menu();
2789 
2790  aws->at("align");
2791  aws->callback(makeWindowCallback(FastAligner_start, data_access));
2792  aws->highlight();
2793  aws->create_button("GO", "GO", "G");
2794 
2795  aws->at("config");
2796  AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, "aligner", aligner_config_mapping);
2797 
2798  return aws;
2799 }
2800 
2801 // --------------------------------------------------------------------------------
2802 
2803 #ifdef UNIT_TESTS
2804 
2805 #include <test_unit.h>
2806 
2807 // ---------------------
2808 // OligoCounter
2809 
2810 #include <map>
2811 #include <string>
2812 
2813 using std::map;
2814 using std::string;
2815 
2816 typedef map<string, size_t> OligoCount;
2817 
2818 class OligoCounter {
2819  size_t oligo_len;
2820  size_t datasize;
2821 
2822  mutable OligoCount occurrence;
2823 
2824  static string removeGaps(const char *seq) {
2825  size_t len = strlen(seq);
2826  string nogaps;
2827  nogaps.reserve(len);
2828 
2829  for (size_t p = 0; p<len; ++p) {
2830  char c = seq[p];
2831  if (!is_gap(c)) nogaps.append(1, c);
2832  }
2833  return nogaps;
2834  }
2835 
2836  void count_oligos(const string& seq) {
2837  occurrence.clear();
2838  size_t max_pos = seq.length()-oligo_len;
2839  for (size_t p = 0; p <= max_pos; ++p) {
2840  string oligo(seq, p, oligo_len);
2841  occurrence[oligo]++;
2842  }
2843  }
2844 
2845 public:
2846  OligoCounter()
2847  : oligo_len(0),
2848  datasize(0)
2849  {}
2850  OligoCounter(const char *seq, size_t oligo_len_)
2851  : oligo_len(oligo_len_)
2852  {
2853  string seq_nogaps = removeGaps(seq);
2854  datasize = seq_nogaps.length();
2855  count_oligos(seq_nogaps);
2856  }
2857 
2858  size_t oligo_count(const char *oligo) {
2859  fa_assert(strlen(oligo) == oligo_len);
2860  return occurrence[oligo];
2861  }
2862 
2863  size_t similarity_score(const OligoCounter& other) const {
2864  size_t score = 0;
2865  if (oligo_len == other.oligo_len) {
2866  for (OligoCount::const_iterator o = occurrence.begin(); o != occurrence.end(); ++o) {
2867  const string& oligo = o->first;
2868  size_t count = o->second;
2869 
2870  score += min(count, other.occurrence[oligo]);
2871  }
2872  }
2873  return score;
2874  }
2875 
2876  size_t getDataSize() const { return datasize; }
2877 };
2878 
2879 void TEST_OligoCounter() {
2880  OligoCounter oc1("CCAGGT", 3);
2881  OligoCounter oc2("GGTCCA", 3);
2882  OligoCounter oc2_gaps("..GGT--CCA..", 3);
2883  OligoCounter oc3("AGGTCC", 3);
2884  OligoCounter oc4("AGGTCCAGG", 3);
2885 
2886  TEST_EXPECT_EQUAL(oc1.oligo_count("CCA"), 1);
2887  TEST_EXPECT_ZERO(oc1.oligo_count("CCG"));
2888  TEST_EXPECT_EQUAL(oc4.oligo_count("AGG"), 2);
2889 
2890  int sc1_2 = oc1.similarity_score(oc2);
2891  int sc2_1 = oc2.similarity_score(oc1);
2892  TEST_EXPECT_EQUAL(sc1_2, sc2_1);
2893 
2894  int sc1_2gaps = oc1.similarity_score(oc2_gaps);
2895  TEST_EXPECT_EQUAL(sc1_2, sc1_2gaps);
2896 
2897  int sc1_3 = oc1.similarity_score(oc3);
2898  int sc2_3 = oc2.similarity_score(oc3);
2899  int sc3_4 = oc3.similarity_score(oc4);
2900 
2901  TEST_EXPECT_EQUAL(sc1_2, 2); // common oligos (CCA GGT)
2902  TEST_EXPECT_EQUAL(sc1_3, 2); // common oligos (AGG GGT)
2903  TEST_EXPECT_EQUAL(sc2_3, 3); // common oligos (GGT GTC TCC)
2904 
2905  TEST_EXPECT_EQUAL(sc3_4, 4);
2906 }
2907 
2908 // -------------------------
2909 // FakeFamilyFinder
2910 
2911 class FakeFamilyFinder: public FamilyFinder { // derived from a Noncopyable
2912  // used by unit tests to detect next relatives instead of asking the pt-server
2913 
2914  GBDATA *gb_main;
2915  string ali_name;
2916  map<string, OligoCounter> oligos_counted; // key = species name
2917  PosRange counted_for_range;
2918  size_t oligo_len;
2919 
2920 public:
2921  FakeFamilyFinder(GBDATA *gb_main_, string ali_name_, bool rel_matches_, size_t oligo_len_)
2922  : FamilyFinder(rel_matches_, RSS_BOTH_MIN),
2923  gb_main(gb_main_),
2924  ali_name(ali_name_),
2925  counted_for_range(PosRange::whole()),
2926  oligo_len(oligo_len_)
2927  {}
2928 
2929  GB_ERROR searchFamily(const char *sequence, FF_complement compl_mode, int max_results, double min_score) OVERRIDE {
2930  // 'sequence' has to contain full sequence or part corresponding to 'range'
2931 
2932  TEST_EXPECT_EQUAL(compl_mode, FF_FORWARD); // not fit for other modes
2933 
2935 
2936  OligoCounter seq_oligo_count(sequence, oligo_len);
2937 
2938  if (range != counted_for_range) {
2939  oligos_counted.clear(); // forget results for different range
2940  counted_for_range = range;
2941  }
2942 
2943  char *buffer = NULp;
2944  int buffersize = 0;
2945 
2946  bool partial_match = range.is_part();
2947 
2948  GB_transaction ta(gb_main);
2949  int results = 0;
2950 
2951  for (GBDATA *gb_species = GBT_first_species(gb_main);
2952  gb_species && results<max_results;
2953  gb_species = GBT_next_species(gb_species))
2954  {
2955  string name = GBT_get_name_or_description(gb_species);
2956  if (oligos_counted.find(name) == oligos_counted.end()) {
2957  GBDATA *gb_data = GBT_find_sequence(gb_species, ali_name.c_str());
2958  const char *spec_seq = GB_read_char_pntr(gb_data);
2959 
2960  if (partial_match) {
2961  int spec_seq_len = GB_read_count(gb_data);
2962  int range_len = ExplicitRange(range, spec_seq_len).size();
2963 
2964  if (buffersize<range_len) {
2965  delete [] buffer;
2966  buffersize = range_len;
2967  buffer = new char[buffersize+1];
2968  }
2969 
2970  range.copy_corresponding_part(buffer, spec_seq, spec_seq_len);
2971  oligos_counted[name] = OligoCounter(buffer, oligo_len);
2972  }
2973  else {
2974  oligos_counted[name] = OligoCounter(spec_seq, oligo_len);
2975  }
2976  }
2977 
2978  const OligoCounter& spec_oligo_count = oligos_counted[name];
2979  size_t score = seq_oligo_count.similarity_score(spec_oligo_count);
2980 
2981  if (score>=min_score) {
2982  FamilyList *newMember = new FamilyList;
2983 
2984  newMember->name = strdup(name.c_str());
2985  newMember->matches = score;
2986  newMember->rel_matches = score/spec_oligo_count.getDataSize();
2987  newMember->next = NULp;
2988 
2989  family_list = newMember->insertSortedBy_matches(family_list);
2990  results++;
2991  }
2992  }
2993 
2994  delete [] buffer;
2995 
2996  return NULp;
2997  }
2998 };
2999 
3000 // ----------------------------
3001 // test_alignment_data
3002 
3003 #include <arb_unit_test.h>
3004 
3005 static const char *test_aliname = "ali_test";
3006 
3007 static const char *get_aligned_data_of(GBDATA *gb_main, const char *species_name) {
3008  GB_transaction ta(gb_main);
3009  ARB_ERROR error;
3010  const char *data = NULp;
3011 
3012  GBDATA *gb_species = GBT_find_species(gb_main, species_name);
3013  if (!gb_species) error = GB_await_error();
3014  else {
3015  GBDATA *gb_data = GBT_find_sequence(gb_species, test_aliname);
3016  if (!gb_data) error = GB_await_error();
3017  else {
3018  data = GB_read_char_pntr(gb_data);
3019  if (!data) error = GB_await_error();
3020  }
3021  }
3022 
3023  TEST_EXPECT_NULL(error.deliver());
3024 
3025  return data;
3026 }
3027 
3028 static const char *get_used_rels_for(GBDATA *gb_main, const char *species_name) {
3029  GB_transaction ta(gb_main);
3030  const char *result = NULp;
3031  GBDATA *gb_species = GBT_find_species(gb_main, species_name);
3032  if (!gb_species) result = GBS_global_string("<No such species '%s'>", species_name);
3033  else {
3034  GBDATA *gb_used_rels = GB_search(gb_species, "used_rels", GB_FIND);
3035  if (!gb_used_rels) result = "<No such field 'used_rels'>";
3036  else result = GB_read_char_pntr(gb_used_rels);
3037  }
3038  return result;
3039 }
3040 
3041 static GB_ERROR forget_used_relatives(GBDATA *gb_main) {
3042  GB_ERROR error = NULp;
3043  GB_transaction ta(gb_main);
3044  for (GBDATA *gb_species = GBT_first_species(gb_main);
3045  gb_species && !error;
3046  gb_species = GBT_next_species(gb_species))
3047  {
3048  GBDATA *gb_used_rels = GB_search(gb_species, "used_rels", GB_FIND);
3049  if (gb_used_rels) {
3050  error = GB_delete(gb_used_rels);
3051  }
3052  }
3053  return error;
3054 }
3055 
3056 
3057 #define ALIGNED_DATA_OF(name) get_aligned_data_of(gb_main, name)
3058 #define USED_RELS_FOR(name) get_used_rels_for(gb_main, name)
3059 
3060 // ----------------------------------------
3061 
3062 static GBDATA *selection_fake_gb_main = NULp;
3063 static GBDATA *selection_fake_gb_last = NULp;
3064 
3065 static GBDATA *fake_first_selected(int *count) {
3066  selection_fake_gb_last = NULp;
3067  *count = 2; // we fake two species as selected ("c1" and "c2")
3068  return GBT_find_species(selection_fake_gb_main, "c1");
3069 }
3070 static GBDATA *fake_next_selected() {
3071  if (!selection_fake_gb_last) {
3072  selection_fake_gb_last = GBT_find_species(selection_fake_gb_main, "c2");
3073  }
3074  else {
3075  selection_fake_gb_last = NULp;
3076  }
3077  return selection_fake_gb_last;
3078 }
3079 
3080 static char *fake_get_consensus(const char*, PosRange range) {
3081  const char *data = get_aligned_data_of(selection_fake_gb_main, "s1");
3082  if (range.is_whole()) return strdup(data);
3083  return ARB_strpartdup(data+range.start(), data+range.end());
3084 }
3085 
3086 static void test_install_fakes(GBDATA *gb_main) {
3087  selection_fake_gb_main = gb_main;
3088 }
3089 
3090 
3091 // ----------------------------------------
3092 
3093 static AlignParams test_ali_params = {
3094  FA_NO_REPORT,
3095  false, // showGapsMessages
3096  PosRange::whole()
3097 };
3098 
3099 static AlignParams test_ali_params_partial = {
3100  FA_NO_REPORT,
3101  false, // showGapsMessages
3102  PosRange(25, 60)
3103 };
3104 
3105 // ----------------------------------------
3106 
3107 static struct arb_unit_test::test_alignment_data TestAlignmentData_TargetAndReferenceHandling[] = {
3108  { 0, "s1", ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C..........." }, // reference
3109  { 0, "s2", "AUCUCCUAAACCCAACCGUAGUUCGAAUUGAGGACUGUAACUC......................................................" }, // align single sequence (same data as reference)
3110  { 1, "m1", "UAGAGGAUUUGGGUUGGCAUCAAGCUUAACUCCUGACAUUGAG......................................................" }, // align marked sequences.. (complement of reference)
3111  { 1, "m2", "...UCCUAAACCAACCCGUAGUUCGAAUUGAGGACUGUAA........................................................." },
3112  { 1, "m3", "AUC---UAAACCAACCCGUAGUUCGAAUUGAGGACUG---CUC......................................................" },
3113  { 0, "c1", "AUCUCCUAAACCCAACC--------AAUUGAGGACUGUAACUC......................................................" }, // align selected sequences..
3114  { 0, "c2", "AUCUCCU------AACCGUAGUUCCCCGAA------ACUGUAACUC..................................................." },
3115  { 0, "r1", "GAGUUACAGUCCUCAAUUCGGGGAACUACGGUUGGGUUUAGGAGAU..................................................." }, // align by faked pt_server
3116 };
3117 
3118 void TEST_Aligner_TargetAndReferenceHandling() {
3119  // performs some alignments to check logic of target and reference handling
3120 
3121  GB_shell shell;
3122  ARB_ERROR error;
3123  GBDATA *gb_main = TEST_CREATE_DB(error, test_aliname, TestAlignmentData_TargetAndReferenceHandling, false);
3124 
3125  TEST_EXPECT_NULL(error.deliver());
3126 
3127  SearchRelativeParams search_relative_params(new FakeFamilyFinder(gb_main, test_aliname, false, 8),
3128  test_aliname,
3129  2);
3130 
3131  test_install_fakes(gb_main);
3132  arb_suppress_progress silence;
3133 
3134  // bool cont_on_err = true;
3135  bool cont_on_err = false;
3136 
3137  TEST_EXPECT_EQUAL(GBT_count_marked_species(gb_main), 3); // we got 3 marked species
3138  {
3139  Aligner aligner(gb_main,
3140  FA_CURRENT,
3141  test_aliname,
3142  "s2", // toalign
3143  NULp, // get_first_selected_species
3144  NULp, // get_next_selected_species
3145  "s1", // reference species
3146  NULp, // get_consensus
3147  search_relative_params, // relative search
3149  test_ali_params,
3150  0,
3151  cont_on_err,
3152  FA_NO_ACTION);
3153  error = aligner.run();
3154  TEST_EXPECT_NULL(error.deliver());
3155  }
3156  TEST_EXPECT_EQUAL(GBT_count_marked_species(gb_main), 3); // we still got 3 marked species
3157  {
3158  Aligner aligner(gb_main,
3159  FA_MARKED,
3160  test_aliname,
3161  NULp, // toalign
3162  NULp, // get_first_selected_species
3163  NULp, // get_next_selected_species
3164  "s1", // reference species
3165  NULp, // get_consensus
3166  search_relative_params, // relative search
3168  test_ali_params,
3169  0,
3170  cont_on_err,
3171  FA_MARK_FAILED);
3172  error = aligner.run();
3173  TEST_EXPECT_NULL(error.deliver());
3174 
3175  TEST_EXPECT(!cont_on_err || GBT_count_marked_species(gb_main) == 0); // FA_MARK_FAILED (none failed -> none marked)
3176  }
3177  {
3178  Aligner aligner(gb_main,
3179  FA_SELECTED,
3180  test_aliname,
3181  NULp, // toalign
3182  fake_first_selected, // get_first_selected_species
3183  fake_next_selected, // get_next_selected_species
3184  NULp, // reference species
3185  fake_get_consensus, // get_consensus
3186  search_relative_params, // relative search
3188  test_ali_params,
3189  0,
3190  cont_on_err,
3191  FA_MARK_ALIGNED);
3192  error = aligner.run();
3193  TEST_EXPECT_NULL(error.deliver());
3194 
3195  TEST_EXPECT(!cont_on_err || GBT_count_marked_species(gb_main) == 2); // FA_MARK_ALIGNED (2 selected were aligned)
3196  }
3197  {
3198  Aligner aligner(gb_main,
3199  FA_CURRENT,
3200  test_aliname,
3201  "r1", // toalign
3202  NULp, // get_first_selected_species
3203  NULp, // get_next_selected_species
3204  NULp, // reference species
3205  NULp, // get_consensus
3206  search_relative_params, // relative search
3208  test_ali_params,
3209  0,
3210  cont_on_err,
3211  FA_MARK_ALIGNED);
3212 
3213  error = aligner.run();
3214  TEST_EXPECT_NULL(error.deliver());
3215 
3216  TEST_EXPECT(!cont_on_err || GBT_count_marked_species(gb_main) == 1);
3217  }
3218 
3219  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("s2"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C...........");
3220 
3221  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m1"), ".......UAG--AGG-A------U-U-UGGGU-UG-G-C-A-U-CAA-GCU--------UAA-C-UCCUG-AC--A-UUGAG...............");
3222  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m2"), "..............U-C------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA................");
3223  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m3"), ".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C...........");
3224 
3225  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("c1"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C...........");
3226  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("c2"), ".........A--UCU-C------C-U-AA---------C-C-G-UAG-UUC------------C-CCGAA-AC--U-GUAA-CU-C...........");
3227 
3228  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("r1"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUCCCC-----GAA-U-UGAGG-AC--U-GUAA-CU-C..........."); // here sequence shall be turned!
3229 
3230 
3231  TEST_EXPECT_EQUAL(USED_RELS_FOR("r1"), "s2:43, s1:1");
3232 
3233  // ----------------------------------------------
3234  // now align all others vs next relative
3235 
3236  search_relative_params.maxRelatives = 5;
3237  TEST_EXPECT_NO_ERROR(forget_used_relatives(gb_main));
3238 
3239  int species_count = ARRAY_ELEMS(TestAlignmentData_TargetAndReferenceHandling);
3240  for (int sp = 0; sp<species_count; ++sp) {
3241  const char *name = TestAlignmentData_TargetAndReferenceHandling[sp].name;
3242  if (strcmp(name, "r1") != 0) { // skip "r1" (already done above)
3243  Aligner aligner(gb_main,
3244  FA_CURRENT,
3245  test_aliname,
3246  name, // toalign
3247  NULp, // get_first_selected_species
3248  NULp, // get_next_selected_species
3249  NULp, // reference species
3250  NULp, // get_consensus
3251  search_relative_params, // relative search
3253  test_ali_params,
3254  0,
3255  cont_on_err,
3256  FA_MARK_ALIGNED);
3257 
3258  error = aligner.run();
3259  TEST_EXPECT_NULL(error.deliver());
3260 
3261  TEST_EXPECT(!cont_on_err || GBT_count_marked_species(gb_main) == 1);
3262  }
3263  }
3264 
3265  TEST_EXPECT_EQUAL(USED_RELS_FOR("s1"), "s2");
3266  TEST_EXPECT_EQUAL(USED_RELS_FOR("s2"), "s1"); // same as done manually
3267  TEST_EXPECT_EQUAL(USED_RELS_FOR("m1"), "r1:42");
3268  TEST_EXPECT_EQUAL(USED_RELS_FOR("m2"), "m3");
3269  TEST_EXPECT_EQUAL(USED_RELS_FOR("m3"), "m2");
3270  TEST_EXPECT_EQUAL(USED_RELS_FOR("c1"), "r1");
3271  TEST_EXPECT_EQUAL(USED_RELS_FOR("c2"), "r1");
3272 
3273  // range aligned below (see test_ali_params_partial)
3274  // "-------------------------XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX------------------------------------"
3275  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("s1"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C..........."); // 1st aligning of 's1'
3276  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("s2"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C..........."); // same_as_above (again aligned vs 's1')
3277 
3278  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m1"), ".........U--AGA-G------G---AUUUG-GG-U-U-G-G-CAU-CAAGCU-----UAA-C-UCCUG-AC--A-UUGAG---------------"); // changed; @@@ bug: no dots at end
3279 
3280  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m2"), ".........U--C----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----UA-A..........."); // changed (1st align vs 's1', this align vs 'm3')
3281  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m3"), ".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C..........."); // same_as_above (1st align vs 's1', this align vs 'm2')
3282  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("c1"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C..........."); // same_as_above
3283  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("c2"), ".........A--UCU-C------C-U--------A-A-C-C-G-UAG-UUCCCC-----GA--------A-AC--U-GUAA-CU-C..........."); // changed
3284 
3285  // --------------------------------------
3286  // test partial relative search
3287 
3288  search_relative_params.getFamilyFinder()->restrict_2_region(test_ali_params_partial.range);
3289  TEST_EXPECT_NO_ERROR(forget_used_relatives(gb_main));
3290 
3291  for (int sp = 0; sp<species_count; ++sp) {
3292  const char *name = TestAlignmentData_TargetAndReferenceHandling[sp].name;
3293  Aligner aligner(gb_main,
3294  FA_CURRENT,
3295  test_aliname,
3296  name, // toalign
3297  NULp, // get_first_selected_species
3298  NULp, // get_next_selected_species
3299  NULp, // reference species
3300  NULp, // get_consensus
3301  search_relative_params, // relative search
3302  FA_TURN_NEVER,
3303  test_ali_params_partial,
3304  0,
3305  cont_on_err,
3306  FA_MARK_ALIGNED);
3307 
3308  error = aligner.run();
3309  TEST_EXPECT_NULL(error.deliver());
3310 
3311  TEST_EXPECT(!cont_on_err || GBT_count_marked_species(gb_main) == 1);
3312  }
3313 
3314  TEST_EXPECT_EQUAL(USED_RELS_FOR("s1"), "s2");
3315  TEST_EXPECT_EQUAL(USED_RELS_FOR("s2"), "s1");
3316  TEST_EXPECT_EQUAL(USED_RELS_FOR("m1"), "r1"); // (not really differs)
3317  TEST_EXPECT_EQUAL(USED_RELS_FOR("m2"), "m3");
3318  TEST_EXPECT_EQUAL(USED_RELS_FOR("m3"), "m2");
3319  TEST_EXPECT_EQUAL(USED_RELS_FOR("c1"), "r1");
3320  TEST_EXPECT_EQUAL(USED_RELS_FOR("c2"), "r1");
3321  TEST_EXPECT_EQUAL(USED_RELS_FOR("r1"), "s2:20, c2:3");
3322 
3323  // aligned range (see test_ali_params_partial)
3324  // "-------------------------XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX------------------------------------"
3325  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("s1"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C..........."); // same_as_above
3326  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("s2"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-GUAA-CU-C..........."); // same_as_above
3327 
3328  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m1"), ".........U--AGA-G------G-A-UU-UG-GG-U-U-G-G-CAU-CAAGCU-----UAA-C-UCCUG-AC--A-UUGAG---------------"); // changed
3329  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m2"), ".........U--C----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----UA-A..........."); // same_as_above
3330  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("m3"), ".........A--U----------C-U-AAACC-AA-C-C-C-G-UAG-UUC--------GAA-U-UGAGG-AC--U-G----CU-C..........."); // same_as_above
3331 
3332  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("c1"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-------------------AA-U-UGAGG-AC--U-GUAA-CU-C..........."); // same_as_above
3333  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("c2"), ".........A--UCU-C------C---------UA-A-C-C-G-UAG-UUCCCC-----GA--------A-AC--U-GUAA-CU-C..........."); // changed
3334 
3335  TEST_EXPECT_EQUAL(ALIGNED_DATA_OF("r1"), ".........A--UCU-C------C-U-AAACC-CA-A-C-C-G-UAG-UUCCCC-----GAA-U-UGAGG-AC--U-GUAA-CU-C..........."); // same_as_above
3336 
3337  GB_close(gb_main);
3338 }
3339 
3340 // ----------------------------------------
3341 
3342 static struct arb_unit_test::test_alignment_data TestAlignmentData_checksumError[] = {
3343  { 0, "MtnK1722", "...G-GGC-C-G............CCC-GG--------CAAUGGGGGCGGCCCGGCGGAC----GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCC....." }, // gets aligned
3344  { 0, "MhnFormi", "...A-CGA-U-C------------CUUCGG--------GGUCG-U-GG-C-GU-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCCU...." }, // 1st relative
3345  { 0, "MhnT1916", "...A-CGA-A-C------------CUU-GU--------GUUCG-U-GG-C-GA-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCCU...." }, // next relative
3346  { 0, "MthVanni", "...U-GGU-U-U------------C-------------GGCCA-U-GG-C-GG-A--C------GG--C-UCAUU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUG-CGGC-UGGAUCACCUCC....." }, // next relative
3347  { 0, "ThcCeler", "...G-GGG-C-G...CC-U---U--------GC--G--CGCAC-C-GG-C-GG-A--C------GG--C-UCAGU-A---AAG-UCGUAACAA-GG-UAG-CCGU-AGGGGAA-CCUA-CGGC-UCGAUCACCUCCU...." }, // next relative
3348 };
3349 
3350 void TEST_SLOW_Aligner_checksumError() {
3351  // @@@ SLOW cause this often gets terminated in nightly builds
3352  // no idea why. normally it runs a few ms
3353 
3354  // this produces an internal aligner error
3355 
3356  GB_shell shell;
3357  ARB_ERROR error;
3358  GBDATA *gb_main = TEST_CREATE_DB(error, test_aliname, TestAlignmentData_checksumError, false);
3359 
3360  SearchRelativeParams search_relative_params(new FakeFamilyFinder(gb_main, test_aliname, false, 8),
3361  test_aliname,
3362  10); // use up to 10 relatives
3363 
3364  test_install_fakes(gb_main);
3365  arb_suppress_progress silence;
3366 
3367  bool cont_on_err = true;
3368  if (!error) {
3369  Aligner aligner(gb_main,
3370  FA_CURRENT,
3371  test_aliname,
3372  "MtnK1722", // toalign
3373  NULp, // get_first_selected_species
3374  NULp, // get_next_selected_species
3375  NULp, // reference species
3376  NULp, // get_consensus
3377  search_relative_params, // relative search
3379  test_ali_params,
3380  0,
3381  cont_on_err,
3382  FA_MARK_ALIGNED);
3383 
3384  error = aligner.run();
3385  }
3386  {
3387  GB_ERROR err = error.deliver();
3388  TEST_EXPECT_NULL__BROKEN(err, "Aligner produced 1 error");
3389  }
3390  TEST_EXPECT_EQUAL__BROKEN(USED_RELS_FOR("MtnK1722"), "???", "<No such field 'used_rels'>"); // subsequent failure
3391 
3392  GB_close(gb_main);
3393 }
3394 
3395 static const char *asstr(LooseBases& ub) {
3396  LooseBases tmp;
3397  while (!ub.is_empty()) tmp.memorize(ub.recall());
3398 
3399  const char *result = "";
3400  while (!tmp.is_empty()) {
3401  ExplicitRange r = tmp.recall();
3402  result = GBS_global_string("%s %i/%i", result, r.start(), r.end());
3403  ub.memorize(r);
3404  }
3405  return result;
3406 }
3407 
3408 void TEST_BASIC_UnalignedBases() {
3409  LooseBases ub;
3410  TEST_EXPECT(ub.is_empty());
3411  TEST_EXPECT_EQUAL(asstr(ub), "");
3412 
3413  // test add+remove
3414  ub.memorize(ExplicitRange(5, 7));
3415  TEST_REJECT(ub.is_empty());
3416  TEST_EXPECT_EQUAL(asstr(ub), " 5/7");
3417 
3418  TEST_EXPECT(ub.recall() == ExplicitRange(5, 7));
3419  TEST_EXPECT(ub.is_empty());
3420 
3421  ub.memorize(ExplicitRange(2, 4));
3422  TEST_EXPECT_EQUAL(asstr(ub), " 2/4");
3423 
3424  ub.memorize(ExplicitRange(4, 9));
3425  TEST_EXPECT_EQUAL(asstr(ub), " 2/4 4/9");
3426 
3427  ub.memorize(ExplicitRange(8, 10));
3428  ub.memorize(ExplicitRange(11, 14));
3429  ub.memorize(ExplicitRange(12, 17));
3430  TEST_EXPECT_EQUAL(asstr(ub), " 2/4 4/9 8/10 11/14 12/17");
3431  TEST_EXPECT_EQUAL(asstr(ub), " 2/4 4/9 8/10 11/14 12/17"); // check asstr has no side-effect
3432 
3433  {
3434  LooseBases toaddNrecalc;
3435 
3436  CompactedSubSequence Old("ACGTACGT", 8, "name1");
3437  CompactedSubSequence New("--A-C--G-T--A-C-G-T", 19, "name2");
3438  // ---------------------- 0123456789012345678
3439 
3440  toaddNrecalc.memorize(ExplicitRange(1, 7));
3441  toaddNrecalc.memorize(ExplicitRange(3, 5));
3442  TEST_EXPECT_EQUAL(asstr(toaddNrecalc), " 1/7 3/5");
3443 
3444  ub.follow_ali_change_and_append(toaddNrecalc, AliChange(Old, New));
3445 
3446  TEST_EXPECT_EQUAL(asstr(ub), " 3/18 8/15 2/4 4/9 8/10 11/14 12/17");
3447  TEST_EXPECT(toaddNrecalc.is_empty());
3448 
3449  LooseBases selfRecalc;
3450  selfRecalc.follow_ali_change_and_append(ub, AliChange(New, New));
3451  TEST_EXPECT_EQUAL__BROKEN(asstr(selfRecalc),
3452  " 3/18 8/15 0/6 3/11 8/11 10/15 10/17", // wanted behavior?
3453  " 3/18 8/17 0/6 3/11 8/13 10/15 10/18"); // doc wrong behavior @@@ "8/17", "8/13", "10/18" are wrong
3454 
3455  ub.follow_ali_change_and_append(selfRecalc, AliChange(New, Old));
3456  TEST_EXPECT_EQUAL__BROKEN(asstr(ub),
3457  " 1/7 3/5 0/1 1/3 3/3 4/5 4/6", // wanted behavior? (from wanted behavior above)
3458  " 1/7 3/7 0/2 1/4 3/5 4/6 4/7"); // document wrong behavior
3459  TEST_EXPECT_EQUAL__BROKEN(asstr(ub),
3460  " 1/7 3/6 0/1 1/3 3/4 4/5 4/7", // wanted behavior? (from wrong result above)
3461  " 1/7 3/7 0/2 1/4 3/5 4/6 4/7"); // document wrong behavior
3462  }
3463 }
3464 
3465 
3466 #endif // UNIT_TESTS
3467 
3468 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
#define GAP
#define FA_AWAR_SUBST_PARA_CT
void AWTC_create_common_next_neighbour_fields(AW_window *aws, int scaler_length)
const CompactedSubSequence & sequence() const
Definition: seq_search.hxx:265
int expdPosition(int cPos) const
Definition: seq_search.hxx:200
void restoreDots(CompactedSubSequence &slaveSequence)
Definition: seq_search.cxx:168
bool is_empty() const
const char * GB_ERROR
Definition: arb_core.h:25
long gaps() const
Definition: seq_search.hxx:414
#define QUALITY_NAME
void FastAligner_create_variables(AW_root *root, AW_default db1)
static LooseBases unaligned_bases
string result
bool is_whole() const
Definition: pos_range.h:71
char alignQuality(char slave, char master)
long free() const
Definition: seq_search.hxx:355
void GB_warning(const char *message)
Definition: arb_msg.cxx:530
SearchRelativeParams(FamilyFinder *ff_, const char *pt_server_alignment_, int maxRelatives_)
long expdPosition() const
Definition: seq_search.hxx:267
static int currentSequenceNumber
const FastAlignInsertion * insertion() const
Definition: seq_search.hxx:463
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:113
#define CAN_SCORE_LEFT()
#define FA_AWAR_ESTIMATE_BASE_FREQ
virtual ARB_ERROR align_to(GBDATA *gb_toalign) const =0
void count_aligned_base(int mismatched)
Definition: seq_search.hxx:465
#define FA_AWAR_RELATIVE_RANGE
ARB_ERROR fast_align(const CompactedSubSequence &align_to, AlignBuffer *alignBuffer, int max_seq_length, int matchScore, int mismatchScore, FastAlignReport *report) const
GBDATA * GBT_first_marked_species_rel_species_data(GBDATA *gb_species_data)
Definition: aditem.cxx:109
#define FA_AWAR_GAP_A
bool may_refer_to_same_part_as(const CompactedSubSequence &other) const
Definition: seq_search.hxx:236
static ARB_ERROR alignToGroupConsensus(GBDATA *gb_toAlign, GB_CSTR alignment, Aligner_get_consensus_func get_consensus, int max_seq_length, const AlignParams &ali_params)
return string(buffer, length)
ARB_ERROR species_not_found(GB_CSTR species_name)
#define FA_AWAR_EXPECTED_DISTANCE
GB_ERROR GB_append_exportedError(GB_ERROR error)
Definition: arb_msg.cxx:387
ARB_ERROR run()
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
void load_xfig(const char *file, bool resize=true)
Definition: AW_window.cxx:720
virtual GB_ERROR searchFamily(const char *sequence, FF_complement compl_mode, int max_results, double min_score)=0
FamilyFinder * getFamilyFinder()
#define ACID
PosRange intersection(PosRange r1, PosRange r2)
Definition: pos_range.h:101
#define FA_AWAR_AROUND
int length() const
Definition: seq_search.hxx:187
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
long GBT_mark_all(GBDATA *gb_main, int flag)
Definition: aditem.cxx:295
#define FA_AWAR_BASE_FREQ_A
int relatedBases(char base1, char base2)
void FastAligner_set_align_current(AW_root *root, AW_default db1)
Aligner_get_first_selected_species get_first_selected_species
#define FA_AWAR_TO_ALIGN
int start() const
Definition: pos_range.h:60
#define FA_AWAR_SUBST_PARA_AG
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
long
Definition: AW_awar.cxx:152
GB_CSTR get_alignment() const
void allow_title_reuse()
Definition: arb_progress.h:319
void insertGap(AlignBuffer *alignBuffer, SequencePosition &master, FastAlignReport *report)
const char * text() const
Definition: seq_search.hxx:350
#define INTEGRATED_ALIGNERS_TITLE
range_set::const_iterator iterator
Definition: RangeList.h:46
void AWT_insert_config_manager(AW_window *aww, AW_default default_file_, const char *id, const StoreConfigCallback &store_cb, const RestoreConfigCallback &load_or_reset_cb, const char *macro_id, const AWT_predefined_config *predef)
#define AWAR_DEFAULT_ALIGNMENT
Definition: aw_awar_defs.hxx:8
void GB_end_transaction_show_error(GBDATA *gbd, GB_ERROR error, void(*error_handler)(GB_ERROR))
Definition: arbdb.cxx:2584
bool is_part() const
Definition: pos_range.h:73
long read_int() const
Definition: AW_awar.cxx:184
static GBDATA * get_next_selected_species()
Definition: ED4_root.cxx:910
AW_awar * set_minmax(float min, float max)
Definition: AW_awar.cxx:530
#define GAP_CHARS
Definition: seq_search.hxx:45
#define FA_AWAR_SHOW_GAPS_MESSAGES
bool showGapsMessages
#define FA_AWAR_BASE_FREQ_T
FA_errorAction
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
void warning(int warning_num, const char *warning_message)
Definition: util.cxx:61
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
static char * alignment_name
bool GB_have_error()
Definition: arb_msg.cxx:338
FA_reference
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
std::string alignment_name
ExplicitRange recall()
long length() const
Definition: seq_search.hxx:352
GBDATA * GBT_expect_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:184
char * ARB_strpartdup(const char *start, const char *end)
Definition: arb_string.h:51
int size() const
Definition: pos_range.h:69
Aligner_get_next_selected_species get_next_selected_species
uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude)
Definition: adstring.cxx:319
void set(char c, char q)
Definition: seq_search.hxx:363
void add(const PosRange &range)
Definition: RangeList.h:61
static PosRange whole()
Definition: pos_range.h:52
void awt_create_SAI_selection_button(GBDATA *gb_main, AW_window *aws, const char *varname, const SaiSelectionlistFilterCallback &fcb)
static void build_reverse_complement(AW_window *aw, const AlignDataAccess *data_access)
#define FA_AWAR_REFERENCE
static CompactedSubSequence * readCompactedSequence(GBDATA *gb_species, const char *ali, ARB_ERROR *errorPtr, char **dataPtr, long *seqChksum, PosRange range)
static int overallSequenceNumber
const char * name() const
Definition: seq_search.hxx:192
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
#define FA_AWAR_CONTINUE_ON_ERROR
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
static ARB_ERROR insertClustalValigned(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, const char *masterAlignment, const char *slaveAlignment, long alignmentLength, FastAlignReport *report)
long rightOf() const
Definition: seq_search.hxx:280
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2494
FILE * seq
Definition: rns.c:46
ARB_ERROR bufferTooSmall()
#define AWAR_NN_REL_SCALING
double log3(double d)
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1916
#define AWAR_NN_FAST_MODE
long leftOf() const
Definition: seq_search.hxx:279
int follow_ali_change(const AliChange &change)
FamilyFinder * ff
#define GAP_CHAR
const char * read_char_pntr() const
Definition: AW_awar.cxx:168
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:597
char *(* Aligner_get_consensus_func)(const char *species_name, PosRange range)
size_t GB_read_string_count(GBDATA *gbd)
Definition: arbdb.cxx:916
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
#define TEST_EXPECT(cond)
Definition: test_unit.h:1328
long GB_read_count(GBDATA *gbd)
Definition: arbdb.cxx:758
#define AWAR_PT_SERVER
#define SCORE_LEFT()
static GB_alignment_type global_alignmentType
Definition: arbdb.h:78
#define INSERTS_NAME
GB_ERROR deliver() const
Definition: arb_error.h:116
void FastAligner_set_reference_species(AW_root *root)
static void nullcb()
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
NOT4PERL void GBT_reverseComplementNucSequence(char *seq, long length, char T_or_U)
Definition: adRevCompl.cxx:102
static AW_window * create_family_settings_window(AW_root *root)
static ARB_ERROR alignCompactedTo(CompactedSubSequence *toAlignSequence, const FastSearchSequence *alignTo, int max_seq_length, GB_CSTR alignment, long toAlignChksum, GBDATA *gb_toAlign, GBDATA *gb_alignTo, const AlignParams &ali_params)
void restrict_2_region(const PosRange &range_)
AW_awar * awar_float(const char *var_name, float default_value=0.0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:560
#define FA_AWAR_SUBST_PARA_AT
GBDATA * gb_species_data
Definition: adname.cxx:33
static const char * read_name(GBDATA *gbd)
int GB_read_security_write(GBDATA *gbd)
Definition: arbdb.cxx:1572
#define FA_AWAR_PROTECTION
#define TEST_EXPECT_EQUAL__BROKEN(expr, want, got)
Definition: test_unit.h:1295
#define FA_AWAR_ACTION_ON_ERROR
int no_of_gaps_after(int cPos) const
Definition: seq_search.hxx:197
const int * gapsBefore(int offset=0) const
Definition: seq_search.hxx:208
void append(LooseBases &loose)
void FastAligner_start(AW_window *aw, const AlignDataAccess *data_access)
void copy(const char *s, char q, long len)
Definition: seq_search.hxx:357
bool GB_is_ancestor_of(GBDATA *gb_ancestor, GBDATA *gb_descendant)
Definition: arbdb.cxx:1744
ExplicitReference(GB_CSTR alignment_, const FastSearchSequence *targetSequence_, GBDATA *gb_alignTo_, int max_seq_length_, const AlignParams &ali_params_)
void message(char *errortext)
int get_max_seq_length() const
#define TEST_REJECT(cond)
Definition: test_unit.h:1330
FA_report
void(* refresh_display)()
GBDATA *(* Aligner_get_first_selected_species)(int *total_no_of_selected_species)
AliChange(const CompactedSubSequence &old_, const CompactedSubSequence &new_)
int min(int i, int j)
static void error(const char *msg)
Definition: mkptypes.cxx:96
GBDATA * GB_get_root(GBDATA *gbd)
Definition: arbdb.cxx:1740
bool uses_rel_matches() const
void insertBase(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, FastAlignReport *report)
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
Definition: aditem.cxx:116
#define FA_AWAR_PT_SERVER_ALIGNMENT
iterator begin() const
Definition: RangeList.h:51
#define FA_AWAR_RANGE
#define CAN_SCORE_RIGHT()
ARB_ERROR ClustalV_align(int is_dna, int weighted, const char *seq1, int length1, const char *seq2, int length2, const int *gapsBefore1, int max_seq_length, const char *&res1, const char *&res2, int &reslen, int &score)
Definition: ClustalV.cxx:971
int follow_ali_change_and_append(LooseBases &loose, const AliChange &change)
const char * text() const
Definition: seq_search.hxx:189
iterator end() const
Definition: RangeList.h:52
Definition: align.cxx:35
GB_alignment_type GBT_get_alignment_type(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:873
#define AWAR_SPECIES_NAME
#define YOFF
void correctUnalignedPositions()
Definition: seq_search.cxx:124
void ClustalV_exit()
Definition: ClustalV.cxx:1049
char * getHelixString() const
#define cmp(h1, h2)
Definition: admap.cxx:50
Aligner_get_selected_range get_selected_range
#define FA_AWAR_GAP_C
FF_complement
bool is_gap(char c)
Definition: seq_search.hxx:48
void AWTC_create_common_next_neighbour_vars(AW_root *aw_root, const RootCallback &awar_changed_cb)
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
Definition: aditem.cxx:133
FA_turn
static AW_window * create_island_hopping_window(AW_root *root)
char * read_string() const
Definition: AW_awar.cxx:198
static WindowCallback simple(void(*root_cb)(AW_root *, T), T t)
Definition: rootAsWin.h:47
bool ARB_in_novice_mode(AW_root *awr)
Definition: aw_root.hxx:193
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
GB_ERROR GB_pop_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2524
#define FA_AWAR_USE_ISLAND_HOPPING
#define FA_AWAR_STRUCTURE_SUPPLEMENT
Definition: arbdb.h:86
int no_of_gaps_before(int cPos) const
Definition: seq_search.hxx:196
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:708
FamilyList * insertSortedBy_matches(FamilyList *other)
FamilyList * next
PosRange range
bool is_ali_gap(char c)
Definition: seq_search.hxx:47
int baseMatch(char c1, char c2)
Definition: ClustalV.cxx:249
static ARB_ERROR alignTo(GBDATA *gb_toAlign, GB_CSTR alignment, const FastSearchSequence *alignTo, GBDATA *gb_alignTo, int max_seq_length, const AlignParams &ali_params)
void awt_create_PTSERVER_selection_button(AW_window *aws, const char *varname)
void count_unaligned_base(int no_of_bases)
Definition: seq_search.hxx:466
long GBT_count_marked_species(GBDATA *gb_main)
Definition: aditem.cxx:372
float read_float() const
Definition: AW_awar.cxx:177
#define TEST_EXPECT_ZERO(cond)
Definition: test_unit.h:1085
GB_alignment_type
Definition: arbdb_base.h:61
void memorize(ExplicitRange range)
GB_ERROR write_as_string(const char *aw_string)
static ARB_ERROR cannot_fast_align(const CompactedSubSequence &master, long moffset, long mlength, const CompactedSubSequence &slaveSequence, long soffset, long slength, int max_seq_length, AlignBuffer *alignBuffer, FastAlignReport *report)
GB_ERROR GB_set_temporary(GBDATA *gbd) __ATTR__USERESULT
Definition: arbdb.cxx:2282
static void appendNameAndUsedBasePositions(char **toString, GBDATA *gb_species, int usedBasePositions)
int aw_question(const char *unique_id, const char *question, const char *buttons, bool sameSizeButtons, const char *helpfile)
Definition: AW_question.cxx:26
static AWT_config_mapping_def aligner_config_mapping[]
#define FA_AWAR_SAI_RANGE_NAME
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
#define TEST_EXPECT_NULL__BROKEN(n, got)
Definition: test_unit.h:1323
double partSignificance(long seq1len, long seq2len, long partlen)
#define FA_AWAR_MIRROR
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1322
#define gb_assert(cond)
Definition: arbdbt.h:11
long offset() const
Definition: seq_search.hxx:413
void GB_write_flag(GBDATA *gbd, long flag)
Definition: arbdb.cxx:2773
SearchRelativesReference(SearchRelativeParams &relSearch_, int max_seq_length_, FA_turn turnAllowed_, GB_CSTR alignment_, const AlignParams &ali_params_)
AW_window * FastAligner_create_window(AW_root *root, const AlignDataAccess *data_access)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
#define OVERRIDE
Definition: cxxforward.h:110
#define FA_AWAR_SUBST_PARA_AC
#define FA_AWAR_REFERENCE_NAME
const char * text() const
Definition: seq_search.hxx:264
virtual ~AlignmentReference()
long insertsToNextBase(AlignBuffer *alignBuffer, const SequencePosition &master)
ARB_ERROR FastAligner_delete_temp_entries(GBDATA *gb_species, const char *alignment)
#define FA_AWAR_BASE_FREQ_G
static IslandHopping * island_hopper
fa_assert(chars< MESSAGE_BUFFERSIZE)
static ARB_ERROR align_error(ARB_ERROR olderr, GBDATA *gb_toAlign, GBDATA *gb_alignTo)
static ARB_ERROR insertAligned(AlignBuffer *alignBuffer, SequencePosition &master, SequencePosition &slave, long partLength, FastAlignReport *report)
Aligner(GBDATA *gb_main_, FA_alignTarget alignWhat_, GB_CSTR alignment_, GB_CSTR toalign_, Aligner_get_first_selected_species get_first_selected_species_, Aligner_get_next_selected_species get_next_selected_species_, GB_CSTR reference_, Aligner_get_consensus_func get_consensus_, SearchRelativeParams &relSearch_, FA_turn turnAllowed_, const AlignParams &ali_params_, int maxProtection_, bool continue_on_error_, FA_errorAction error_action_)
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:909
FA_range
#define AWAR_CURSOR_POSITION_LOCAL
long offset() const
Definition: seq_search.hxx:354
static ARB_ERROR reverseComplement(GBDATA *gb_species, GB_CSTR ali, int max_protection)
ARB_ERROR align_to(GBDATA *gb_toalign) const OVERRIDE
const FastAlignInsertion * next() const
Definition: seq_search.hxx:412
GBDATA *(* Aligner_get_next_selected_species)(void)
GBDATA * GBT_first_species(GBDATA *gb_main)
Definition: aditem.cxx:124
static ARB_ERROR writeStringToAlignment(GBDATA *gb_species, GB_CSTR alignment, GB_CSTR data_name, GB_CSTR str, bool temporary)
RangeList build_RangeList_from_string(const char *SAI_data, const char *set_bytes, bool invert)
Definition: RangeList.cxx:32
const FamilyList * getFamilyList() const
#define FA_AWAR_REPORT
int follow(ExplicitRange &range) const
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
void insertSlaveBases(AlignBuffer *alignBuffer, SequencePosition &slave, int length, FastAlignReport *report)
AW_root * get_root()
Definition: aw_window.hxx:354
#define SCORE_RIGHT()
const PosRange & get_TargetRange() const
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:114
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
Aligner_get_consensus_func get_group_consensus
static GBDATA * get_first_selected_species(int *total_no_of_selected_species)
Definition: ED4_root.cxx:918
int end() const
Definition: pos_range.h:64
AlignmentReference(GB_CSTR alignment_, int max_seq_length_, const AlignParams &ali_params_)
#define FA_AWAR_THRESHOLD
GB_ERROR write_string(const char *aw_string)
#define offset(field)
Definition: GLwDrawA.c:73
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
const char * GBT_get_name(GBDATA *gb_item)
Definition: aditem.cxx:468
FA_alignTarget
const CompactedSubSequence & sequence() const
Definition: seq_search.hxx:504
#define AWAR_NN_MISMATCHES
int GB_get_transaction_level(GBDATA *gbd)
Definition: arbdb.cxx:2590
#define XOFF
static ARB_ERROR alignToNextRelative(SearchRelativeParams &relSearch, int max_seq_length, FA_turn turnAllowed, GB_CSTR alignment, GBDATA *gb_toAlign, const AlignParams &ali_params)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
#define FA_AWAR_SAI_RANGE_CHARS
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
void memorize_insertion(long offset, long gaps)
Definition: seq_search.hxx:445
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
FA_report report
size_t length
#define FA_AWAR_USE_SECONDARY
long calcSequenceChecksum(const char *data, long length)
#define FA_AWAR_GAP_B
#define FA_AWAR_NEXT_RELATIVES
static int restart
Definition: arb_a2ps.c:264
const char * GB_CSTR
Definition: arbdb_base.h:25
int compPosition(int xPos) const
Definition: seq_search.hxx:205
#define FA_AWAR_SUBST_PARA_CG
#define AW_ROOT_DEFAULT
Definition: aw_base.hxx:106
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
GB_ERROR write_int(long aw_int)
const AlignParams & get_ali_params() const
ConsensusReference(GB_CSTR alignment_, Aligner_get_consensus_func get_consensus_, int max_seq_length_, const AlignParams &ali_params_)
void aw_message_if(GB_ERROR error)
Definition: aw_msg.hxx:21
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
const char * quality() const
Definition: seq_search.hxx:351
NOT4PERL GB_ERROR GBT_determine_T_or_U(GB_alignment_type alignment_type, char *T_or_U, const char *supposed_target)
Definition: adRevCompl.cxx:90
#define FA_AWAR_BASE_FREQ_C
#define AWAR_NN_OLIGO_LEN
void setDotsAtEOSequence()
Definition: seq_search.cxx:197
void copy_corresponding_part(char *dest, const char *source, size_t source_len) const
Definition: pos_range.cxx:15
#define MIN_ALIGNMENT_RANGE
#define AWAR_NN_REL_MATCHES
#define FA_AWAR_SUBST_PARA_GT
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105
GB_write_int const char s
Definition: AW_awar.cxx:154