ARB
insdel.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : insdel.cxx //
4 // Purpose : insert/delete columns //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 // AISC_MKPT_PROMOTE:#ifndef ARBDB_BASE_H
12 // AISC_MKPT_PROMOTE:#include <arbdb_base.h>
13 // AISC_MKPT_PROMOTE:#endif
14 
15 #include "insdel.h"
16 #include <RangeList.h>
17 
18 #include <arbdbt.h>
19 #include <adGene.h>
20 #include <arb_progress.h>
21 #include <arb_defs.h>
22 #include <arb_diff.h>
23 #include <algorithm>
24 
25 using namespace std;
26 
27 #define id_assert(cond) arb_assert(cond)
28 
29 // --------------------------------------------------------------------------------
30 // helper to hold any kind of unit (char, int, float)
31 
32 class UnitPtr {
33  const void *ptr;
34 public:
35  UnitPtr() : ptr(NULp) {}
36  UnitPtr(const void *ptr_)
37  : ptr(ptr_)
38  {
39  id_assert(ptr);
40  }
41 
42  void set_pointer(const void *ptr_) {
43  id_assert(!ptr);
44  ptr = ptr_;
45  }
46  const void *get_pointer() const { return ptr; }
47  const void *expect_pointer() const { id_assert(ptr); return ptr; }
48 };
49 struct UnitPair {
50  UnitPtr left, right;
51 };
52 
53 template <typename T>
54 inline int compare_type(const T& t1, const T& t2) {
55  return t1<t2 ? -1 : (t1>t2 ? 1 : 0);
56 }
57 
58 // --------------------------------------------------------------------------------
59 
60 class AliData;
62 
63 // --------------------------------------------------------------------------------
64 
65 class AliData {
66  size_t size;
67  static GB_ERROR op_error;
68 
69 public:
70  AliData(size_t size_) : size(size_) {}
71  virtual ~AliData() {}
72 
73  virtual size_t unitsize() const = 0;
74  virtual bool has_slice() const = 0;
75 
76  enum memop {
77  COPY_TO, // always returns 0
78  COMPARE_WITH, // returns compare value
79  CHECK_DELETE, // return 0 if ok to delete, otherwise op_error is set
80  };
81 
82  void clear_error() const { op_error = NULp; }
83  void set_error(GB_ERROR error) const {
84  id_assert(error);
85  id_assert(!op_error);
86  op_error = error;
87  }
88 
89  virtual int operate_on_mem(void *mem, size_t start, size_t count, memop op) const = 0;
90  virtual int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const = 0;
91 
92  void copyPartTo(void *mem, size_t start, size_t count) const { operate_on_mem(mem, start, count, COPY_TO); }
93  int cmpPartWith(const void *mem, size_t start, size_t count) const {
94  id_assert(is_valid_part(start, count));
95  return operate_on_mem(const_cast<void*>(mem), start, count, COMPARE_WITH); // COMPARE_WITH does not modify
96  }
97  GB_ERROR check_delete_allowed(size_t start, size_t count) const {
98  op_error = NULp;
99  id_assert(start <= size);
100  IF_ASSERTION_USED(int forbidden =) operate_on_mem(NULp, start, std::min(count, size-start), CHECK_DELETE);
101  id_assert(correlated(forbidden, op_error));
102  return op_error;
103  }
104 
105  virtual UnitPtr unit_left_of(size_t pos) const = 0;
106  virtual UnitPtr unit_right_of(size_t pos) const = 0;
107 
108  virtual AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const = 0;
109  virtual AliDataPtr slice_down(size_t start, size_t count) const = 0;
110 
111  size_t elems() const { return size; }
112  size_t memsize() const { return unitsize()*elems(); }
113  void copyTo(void *mem) const { copyPartTo(mem, 0, elems()); }
114  bool empty() const { return !elems(); }
115 
116  int cmp_whole_data(const AliData& other) const {
117  int cmp = cmp_data(0, other, 0, std::min(elems(), other.elems()));
118  if (cmp == 0) { // prefixes are equal
119  return compare_type(elems(), other.elems());
120  }
121  return cmp;
122  }
123 
124  bool equals(const AliData& other) const {
125  if (&other == this) return true;
126  if (elems() != other.elems()) return false;
127 
128  return cmp_whole_data(other) == 0;
129  }
130  bool differs_from(const AliData& other) const { return !equals(other); }
131 
132  bool is_valid_pos(size_t pos) const { return pos < elems(); }
133  bool is_valid_between(size_t pos) const { return pos <= elems(); } // pos == 0 -> before first base; pos == elems() -> after last base
134 
135  bool is_valid_part(size_t start, size_t count) const {
136  return is_valid_between(start) && is_valid_between(start+count);
137  }
138 };
139 
140 GB_ERROR AliData::op_error = NULp;
141 
142 // --------------------------------------------------------------------------------
143 
144 class AliDataSlice : public AliData {
145  AliDataPtr from;
146  size_t offset;
147 
148  static int fix_amount(AliDataPtr from, size_t offset, size_t amount) {
149  if (amount) {
150  size_t from_size = from->elems();
151  if (offset>from_size) {
152  amount = 0;
153  }
154  else {
155  size_t last_pos = offset+amount-1;
156  size_t last_from = from->elems()-1;
157 
158  if (last_pos > last_from) {
159  id_assert(last_from >= offset);
160  amount = last_from-offset+1;
161  }
162  }
163  }
164  return amount;
165  }
166 
167  AliDataSlice(AliDataPtr from_, size_t offset_, size_t amount_)
168  : AliData(fix_amount(from_, offset_, amount_)),
169  from(from_),
170  offset(offset_)
171  {
172  id_assert(!from->has_slice()); // do not double-slice
173  }
174 
175 public:
176  static AliDataPtr make(AliDataPtr from, size_t offset, size_t amount) {
177  return (offset == 0 && amount >= from->elems())
178  ? from
179  : (from->has_slice()
180  ? from->slice_down(offset, amount)
181  : new AliDataSlice(from, offset, amount));
182  }
183 
184  size_t unitsize() const OVERRIDE { return from->unitsize(); }
185  bool has_slice() const OVERRIDE { return true; }
186 
187  AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const OVERRIDE {
188  return from->create_gap(gapsize, gapinfo);
189  }
190  AliDataPtr slice_down(size_t start, size_t count) const OVERRIDE {
191  return new AliDataSlice(from, offset+start, std::min(count, elems()));
192  }
193  int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE {
194  id_assert(is_valid_part(start, count));
195  return from->operate_on_mem(mem, start+offset, count, op);
196  }
197  UnitPtr unit_left_of(size_t pos) const OVERRIDE {
198  id_assert(is_valid_between(pos));
199  return from->unit_left_of(pos+offset);
200  }
201  UnitPtr unit_right_of(size_t pos) const OVERRIDE {
202  id_assert(is_valid_between(pos));
203  return from->unit_right_of(pos+offset);
204  }
205  int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE {
206  id_assert(is_valid_part(start, count));
207  id_assert(other.is_valid_part(ostart, count));
208 
209  return from->cmp_data(start+offset, other, ostart, count);
210  }
211 };
212 
213 class ComposedAliData FINAL_TYPE : public AliData {
214  AliDataPtr left, right;
215  bool hasSlice;
216 
217  ComposedAliData(AliDataPtr l, AliDataPtr r)
218  : AliData(l->elems()+r->elems()),
219  left(l),
220  right(r),
221  hasSlice(left->has_slice() || right->has_slice())
222  {
223  id_assert(l->unitsize() == r->unitsize());
224  id_assert(l->elems());
225  id_assert(r->elems());
226  }
227  friend AliDataPtr concat(AliDataPtr left, AliDataPtr right); // for above ctor
228 
229  void *inc_by_units(void *mem, size_t units) const { return reinterpret_cast<char*>(mem)+units*unitsize(); }
230 
231 public:
232  size_t unitsize() const OVERRIDE { return left->unitsize(); }
233  bool has_slice() const OVERRIDE { return hasSlice; }
234 
235  AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const OVERRIDE {
236  return left->create_gap(gapsize, gapinfo);
237  }
238  AliDataPtr slice_down(size_t start, size_t count) const OVERRIDE {
239  size_t left_elems = left->elems();
240 
241  if (left_elems <= start) { // left is before slice
242  return AliDataSlice::make(right, start-left_elems, count);
243  }
244 
245  size_t pos_behind = start+count;
246  if (left_elems >= pos_behind) { // right is behind slice
247  return AliDataSlice::make(left, start, min(count, left_elems));
248  }
249 
250  size_t take_left = left_elems-start;
251  size_t take_right = count-take_left;
252 
253  return new ComposedAliData(
254  AliDataSlice::make(left, start, take_left),
255  AliDataSlice::make(right, 0, take_right)
256  );
257  }
258  int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE {
259  size_t left_elems = left->elems();
260  size_t take_left = 0;
261  int res = 0;
262  if (start<left_elems) {
263  take_left = min(count, left_elems-start);
264  res = left->operate_on_mem(mem, start, take_left, op);
265  }
266 
267  if (res == 0) {
268  size_t take_right = count-take_left;
269  if (take_right) {
270  size_t rstart = start>left_elems ? start-left_elems : 0;
271  id_assert(right->is_valid_part(rstart, take_right));
272  res = right->operate_on_mem(inc_by_units(mem, take_left), rstart, take_right, op);
273  }
274  }
275  return res;
276  }
277  int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE {
278  size_t left_elems = left->elems();
279  size_t take_left = 0;
280  int cmp = 0;
281  if (start<left_elems) {
282  take_left = min(count, left_elems-start);
283  cmp = left->cmp_data(start, other, ostart, take_left);
284  }
285 
286  if (cmp == 0) {
287  size_t take_right = count-take_left;
288  if (take_right) {
289  size_t rstart = start>left_elems ? start-left_elems : 0;
290  size_t rostart = ostart+take_left;
291 
292  id_assert(is_valid_part(rstart, take_right));
293  id_assert(other.is_valid_part(rostart, take_right));
294 
295  cmp = right->cmp_data(rstart, other, rostart, take_right);
296  }
297  }
298  return cmp;
299  }
300 
301  UnitPtr unit_left_of(size_t pos) const OVERRIDE {
302  id_assert(is_valid_between(pos));
303  if (left->elems() == pos) { // split between left and right
304  id_assert(pos >= 1);
305  return left->unit_right_of(pos-1);
306  }
307  else if (left->elems() < pos) { // split inside or behind 'right'
308  return right->unit_left_of(pos-left->elems());
309  }
310  else { // split inside or frontof 'left'
311  return left->unit_left_of(pos);
312  }
313  }
314  UnitPtr unit_right_of(size_t pos) const OVERRIDE {
315  id_assert(is_valid_between(pos));
316  if (left->elems() == pos) { // split between left and right
317  id_assert(pos >= 1);
318  return right->unit_left_of(0);
319  }
320  else if (left->elems() < pos) { // split inside or behind 'right'
321  return right->unit_right_of(pos-left->elems());
322  }
323  else { // split inside or frontof 'left'
324  return left->unit_right_of(pos);
325  }
326  }
327 };
328 
329 // --------------------------------------------------------------------------------
330 
331 class Deletable { // define characters allowed to delete (only applicable to TypedAliData<char>)
332  bool deletable[256];
333 
334  void init(bool val) {
335  for (int i = 0; i<256; ++i) {
336  deletable[i] = val;
337  }
338  }
339 
340 public:
341  enum DeleteWhat { NOTHING, ANYTHING };
342  explicit Deletable(DeleteWhat what) {
343  switch (what) {
344  case ANYTHING: init(true); break;
345  case NOTHING: init(false); break;
346  }
347  }
348  explicit Deletable(const char *allowed) {
349  init(false);
350  for (int i = 0; allowed[i]; ++i) {
351  deletable[safeCharIndex(allowed[i])] = true;
352  }
353  }
354 
355  GB_ERROR get_delete_error(const char *data, size_t start, size_t count) const {
356  GB_ERROR error = NULp;
357  id_assert(count > 0);
358  size_t end = start+count-1;
359  for (size_t col = start; col <= end && !error; ++col) {
360  if (!deletable[safeCharIndex(data[col])]) {
361  error = GBS_global_string("You tried to delete '%c' at position %zu -> Operation aborted", data[col], col);
362  }
363  }
364  return error;
365  }
366 };
367 
368 // --------------------------------------------------------------------------------
369 
370 template<typename T>
371 class TypedAliData : public AliData {
372  T gap;
373 
374 protected:
375  static const T *typed_ptr(const UnitPtr& uptr) { return (const T*)uptr.get_pointer(); }
376  const T* std_gap_ptr() const { return &gap; }
377 
378 public:
379  TypedAliData(size_t size_, T gap_)
380  : AliData(size_),
381  gap(gap_)
382  {}
383 
384  const T& std_gap() const { return gap; }
385 
386  size_t unitsize() const OVERRIDE { return sizeof(T); }
387  bool has_slice() const OVERRIDE { return false; }
388 
389  virtual UnitPtr at_ptr(size_t pos) const = 0;
390  AliDataPtr create_gap(size_t gapsize, const UnitPair& /*gapinfo*/) const OVERRIDE;
391  __ATTR__NORETURN AliDataPtr slice_down(size_t /*start*/, size_t /*count*/) const OVERRIDE {
392  GBK_terminate("logic error: slice_down called for explicit TypedAliData");
393  }
394  UnitPtr unit_left_of(size_t pos) const OVERRIDE {
395  id_assert(is_valid_between(pos));
396  return at_ptr(pos-1);
397  }
398  UnitPtr unit_right_of(size_t pos) const OVERRIDE {
399  id_assert(is_valid_between(pos));
400  return at_ptr(pos);
401  }
402 };
403 
404 template<typename T>
405 struct SpecificGap : public TypedAliData<T> {
407 
408  SpecificGap(size_t gapsize, const T& gap_)
409  : BaseType(gapsize, gap_)
410  {}
411  int operate_on_mem(void *mem, size_t IF_ASSERTION_USED(start), size_t count, AliData::memop op) const OVERRIDE {
412  id_assert(BaseType::is_valid_part(start, count));
413  switch (op) {
414  case AliData::COPY_TO: {
415  T *typedMem = (T*)mem;
416  for (size_t a = 0; a<count; ++a) { // LOOP_VECTORIZED =3[<10] =1 (3x up to 9.x; 1x with 10.x)
417  typedMem[a] = BaseType::std_gap();
418  }
419  break;
420  }
421  case AliData::COMPARE_WITH: {
422  const T *typedMem = (const T*)mem;
423  for (size_t a = 0; a<count; ++a) {
424  int cmp = compare_type(BaseType::std_gap(), typedMem[a]);
425  if (cmp) return cmp;
426  }
427  break;
428  }
429  case AliData::CHECK_DELETE: {
430  break; // deleting an inserted gap is always permitted
431  }
432  }
433  return 0;
434  }
435  int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE {
436  const SpecificGap<T> *other_is_gap = dynamic_cast<const SpecificGap<T>*>(&other);
437  if (other_is_gap) {
438  return compare_type(BaseType::std_gap(), other_is_gap->std_gap());
439  }
440  return -other.cmp_data(ostart, *this, start, count);
441  }
442  UnitPtr at_ptr(size_t pos) const OVERRIDE {
443  if (pos<BaseType::elems()) return UnitPtr(BaseType::std_gap_ptr());
444  return UnitPtr();
445  }
446 };
447 
448 template <typename T>
449 AliDataPtr TypedAliData<T>::create_gap(size_t gapsize, const UnitPair& /*gapinfo*/) const {
450  return new SpecificGap<T>(gapsize, std_gap());
451 }
452 
454  bool allows_oversize;
455  size_t org_ali_size;
456 public:
457  SizeAwarable(bool allows_oversize_, size_t ali_size_)
458  : allows_oversize(allows_oversize_),
459  org_ali_size(ali_size_)
460  {}
461 
462  size_t get_allowed_size(size_t term_size, size_t new_ali_size) const {
463  size_t allowed_size = new_ali_size;
464  if (allows_oversize && term_size>org_ali_size) {
465  size_t oversize = term_size-org_ali_size;
466  allowed_size = new_ali_size+oversize;
467  }
468  return allowed_size;
469  }
470 };
471 inline SizeAwarable dontAllowOversize(size_t ali_size) { return SizeAwarable(false, ali_size); }
472 
473 template<typename T>
474 inline GB_ERROR check_delete_allowed(const T *, size_t, size_t , const Deletable& ) {
475  return NULp; // for non-char deleting is always allowed
476 }
477 template<>
478 inline GB_ERROR check_delete_allowed(const char *data, size_t start, size_t count, const Deletable& deletable) {
479  return deletable.get_delete_error(data, start, count);
480 }
481 
482 template<typename T>
483 class SpecificAliData : public TypedAliData<T>, public SizeAwarable, virtual Noncopyable {
484  const T *data;
485  Deletable deletable;
486 
487 public:
489 
490  SpecificAliData(const T *static_data, size_t elements, const T& gap_, const SizeAwarable& sizeAware, const Deletable& deletable_)
491  : BaseType(elements, gap_),
492  SizeAwarable(sizeAware),
493  data(static_data),
494  deletable(deletable_)
495  {}
496 
497  int operate_on_mem(void *mem, size_t start, size_t count, AliData::memop op) const OVERRIDE {
498  if (count>0) {
499  id_assert(BaseType::is_valid_part(start, count));
500  switch (op) {
501  case AliData::COPY_TO: {
502  size_t msize = BaseType::unitsize()*count;
503  id_assert(msize>0);
504  memcpy(mem, data+start, msize);
505  break;
506  }
507  case AliData::COMPARE_WITH: {
508  const T *typedMem = (const T*)mem;
509  for (size_t a = 0; a<count; ++a) {
510  int cmp = compare_type(data[start+a], typedMem[a]);
511  if (cmp) return cmp;
512  }
513  break;
514  }
515  case AliData::CHECK_DELETE: {
516  const T *typedMem = (const T*)data;
517  GB_ERROR error = check_delete_allowed<T>(typedMem, start, count, deletable);
518  if (error) {
519  BaseType::set_error(error);
520  return 1;
521  }
522  break;
523  }
524  }
525  }
526  return 0;
527  }
528  int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE {
529  id_assert(BaseType::is_valid_part(start, count));
530  id_assert(other.is_valid_part(ostart, count));
531 
532  // if (&other == this && start == ostart) return true; // @@@ why does this fail tests?
533  return -other.cmpPartWith(data+start, ostart, count);
534  }
535  UnitPtr at_ptr(size_t pos) const OVERRIDE {
536  if (pos<BaseType::elems()) return UnitPtr(&data[pos]);
537  return UnitPtr();
538  }
539  const T *get_data() const { return data; }
540 };
541 
542 class SequenceAliData : public SpecificAliData<char> {
543  char dot;
544 
545  char preferred_gap(const char *s1, const char *s2) const {
546  if (s1 && s2) {
547  if (*s1 == std_gap() || *s2 == std_gap()) {
548  return std_gap();
549  }
550  if (*s1 == dot || *s2 == dot) {
551  return dot;
552  }
553  return std_gap();
554  }
555  else if (s1) {
556  id_assert(!s2);
557  return *s1 == std_gap() ? std_gap() : dot;
558  }
559  else if (s2) {
560  id_assert(!s1);
561  return *s2 == std_gap() ? std_gap() : dot;
562  }
563  else {
564  id_assert(!s1 && !s2);
565  return dot;
566  }
567  }
568 
569 public:
570  SequenceAliData(const char* static_data, size_t elements, char stdgap, char dotgap, const SizeAwarable& sizeAware, const Deletable& deletable_)
571  : SpecificAliData<char>(static_data, elements, stdgap, sizeAware, deletable_),
572  dot(dotgap)
573  {}
574 
575  AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const OVERRIDE {
576  char use = preferred_gap(typed_ptr(gapinfo.left), typed_ptr(gapinfo.right));
577  return new SpecificGap<char>(gapsize, use);
578  }
579 };
580 
581 // --------------------------------------------------------------------------------
582 // @@@ move things below into a class ?
583 
584 inline AliDataPtr concat(AliDataPtr left, AliDataPtr right) {
585  return left->empty() ? right : (right->empty() ? left : new ComposedAliData(left, right));
586 }
588  return concat(left, concat(mid, right));
589 }
590 
591 inline AliDataPtr partof(AliDataPtr data, size_t pos, size_t amount) { return AliDataSlice::make(data, pos, amount); }
592 inline AliDataPtr before(AliDataPtr data, size_t pos) { return partof(data, 0, pos); }
593 inline AliDataPtr after(AliDataPtr data, size_t pos) { return partof(data, pos+1, data->elems()-pos-1); }
594 
595 inline AliDataPtr delete_from(AliDataPtr from, size_t pos, size_t amount, GB_ERROR& error) {
596  error = from->check_delete_allowed(pos, amount);
597  return concat(before(from, pos), after(from, pos+amount-1));
598 }
599 inline AliDataPtr insert_at(AliDataPtr dest, size_t pos, AliDataPtr src) {
600  return concat(before(dest, pos), src, after(dest, pos-1));
601 }
602 
603 inline AliDataPtr insert_gap(AliDataPtr data, size_t pos, size_t count) {
604  UnitPair gapinfo;
605 
606  id_assert(data->unitsize() <= sizeof(gapinfo.left));
607 
608  gapinfo.left = data->unit_left_of(pos); // @@@ do not perform ALWAYS (put into an object and lazy eval)
609  gapinfo.right = data->unit_right_of(pos);
610 
611  AliDataPtr gap = data->create_gap(count, gapinfo);
612  return insert_at(data, pos, gap);
613 }
614 
615 inline AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR& error) {
616  size_t curr_len = data->elems();
617  if (curr_len < wanted_len) {
618  data = insert_gap(data, curr_len, wanted_len-curr_len);
619  }
620  else if (curr_len > wanted_len) {
621  data = delete_from(data, wanted_len, curr_len-wanted_len, error);
622  }
623  id_assert(data->elems() == wanted_len);
624  return data;
625 }
626 
627 
628 template<typename T> inline AliDataPtr makeAliData(T*& allocated_data, size_t elems, const T& gap) {
629  return new SpecificAliData<T>(allocated_data, elems, gap, dontAllowOversize(elems), Deletable(Deletable::ANYTHING));
630 }
631 inline AliDataPtr makeAliSeqData(char*& allocated_data, size_t elems, char gap, char dot) {
632  return new SequenceAliData(allocated_data, elems, gap, dot, dontAllowOversize(elems), Deletable(Deletable::ANYTHING));
633 }
634 
635 // --------------------------------------------------------------------------------
636 
637 #ifdef UNIT_TESTS
638 #ifndef TEST_UNIT_H
639 #include <test_unit.h>
640 #endif
641 
642 template<typename T>
643 inline T*& copyof(const T* const_data, size_t elemsize, size_t elements) { // @@@ elemsize should be derived from type here (if possible)
644  static T *copy = NULp;
645 
646  size_t memsize = elemsize*elements;
647  id_assert(!copy);
648  copy = (T*)ARB_alloc<char>(memsize);
649  id_assert(copy);
650  memcpy(copy, const_data, memsize);
651  return copy;
652 }
653 
654 #define COPYOF(typedarray) copyof(typedarray, sizeof(*(typedarray)), ARRAY_ELEMS(typedarray))
655 #define SIZEOF(typedarray) (sizeof(*(typedarray))*ARRAY_ELEMS(typedarray))
656 
657 #define TEST_EXPECT_COPIES_EQUAL(d1,d2) do{ \
658  size_t s1 = (d1)->memsize(); \
659  size_t s2 = (d2)->memsize(); \
660  TEST_EXPECT_EQUAL(s1, s2); \
661  void *copy1 = ARB_alloc<char>(s1+s2); \
662  void *copy2 = reinterpret_cast<char*>(copy1)+s1; \
663  (d1)->copyTo(copy1); \
664  (d2)->copyTo(copy2); \
665  TEST_EXPECT_MEM_EQUAL(copy1, copy2, s1); \
666  free(copy1); \
667  }while(0)
668 
669 #define TEST_EXPECT_COPY_EQUALS_ARRAY(adp,typedarray,asize) do{ \
670  size_t size = (adp)->memsize(); \
671  TEST_EXPECT_EQUAL(size, asize); \
672  void *ad_copy = ARB_alloc<char*>(size); \
673  (adp)->copyTo(ad_copy); \
674  TEST_EXPECT_MEM_EQUAL(ad_copy, typedarray, size); \
675  free(ad_copy); \
676  }while(0)
677 
678 #define TEST_EXPECT_COPY_EQUALS_STRING(adp,str) do{ \
679  size_t size = (adp)->memsize(); \
680  char *ad_copy = ARB_alloc<char>(size+1); \
681  (adp)->copyTo(ad_copy); \
682  ad_copy[size] = 0; \
683  TEST_EXPECT_EQUAL(ad_copy, str); \
684  free(ad_copy); \
685  }while(0)
686 
687 #if defined(ENABLE_CRASH_TESTS) && defined(ASSERTION_USED)
688 static void illegal_alidata_composition() {
689  const int ELEMS = 5;
690 
691  int *i = ARB_alloc<int> (ELEMS);
692  char *c = ARB_alloc<char>(ELEMS);
693 
694  concat(makeAliData(i, ELEMS, 0), makeAliData(c, ELEMS, '-'));
695 }
696 #endif
697 
698 void TEST_illegal_alidata__crashtest() {
699  TEST_EXPECT_CODE_ASSERTION_FAILS(illegal_alidata_composition);
700 }
701 
702 template <typename T>
703 inline T *makeCopy(AliDataPtr d) {
704  TEST_EXPECT_EQUAL(d->unitsize(), sizeof(T));
705  size_t size = d->memsize();
706  T *copy = (T*)ARB_alloc<char>(size);
707  d->copyTo(copy);
708  return copy;
709 }
710 
711 template <typename T>
712 static arb_test::match_expectation compare_works(AliDataPtr d1, AliDataPtr d2, int expected_cmp) {
713  int brute_force_compare = 0;
714  {
715  int minSize = std::min(d1->elems(), d2->elems());
716 
717  T *copy1 = makeCopy<T>(d1);
718  T *copy2 = makeCopy<T>(d2);
719 
720  for (int i = 0; i < minSize && brute_force_compare == 0; ++i) { // compare inclusive terminal zero-element
721  brute_force_compare = compare_type(copy1[i], copy2[i]);
722  }
723 
724  if (brute_force_compare == 0) {
725  brute_force_compare = compare_type(d1->elems(), d2->elems());
726  }
727 
728  free(copy2);
729  free(copy1);
730  }
731 
732  int smart_forward_compare = d1->cmp_whole_data(*d2);
733  int smart_backward_compare = d2->cmp_whole_data(*d1);
734 
735  using namespace arb_test;
736  expectation_group expected;
737 
738  expected.add(that(brute_force_compare).is_equal_to(expected_cmp));
739  expected.add(that(smart_forward_compare).is_equal_to(expected_cmp));
740  expected.add(that(smart_backward_compare).is_equal_to(-expected_cmp));
741 
742  return all().ofgroup(expected);
743 }
744 
745 #define TEST_COMPARE_WORKS(d1,d2,expected) TEST_EXPECTATION(compare_works<char>(d1,d2,expected))
746 
747 #define TEST_COMPARE_WORKS_ALL_TYPES(tid,d1,d2,expected) \
748  switch (tid) { \
749  case 0: TEST_EXPECTATION(compare_works<char>(d1,d2,expected)); break; \
750  case 1: TEST_EXPECTATION(compare_works<GB_UINT4>(d1,d2,expected)); break; \
751  case 2: TEST_EXPECTATION(compare_works<float>(d1,d2,expected)); break; \
752  }
753 
754 #if !defined(ENABLE_CRASH_TESTS)
755 static void avoid_INVALID_testExport() { avoid_INVALID_testExport(); } // avoids weird symbol-export-bug with stabs + !ENABLE_CRASH_TESTS (encountered with gcc 4.4.3)
756 #endif
757 
758 __ATTR__REDUCED_OPTIMIZE void TEST_AliData() {
759 #define SEQDATA "CGCAC-C-GG-C-GG.A.-C------GG-.C..UCAGU"
760  char chr_src[] = SEQDATA; // also contains trailing 0-byte!
761  GB_CUINT4 int_src[] = { 0x01, 0x1213, 0x242526, 0x37383930, 0xffffffff };
762  float flt_src[] = { 0.0, 0.5, 1.0, -5.0, 20.1 };
763 
764  AliDataPtr type[] = {
765  makeAliSeqData(COPYOF(chr_src), ARRAY_ELEMS(chr_src)-1, '-', '.'),
766  makeAliData(COPYOF(int_src), ARRAY_ELEMS(int_src), 0U),
767  makeAliData(COPYOF(flt_src), ARRAY_ELEMS(flt_src), 0.0F)
768  };
769  TEST_EXPECT_COPY_EQUALS_ARRAY(type[0], chr_src, SIZEOF(chr_src)-1);
770  TEST_EXPECT_COPY_EQUALS_STRING(type[0], chr_src);
771  TEST_EXPECT_COPY_EQUALS_ARRAY(type[1], int_src, SIZEOF(int_src));
772  TEST_EXPECT_COPY_EQUALS_ARRAY(type[2], flt_src, SIZEOF(flt_src));
773 
774  for (size_t t = 0; t<ARRAY_ELEMS(type); ++t) {
775  AliDataPtr data = type[t];
776  AliDataPtr dup = concat(data, data);
777  TEST_EXPECT_EQUAL(dup->elems(), 2*data->elems());
778 
779  AliDataPtr start = before(data, 3);
780  TEST_EXPECT_EQUAL(start->elems(), 3U);
781 
782  AliDataPtr end = after(data, 3);
783  TEST_EXPECT_EQUAL(end->elems(), data->elems()-4);
784 
785  AliDataPtr mid = partof(data, 3, 1);
786  TEST_EXPECT_COPIES_EQUAL(concat(start, mid, end), data);
787 
788  GB_ERROR error = NULp;
789  AliDataPtr del = delete_from(data, 3, 1, error);
790  TEST_EXPECT_NO_ERROR(error);
791  TEST_EXPECT_EQUAL(del->elems(), data->elems()-1);
792  TEST_EXPECT_COPIES_EQUAL(concat(start, end), del);
793 
794  AliDataPtr empty = before(data, 0);
795  TEST_EXPECT_EQUAL(empty->elems(), 0U);
796 
797  TEST_EXPECT_COPIES_EQUAL(data, concat(data, empty));
798  TEST_EXPECT_COPIES_EQUAL(data, concat(empty, data));
799  TEST_EXPECT_COPIES_EQUAL(empty, concat(empty, empty));
800 
801  AliDataPtr del_rest = delete_from(data, 3, 999, error);
802  TEST_EXPECT_NO_ERROR(error);
803  TEST_EXPECT_COPIES_EQUAL(start, del_rest);
804 
805  AliDataPtr ins = insert_at(del, 3, mid);
806  TEST_EXPECT_COPIES_EQUAL(data, ins);
807  TEST_EXPECT_COPIES_EQUAL(del, delete_from(ins, 3, 1, error));
808  TEST_EXPECT_NO_ERROR(error);
809 
810  TEST_EXPECT_COPIES_EQUAL(insert_at(del, 3, empty), del);
811  TEST_EXPECT_COPIES_EQUAL(insert_at(del, 777, empty), del); // append via insert_at
812  TEST_EXPECT_COPIES_EQUAL(insert_at(start, 777, end), del); // append via insert_at
813 
814  AliDataPtr ins_gap = insert_gap(del, 4, 5);
815  TEST_EXPECT_EQUAL(ins_gap->elems(), del->elems()+5);
816 
817  AliDataPtr gap_iseq = partof(ins_gap, 4, 5);
818 
819  TEST_EXPECT_COPIES_EQUAL(ins_gap, insert_gap(ins_gap, 7, 0)); // insert empty gap
820 
821  AliDataPtr start_gap1 = insert_gap(ins_gap, 0, 1); // insert gap at start
822  AliDataPtr start_gap3 = insert_gap(ins_gap, 0, 3); // insert gap at start
823 
824  AliDataPtr gap_iempty = insert_gap(empty, 0, 5);
825  TEST_EXPECT_EQUAL(gap_iempty->elems(), 5U);
826 
827  AliDataPtr gap_in_gap = insert_gap(gap_iempty, 3, 2);
828  TEST_EXPECT_EQUAL(gap_in_gap->elems(), 7U);
829 
830  AliDataPtr end_gap1 = insert_gap(mid, 1, 1);
831  TEST_EXPECT_EQUAL(end_gap1->elems(), 2U);
832 
833  if (t == 0) {
834  AliDataPtr end_gap2 = insert_gap(end, 34, 2);
835 
836  TEST_EXPECT_COPY_EQUALS_STRING(start, "CGC");
837  TEST_EXPECT_COPY_EQUALS_STRING(end, "C-C-GG-C-GG.A.-C------GG-.C..UCAGU");
838  TEST_EXPECT_COPY_EQUALS_STRING(end_gap2, "C-C-GG-C-GG.A.-C------GG-.C..UCAGU..");
839  TEST_EXPECT_COPY_EQUALS_STRING(mid, "A");
840  TEST_EXPECT_COPY_EQUALS_STRING(end_gap1, "A-"); // '-' is ok, since before there was a C behind (but correct would be '.')
841  TEST_EXPECT_COPY_EQUALS_STRING(del, "CGCC-C-GG-C-GG.A.-C------GG-.C..UCAGU");
842  TEST_EXPECT_COPY_EQUALS_STRING(del_rest, "CGC");
843  TEST_EXPECT_COPY_EQUALS_STRING(ins, "CGCAC-C-GG-C-GG.A.-C------GG-.C..UCAGU");
844  TEST_EXPECT_COPY_EQUALS_STRING(gap_iseq, "-----"); // inserted between bases
845  TEST_EXPECT_COPY_EQUALS_STRING(gap_iempty, "....."); // inserted in empty sequence
846  TEST_EXPECT_COPY_EQUALS_STRING(gap_in_gap, "......."); // inserted gap in gap
847  TEST_EXPECT_COPY_EQUALS_STRING(ins_gap, "CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU");
848  TEST_EXPECT_COPY_EQUALS_STRING(start_gap1, ".CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU");
849  TEST_EXPECT_COPY_EQUALS_STRING(start_gap3, "...CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU");
850 
851  AliDataPtr bef_dot = insert_gap(ins, 15, 2);
852  AliDataPtr aft_dot = insert_gap(ins, 16, 2);
853  AliDataPtr bet_dots = insert_gap(ins, 32, 2);
854  AliDataPtr bet_dashes = insert_gap(ins, 23, 2);
855  AliDataPtr bet_dashdot = insert_gap(ins, 29, 2);
856  AliDataPtr bet_dotdash = insert_gap(ins, 18, 2);
857 
858  TEST_EXPECT_COPY_EQUALS_STRING(ins, "CGCAC-C-GG-C-GG.A.-C------GG-.C..UCAGU");
859  TEST_EXPECT_COPY_EQUALS_STRING(bef_dot, "CGCAC-C-GG-C-GG...A.-C------GG-.C..UCAGU");
860  TEST_EXPECT_COPY_EQUALS_STRING(aft_dot, "CGCAC-C-GG-C-GG...A.-C------GG-.C..UCAGU");
861  TEST_EXPECT_COPY_EQUALS_STRING(bet_dots, "CGCAC-C-GG-C-GG.A.-C------GG-.C....UCAGU");
862  TEST_EXPECT_COPY_EQUALS_STRING(bet_dashes, "CGCAC-C-GG-C-GG.A.-C--------GG-.C..UCAGU");
863  TEST_EXPECT_COPY_EQUALS_STRING(bet_dashdot,"CGCAC-C-GG-C-GG.A.-C------GG---.C..UCAGU");
864  TEST_EXPECT_COPY_EQUALS_STRING(bet_dotdash,"CGCAC-C-GG-C-GG.A.---C------GG-.C..UCAGU");
865 
866  {
867  // test comparability of AliData
868 
869  AliDataPtr same_as_start_gap1 = after(start_gap3, 1);
870 
871  TEST_COMPARE_WORKS(start_gap1, same_as_start_gap1, 0);
872 
873  TEST_EXPECT(start_gap1->differs_from(*start_gap3));
874  // TEST_EXPECT_EQUAL(strcmp(".CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU", // start_gap1
875  // "...CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU"), 1); // start_gap3
876 
877  TEST_EXPECT_EQUAL(start_gap1->cmp_whole_data(*start_gap3), 1);
878  TEST_EXPECT_EQUAL(start_gap3->cmp_whole_data(*start_gap1), -1);
879 
880  TEST_COMPARE_WORKS(end, end_gap2, -1);
881  }
882  }
883 
884  {
885  // test comparability of AliData (for all types)
886 
887  TEST_COMPARE_WORKS_ALL_TYPES(t, start_gap1, start_gap3, 1);
888  TEST_COMPARE_WORKS_ALL_TYPES(t, gap_iempty, gap_in_gap, -1);
889  TEST_COMPARE_WORKS_ALL_TYPES(t, del, ins, 1);
890  TEST_COMPARE_WORKS_ALL_TYPES(t, partof(ins_gap, 0, 17), partof(start_gap3, 3, 17), 0);
891  TEST_COMPARE_WORKS_ALL_TYPES(t, start_gap3, start_gap3, 0);
892  }
893  }
894 
895 }
896 
897 #endif // UNIT_TESTS
898 
899 // --------------------------------------------------------------------------------
900 
905 };
906 
908  "Species",
909  "SAI",
910  "SeceditStruct",
911 };
912 
913 class Alignment {
914  SmartCharPtr name; // name of alignment
915  size_t len; // length of alignment
916 public:
917  Alignment(const char *name_, size_t len_) : name(strdup(name_)), len(len_) {}
918 
919  const char *get_name() const { return &*name; }
920  size_t get_len() const { return len; }
921 };
922 
923 // --------------------------------------------------------------------------------
924 
925 class AliApplicable { // something that can be appied to the whole alignment
926  virtual GB_ERROR apply_to_terminal(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const = 0;
927 
928  GB_ERROR apply_recursive(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const;
929  GB_ERROR apply_to_childs_named(GBDATA *gb_item_data, const char *item_field, TerminalType term_type, const Alignment& ali) const;
930  GB_ERROR apply_to_secstructs(GBDATA *gb_secstructs, const Alignment& ali) const;
931 
932 public:
934  virtual ~AliApplicable() {}
935 
936  GB_ERROR apply_to_alignment(GBDATA *gb_main, const Alignment& ali) const;
937 };
938 
939 GB_ERROR AliApplicable::apply_recursive(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const {
940  GB_ERROR error = NULp;
941  GB_TYPES type = GB_read_type(gb_data);
942 
943  if (type == GB_DB) {
944  GBDATA *gb_child;
945  for (gb_child = GB_child(gb_data); gb_child && !error; gb_child = GB_nextChild(gb_child)) {
946  error = apply_recursive(gb_child, term_type, item_name, ali);
947  }
948  }
949  else {
950  error = apply_to_terminal(gb_data, term_type, item_name, ali);
951  }
952 
953  return error;
954 }
955 GB_ERROR AliApplicable::apply_to_childs_named(GBDATA *gb_item_data, const char *item_field, TerminalType term_type, const Alignment& ali) const {
956  GBDATA *gb_item;
957  GB_ERROR error = NULp;
958  long item_count = GB_number_of_subentries(gb_item_data);
959 
960  if (item_count) {
961  for (gb_item = GB_entry(gb_item_data, item_field);
962  gb_item && !error;
963  gb_item = GB_nextEntry(gb_item))
964  {
965  GBDATA *gb_ali = GB_entry(gb_item, ali.get_name());
966  if (gb_ali) {
967  char *item_name = ARB_strdup(GBT_get_name_or_description(gb_item));
968  error = apply_recursive(gb_ali, term_type, item_name, ali);
969  if (error) error = GBS_global_string("%s '%s': %s", targetTypeName[term_type], item_name, error);
970  free(item_name);
971  }
972  }
973  }
974  return error;
975 }
976 GB_ERROR AliApplicable::apply_to_secstructs(GBDATA *gb_secstructs, const Alignment& ali) const {
977  GB_ERROR error = NULp;
978  GBDATA *gb_ali = GB_entry(gb_secstructs, ali.get_name());
979 
980  if (gb_ali) {
981  long item_count = GB_number_of_subentries(gb_ali)-1;
982  if (item_count<1) item_count = 1;
983 
984  GBDATA *gb_item;
985  for (gb_item = GB_entry(gb_ali, "struct");
986  gb_item && !error;
987  gb_item = GB_nextEntry(gb_item))
988  {
989  GBDATA *gb_ref = GB_entry(gb_item, "ref");
990  if (gb_ref) {
991  error = apply_recursive(gb_ref, IDT_SECSTRUCT, "ref", ali);
992  if (error) {
993  const char *item_name = GBT_get_name_or_description(gb_item);
994  error = GBS_global_string("%s '%s': %s", targetTypeName[IDT_SECSTRUCT], item_name, error);
995  }
996  }
997  }
998  }
999  return error;
1000 }
1001 
1003  GB_ERROR error = apply_to_childs_named(GBT_find_or_create(gb_main, "extended_data", 7), "extended", IDT_SAI, ali);
1004  if (!error) error = apply_to_secstructs(GB_search(gb_main, "secedit/structs", GB_CREATE_CONTAINER), ali);
1005  if (!error) error = apply_to_childs_named(GBT_find_or_create(gb_main, "species_data", 7), "species", IDT_SPECIES, ali);
1006  return error;
1007 }
1008 
1009 // --------------------------------------------------------------------------------
1010 
1012  mutable size_t count;
1013  GB_ERROR apply_to_terminal(GBDATA *, TerminalType, const char *, const Alignment&) const OVERRIDE { count++; return NULp; }
1014 public:
1015  AliEntryCounter() : count(0) {}
1016  size_t get_entry_count() const { return count; }
1017 };
1018 
1019 // --------------------------------------------------------------------------------
1020 
1022  virtual ~AliEditCommand() {}
1023  virtual AliDataPtr apply(AliDataPtr to, GB_ERROR& error) const = 0;
1024  virtual GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const = 0;
1025 };
1026 
1028  size_t pos; // inserts in front of pos
1029  size_t amount;
1030 public:
1031  AliInsertCommand(size_t pos_, size_t amount_) : pos(pos_), amount(amount_) {}
1032  AliDataPtr apply(AliDataPtr to, GB_ERROR& /*error*/) const OVERRIDE { return insert_gap(to, pos, amount); }
1033  GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE {
1034  size_t len = ali.get_len();
1035  if (pos>len) {
1036  return GBS_global_string("Can't insert at position %zu (exceeds length %zu of alignment '%s')",
1037  pos, len, ali.get_name());
1038  }
1039  resulting_ali_length = len+amount;
1040  return NULp;
1041  }
1042 };
1043 
1045  size_t pos;
1046  size_t amount;
1047 public:
1048  AliDeleteCommand(size_t pos_, size_t amount_)
1049  : pos(pos_),
1050  amount(amount_)
1051  {}
1052  AliDataPtr apply(AliDataPtr to, GB_ERROR& error) const OVERRIDE { return delete_from(to, pos, amount, error); }
1053  GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE {
1054  size_t len = ali.get_len();
1055  size_t end_pos = pos+amount-1;
1056  if (end_pos >= len) {
1057  return GBS_global_string("Can't delete positions %zu-%zu (exceeds max. position %zu of alignment '%s')",
1058  pos, end_pos, len-1, ali.get_name());
1059  }
1060  resulting_ali_length = len-amount;
1061  return NULp;
1062  }
1063 };
1064 
1065 class AliFormatCommand FINAL_TYPE : public AliEditCommand {
1066  size_t wanted_len;
1067 
1068 public:
1069  AliFormatCommand(size_t wanted_len_) : wanted_len(wanted_len_) {}
1071  SizeAwarable *knows_size = dynamic_cast<SizeAwarable*>(&*to);
1072 
1073  id_assert(knows_size); // format can only be applied to SpecificAliData
1074  // i.e. AliFormatCommand has to be the FIRST of a series of applied commands!
1075 
1076  int allowed_size = knows_size->get_allowed_size(to->elems(), wanted_len);
1077  return format(to, allowed_size, error);
1078  }
1079  GB_ERROR check_applicable_to(const Alignment& IF_ASSERTION_USED(ali), size_t& resulting_ali_length) const OVERRIDE {
1080  id_assert(ali.get_len() == wanted_len);
1081  resulting_ali_length = wanted_len;
1082  return NULp;
1083  }
1084 };
1085 
1087  mutable SmartPtr<AliFormatCommand> cmd;
1088 public:
1090  return cmd->apply(to, error);
1091  }
1092  GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE {
1093  cmd = new AliFormatCommand(ali.get_len()); // late decision on length to format
1094  return cmd->check_applicable_to(ali, resulting_ali_length);
1095  }
1096 };
1097 
1099  AliEditCommand *first;
1100  AliEditCommand *second;
1101 public:
1102  AliCompositeCommand(AliEditCommand *cmd1_, AliEditCommand *cmd2_) // takes ownership of commands
1103  : first(cmd1_),
1104  second(cmd2_)
1105  {}
1106  ~AliCompositeCommand() OVERRIDE { delete second; delete first; }
1108  AliDataPtr tmp = first->apply(to, error);
1109  if (!error) tmp = second->apply(tmp, error);
1110  return tmp;
1111  }
1112  GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE {
1113  GB_ERROR error = first->check_applicable_to(ali, resulting_ali_length);
1114  if (!error) {
1115  Alignment tmp_ali(ali.get_name(), resulting_ali_length);
1116  error = second->check_applicable_to(tmp_ali, resulting_ali_length);
1117  }
1118  return error;
1119  }
1120 };
1121 
1122 // --------------------------------------------------------------------------------
1123 
1124 class AliEditor : public AliApplicable {
1125  const AliEditCommand& cmd;
1126  Deletable deletable;
1127 
1128  mutable arb_progress progress;
1129  mutable size_t modified_counter;
1130 
1131  GB_ERROR apply_to_terminal(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const OVERRIDE;
1132 
1133 public:
1134  static bool shall_edit(GBDATA *gb_data, TerminalType term_type) {
1135  // defines whether specific DB-elements shall be edited by any AliEditor
1136  // (true for all data, that contains alignment position specific data)
1137 
1138  const char *key = GB_read_key_pntr(gb_data);
1139  bool shall = key[0] != '_'; // general case: don't apply to keys starting with '_'
1140  if (!shall) shall = term_type == IDT_SAI && strcmp(key, "_REF") == 0; // exception (SAI:_REF needs editing)
1141  return shall;
1142  }
1143 
1144  AliEditor(const AliEditCommand& cmd_, const Deletable& deletable_, const char *progress_title, size_t progress_count)
1145  : cmd(cmd_),
1146  deletable(deletable_),
1147  progress(progress_title, progress_count),
1148  modified_counter(0)
1149  {}
1150  ~AliEditor() OVERRIDE {
1151  progress.done();
1152  }
1153 
1154  const AliEditCommand& edit_command() const { return cmd; }
1155 };
1156 
1157 // --------------------------------------------------------------------------------
1158 
1159 static char *insDelBuffer = NULp;
1160 static size_t insDelBuffer_size;
1161 
1162 inline void free_insDelBuffer() {
1163  freenull(insDelBuffer);
1164 }
1165 inline char *provide_insDelBuffer(size_t neededSpace) {
1166  if (insDelBuffer && insDelBuffer_size<neededSpace) free_insDelBuffer();
1167  if (!insDelBuffer) {
1168  insDelBuffer_size = neededSpace+10;
1169  insDelBuffer = ARB_alloc<char>(insDelBuffer_size);
1170  }
1171  return insDelBuffer;
1172 }
1173 
1174 inline GB_CSTR alidata2buffer(const AliData& data) { // @@@ DRY vs copying code (above in this file)
1175  char *buffer = provide_insDelBuffer(data.memsize()+1);
1176 
1177  data.copyTo(buffer);
1178  buffer[data.memsize()] = 0; // only needed for strings but does not harm otherwise
1179 
1180  return buffer;
1181 }
1182 
1183 // --------------------------------------------------------------------------------
1184 
1185 class EditedTerminal;
1186 
1187 class LazyAliData : public AliData, public SizeAwarable, virtual Noncopyable {
1188  // internally transforms into SpecificAliData as soon as somebody tries to access the data.
1189  // (implements lazy loading of sequence data, esp. useful when applying AliFormatCommand; see #702)
1190 
1191  TerminalType term_type;
1192  EditedTerminal& terminal;
1193  mutable AliDataPtr loaded; // always is TypedAliData<T>
1194 
1195  AliDataPtr loaded_data() const {
1196  if (loaded.isNull()) load_data();
1197  return loaded;
1198  }
1199 
1200 public:
1201  LazyAliData(const SizeAwarable& oversizable, size_t size_, TerminalType term_type_, EditedTerminal& terminal_)
1202  : AliData(size_),
1203  SizeAwarable(oversizable),
1204  term_type(term_type_),
1205  terminal(terminal_)
1206  {}
1207 
1208  size_t unitsize() const OVERRIDE {
1209  // Note: information also known by EditedTerminal (only depends on data-type)
1210  // No need to load data (doesnt harm atm as data is always used for more atm)
1211  return loaded_data()->unitsize();
1212  }
1213  bool has_slice() const OVERRIDE {
1214  id_assert(loaded_data()->has_slice() == false); // TypedAliData<T> never has_slice()!
1215  return false;
1216  }
1217 
1218  int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE { return loaded_data()->operate_on_mem(mem, start, count, op); }
1219  int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE { return loaded_data()->cmp_data(start, other, ostart, count); }
1220 
1221  UnitPtr unit_left_of(size_t pos) const OVERRIDE { return loaded_data()->unit_left_of(pos); }
1222  UnitPtr unit_right_of(size_t pos) const OVERRIDE { return loaded_data()->unit_right_of(pos); }
1223 
1224  AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const OVERRIDE { return loaded_data()->create_gap(gapsize, gapinfo); }
1225  __ATTR__NORETURN AliDataPtr slice_down(size_t /*start*/, size_t /*count*/) const OVERRIDE {
1226  GBK_terminate("logic error: slice_down called for explicit LazyAliData");
1227  }
1228 
1229  void load_data() const; // has to be public to be a friend of EditedTerminal
1230 };
1231 
1232 // --------------------------------------------------------------------------------
1233 
1234 class EditedTerminal : virtual Noncopyable {
1235  GBDATA *gb_data;
1236  GB_TYPES type;
1237  const char *item_name; // name of SAI/species etc
1238  AliDataPtr data;
1239  Deletable deletable;
1240  GB_ERROR error;
1241 
1242  bool has_key(const char *expected_key) const {
1243  return strcmp(GB_read_key_pntr(gb_data), expected_key) == 0;
1244  }
1245  bool has_name(const char *expected_name) const {
1246  return strcmp(item_name, expected_name) == 0;
1247  }
1248 
1249  bool is_ref(TerminalType term_type) const {
1250  return
1251  type == GB_STRING &&
1252  ((term_type == IDT_SECSTRUCT && has_key("ref")) ||
1253  (term_type == IDT_SAI && has_key("_REF")));
1254  }
1255  bool is_helix(TerminalType term_type) const {
1256  return
1257  type == GB_STRING &&
1258  term_type == IDT_SAI &&
1259  (has_name("HELIX") || has_name("HELIX_NR")) &&
1260  has_key("data");
1261  }
1262 
1263  bool does_allow_oversize(TerminalType term_type) const { return is_ref(term_type); }
1264  char get_std_string_gaptype(TerminalType term_type) const {
1265  bool prefers_dots = is_ref(term_type) || is_helix(term_type);
1266  return prefers_dots ? '.' : '-';
1267  }
1268 
1269  AliDataPtr load_data(const SizeAwarable& oversizable, size_t size_, TerminalType term_type) {
1270  switch(type) {
1271  case GB_STRING: {
1272  const char *s = GB_read_char_pntr(gb_data);
1273  if (!s) error = GB_await_error();
1274  else {
1275  char stdgap = get_std_string_gaptype(term_type);
1276  if (stdgap == '.') data = new SpecificAliData<char>(s, size_, '.', oversizable, deletable);
1277  else data = new SequenceAliData(s, size_, stdgap, '.', oversizable, deletable);
1278  }
1279  break;
1280  }
1281  case GB_BITS: {
1282  const char *b = GB_read_bits_pntr(gb_data, '-', '+');
1283  if (!b) error = GB_await_error();
1284  else data = new SpecificAliData<char>(b, size_, '-', oversizable, deletable);
1285  break;
1286  }
1287  case GB_BYTES: {
1288  const char *b = GB_read_bytes_pntr(gb_data);
1289  if (!b) error = GB_await_error();
1290  else data = new SpecificAliData<char>(b, size_, 0, oversizable, deletable);
1291  break;
1292  }
1293  case GB_INTS: {
1294  const GB_UINT4 *ui = GB_read_ints_pntr(gb_data);
1295  if (!ui) error = GB_await_error();
1296  else data = new SpecificAliData<GB_UINT4>(ui, size_, 0, oversizable, deletable);
1297  break;
1298  }
1299  case GB_FLOATS: {
1300  const float *f = GB_read_floats_pntr(gb_data);
1301  if (!f) error = GB_await_error();
1302  else data = new SpecificAliData<float>(f, size_, 0.0, oversizable, deletable);
1303  break;
1304  }
1305 
1306  default:
1307  error = GBS_global_string("Unhandled type '%i'", type);
1308  id_assert(0);
1309  break;
1310  }
1311 
1312  id_assert(implicated(!error, size_ == data->elems()));
1313  return data;
1314  }
1315 
1316  friend void LazyAliData::load_data() const;
1317 
1318 public:
1319  EditedTerminal(GBDATA *gb_data_, GB_TYPES type_, const char *item_name_, size_t size_, TerminalType term_type, const Alignment& ali, const Deletable& deletable_)
1320  : gb_data(gb_data_),
1321  type(type_),
1322  item_name(item_name_),
1323  deletable(deletable_),
1324  error(NULp)
1325  {
1326  SizeAwarable oversizable(does_allow_oversize(term_type), ali.get_len());
1327  data = new LazyAliData(oversizable, size_, term_type, *this);
1328  }
1329 
1330  GB_ERROR apply(const AliEditCommand& cmd, bool& did_modify) {
1331  did_modify = false;
1332  if (!error) {
1333  AliDataPtr modified_data = cmd.apply(data, error);
1334 
1335  if (!error && modified_data->differs_from(*data)) {
1336  GB_CSTR modified = alidata2buffer(*modified_data);
1337  size_t modified_elems = modified_data->elems();
1338 
1339  switch (type) {
1340  case GB_STRING: {
1341  id_assert(strlen(modified) == modified_elems);
1342  error = GB_write_string(gb_data, modified);
1343  break;
1344  }
1345  case GB_BITS: error = GB_write_bits (gb_data, modified, modified_elems, "-"); break;
1346  case GB_BYTES: error = GB_write_bytes (gb_data, modified, modified_elems); break;
1347  case GB_INTS: error = GB_write_ints (gb_data, (GB_UINT4*)modified, modified_elems); break;
1348  case GB_FLOATS: error = GB_write_floats(gb_data, (float*)modified, modified_elems); break;
1349 
1350  default: id_assert(0); break;
1351  }
1352 
1353  if (!error) did_modify = true;
1354  }
1355  }
1356  return error;
1357  }
1358 };
1359 
1361  loaded = terminal.load_data(*this, elems(), term_type);
1362 }
1363 
1364 GB_ERROR AliEditor::apply_to_terminal(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const {
1365  GB_TYPES gbtype = GB_read_type(gb_data);
1366  GB_ERROR error = NULp;
1367  if (gbtype >= GB_BITS && gbtype != GB_OBSOLETE) {
1368  if (shall_edit(gb_data, term_type)) {
1369  EditedTerminal edited(gb_data, gbtype, item_name, GB_read_count(gb_data), term_type, ali, deletable);
1370 
1371  bool terminal_was_modified;
1372  error = edited.apply(edit_command(), terminal_was_modified);
1373  if (terminal_was_modified) {
1374  progress.subtitle(GBS_global_string("modified: %zu", ++modified_counter));
1375  }
1376  }
1377  }
1378  progress.inc_and_check_user_abort(error);
1379  return error;
1380 }
1381 
1382 // --------------------------------------------------------------------------------
1383 
1384 static size_t countAffectedEntries(GBDATA *Main, const Alignment& ali) {
1386  counter.apply_to_alignment(Main, ali);
1387  return counter.get_entry_count();
1388 }
1389 
1390 static GB_ERROR apply_command_to_alignment(const AliEditCommand& cmd, const char *cmd_description, GBDATA *Main, const char *alignment_name, const char *deletable_chars) {
1391  // applies 'cmd' to one or all alignments
1392  // (if 'alignment_name' is NULp, all alignments are affected - probably useless case)
1393  //
1394  // 'deletable_chars' is either
1395  // - NULp -> nothing may be deleted
1396  // - "%" -> anything may be deleted
1397  // - or a string containing all deletable characters
1398 
1399  Deletable deletable =
1400  deletable_chars
1401  ? ( strchr(deletable_chars, '%')
1403  : Deletable(deletable_chars))
1404  : Deletable(Deletable::NOTHING);
1405 
1406  GB_ERROR error = NULp;
1407  GBDATA *gb_presets = GBT_get_presets(Main);
1408 
1409  for (GBDATA *gb_ali = GB_entry(gb_presets, "alignment");
1410  gb_ali && !error;
1411  gb_ali = GB_nextEntry(gb_ali))
1412  {
1413  GBDATA *gb_name = GB_find_string(gb_ali, "alignment_name", alignment_name, GB_IGNORE_CASE, SEARCH_CHILD);
1414 
1415  if (gb_name) {
1416  GBDATA *gb_len = GB_entry(gb_ali, "alignment_len");
1417  Alignment ali(GB_read_char_pntr(gb_name), GB_read_int(gb_len));
1418 
1419  size_t resulting_ali_length;
1420  error = cmd.check_applicable_to(ali, resulting_ali_length);
1421 
1422  if (!error) error = AliEditor(cmd, deletable, cmd_description, countAffectedEntries(Main, ali)).apply_to_alignment(Main, ali);
1423  if (!error) error = GB_write_int(gb_len, resulting_ali_length);
1424  }
1425  }
1426 
1428 
1429  if (!error) GB_disable_quicksave(Main, "a lot of sequences changed"); // @@@ only disable if a reasonable amount of sequences has changed!
1430 
1431  return error;
1432 }
1433 
1434 static GB_ERROR format_to_alilen(GBDATA *Main, const char *alignment_name) { // @@@ inline
1435  AliAutoFormatCommand fcmd;
1436  return apply_command_to_alignment(fcmd, "Formatting alignment", Main, alignment_name, "-.");
1437 }
1438 
1440  GB_ERROR err = NULp;
1441 
1442  if (strcmp(alignment_name, GENOM_ALIGNMENT) != 0) { // NEVER EVER format 'ali_genom'
1443  err = GBT_check_data(Main, alignment_name); // detect max. length
1444  if (!err) err = format_to_alilen(Main, alignment_name); // format sequences in alignment
1445  if (!err) err = GBT_check_data(Main, alignment_name); // sets state to "formatted"
1446  }
1447  else {
1448  err = "It's forbidden to format '" GENOM_ALIGNMENT "'!";
1449  }
1450  return err;
1451 }
1452 
1453 GB_ERROR ARB_insdel_columns(GBDATA *Main, const char *alignment_name, long pos, long count, const char *deletable_chars) {
1454  /* if count > 0 insert 'count' characters at pos
1455  * if count < 0 delete pos to pos+|count|
1456  *
1457  * Note: deleting is only performed, if found characters in deleted range are listed in 'deletable_chars'
1458  * otherwise function returns with an error.
1459  * (if 'deletable_chars' contains a '%', any character will be deleted)
1460  *
1461  * This affects all species' and SAIs having data in given 'alignment_name' and
1462  * modifies several data entries found there
1463  * (see shall_edit() for details which fields are affected).
1464  */
1465 
1466  GB_ERROR error = NULp;
1467 
1468  if (pos<0) {
1469  error = GBS_global_string("Illegal sequence position %li", pos);
1470  }
1471  else {
1472  const char *description = NULp;
1473 
1475  if (count<0) {
1476  idcmd = new AliDeleteCommand(pos, -count);
1477  description = "Deleting columns";
1478  }
1479  else {
1480  idcmd = new AliInsertCommand(pos, count);
1481  description = "Inserting columns";
1482  }
1483 
1484  error = apply_command_to_alignment(*idcmd, description, Main, alignment_name, deletable_chars);
1485  }
1486  return error;
1487 }
1488 
1489 // AISC_MKPT_PROMOTE:class RangeList;
1490 // AISC_MKPT_PROMOTE:enum UseRange { RANGES, SINGLE_COLUMNS };
1491 // AISC_MKPT_PROMOTE:enum InsertWhere { INFRONTOF, BEHIND };
1492 
1493 GB_ERROR ARB_delete_columns_using_SAI(GBDATA *Main, const char *alignment_name, const RangeList& ranges, const char *deletable_chars) {
1494  // Deletes all columns defined by 'ranges'
1495  // from all members (SAIs, seqs, ..) of alignment named 'alignment_name'.
1496 
1497  GB_ERROR error;
1498  if (ranges.empty()) {
1499  error = "Done with deleting nothing :)";
1500  }
1501  else {
1502  AliEditCommand *cmd = new AliAutoFormatCommand; // @@@ use SmartPtr (here and in AliCompositeCommand)
1503  for (RangeList::reverse_iterator r = ranges.rbegin(); r != ranges.rend(); ++r) {
1504  cmd = new AliCompositeCommand(cmd, new AliDeleteCommand(r->start(), r->size()));
1505  }
1506  error = apply_command_to_alignment(*cmd, "Deleting columns using SAI", Main, alignment_name, deletable_chars);
1507  delete cmd;
1508  }
1509  return error;
1510 }
1511 
1512 GB_ERROR ARB_insert_columns_using_SAI(GBDATA *Main, const char *alignment_name, const RangeList& ranges, UseRange units, InsertWhere where, size_t amount) {
1513  // Insert 'amount' columns into all members of the alignment named 'alignment_name'.
1514  //
1515  // If units is
1516  // - RANGES, each range
1517  // - SINGLE_COLUMNS, each column of each range
1518  // is handled as a unit.
1519  //
1520  // InsertWhere specifies whether the insertion happens INFRONTOF or BEHIND
1521 
1522  GB_ERROR error;
1523  if (!amount || ranges.empty()) {
1524  error = "Done with inserting no gaps :)";
1525  }
1526  else {
1527  AliEditCommand *cmd = new AliAutoFormatCommand; // @@@ use SmartPtr (here and in AliCompositeCommand)
1528  for (RangeList::reverse_iterator r = ranges.rbegin(); r != ranges.rend(); ++r) {
1529  switch (units) {
1530  case RANGES: {
1531  int pos = 0;
1532  switch (where) {
1533  case INFRONTOF: pos = r->start(); break;
1534  case BEHIND: pos = r->end()+1; break;
1535  }
1536  cmd = new AliCompositeCommand(cmd, new AliInsertCommand(pos, amount));
1537  break;
1538  }
1539  case SINGLE_COLUMNS: {
1540  for (int pos = r->end(); pos >= r->start(); --pos) {
1541  cmd = new AliCompositeCommand(cmd, new AliInsertCommand(where == INFRONTOF ? pos : pos+1, amount));
1542  }
1543  break;
1544  }
1545  }
1546  }
1547  error = apply_command_to_alignment(*cmd, "Inserting columns using SAI", Main, alignment_name, NULp);
1548  delete cmd;
1549  }
1550  return error;
1551 }
1552 
1553 // --------------------------------------------------------------------------------
1554 
1556  bool is_ali_rel = false;
1557  GB_TYPES type = GB_read_type(gb_data);
1558  if (type != GB_DB) {
1559  GBDATA *gb_ali = GB_get_father(gb_data); // assume we are called with child of an alignment container (of SAI or species)
1560  if (gb_ali) {
1561  const char *ali_key = GB_read_key_pntr(gb_ali);
1562  if (strncmp(ali_key, "ali_", 4) == 0) { // fine, looks like an alignment container
1563  GBDATA *gb_item = GB_get_father(gb_ali);
1564  if (gb_item) {
1565  const char *item_key = GB_read_key_pntr(gb_item);
1566  bool is_species = strcmp(item_key, "species") == 0;
1567  if (is_species || strcmp(item_key, "extended") == 0) {
1568  TerminalType itemtype = is_species ? IDT_SPECIES : IDT_SAI;
1569  is_ali_rel = AliEditor::shall_edit(gb_data, itemtype);
1570  }
1571  }
1572  }
1573  }
1574  }
1575 
1576  return is_ali_rel;
1577 }
1578 
1579 // --------------------------------------------------------------------------------
1580 
1581 #ifdef UNIT_TESTS
1582 #ifndef TEST_UNIT_H
1583 #include <test_unit.h>
1584 #endif
1585 #include <arb_unit_test.h>
1586 
1587 #define PLAIN_APPLY_CMD(str,cmd) \
1588  size_t str_len = strlen(str); \
1589  AliDataPtr data = new SequenceAliData(str, str_len, '-', '.', dontAllowOversize(str_len), Deletable("-.")); \
1590  GB_ERROR error = NULp; \
1591  AliDataPtr mod = cmd.apply(data, error)
1592 
1593 #define APPLY_CMD(str,cmd) \
1594  PLAIN_APPLY_CMD(str, cmd); \
1595  TEST_EXPECT_NO_ERROR(error); \
1596  GB_CSTR res = mod->differs_from(*data) ? alidata2buffer(*mod) : NULp
1597 
1598 #define DO_FORMAT(str,wanted_len) \
1599  AliFormatCommand cmd(wanted_len); \
1600  APPLY_CMD(str, cmd)
1601 
1602 #define DO_INSERT(str,pos,amount) \
1603  AliInsertCommand cmd(pos, amount); \
1604  APPLY_CMD(str, cmd)
1605 
1606 #define DO_FORMAT_AND_INSERT(str,wanted_len,pos,amount) \
1607  AliCompositeCommand cmd(new AliFormatCommand(wanted_len), \
1608  new AliInsertCommand(pos,amount)); \
1609  APPLY_CMD(str, cmd)
1610 
1611 #define DO_DELETE(str,pos,amount) \
1612  AliDeleteCommand cmd(pos, amount); \
1613  APPLY_CMD(str, cmd)
1614 
1615 #define TEST_FORMAT(str,wanted_alilen,expected) do { DO_FORMAT(str,wanted_alilen); TEST_EXPECT_EQUAL(res, expected); } while(0)
1616 #define TEST_FORMAT__BROKEN(str,wanted_alilen,expected) do { DO_FORMAT(str,wanted_alilen); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0)
1617 
1618 #define TEST_INSERT(str,pos,amount,expected) do { DO_INSERT(str,pos,amount); TEST_EXPECT_EQUAL(res, expected); } while(0)
1619 #define TEST_INSERT__BROKEN(str,pos,amount,expected) do { DO_INSERT(str,pos,amount); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0)
1620 
1621 #define TEST_DELETE(str,pos,amount,expected) do { DO_DELETE(str,pos,amount); TEST_EXPECT_EQUAL(res, expected); } while(0)
1622 #define TEST_DELETE__BROKEN(str,pos,amount,expected) do { DO_DELETE(str,pos,amount); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0)
1623 
1624 #define TEST_FORMAT_AND_INSERT(str,wanted_alilen,pos,amount,expected) do { DO_FORMAT_AND_INSERT(str,wanted_alilen,pos,amount); TEST_EXPECT_EQUAL(res, expected); } while(0)
1625 #define TEST_FORMAT_AND_INSERT__BROKEN(str,wanted_alilen,pos,amount,expected) do { DO_FORMAT_AND_INSERT(str,wanted_alilen,pos,amount); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0)
1626 
1627 #define TEST_FORMAT_ERROR(str,wanted_alilen,exp_err) do { \
1628  AliFormatCommand cmd(wanted_alilen); \
1629  PLAIN_APPLY_CMD(str, cmd); \
1630  TEST_EXPECT_ERROR_CONTAINS(error, exp_err); \
1631  } while(0)
1632 
1633 #define TEST_DELETE_ERROR(str,pos,amount,exp_err) do { \
1634  AliDeleteCommand cmd(pos, amount); \
1635  PLAIN_APPLY_CMD(str, cmd); \
1636  TEST_EXPECT_ERROR_CONTAINS(error, exp_err); \
1637  } while(0)
1638 
1639 
1640 // --------------------------------------------------------------------------------
1641 
1642 void TEST_format_insert_delete() {
1643  // this test is a bit weird.
1644  //
1645  // originally it was used to test the function gbt_insert_delete, which is gone now.
1646  // now it tests AliFormatCommand, AliInsertCommand, AliDeleteCommand and AliCompositeCommand (but quite implicit).
1647 
1648  const char *UNMODIFIED = NULp;
1649 
1650  TEST_FORMAT("xxx", 5, "xxx..");
1651  TEST_FORMAT(".x.", 5, ".x...");
1652  TEST_FORMAT(".x..", 5, ".x...");
1653  TEST_FORMAT(".x...", 5, UNMODIFIED);
1654 
1655  TEST_FORMAT("xxx--", 3, "xxx");
1656  TEST_FORMAT("xxx..", 3, "xxx");
1657  TEST_FORMAT_ERROR("xxxxx", 3, "You tried to delete 'x' at position 3 -> Operation aborted");
1658  TEST_FORMAT_ERROR("xxx", 0, "You tried to delete 'x' at position 0 -> Operation aborted");
1659 
1660  // insert/delete in the middle
1661  TEST_INSERT("abcde", 3, 0, UNMODIFIED);
1662  TEST_INSERT("abcde", 3, 1, "abc-de");
1663  TEST_INSERT("abcde", 3, 2, "abc--de");
1664 
1665  TEST_DELETE("abcde", 3, 0, UNMODIFIED);
1666  TEST_DELETE("abc-de", 3, 1, "abcde");
1667  TEST_DELETE("abc--de", 3, 2, "abcde");
1668  TEST_DELETE_ERROR("abc-xde", 3, 2, "You tried to delete 'x' at position 4 -> Operation aborted");
1669 
1670  // insert/delete at end
1671  TEST_INSERT("abcde", 5, 1, "abcde.");
1672  TEST_INSERT("abcde", 5, 4, "abcde....");
1673 
1674  TEST_DELETE("abcde-", 5, 1, "abcde");
1675  TEST_DELETE("abcde----", 5, 4, "abcde");
1676 
1677  // insert/delete at start
1678  TEST_INSERT("abcde", 0, 1, ".abcde");
1679  TEST_INSERT("abcde", 0, 4, "....abcde");
1680 
1681  TEST_DELETE("-abcde", 0, 1, "abcde");
1682  TEST_DELETE("----abcde", 0, 4, "abcde");
1683 
1684  // insert behind end
1685  TEST_FORMAT_AND_INSERT("abcde", 10, 8, 1, "abcde......");
1686  TEST_FORMAT_AND_INSERT("abcde", 10, 8, 4, "abcde.........");
1687 
1688  // insert/delete all
1689  TEST_INSERT("", 0, 3, "...");
1690  TEST_DELETE("---", 0, 3, "");
1691 
1693 }
1694 
1695 // ------------------------------
1696 
1697 static struct arb_unit_test::test_alignment_data TADinsdel[] = {
1698  { 1, "MtnK1722", "...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC....." },
1699  { 1, "MhnFormi", "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU....." },
1700  { 1, "MhnT1916", "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU----" },
1701 };
1702 
1703 static struct arb_unit_test::test_alignment_data EXTinsdel[] = {
1704  { 0, "ECOLI", "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA...." },
1705  { 0, "HELIX", ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]" },
1706  { 0, "HELIX_NR", ".....1.1.........25.25.34..34.34..34...25.25...1........1" },
1707 };
1708 
1709 #define HELIX_REF ".....x..x........x...x.x....x.x....x...x...x...x.........x"
1710 #define HELIX_STRUCT "VERSION=3\nLOOP={etc.pp\n}\n"
1711 
1712 static const char *read_item_entry(GBDATA *gb_item, const char *ali_name, const char *entry_name) {
1713  const char *result = NULp;
1714  if (gb_item) {
1715  GBDATA *gb_ali = GB_find(gb_item, ali_name, SEARCH_CHILD);
1716  if (gb_ali) {
1717  GBDATA *gb_entry = GB_entry(gb_ali, entry_name);
1718  if (gb_entry) {
1719  result = GB_read_char_pntr(gb_entry);
1720  }
1721  }
1722  }
1723  if (!result) TEST_EXPECT_NO_ERROR(GB_await_error());
1724  return result;
1725 }
1726 static char *ints2string(const GB_UINT4 *ints, size_t count) {
1727  char *str = ARB_alloc<char>(count+1);
1728  for (size_t c = 0; c<count; ++c) { // IRRELEVANT_LOOP
1729  str[c] = (ints[c]<10) ? ints[c]+'0' : '?';
1730  }
1731  str[count] = 0;
1732  return str;
1733 }
1734 static GB_UINT4 *string2ints(const char *str, size_t count) {
1735  GB_UINT4 *ints = ARB_alloc<GB_UINT4>(count);
1736  for (size_t c = 0; c<count; ++c) { // IRRELEVANT_LOOP
1737  ints[c] = int(str[c]-'0');
1738  }
1739  return ints;
1740 }
1741 static char *floats2string(const float *floats, size_t count) {
1742  char *str = ARB_alloc<char>(count+1);
1743  for (size_t c = 0; c<count; ++c) { // IRRELEVANT_LOOP
1744  str[c] = char(floats[c]*64.0+0.5)+' '+1;
1745  }
1746  str[count] = 0;
1747  return str;
1748 }
1749 static float *string2floats(const char *str, size_t count) {
1750  float *floats = ARB_alloc<float>(count);
1751  for (size_t c = 0; c<count; ++c) { // IRRELEVANT_LOOP
1752  floats[c] = float(str[c]-' '-1)/64.0;
1753  }
1754  return floats;
1755 }
1756 
1757 static GBDATA *get_ali_entry(GBDATA *gb_item, const char *ali_name, const char *entry_name) {
1758  GBDATA *gb_entry = NULp;
1759  if (gb_item) {
1760  GBDATA *gb_ali = GB_find(gb_item, ali_name, SEARCH_CHILD);
1761  if (gb_ali) gb_entry = GB_entry(gb_ali, entry_name);
1762  }
1763  return gb_entry;
1764 }
1765 
1766 static char *read_item_ints_entry_as_string(GBDATA *gb_item, const char *ali_name, const char *entry_name) {
1767  char *result = NULp;
1768  GBDATA *gb_entry = get_ali_entry(gb_item, ali_name, entry_name);
1769  if (gb_entry) {
1770  GB_UINT4 *ints = GB_read_ints(gb_entry);
1771  result = ints2string(ints, GB_read_count(gb_entry));
1772  free(ints);
1773  }
1774  if (!result) TEST_EXPECT_NO_ERROR(GB_await_error());
1775  return result;
1776 }
1777 static char *read_item_floats_entry_as_string(GBDATA *gb_item, const char *ali_name, const char *entry_name) {
1778  char *result = NULp;
1779  GBDATA *gb_entry = get_ali_entry(gb_item, ali_name, entry_name);
1780  if (gb_entry) {
1781  float *floats = GB_read_floats(gb_entry);
1782  result = floats2string(floats, GB_read_count(gb_entry));
1783  free(floats);
1784  }
1785  if (!result) TEST_EXPECT_NO_ERROR(GB_await_error());
1786  return result;
1787 }
1788 
1789 #define TEST_ITEM_HAS_ENTRY(find,name,ename,expected) \
1790  TEST_EXPECT_EQUAL(read_item_entry(find(gb_main, name), ali_name, ename), expected)
1791 
1792 #define TEST_ITEM_HAS_INTSENTRY(find,name,ename,expected) \
1793  TEST_EXPECT_EQUAL(&*SmartCharPtr(read_item_ints_entry_as_string(find(gb_main, name), ali_name, ename)), expected)
1794 
1795 #define TEST_ITEM_HAS_FLOATSENTRY(find,name,ename,expected) \
1796  TEST_EXPECT_EQUAL(&*SmartCharPtr(read_item_floats_entry_as_string(find(gb_main, name), ali_name, ename)), expected)
1797 
1798 #define TEST_ITEM_HAS_DATA(find,name,expected) TEST_ITEM_HAS_ENTRY(find,name,"data",expected)
1799 
1800 #define TEST_SPECIES_HAS_DATA(ad,sd) TEST_ITEM_HAS_DATA(GBT_find_species,ad.name,sd)
1801 #define TEST_SAI_HAS_DATA(ad,sd) TEST_ITEM_HAS_DATA(GBT_find_SAI,ad.name,sd)
1802 #define TEST_SAI_HAS_ENTRY(ad,ename,sd) TEST_ITEM_HAS_ENTRY(GBT_find_SAI,ad.name,ename,sd)
1803 
1804 #define TEST_SPECIES_HAS_INTS(ad,id) TEST_ITEM_HAS_INTSENTRY(GBT_find_species,ad.name,"NN",id)
1805 #define TEST_SPECIES_HAS_FLOATS(ad,fd) TEST_ITEM_HAS_FLOATSENTRY(GBT_find_species,ad.name,"FF",fd)
1806 
1807 #define TEST_DATA(sd0,sd1,sd2,ed0,ed1,ed2,ref,ints,floats,struct) do { \
1808  TEST_SPECIES_HAS_DATA(TADinsdel[0], sd0); \
1809  TEST_SPECIES_HAS_DATA(TADinsdel[1], sd1); \
1810  TEST_SPECIES_HAS_DATA(TADinsdel[2], sd2); \
1811  TEST_SAI_HAS_DATA(EXTinsdel[0], ed0); \
1812  TEST_SAI_HAS_DATA(EXTinsdel[1], ed1); \
1813  TEST_SAI_HAS_DATA(EXTinsdel[2], ed2); \
1814  TEST_SAI_HAS_ENTRY(EXTinsdel[1], "_REF", ref); \
1815  GBDATA *gb_ref = GB_search(gb_main, "secedit/structs/ali_mini/struct/ref", GB_FIND); \
1816  TEST_EXPECT_EQUAL(GB_read_char_pntr(gb_ref), ref); \
1817  TEST_SPECIES_HAS_INTS(TADinsdel[0], ints); \
1818  TEST_SPECIES_HAS_FLOATS(TADinsdel[0], floats); \
1819  TEST_SAI_HAS_ENTRY(EXTinsdel[1], "_STRUCT", struct); \
1820  } while(0)
1821 
1822 static int get_alignment_aligned(GBDATA *gb_main, const char *aliname) { // former GBT_get_alignment_aligned
1823  GBDATA *gb_alignment = GBT_get_alignment(gb_main, aliname);
1824  return gb_alignment ? *GBT_read_int(gb_alignment, "aligned") : -1;
1825 }
1826 
1827 #define TEST_ALI_LEN_ALIGNED(len,aligned) do { \
1828  TEST_EXPECT_EQUAL(GBT_get_alignment_len(gb_main, ali_name), len); \
1829  TEST_EXPECT_EQUAL(get_alignment_aligned(gb_main, ali_name), aligned); \
1830  } while(0)
1831 
1832 static ARB_ERROR add_some_SAIs(GBDATA *gb_main, const char *ali_name) {
1833  ARB_ERROR error;
1834  GB_transaction ta(gb_main);
1835  TEST_DB_INSERT_SAI(gb_main, error, ali_name, EXTinsdel);
1836 
1837  // add secondary structure to "HELIX"
1838  GBDATA *gb_helix = GBT_find_SAI(gb_main, "HELIX");
1839  if (!gb_helix) error = GB_await_error();
1840  else {
1841  GBDATA *gb_struct = GBT_add_data(gb_helix, ali_name, "_STRUCT", GB_STRING);
1842  if (!gb_struct) error = GB_await_error();
1843  else error = GB_write_string(gb_struct, HELIX_STRUCT);
1844 
1845  GBDATA *gb_struct_ref = GBT_add_data(gb_helix, ali_name, "_REF", GB_STRING);
1846  if (!gb_struct_ref) error = GB_await_error();
1847  else error = GB_write_string(gb_struct_ref, HELIX_REF);
1848  }
1849 
1850  // add stored secondary structure
1851  GBDATA *gb_ref = GB_search(gb_main, "secedit/structs/ali_mini/struct/ref", GB_STRING);
1852  if (!gb_ref) error = GB_await_error();
1853  else error = GB_write_string(gb_ref, HELIX_REF);
1854 
1855  // create one INTS and one FLOATS entry for first species
1856  GBDATA *gb_spec = GBT_find_species(gb_main, TADinsdel[0].name);
1857  {
1858  GBDATA *gb_ints = GBT_add_data(gb_spec, ali_name, "NN", GB_INTS);
1859  const char *intsAsStr = "9346740960354855652100942568200611650200211394358998513";
1860  size_t len = strlen(intsAsStr);
1861  GB_UINT4 *ints = string2ints(intsAsStr, len);
1862  {
1863  char *asStr = ints2string(ints, len);
1864  TEST_EXPECT_EQUAL(intsAsStr, asStr);
1865  free(asStr);
1866  }
1867  error = GB_write_ints(gb_ints, ints, len);
1868  free(ints);
1869  }
1870  {
1871  GBDATA *gb_ints = GBT_add_data(gb_spec, ali_name, "FF", GB_FLOATS);
1872  const char *floatsAsStr = "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW";
1873  size_t len = strlen(floatsAsStr);
1874  float *floats = string2floats(floatsAsStr, len);
1875  {
1876  char *asStr = floats2string(floats, len);
1877  TEST_EXPECT_EQUAL(floatsAsStr, asStr);
1878  free(asStr);
1879  }
1880  error = GB_write_floats(gb_ints, floats, len);
1881  free(floats);
1882  }
1883  return error;
1884 }
1885 
1886 __ATTR__REDUCED_OPTIMIZE__NO_GCSE static void test_insert_delete_DB() {
1887  GB_shell shell;
1888  ARB_ERROR error;
1889  const char *ali_name = "ali_mini";
1890  GBDATA *gb_main = TEST_CREATE_DB(error, ali_name, TADinsdel, false);
1891 
1892  arb_suppress_progress noProgress;
1893 
1894  if (!error) error = add_some_SAIs(gb_main, ali_name);
1895  if (!error) {
1896  GB_transaction ta(gb_main);
1897 
1898  for (int pass = 1; pass <= 2; ++pass) {
1899  if (pass == 1) TEST_ALI_LEN_ALIGNED(56, 1);
1900  if (pass == 2) TEST_ALI_LEN_ALIGNED(57, 0); // was marked as "not aligned"
1901 
1902  TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC.....",
1903  "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU.....",
1904  "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU----",
1905  "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA....",
1906  ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]",
1907  ".....1.1.........25.25.34..34.34..34...25.25...1........1",
1908  ".....x..x........x...x.x....x.x....x...x...x...x.........x",
1909  "9346740960354855652100942568200611650200211394358998513", // a INTS entry
1910  "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW", // a FLOATS entry
1911  HELIX_STRUCT);
1912 
1913  if (pass == 1) TEST_EXPECT_NO_ERROR(GBT_check_data(gb_main, ali_name));
1914  }
1915 
1916  TEST_EXPECT_NO_ERROR(ARB_format_alignment(gb_main, ali_name));
1917  TEST_ALI_LEN_ALIGNED(57, 1);
1918  TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC......",
1919  "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU.....",
1920  "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU-----", // @@@ <- should convert '-' to '.'
1921  "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA.....",
1922  ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]",
1923  ".....1.1.........25.25.34..34.34..34...25.25...1........1",
1924  ".....x..x........x...x.x....x.x....x...x...x...x.........x",
1925  "934674096035485565210094256820061165020021139435899851300",
1926  "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW!",
1927  HELIX_STRUCT);
1928 
1929 // text-editor column -> alignment column
1930 #define COL(col) ((col)-19)
1931 
1932  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(64), 2, "")); // insert in middle
1933  TEST_ALI_LEN_ALIGNED(59, 1);
1934  TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC......",
1935  "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.....",
1936  "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-----",
1937  "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.....",
1938  ".....[<[.........[..[..[<<.[..].>>]....]..]......].>......]",
1939  ".....1.1.........25.25.34..34.34..34...25.25.....1........1",
1940  ".....x..x........x...x.x....x.x....x...x...x.....x.........x",
1941  "93467409603548556521009425682006116502002113900435899851300",
1942  "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59DW!",
1943  HELIX_STRUCT);
1944 
1945  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(75), 2, "")); // insert near end
1946  TEST_ALI_LEN_ALIGNED(61, 1);
1947  TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........",
1948  "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......",
1949  "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------",
1950  "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......",
1951  ".....[<[.........[..[..[<<.[..].>>]....]..]......].>........]",
1952  ".....1.1.........25.25.34..34.34..34...25.25.....1..........1",
1953  ".....x..x........x...x.x....x.x....x...x...x.....x...........x",
1954  "9346740960354855652100942568200611650200211390043589985100300",
1955  "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
1956  HELIX_STRUCT);
1957 
1958  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(20), 2, "")); // insert near start
1959  TEST_ALI_LEN_ALIGNED(63, 1);
1960  TEST_DATA(".....G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........",
1961  "-----A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......",
1962  ".....A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------",
1963  "-----U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......",
1964  ".......[<[.........[..[..[<<.[..].>>]....]..]......].>........]",
1965  ".......1.1.........25.25.34..34.34..34...25.25.....1..........1",
1966  ".......x..x........x...x.x....x.x....x...x...x.....x...........x",
1967  "900346740960354855652100942568200611650200211390043589985100300",
1968  "O!!Du8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
1969  HELIX_STRUCT);
1970 
1971 
1972  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(26), 2, "")); // insert at left helix start
1973  TEST_ALI_LEN_ALIGNED(65, 1);
1974  TEST_DATA(".....G---GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........",
1975  "-----A---CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......",
1976  ".....A---CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------",
1977  "-----U---GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......",
1978  ".........[<[.........[..[..[<<.[..].>>]....]..]......].>........]",
1979  ".........1.1.........25.25.34..34.34..34...25.25.....1..........1",
1980  ".........x..x........x...x.x....x.x....x...x...x.....x...........x",
1981  "90034670040960354855652100942568200611650200211390043589985100300",
1982  "O!!Du8E!!Jh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
1983  HELIX_STRUCT);
1984 
1985  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(29), 2, "")); // insert behind left helix start
1986  TEST_ALI_LEN_ALIGNED(67, 1);
1987  TEST_DATA(".....G---G--GC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........",
1988  "-----A---C--GA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......",
1989  ".....A---C--GA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------",
1990  "-----U---G--CC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......",
1991  ".........[..<[.........[..[..[<<.[..].>>]....]..]......].>........]",
1992  ".........1...1.........25.25.34..34.34..34...25.25.....1..........1",
1993  ".........x....x........x...x.x....x.x....x...x...x.....x...........x",
1994  "9003467004000960354855652100942568200611650200211390043589985100300",
1995  "O!!Du8E!!J!!h60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
1996  HELIX_STRUCT);
1997 
1998  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(32), 2, "")); // insert at left helix end
1999  TEST_ALI_LEN_ALIGNED(69, 1);
2000  TEST_DATA(".....G---G--G--C-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........",
2001  "-----A---C--G--A-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......",
2002  ".....A---C--G--A-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------",
2003  "-----U---G--C--C-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......",
2004  ".........[..<..[.........[..[..[<<.[..].>>]....]..]......].>........]",
2005  ".........1.....1.........25.25.34..34.34..34...25.25.....1..........1",
2006  ".........x......x........x...x.x....x.x....x...x...x.....x...........x",
2007  "900346700400000960354855652100942568200611650200211390043589985100300",
2008  "O!!Du8E!!J!!h!!60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
2009  HELIX_STRUCT);
2010 
2011  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(35), 2, "")); // insert behind left helix end
2012  TEST_ALI_LEN_ALIGNED(71, 1);
2013  TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........",
2014  "-----A---C--G--A---U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......",
2015  ".....A---C--G--A---A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------",
2016  "-----U---G--C--C---U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......",
2017  ".........[..<..[...........[..[..[<<.[..].>>]....]..]......].>........]",
2018  ".........1.....1...........25.25.34..34.34..34...25.25.....1..........1",
2019  ".........x........x........x...x.x....x.x....x...x...x.....x...........x", // @@@ _REF gets destroyed here! (see #159)
2020  // ^ ^
2021  "90034670040000090060354855652100942568200611650200211390043589985100300",
2022  "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
2023  HELIX_STRUCT);
2024 
2025 
2026  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(59), 2, "")); // insert at right helix start
2027  TEST_ALI_LEN_ALIGNED(73, 1);
2028  TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G-CGGC-UGG--AUC--ACCUCC........",
2029  "-----A---C--G--A---U-C-----C--G--GAA-CCU--G-CGGC-UGG--AUC--ACCUCCU.......",
2030  ".....A---C--G--A---A-C.....G--G--GAA-CCU--G-CGGC-UGG--AUC--ACCUCCU-------",
2031  "-----U---G--C--C---U-G-----G--C--CCU-UAG--C-GCGG-UGG--UCC--CACCUGA.......",
2032  ".........[..<..[...........[..[..[<<.[....].>>]....]..]......].>........]",
2033  ".........1.....1...........25.25.34..34...34..34...25.25.....1..........1",
2034  ".........x........x........x...x.x....x...x....x...x...x.....x...........x",
2035  "9003467004000009006035485565210094256820000611650200211390043589985100300",
2036  "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!JxT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
2037  HELIX_STRUCT);
2038 
2039  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(62), 2, "")); // insert behind right helix start
2040  TEST_ALI_LEN_ALIGNED(75, 1);
2041  TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G---CGGC-UGG--AUC--ACCUCC........",
2042  "-----A---C--G--A---U-C-----C--G--GAA-CCU--G---CGGC-UGG--AUC--ACCUCCU.......",
2043  ".....A---C--G--A---A-C.....G--G--GAA-CCU--G---CGGC-UGG--AUC--ACCUCCU-------",
2044  "-----U---G--C--C---U-G-----G--C--CCU-UAG--C---GCGG-UGG--UCC--CACCUGA.......",
2045  ".........[..<..[...........[..[..[<<.[....]...>>]....]..]......].>........]",
2046  ".........1.....1...........25.25.34..34...3..4..34...25.25.....1..........1", // @@@ <- helix nr destroyed
2047  // ^^^^
2048  ".........x........x........x...x.x....x...x......x...x...x.....x...........x",
2049  "900346700400000900603548556521009425682000000611650200211390043589985100300",
2050  "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!J!!xT6JPiCvQrq4uC!!LDoHlWV59!!DW!",
2051  HELIX_STRUCT);
2052 
2053  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(67), 2, "")); // insert at right helix end
2054  TEST_ALI_LEN_ALIGNED(77, 1);
2055  TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G---CG--GC-UGG--AUC--ACCUCC........",
2056  "-----A---C--G--A---U-C-----C--G--GAA-CCU--G---CG--GC-UGG--AUC--ACCUCCU.......",
2057  ".....A---C--G--A---A-C.....G--G--GAA-CCU--G---CG--GC-UGG--AUC--ACCUCCU-------",
2058  "-----U---G--C--C---U-G-----G--C--CCU-UAG--C---GC--GG-UGG--UCC--CACCUGA.......",
2059  ".........[..<..[...........[..[..[<<.[....]...>>..]....]..]......].>........]",
2060  ".........1.....1...........25.25.34..34...3..4....34...25.25.....1..........1",
2061  ".........x........x........x...x.x....x...x........x...x...x.....x...........x",
2062  "90034670040000090060354855652100942568200000061100650200211390043589985100300",
2063  "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!J!!xT6!!JPiCvQrq4uC!!LDoHlWV59!!DW!",
2064  HELIX_STRUCT);
2065 
2066  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(70), 2, "")); // insert behind right helix end
2067  TEST_ALI_LEN_ALIGNED(79, 1);
2068  TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCC........",
2069  "-----A---C--G--A---U-C-----C--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU.......",
2070  ".....A---C--G--A---A-C.....G--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU-------",
2071  "-----U---G--C--C---U-G-----G--C--CCU-UAG--C---GC--G--G-UGG--UCC--CACCUGA.......",
2072  ".........[..<..[...........[..[..[<<.[....]...>>..]......]..]......].>........]",
2073  ".........1.....1...........25.25.34..34...3..4....3..4...25.25.....1..........1", // @@@ <- helix nr destroyed
2074  ".........x........x........x...x.x....x...x..........x...x...x.....x...........x", // @@@ _REF gets destroyed here! (see #159)
2075  "9003467004000009006035485565210094256820000006110060050200211390043589985100300",
2076  "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQrq4uC!!LDoHlWV59!!DW!",
2077  HELIX_STRUCT);
2078 
2079 
2080 
2081  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(44), 2, "")); // insert at gap border (between different gap types)
2082  TEST_ALI_LEN_ALIGNED(81, 1);
2083  TEST_DATA(".....G---G--G--C---C-G...----A--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCC........", // now prefers '-' here
2084  "-----A---C--G--A---U-C-------C--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU.......",
2085  ".....A---C--G--A---A-C.......G--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU-------",
2086  "-----U---G--C--C---U-G-------G--C--CCU-UAG--C---GC--G--G-UGG--UCC--CACCUGA.......",
2087  ".........[..<..[.............[..[..[<<.[....]...>>..]......]..]......].>........]",
2088  ".........1.....1.............25.25.34..34...3..4....3..4...25.25.....1..........1",
2089  ".........x........x..........x...x.x....x...x..........x...x...x.....x...........x",
2090  "900346700400000900603548500565210094256820000006110060050200211390043589985100300",
2091  "O!!Du8E!!J!!h!!6!!0e1XYLg!!xvzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQrq4uC!!LDoHlWV59!!DW!",
2092  HELIX_STRUCT);
2093 
2094 
2095  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(42), -6, "-.")); // delete gaps
2096  TEST_ALI_LEN_ALIGNED(75, 1);
2097  TEST_DATA(".....G---G--G--C---C-G.A--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCC........",
2098  "-----A---C--G--A---U-C-C--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU.......",
2099  ".....A---C--G--A---A-C.G--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU-------",
2100  "-----U---G--C--C---U-G-G--C--CCU-UAG--C---GC--G--G-UGG--UCC--CACCUGA.......",
2101  ".........[..<..[.......[..[..[<<.[....]...>>..]......]..]......].>........]",
2102  ".........1.....1.......25.25.34..34...3..4....3..4...25.25.....1..........1",
2103  ".........x........x....x...x.x....x...x..........x...x...x.....x...........x",
2104  "900346700400000900603545210094256820000006110060050200211390043589985100300",
2105  "O!!Du8E!!J!!h!!6!!0e1XYzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQrq4uC!!LDoHlWV59!!DW!",
2106  HELIX_STRUCT);
2107 
2108  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(74), -1, "-.")); // delete gap inside helix destroying helix nrs
2109  TEST_ALI_LEN_ALIGNED(74, 1);
2110  TEST_DATA(".....G---G--G--C---C-G.A--G--GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCC........",
2111  "-----A---C--G--A---U-C-C--G--GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU.......",
2112  ".....A---C--G--A---A-C.G--G--GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU-------",
2113  "-----U---G--C--C---U-G-G--C--CCU-UAG--C---GC--G--G-UGG-UCC--CACCUGA.......",
2114  ".........[..<..[.......[..[..[<<.[....]...>>..]......].]......].>........]",
2115  ".........1.....1.......25.25.34..34...3..4....3..4...2525.....1..........1", // @@@ helix nr destroyed ('25.25' -> '2525')
2116  ".........x........x....x...x.x....x...x..........x...x..x.....x...........x",
2117  "90034670040000090060354521009425682000000611006005020021390043589985100300",
2118  "O!!Du8E!!J!!h!!6!!0e1XYzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQr4uC!!LDoHlWV59!!DW!",
2119  HELIX_STRUCT);
2120 
2121 
2122  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(47), -1, "-.")); // delete gap between helices destroying helix nrs
2123  TEST_ALI_LEN_ALIGNED(73, 1);
2124  TEST_DATA(".....G---G--G--C---C-G.A--G-GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCC........",
2125  "-----A---C--G--A---U-C-C--G-GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU.......",
2126  ".....A---C--G--A---A-C.G--G-GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU-------",
2127  "-----U---G--C--C---U-G-G--C-CCU-UAG--C---GC--G--G-UGG-UCC--CACCUGA.......",
2128  ".........[..<..[.......[..[.[<<.[....]...>>..]......].]......].>........]",
2129  ".........1.....1.......25.2534..34...3..4....3..4...2525.....1..........1", // @@@ helix nr destroyed ('25.34' -> '2534')
2130  ".........x........x....x...xx....x...x..........x...x..x.....x...........x",
2131  "9003467004000009006035452100425682000000611006005020021390043589985100300",
2132  "O!!Du8E!!J!!h!!6!!0e1XYzrqmeiMAjB5E!!J!!xT6!!J!!PiCvQr4uC!!LDoHlWV59!!DW!",
2133  HELIX_STRUCT);
2134 
2135 
2136  TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(72), -5, "%")); // delete anything
2137  TEST_ALI_LEN_ALIGNED(68, 1);
2138  TEST_DATA(".....G---G--G--C---C-G.A--G-GAA-CCU--G---CG--G--C-UGG-ACCUCC........",
2139  "-----A---C--G--A---U-C-C--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU.......",
2140  ".....A---C--G--A---A-C.G--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU-------",
2141  "-----U---G--C--C---U-G-G--C-CCU-UAG--C---GC--G--G-UGG-CACCUGA.......",
2142  ".........[..<..[.......[..[.[<<.[....]...>>..]......]...].>........]",
2143  ".........1.....1.......25.2534..34...3..4....3..4...2...1..........1",
2144  ".........x........x....x...xx....x...x..........x...x...x...........x",
2145  "90034670040000090060354521004256820000006110060050200043589985100300",
2146  "O!!Du8E!!J!!h!!6!!0e1XYzrqmeiMAjB5E!!J!!xT6!!J!!PiCvQ!LDoHlWV59!!DW!",
2147  HELIX_STRUCT);
2148 
2149  }
2150 
2151  if (!error) {
2152  {
2153  GB_transaction ta(gb_main);
2154  TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, COL(35), -3, "-."), // illegal delete
2155  "SAI 'HELIX': You tried to delete 'x' at position 18 -> Operation aborted");
2156  ta.close("xxx");
2157  }
2158  {
2159  GB_transaction ta(gb_main);
2160  TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, COL(57), -3, "-."), // illegal delete
2161  "SAI 'HELIX_NR': You tried to delete '4' at position 40 -> Operation aborted");
2162  ta.close("xxx");
2163  }
2164  {
2165  GB_transaction ta(gb_main);
2166  TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, 4711, 3, "-."), // illegal insert
2167  "Can't insert at position 4711 (exceeds length 68 of alignment 'ali_mini')");
2168  ta.close("xxx");
2169  }
2170  {
2171  GB_transaction ta(gb_main);
2172  TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, 66, -3, "-."), // illegal delete
2173  "Can't delete positions 66-68 (exceeds max. position 67 of alignment 'ali_mini')");
2174  ta.close("xxx");
2175  }
2176  {
2177  GB_transaction ta(gb_main);
2178  TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, -1, 3, "-."), // illegal insert
2179  "Illegal sequence position -1");
2180  ta.close("xxx");
2181  }
2182  }
2183  if (!error) {
2184  GB_transaction ta(gb_main);
2185  TEST_DATA(".....G---G--G--C---C-G.A--G-GAA-CCU--G---CG--G--C-UGG-ACCUCC........",
2186  "-----A---C--G--A---U-C-C--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU.......",
2187  ".....A---C--G--A---A-C.G--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU-------",
2188  "-----U---G--C--C---U-G-G--C-CCU-UAG--C---GC--G--G-UGG-CACCUGA.......",
2189  ".........[..<..[.......[..[.[<<.[....]...>>..]......]...].>........]",
2190  ".........1.....1.......25.2534..34...3..4....3..4...2...1..........1",
2191  ".........x........x....x...xx....x...x..........x...x...x...........x",
2192  "90034670040000090060354521004256820000006110060050200043589985100300",
2193  "O!!Du8E!!J!!h!!6!!0e1XYzrqmeiMAjB5E!!J!!xT6!!J!!PiCvQ!LDoHlWV59!!DW!",
2194  HELIX_STRUCT);
2195  }
2196 
2197  GB_close(gb_main);
2198  TEST_EXPECT_NO_ERROR(error.deliver());
2199 }
2200 void TEST_insert_delete_DB() {
2201  test_insert_delete_DB(); // wrap test code in subroutine (otherwise nm 2.24 fails to provide source-location, even if TEST_PUBLISH is used)
2202 }
2203 
2204 __ATTR__REDUCED_OPTIMIZE void TEST_insert_delete_DB_using_SAI() {
2205  GB_shell shell;
2206  ARB_ERROR error;
2207  const char *ali_name = "ali_mini";
2208  GBDATA *gb_main = TEST_CREATE_DB(error, ali_name, TADinsdel, false);
2209 
2210  arb_suppress_progress noProgress;
2211 
2212  if (!error) error = add_some_SAIs(gb_main, ali_name);
2213  if (!error) {
2214  GB_transaction ta(gb_main);
2215 
2216  // test here is just a duplicate from TEST_insert_delete_DB() - just here to show the data
2217  TEST_EXPECT_NO_ERROR(ARB_format_alignment(gb_main, ali_name));
2218  int alilen_exp = 57;
2219  TEST_ALI_LEN_ALIGNED(alilen_exp, 1);
2220  TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC......",
2221  "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU.....",
2222  "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU-----",
2223  "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA.....",
2224  ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]",
2225  ".....1.1.........25.25.34..34.34..34...25.25...1........1",
2226  ".....x..x........x...x.x....x.x....x...x...x...x.........x",
2227  "934674096035485565210094256820061165020021139435899851300",
2228  "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW!",
2229  HELIX_STRUCT);
2230 
2232  /* */ "xxx-------x-x-xxx---------x---------x---------------xxxx-",
2233  "x", false);
2234  TEST_EXPECT_NO_ERROR(ARB_delete_columns_using_SAI(gb_main, ali_name, delRanges, ".-"));
2235  alilen_exp -= 14;
2236  TEST_ALI_LEN_ALIGNED(alilen_exp, 1);
2237  TEST_DATA("G-GGC-CG.A--G--GAACCUG-CGGCUGG--AUCACCUCC..",
2238  "A-CGA-UC-C--G--GAACCUG-CGGCUGG--AUCACCUCCU.",
2239  "A-CGA-AC.G--G--GAACCUG-CGGCUGG--AUCACCUCCU-",
2240  "U-GCC-UG-G--C--CCUUAGC-GCGGUGG--UCCCACCUGA.",
2241  "..[<[....[..[..[<<[..].>>]...]..]....].>..]",
2242  "..1.1....25.25.34.34.34..34..25.25...1....1",
2243  "..x..x...x...x.x...x.x....x..x...x...x.....x",
2244  "6740960585210094258200611652002113943589980",
2245  "8EJh60eXLzrqmeMiMAB5EJxT6JPCvQrq4uCLDoHlWV!",
2246  HELIX_STRUCT);
2247 
2248  // insert INFRONTOF each range
2250  /* */ "---xx---xxxxxxxx---------xxxx--------------",
2251  "x", false);
2252  TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, RANGES, INFRONTOF, 2));
2253  alilen_exp += 3*2;
2254  TEST_ALI_LEN_ALIGNED(alilen_exp, 1);
2255  TEST_DATA("G-G--GC-CG...A--G--GAACCUG-CG--GCUGG--AUCACCUCC..",
2256  "A-C--GA-UC---C--G--GAACCUG-CG--GCUGG--AUCACCUCCU.",
2257  "A-C--GA-AC...G--G--GAACCUG-CG--GCUGG--AUCACCUCCU-",
2258  "U-G--CC-UG---G--C--CCUUAGC-GC--GGUGG--UCCCACCUGA.",
2259  "..[..<[......[..[..[<<[..].>>..]...]..]....].>..]",
2260  "..1...1......25.25.34.34.34....34..25.25...1....1",
2261  "..x....x.....x...x.x...x.x......x..x...x...x.....x",
2262  "6740009605008521009425820061100652002113943589980",
2263  "8EJ!!h60eX!!LzrqmeMiMAB5EJxT6!!JPCvQrq4uCLDoHlWV!",
2264  HELIX_STRUCT);
2265 
2266  // insert BEHIND each range
2267  insRanges = build_RangeList_from_string(
2268  /* */ "-----------xx-x------------xxxxxx---------------x",
2269  "x", false);
2270  TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, RANGES, BEHIND, 4));
2271  alilen_exp += 4*4;
2272  TEST_ALI_LEN_ALIGNED(alilen_exp, 1);
2273  TEST_DATA("G-G--GC-CG.......A------G--GAACCUG-CG--GC----UGG--AUCACCUCC......",
2274  "A-C--GA-UC-------C------G--GAACCUG-CG--GC----UGG--AUCACCUCCU.....",
2275  "A-C--GA-AC.......G------G--GAACCUG-CG--GC----UGG--AUCACCUCCU-----",
2276  "U-G--CC-UG-------G------C--CCUUAGC-GC--GG----UGG--UCCCACCUGA.....",
2277  "..[..<[..........[......[..[<<[..].>>..].......]..]....].>..]....",
2278  "..1...1..........25.....25.34.34.34....34......25.25...1....1....",
2279  "..x....x.........x.......x.x...x.x......x......x...x...x.........x", // @@@ ref gets destroyed here
2280  "67400096050080000520000100942582006110065000020021139435899800000",
2281  "8EJ!!h60eX!!L!!!!zr!!!!qmeMiMAB5EJxT6!!JP!!!!CvQrq4uCLDoHlWV!!!!!",
2282  HELIX_STRUCT);
2283 
2284  // insert INFRONTOF each column
2285  insRanges = build_RangeList_from_string(
2286  /* */ "x----xx--------------------------------------xxx----xxxx--------x",
2287  "x", false);
2288  TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, SINGLE_COLUMNS, INFRONTOF, 1));
2289  alilen_exp += 11*1;
2290  TEST_ALI_LEN_ALIGNED(alilen_exp, 1);
2291  TEST_DATA(".G-G---G-C-CG.......A------G--GAACCUG-CG--GC-----U-G-G--AU-C-A-C-CUCC.......",
2292  ".A-C---G-A-UC-------C------G--GAACCUG-CG--GC-----U-G-G--AU-C-A-C-CUCCU......",
2293  ".A-C---G-A-AC.......G------G--GAACCUG-CG--GC-----U-G-G--AU-C-A-C-CUCCU------",
2294  ".U-G---C-C-UG-------G------C--CCUUAGC-GC--GG-----U-G-G--UC-C-C-A-CCUGA......",
2295  "...[...<.[..........[......[..[<<[..].>>..]..........]..]........].>..].....",
2296  "...1.....1..........25.....25.34.34.34....34.........25.25.......1....1.....",
2297  "...x......x.........x.......x.x...x.x......x.........x...x.......x..........x",
2298  "0674000009605008000052000010094258200611006500000200002113090403058998000000",
2299  "!8EJ!!!h!60eX!!L!!!!zr!!!!qmeMiMAB5EJxT6!!JP!!!!!C!v!Qrq4u!C!L!D!oHlWV!!!!!!",
2300  HELIX_STRUCT);
2301 
2302  // insert BEHIND each column
2303  insRanges = build_RangeList_from_string(
2304  /* */ "------------------------------xxxxxxx----------------------------xxxxxx-----",
2305  "x", false);
2306  TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, SINGLE_COLUMNS, BEHIND, 2));
2307  alilen_exp += 13*2;
2308  TEST_ALI_LEN_ALIGNED(alilen_exp, 1);
2309  TEST_DATA(".G-G---G-C-CG.......A------G--G--A--A--C--C--U--G---CG--GC-----U-G-G--AU-C-A-C-C--U--C--C.............",
2310  ".A-C---G-A-UC-------C------G--G--A--A--C--C--U--G---CG--GC-----U-G-G--AU-C-A-C-C--U--C--C--U..........",
2311  ".A-C---G-A-AC.......G------G--G--A--A--C--C--U--G---CG--GC-----U-G-G--AU-C-A-C-C--U--C--C--U----------",
2312  ".U-G---C-C-UG-------G------C--C--C--U--U--A--G--C---GC--GG-----U-G-G--UC-C-C-A-C--C--U--G--A..........",
2313  "...[...<.[..........[......[..[..<..<..[........]...>>..]..........]..]........].....>........].......",
2314  "...1.....1..........25.....25.3..4.....3..4.....3..4....34.........25.25.......1..............1.......", // @@@ helix nrs destroyed
2315  "...x......x.........x.......x.x...........x.....x........x.........x...x.......x......................x", // @@@ ref destroyed further
2316  "067400000960500800005200001009400200500800200000000611006500000200002113090403050080090090080000000000",
2317  "!8EJ!!!h!60eX!!L!!!!zr!!!!qmeMi!!M!!A!!B!!5!!E!!J!!xT6!!JP!!!!!C!v!Qrq4u!C!L!D!o!!H!!l!!W!!V!!!!!!!!!!",
2318  HELIX_STRUCT);
2319  }
2320 
2321  GB_close(gb_main);
2322  TEST_EXPECT_NO_ERROR(error.deliver());
2323 }
2324 TEST_PUBLISH(TEST_insert_delete_DB_using_SAI);
2325 
2326 #endif // UNIT_TESTS
2327 
size_t get_len() const
Definition: insdel.cxx:920
virtual bool has_slice() const =0
const char * GB_ERROR
Definition: arb_core.h:25
GB_ERROR GB_write_bits(GBDATA *gbd, const char *bits, long size, const char *c_0)
Definition: arbdb.cxx:1418
string result
GB_TYPES type
float * GB_read_floats(GBDATA *gbd)
Definition: arbdb.cxx:1053
AliDataPtr apply(AliDataPtr to, GB_ERROR &error) const OVERRIDE
Definition: insdel.cxx:1107
group_matcher all()
Definition: test_unit.h:1011
AliDataPtr slice_down(size_t start, size_t count) const OVERRIDE
Definition: insdel.cxx:190
TypedAliData< T > BaseType
Definition: insdel.cxx:406
bool has_slice() const OVERRIDE
Definition: insdel.cxx:387
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
Definition: insdel.cxx:615
void load_data() const
Definition: insdel.cxx:1360
long GB_read_int(GBDATA *gbd)
Definition: arbdb.cxx:729
GBDATA * GB_child(GBDATA *father)
Definition: adquery.cxx:322
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
GB_ERROR GB_write_bytes(GBDATA *gbd, const char *s, long size)
Definition: arbdb.cxx:1434
virtual AliDataPtr apply(AliDataPtr to, GB_ERROR &error) const =0
AliDataPtr delete_from(AliDataPtr from, size_t pos, size_t amount, GB_ERROR &error)
Definition: insdel.cxx:595
Definition: ali.h:11
virtual size_t unitsize() const =0
Definition: arbdb.h:69
static void dot(double **i, double **j, double **k)
Definition: trnsprob.cxx:59
AliDataPtr apply(AliDataPtr to, GB_ERROR &error) const OVERRIDE
Definition: insdel.cxx:1089
CONSTEXPR_INLINE unsigned char safeCharIndex(char c)
Definition: dupstr.h:73
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
static GB_ERROR apply_command_to_alignment(const AliEditCommand &cmd, const char *cmd_description, GBDATA *Main, const char *alignment_name, const char *deletable_chars)
Definition: insdel.cxx:1390
GBDATA * GB_find(GBDATA *gbd, const char *key, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:295
bool has_slice() const OVERRIDE
Definition: insdel.cxx:1213
int operate_on_mem(void *mem, size_t start, size_t count, AliData::memop op) const OVERRIDE
Definition: insdel.cxx:497
TypedAliData< T > BaseType
Definition: insdel.cxx:488
GB_ERROR check_applicable_to(const Alignment &ali, size_t &resulting_ali_length) const OVERRIDE
Definition: insdel.cxx:1053
GBDATA * GB_nextEntry(GBDATA *entry)
Definition: adquery.cxx:339
TypedAliData(size_t size_, T gap_)
Definition: insdel.cxx:379
GBDATA * GBT_get_alignment(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:808
bool ARB_is_alignment_relative_data(GBDATA *gb_data)
Definition: insdel.cxx:1555
void GB_disable_quicksave(GBDATA *gbd, const char *reason)
Definition: arbdb.cxx:2647
virtual GB_ERROR check_applicable_to(const Alignment &ali, size_t &resulting_ali_length) const =0
TerminalType
Definition: insdel.cxx:901
UnitPtr unit_left_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:394
#define id_assert(cond)
Definition: insdel.cxx:27
const AliEditCommand & edit_command() const
Definition: insdel.cxx:1154
UseRange
Definition: insdel.h:19
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
const T * std_gap_ptr() const
Definition: insdel.cxx:376
UnitPtr unit_left_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:197
UnitPtr()
Definition: insdel.cxx:35
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
int get_len(int idx) const
Definition: ali.h:18
SizeAwarable(bool allows_oversize_, size_t ali_size_)
Definition: insdel.cxx:457
static char * alignment_name
const char * get_name() const
Definition: insdel.cxx:919
STL namespace.
int cmp_data(size_t start, const AliData &other, size_t ostart, size_t count) const OVERRIDE
Definition: insdel.cxx:1219
AliDataPtr slice_down(size_t start, size_t count) const OVERRIDE
Definition: insdel.cxx:238
UnitPtr unit_left_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:301
bool isNull() const
test if SmartPtr is NULp
Definition: smartptr.h:248
GB_ERROR ARB_insdel_columns(GBDATA *Main, const char *alignment_name, long pos, long count, const char *deletable_chars)
Definition: insdel.cxx:1453
SpecificAliData(const T *static_data, size_t elements, const T &gap_, const SizeAwarable &sizeAware, const Deletable &deletable_)
Definition: insdel.cxx:490
InsertWhere
Definition: insdel.h:20
AliDataPtr create_gap(size_t gapsize, const UnitPair &gapinfo) const OVERRIDE
Definition: insdel.cxx:187
static GB_CSTR targetTypeName[]
Definition: insdel.cxx:907
EditedTerminal(GBDATA *gb_data_, GB_TYPES type_, const char *item_name_, size_t size_, TerminalType term_type, const Alignment &ali, const Deletable &deletable_)
Definition: insdel.cxx:1319
UnitPtr unit_right_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:398
static size_t countAffectedEntries(GBDATA *Main, const Alignment &ali)
Definition: insdel.cxx:1384
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
AliDataPtr partof(AliDataPtr data, size_t pos, size_t amount)
Definition: insdel.cxx:591
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
GBDATA * GB_get_father(GBDATA *gbd)
Definition: arbdb.cxx:1722
Definition: insdel.h:19
static GB_ERROR format_to_alilen(GBDATA *Main, const char *alignment_name)
Definition: insdel.cxx:1434
static size_t insDelBuffer_size
Definition: insdel.cxx:1160
GB_ERROR ARB_delete_columns_using_SAI(GBDATA *Main, const char *alignment_name, const RangeList &ranges, const char *deletable_chars)
Definition: insdel.cxx:1493
bool empty() const
Definition: RangeList.h:59
const void * expect_pointer() const
Definition: insdel.cxx:47
AliDataPtr insert_at(AliDataPtr dest, size_t pos, AliDataPtr src)
Definition: insdel.cxx:599
static HelixNrInfo * start
int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE
Definition: insdel.cxx:258
LazyAliData(const SizeAwarable &oversizable, size_t size_, TerminalType term_type_, EditedTerminal &terminal_)
Definition: insdel.cxx:1201
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1517
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:177
unsigned int GB_UINT4
Definition: arbdb_base.h:37
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
Definition: adali.cxx:597
UnitPtr at_ptr(size_t pos) const OVERRIDE
Definition: insdel.cxx:442
GB_CSTR GB_read_bytes_pntr(GBDATA *gbd)
Definition: arbdb.cxx:964
void free_insDelBuffer()
Definition: insdel.cxx:1162
GB_ERROR ARB_insert_columns_using_SAI(GBDATA *Main, const char *alignment_name, const RangeList &ranges, UseRange units, InsertWhere where, size_t amount)
Definition: insdel.cxx:1512
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
AliDataPtr after(AliDataPtr data, size_t pos)
Definition: insdel.cxx:593
NOT4PERL long * GBT_read_int(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:327
#define TEST_EXPECT(cond)
Definition: test_unit.h:1328
GB_ERROR ARB_format_alignment(GBDATA *Main, const char *alignment_name)
Definition: insdel.cxx:1439
long GB_read_count(GBDATA *gbd)
Definition: arbdb.cxx:758
GB_ERROR apply_to_alignment(GBDATA *gb_main, const Alignment &ali) const
Definition: insdel.cxx:1002
Definition: arbdb.h:78
const T * get_data() const
Definition: insdel.cxx:539
GB_TYPES GB_read_type(GBDATA *gbd)
Definition: arbdb.cxx:1643
GB_ERROR deliver() const
Definition: arb_error.h:116
size_t get_entry_count() const
Definition: insdel.cxx:1016
GB_CSTR GB_read_key_pntr(GBDATA *gbd)
Definition: arbdb.cxx:1656
AliDataPtr concat(AliDataPtr left, AliDataPtr right)
Definition: insdel.cxx:584
bool differs_from(const AliData &other) const
Definition: insdel.cxx:130
const T & std_gap() const
Definition: insdel.cxx:384
AliDataPtr apply(AliDataPtr to, GB_ERROR &) const OVERRIDE
Definition: insdel.cxx:1032
void set_pointer(const void *ptr_)
Definition: insdel.cxx:42
long GB_number_of_subentries(GBDATA *gbd)
Definition: arbdb.cxx:2892
size_t elems() const
Definition: insdel.cxx:111
void GBK_terminate(const char *error) __ATTR__NORETURN
Definition: arb_msg.cxx:509
~AliCompositeCommand() OVERRIDE
Definition: insdel.cxx:1106
virtual ~AliApplicable()
Definition: insdel.cxx:934
int cmp_data(size_t start, const AliData &other, size_t ostart, size_t count) const OVERRIDE
Definition: insdel.cxx:435
reverse_iterator rend() const
Definition: RangeList.h:55
AliData(size_t size_)
Definition: insdel.cxx:70
virtual ~AliEditCommand()
Definition: insdel.cxx:1022
int cmp_whole_data(const AliData &other) const
Definition: insdel.cxx:116
int cmp_data(size_t start, const AliData &other, size_t ostart, size_t count) const OVERRIDE
Definition: insdel.cxx:205
UnitPtr unit_right_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:201
GB_ERROR check_applicable_to(const Alignment &ali, size_t &resulting_ali_length) const OVERRIDE
Definition: insdel.cxx:1079
UnitPtr unit_right_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:314
static void error(const char *msg)
Definition: mkptypes.cxx:96
int cmpPartWith(const void *mem, size_t start, size_t count) const
Definition: insdel.cxx:93
int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE
Definition: insdel.cxx:1218
UnitPtr unit_right_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:1222
GB_ERROR check_applicable_to(const Alignment &ali, size_t &resulting_ali_length) const OVERRIDE
Definition: insdel.cxx:1033
expectation_group & add(const expectation &e)
Definition: test_unit.h:812
char * str
Definition: defines.h:20
Deletable(const char *allowed)
Definition: insdel.cxx:348
#define that(thing)
Definition: test_unit.h:1043
AliDeleteCommand(size_t pos_, size_t amount_)
Definition: insdel.cxx:1048
bool is_valid_pos(size_t pos) const
Definition: insdel.cxx:132
SpecificGap(size_t gapsize, const T &gap_)
Definition: insdel.cxx:408
Definition: insdel.h:20
int cmp_data(size_t start, const AliData &other, size_t ostart, size_t count) const OVERRIDE
Definition: insdel.cxx:528
AliDataPtr create_gap(size_t gapsize, const UnitPair &gapinfo) const OVERRIDE
Definition: insdel.cxx:235
size_t unitsize() const OVERRIDE
Definition: insdel.cxx:1208
GB_UINT4 * GB_read_ints(GBDATA *gbd)
Definition: arbdb.cxx:1013
GB_CSTR alidata2buffer(const AliData &data)
Definition: insdel.cxx:1174
UnitPtr right
Definition: insdel.cxx:50
#define cmp(h1, h2)
Definition: admap.cxx:50
AliCompositeCommand(AliEditCommand *cmd1_, AliEditCommand *cmd2_)
Definition: insdel.cxx:1102
void copyTo(void *mem) const
Definition: insdel.cxx:113
bool is_valid_part(size_t start, size_t count) const
Definition: insdel.cxx:135
static bool shall_edit(GBDATA *gb_data, TerminalType term_type)
Definition: insdel.cxx:1134
const void * get_pointer() const
Definition: insdel.cxx:46
GB_ERROR check_delete_allowed(const T *, size_t, size_t, const Deletable &)
Definition: insdel.cxx:474
__ATTR__NORETURN AliDataPtr slice_down(size_t, size_t) const OVERRIDE
Definition: insdel.cxx:1225
#define GENOM_ALIGNMENT
Definition: adGene.h:19
GB_CUINT4 * GB_read_ints_pntr(GBDATA *gbd)
Definition: arbdb.cxx:979
AliDataPtr apply(AliDataPtr to, GB_ERROR &error) const OVERRIDE
Definition: insdel.cxx:1070
int compare_type(const T &t1, const T &t2)
Definition: insdel.cxx:54
AliDataPtr before(AliDataPtr data, size_t pos)
Definition: insdel.cxx:592
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1250
virtual AliDataPtr slice_down(size_t start, size_t count) const =0
#define is_equal_to(val)
Definition: test_unit.h:1025
#define __ATTR__REDUCED_OPTIMIZE
Definition: test_unit.h:83
bool empty() const
Definition: insdel.cxx:114
~AliEditor() OVERRIDE
Definition: insdel.cxx:1150
size_t get_allowed_size(size_t term_size, size_t new_ali_size) const
Definition: insdel.cxx:462
Definition: arbdb.h:72
UnitPtr left
Definition: insdel.cxx:50
void clear_error() const
Definition: insdel.cxx:82
static void copy(double **i, double **j)
Definition: trnsprob.cxx:32
#define IF_ASSERTION_USED(x)
Definition: arb_assert.h:308
reverse_iterator rbegin() const
Definition: RangeList.h:54
xml element
GB_ERROR close(GB_ERROR error)
Definition: arbdbpp.cxx:35
UnitPtr at_ptr(size_t pos) const OVERRIDE
Definition: insdel.cxx:535
AliFormatCommand(size_t wanted_len_)
Definition: insdel.cxx:1069
#define TEST_EXPECT_CODE_ASSERTION_FAILS(cb)
Definition: test_unit.h:1252
static AliDataPtr make(AliDataPtr from, size_t offset, size_t amount)
Definition: insdel.cxx:176
UnitPtr unit_left_of(size_t pos) const OVERRIDE
Definition: insdel.cxx:1221
bool equals(const AliData &other) const
Definition: insdel.cxx:124
bool equals(const copy< T > &t1, const copy< T > &t2)
Definition: test_unit.h:644
#define OVERRIDE
Definition: cxxforward.h:112
virtual int operate_on_mem(void *mem, size_t start, size_t count, memop op) const =0
char * provide_insDelBuffer(size_t neededSpace)
Definition: insdel.cxx:1165
int operate_on_mem(void *mem, size_t start, size_t count, AliData::memop op) const OVERRIDE
Definition: insdel.cxx:411
GB_ERROR GB_write_ints(GBDATA *gbd, const GB_UINT4 *i, long size)
Definition: arbdb.cxx:1439
SmartPtr< AliData > AliDataPtr
Definition: insdel.cxx:60
AliDataPtr create_gap(size_t gapsize, const UnitPair &) const OVERRIDE
Definition: insdel.cxx:449
static char * insDelBuffer
Definition: insdel.cxx:1159
GBDATA * GBT_find_or_create(GBDATA *father, const char *key, long delete_level)
Definition: adtools.cxx:42
AliDataPtr insert_gap(AliDataPtr data, size_t pos, size_t count)
Definition: insdel.cxx:603
size_t unitsize() const OVERRIDE
Definition: insdel.cxx:386
size_t memsize() const
Definition: insdel.cxx:112
RangeList build_RangeList_from_string(const char *SAI_data, const char *set_bytes, bool invert)
Definition: RangeList.cxx:32
GB_ERROR get_delete_error(const char *data, size_t start, size_t count) const
Definition: insdel.cxx:355
AliDataPtr create_gap(size_t gapsize, const UnitPair &gapinfo) const OVERRIDE
Definition: insdel.cxx:575
GB_ERROR check_delete_allowed(size_t start, size_t count) const
Definition: insdel.cxx:97
static ARB_init_perl_interface init
Definition: ARB_ext.c:101
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
Definition: adquery.cxx:302
SequenceAliData(const char *static_data, size_t elements, char stdgap, char dotgap, const SizeAwarable &sizeAware, const Deletable &deletable_)
Definition: insdel.cxx:570
int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE
Definition: insdel.cxx:193
GB_ERROR GB_write_floats(GBDATA *gbd, const float *f, long size)
Definition: arbdb.cxx:1457
#define NULp
Definition: cxxforward.h:116
virtual UnitPtr unit_left_of(size_t pos) const =0
GB_CSTR GB_read_bits_pntr(GBDATA *gbd, char c_0, char c_1)
Definition: arbdb.cxx:926
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
virtual AliDataPtr create_gap(size_t gapsize, const UnitPair &gapinfo) const =0
Alignment(const char *name_, size_t len_)
Definition: insdel.cxx:917
#define __ATTR__NORETURN
Definition: attributes.h:56
#define __ATTR__REDUCED_OPTIMIZE__NO_GCSE
Definition: test_unit.h:88
int cmp_data(size_t start, const AliData &other, size_t ostart, size_t count) const OVERRIDE
Definition: insdel.cxx:277
virtual UnitPtr unit_right_of(size_t pos) const =0
#define offset(field)
Definition: GLwDrawA.c:73
void set_error(GB_ERROR error) const
Definition: insdel.cxx:83
GB_TYPES
Definition: arbdb.h:62
Definition: trnsprob.h:20
GBDATA * GB_nextChild(GBDATA *child)
Definition: adquery.cxx:326
void copyPartTo(void *mem, size_t start, size_t count) const
Definition: insdel.cxx:92
GB_CFLOAT * GB_read_floats_pntr(GBDATA *gbd)
Definition: arbdb.cxx:1019
bool has_slice() const OVERRIDE
Definition: insdel.cxx:185
GB_transaction ta(gb_var)
AliDataPtr create_gap(size_t gapsize, const UnitPair &gapinfo) const OVERRIDE
Definition: insdel.cxx:1224
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
AliDataPtr apply(AliDataPtr to, GB_ERROR &error) const OVERRIDE
Definition: insdel.cxx:1052
Definition: arbdb.h:71
static const T * typed_ptr(const UnitPtr &uptr)
Definition: insdel.cxx:375
GB_ERROR GBT_check_data(GBDATA *Main, const char *alignment_name)
Definition: adali.cxx:218
GBDATA * GBT_get_presets(GBDATA *gb_main)
Definition: adali.cxx:30
UnitPtr(const void *ptr_)
Definition: insdel.cxx:36
GB_ERROR apply(const AliEditCommand &cmd, bool &did_modify)
Definition: insdel.cxx:1330
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
range_set::const_reverse_iterator reverse_iterator
Definition: RangeList.h:47
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
virtual int cmp_data(size_t start, const AliData &other, size_t ostart, size_t count) const =0
virtual ~AliData()
Definition: insdel.cxx:71
GB_ERROR check_applicable_to(const Alignment &ali, size_t &resulting_ali_length) const OVERRIDE
Definition: insdel.cxx:1112
const unsigned int GB_CUINT4
Definition: arbdb_base.h:40
#define min(a, b)
Definition: f2c.h:153
bool has_slice() const OVERRIDE
Definition: insdel.cxx:233
AliDataPtr makeAliSeqData(char *&allocated_data, size_t elems, char gap, char dot)
Definition: insdel.cxx:631
const char * GB_CSTR
Definition: arbdb_base.h:25
AliInsertCommand(size_t pos_, size_t amount_)
Definition: insdel.cxx:1031
size_t unitsize() const OVERRIDE
Definition: insdel.cxx:232
AliEditor(const AliEditCommand &cmd_, const Deletable &deletable_, const char *progress_title, size_t progress_count)
Definition: insdel.cxx:1144
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
GB_ERROR mid(GBL_command_arguments *args, int start_index)
Definition: adlang1.cxx:907
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
bool is_valid_between(size_t pos) const
Definition: insdel.cxx:133
GB_ERROR check_applicable_to(const Alignment &ali, size_t &resulting_ali_length) const OVERRIDE
Definition: insdel.cxx:1092
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
static Score ** U
Definition: align.cxx:67
Deletable(DeleteWhat what)
Definition: insdel.cxx:342
SizeAwarable dontAllowOversize(size_t ali_size)
Definition: insdel.cxx:471
size_t unitsize() const OVERRIDE
Definition: insdel.cxx:184
AliDataPtr makeAliData(T *&allocated_data, size_t elems, const T &gap)
Definition: insdel.cxx:628
GB_write_int const char s
Definition: AW_awar.cxx:154