ARB
seq.h
Go to the documentation of this file.
1 #ifndef SEQ_H
2 #define SEQ_H
3 
4 #ifndef FUN_H
5 #include "fun.h"
6 #endif
7 #ifndef GLOBAL_H
8 #include "global.h"
9 #endif
10 #ifndef ARB_STRING_H
11 #include <arb_string.h>
12 #endif
13 #ifndef _GLIBCXX_CCTYPE
14 #include <cctype>
15 #endif
16 
17 
18 #define INITSEQ 6000
19 
20 struct BaseCounts {
21  int a;
22  int c;
23  int g;
24  int t;
25  int other;
26 
28  : a(0), c(0), g(0), t(0), other(0)
29  {}
30 
31  void add(char ch) {
32  switch (tolower(ch)) {
33  case 'a': a++; break;
34  case 'c': c++; break;
35  case 'g': g++; break;
36  case 'u':
37  case 't': t++; break;
38  default: other++; break;
39  }
40  }
41 };
42 
43 class Seq : virtual Noncopyable {
44  // - holds sequence data
45 
46  char *id;
47  int len; // sequence length
48  int max; // space allocated for 'sequence'
49  char *seq; // sequence data
50 
51  void zeroTerminate() { add(0); len--; }
52 
53  static void check_valid(char ch) {
54  if (isalpha(ch) || is_gapchar(ch)) return;
55  throw_errorf(43, "Invalid character '%c' in sequence data", ch);
56  }
57  static void check_valid(const char *s, int len) {
58  for (int i = 0; i<len; ++i) {
59  check_valid(s[i]);
60  }
61  }
62 
63 public:
64  Seq(const char *id_, const char *seq_, int len_) :
65  id(ARB_strdup(id_)),
66  len(len_),
67  max(len+1),
68  seq(strndup(seq_, len))
69  {
70  check_valid(seq, len);
71  }
72  Seq() :
73  id(NULp),
74  len(0),
75  max(INITSEQ),
76  seq(ARB_alloc<char>(INITSEQ))
77  {}
78  ~Seq() {
79  ca_assert(seq); // otherwise 'this' is useless!
80  freenull(id);
81  freenull(seq);
82  }
83 
84  void set_id(const char *id_) {
85  ca_assert(!id);
86  ca_assert(id_);
87  freedup(id, id_);
88  }
89  void replace_id(const char *id_) {
90  freenull(id);
91  set_id(id_);
92  }
93  const char *get_id() const {
94  ca_assert(id);
95  return id;
96  }
97 
98  void add(char c) {
99  if (c) check_valid(c);
100  if (len >= max) {
101  max = max*1.5+100;
102  ARB_realloc(seq, max);
103  }
104  seq[len++] = c;
105  }
106 
107  int get_len() const { return len; }
108  bool is_empty() const { return len == 0; }
109 
110  const char *get_seq() const {
111  const_cast<Seq*>(this)->zeroTerminate();
112  return seq;
113  }
114 
115  void count(BaseCounts& counter) const {
116  for (int i = 0; i<len; ++i)
117  counter.add(seq[i]);
118  }
119 
120  int count_gaps() const {
121  int gaps = 0;
122  for (int i = 0; i<len; ++i) { // @@@ loop gets vectorized with gcc 9.1 (move to cxx to be able to force check)
123  gaps += is_gapchar(seq[i]);
124  }
125  return gaps;
126  }
127 
128  void out(Writer& write, Format outType) const;
129 };
131 
132 #else
133 #error seq.h included twice
134 #endif // SEQ_H
Format
Definition: fun.h:10
int c
Definition: seq.h:22
BaseCounts()
Definition: seq.h:27
void throw_errorf(int error_num, const char *error_messagef,...) __ATTR__FORMAT(2) __ATTR__NORETURN
Definition: util.cxx:41
int t
Definition: seq.h:24
void add(char c)
Definition: seq.h:98
SmartPtr< Seq > SeqPtr
Definition: seq.h:130
#define ca_assert(cond)
Definition: global.h:33
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void replace_id(const char *id_)
Definition: seq.h:89
~Seq()
Definition: seq.h:78
void count(BaseCounts &counter) const
Definition: seq.h:115
const char * get_id() const
Definition: seq.h:93
Definition: reader.h:95
void add(char ch)
Definition: seq.h:31
TYPE * ARB_alloc(size_t nelem)
Definition: arb_mem.h:56
#define INITSEQ
Definition: seq.h:18
Generic smart pointer.
Definition: smartptr.h:149
Definition: seq.h:43
int other
Definition: seq.h:25
Seq(const char *id_, const char *seq_, int len_)
Definition: seq.h:64
int a
Definition: seq.h:21
Seq()
Definition: seq.h:72
int get_len() const
Definition: seq.h:107
bool is_empty() const
Definition: seq.h:108
Definition: seq.h:20
bool is_gapchar(char ch)
Definition: global.h:119
void ARB_realloc(TYPE *&tgt, size_t nelem)
Definition: arb_mem.h:43
int count_gaps() const
Definition: seq.h:120
#define NULp
Definition: cxxforward.h:116
int g
Definition: seq.h:23
char * strndup(const char *str, int len)
Definition: global.h:102
const char * get_seq() const
Definition: seq.h:110
void set_id(const char *id_)
Definition: seq.h:84
void out(Writer &write, Format outType) const
Definition: seq.cxx:12
GB_write_int const char s
Definition: AW_awar.cxx:154