17 while (count<seqsize && seq[count] == cmp) count++;
21 inline size_t count_char(
const unsigned char *
seq,
size_t seqsize,
unsigned char c, uint32_t c4) {
22 if (seqsize>0 && seq[0] == c) {
23 size_t count = 1+
count_uint_32((uint32_t*)(seq+1), (seqsize-1)/4, c4);
24 for (; count<seqsize && seq[count] == c; ++count) ;
46 while (count2 || count3);
52 #if defined(COUNT_COMPRESSES_BASES)
63 fputs(
"\nBaseCounter:\n", stderr);
65 fprintf(stderr,
"count[%i]=%li\n", i, count[i]);
69 void inc(
uchar base) {
79 size_t max_input_length;
81 unsigned *base_offset;
84 static bool translation_initialized;
85 static uchar translate[256];
87 #if defined(COUNT_COMPRESSES_BASES)
91 static void initTranslation() {
92 memset(translate,
PT_N, 256);
94 translate[
'A'] = translate[
'a'] =
PT_A;
95 translate[
'C'] = translate[
'c'] =
PT_C;
96 translate[
'G'] = translate[
'g'] =
PT_G;
97 translate[
'T'] = translate[
't'] =
PT_T;
98 translate[
'U'] = translate[
'u'] =
PT_T;
99 translate[
'.'] =
PT_QU;
103 translation_initialized =
true;
108 : max_input_length(max_input_length_),
109 compressed(new uchar[max_input_length+1]),
110 base_offset(new unsigned[max_input_length+1]),
113 if (!translation_initialized) initTranslation();
116 delete [] compressed;
117 delete [] base_offset;
123 size_t base_count = 0;
124 size_t last_base_offset = 0;
135 if (
offset >= length)
break;
137 c = translate[seq[
offset]];
140 #if defined(COUNT_COMPRESSES_BASES)
143 compressed[base_count] = c;
144 base_offset[base_count++] =
offset-last_base_offset;
145 last_base_offset =
offset;
148 if (base_count <= 0) {
154 if (compressed[base_count-1] !=
PT_QU) {
155 #if defined(COUNT_COMPRESSES_BASES)
156 base_counter.inc(
PT_QU);
158 compressed[base_count] =
PT_QU;
159 base_offset[base_count] = 1;
164 if (compressed[0] ==
PT_QU && base_count>1) {
166 base_offset[0] = base_offset[1]-1;
178 createFrom(reinterpret_cast<const unsigned char *>(seq), length);
183 const char *
get_seq()
const {
return reinterpret_cast<const char *
>(compressed); }
188 #error PT_compress.h included twice
189 #endif // PT_COMPRESS_H
size_t count_uint_32(uint32_t *seq, size_t seqsize, uint32_t cmp)
const unsigned * get_offsets() const
size_t get_allowed_size() const
const char * get_seq() const
size_t count_char(const unsigned char *seq, size_t seqsize, unsigned char c, uint32_t c4)
void createFrom(const unsigned char *const seq, const size_t length)
void createFrom(const char *const seq, const size_t length)
BaseCounter(const std::string &Source)
PT_compressed(size_t max_input_length_)
fputs(TRACE_PREFIX, stderr)
size_t count_dots(const unsigned char *seq, int seqsize)
size_t count_gaps(const unsigned char *seq, int seqsize)
size_t count_gaps_and_dots(const unsigned char *seq, int seqsize)