18 char *AP_sequence_parsimony::table;
20 AP_sequence_parsimony::AP_sequence_parsimony(
const AliView *aliview) :
25 AP_sequence_parsimony::~AP_sequence_parsimony() {
30 return new AP_sequence_parsimony(get_aliview());
33 int AP_sequence_parsimony::cmp_combined(
const AP_combinableSeq *other)
const {
34 const AP_sequence_parsimony *sother =
DOWNCAST(
const AP_sequence_parsimony*, other);
36 const unsigned char *s1 = get_usequence();
37 const unsigned char *s2 = sother->get_usequence();
39 size_t len = get_sequence_length();
41 for (
size_t i = 0; i<len; ++i) {
42 int comp =
int(s1[i]) -
int(s2[i]);
43 if (comp)
return comp;
49 void AP_sequence_parsimony::build_table() {
67 void AP_sequence_parsimony::set(
const char *isequence) {
68 size_t sequence_len = get_filter()->get_filtered_length();
70 memset(seq_pars,
AP_DOT, (
size_t)sequence_len+1);
72 const uchar *simplify = get_filter()->get_simplify_table();
73 if (!table) this->build_table();
77 size_t iseqlen = strlen(isequence);
79 for (
size_t i = 0; i<sequence_len; ++i) {
83 if (pos >= iseqlen)
continue;
85 unsigned char c = (
unsigned char)isequence[pos];
88 fputc(simplify[c], stdout);
91 seq_pars[i] = table[simplify[c]];
97 for (
size_t i = 0; i < sequence_len; ++i) {
98 size_t pos = base_pos[i];
99 unsigned char c = (
unsigned char)isequence[pos];
100 seq_pars[i] = table[simplify[c]];
102 #if defined(SHOW_SEQ)
103 fputc(simplify[c], stdout);
108 #if defined(SHOW_SEQ)
112 mark_sequence_set(
true);
115 void AP_sequence_parsimony::unset() {
117 mark_sequence_set(
false);
126 template <
class COUNT,
class SITE>
128 const char * __restrict p1,
129 const char * __restrict p2,
134 for (
size_t idx = 0; idx<sequence_len; ++idx) {
139 p[idx] = (c==0)?c1|c2:c;
148 template <
class COUNT>
150 const char * __restrict p1,
151 const char * __restrict p2,
154 for (
size_t idx = 0; idx<sequence_len; ++idx) {
169 void add(
size_t,
char c) {
179 void add(
size_t idx,
char c) {
180 sum += !c * weights[idx];
191 void add(
size_t idx,
char c) {
199 sites[idx] += ((c | -c) >> 7 & 1) ^ 1;
203 #define NEVER_COMBINE_ASYNC
206 # if !defined(NEVER_COMBINE_ASYNC)
207 # define ASYNC_COMBINE
211 #if defined(ASYNC_COMBINE)
225 char *mutation_per_site;
229 #if defined(ASYNC_COMBINE)
230 std::future<Mutations> f;
231 void allow_async_calc(
bool allow_async) {
234 f = std::async( [
this]() {
return calculate(); } );
244 catch (std::system_error& serr) {
245 fprintf(stderr,
"catched system_error %i: %s\n", serr.code().value(), serr.what());
250 result = calculate();
255 # if defined(NEVER_COMBINE_ASYNC)
259 # else // !NEVER_COMBINE_ASYNC
260 void allow_async_calc(
bool) {}
262 Mutations calc_result() {
return calculate(); }
266 CombinableSeq(
size_t seq_len,
const char *seq1,
const char *seq2,
char *
result,
char *mutation_per_site_,
const GB_UINT4 *weights_,
bool allow_async) :
267 sequence_len(seq_len),
272 mutation_per_site(mutation_per_site_)
276 allow_async_calc(allow_async && !mutation_per_site);
280 return calc_result();
284 Mutations CombinableSeq::calculate()
const {
296 if (mutation_per_site) {
297 mutations =
do_combine(sequence_len, s1, s2, out,
302 mutations =
do_combine(sequence_len, s1, s2, out,
309 if (mutation_per_site) {
310 mutations =
do_combine(sequence_len, s1, s2, out,
315 mutations =
do_combine(sequence_len, s1, s2, out,
325 const AP_sequence_parsimony *left =
DOWNCAST(
const AP_sequence_parsimony*, lefts);
326 const AP_sequence_parsimony *right =
DOWNCAST(
const AP_sequence_parsimony*, rights);
328 size_t sequence_len = get_sequence_length();
333 const GB_UINT4 *
weights = get_weights()->is_unweighted() ?
NULp : get_weights()->get_weights();
334 CombinableSeq cs(sequence_len, left->get_sequence(), right->get_sequence(), seq_pars, mutation_per_site,
weights,
false);
335 long result = cs.get_result();
338 mark_sequence_set(
true);
345 size_t sequence_len = get_sequence_length();
346 const GB_UINT4 *weights = get_weights()->is_unweighted() ?
NULp : get_weights()->get_weights();
348 const AP_sequence_parsimony *pother =
DOWNCAST(
const AP_sequence_parsimony*, other);
358 void AP_sequence_parsimony::partial_match(
const AP_combinableSeq *part_,
long *overlapPtr,
long *penaltyPtr)
const {
370 const AP_sequence_parsimony *part = (
const AP_sequence_parsimony *)part_;
372 const char *pf = get_sequence();
373 const char *pp = part->get_sequence();
378 for (min_end = get_sequence_length()-1; min_end >= 0; --min_end) {
379 char both = pf[min_end]|pp[min_end];
382 for (; min_end >= 0; --min_end) {
388 for (; min_end >= 0; --min_end) {
401 for (max_start = 0; max_start <= min_end; ++max_start) {
402 char both = pf[max_start]|pp[max_start];
405 for (; max_start <= min_end; ++max_start) {
411 for (; max_start <= min_end; ++max_start) {
419 if (max_start <= min_end) {
420 for (
long idx = max_start; idx <= min_end; ++idx) {
421 if ((pf[idx]&pp[idx]) == 0) {
422 penalty += weights->
weight(idx);
425 overlap = min_end-max_start+1;
429 *overlapPtr = overlap;
430 *penaltyPtr = penalty;
433 AP_FLOAT AP_sequence_parsimony::count_weighted_bases()
const {
434 static char *hits =
NULp;
437 memset(hits, 1, 256);
449 const char *p = get_sequence();
452 size_t sequence_len = get_sequence_length();
454 for (
size_t i = 0; i<sequence_len; ++i) {
462 uint32_t AP_sequence_parsimony::checksum()
const {
463 const char *
seq = get_sequence();
464 return GB_checksum(seq,
sizeof(*seq)*get_sequence_length(), 0, NULp);
const size_t * get_filterpos_2_seqpos() const
static long do_combine(size_t sequence_len, const char *__restrict p1, const char *__restrict p2, char *__restrict p, COUNT count, SITE site)
char * AP_create_dna_to_ap_bases()
CONSTEXPR_INLINE unsigned char safeCharIndex(char c)
size_t bootstrapped_seqpos(size_t bpos) const
uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude)
#define DOWNCAST(totype, expr)
TYPE * ARB_alloc(size_t nelem)
static long do_countMutations(size_t sequence_len, const char *__restrict p1, const char *__restrict p2, COUNT count)
bool does_bootstrap() const
static int weights[MAX_BASETYPES][MAX_BASETYPES]
CombinableSeq(size_t seq_len, const char *seq1, const char *seq2, char *result, char *mutation_per_site_, const GB_UINT4 *weights_, bool allow_async)
void add(size_t idx, char c)
static int COUNT(DictTree tree)
void add(size_t idx, char c)
#define IF_ASSERTION_USED(x)
count_weighted(const GB_UINT4 *w)
GB_UINT4 weight(size_t idx) const
void ARB_alloc_aligned(TYPE *&tgt, size_t nelems)
GB_write_int const char s