15 #include <PT_server_prototypes.h>
26 inline void aisc_link(dll_public *dll, PT_family_list *family) {
aisc_link(reinterpret_cast<dllpublic_ext*>(dll), reinterpret_cast<dllheader_ext*>(family)); }
33 : id(id_), limit(limit_)
44 HitCounter() : trav_id(-1), trav_hits(0), count(0), rel_count(0.0) {}
47 if (traversal.
id != trav_id) {
48 trav_id = traversal.
id;
53 if (trav_hits<traversal.
limit) {
60 rel_count = max_poss_matches>0 ? double(count)/max_poss_matches : 0;
86 for (
size_t i = 0; i < size; i++) {
89 case RSS_SOURCE: full_length = sequence_length;
break;
94 int max_poss_matches = full_length - oligo_len + 1;
104 trav_info.
limit = hit_limit;
108 int cmp_abs(
int a,
int b)
const {
int cmp = famstat[a].
cmp_abs(famstat[b]);
return cmp ? cmp : a-b; }
109 int cmp_rel(
int a,
int b)
const {
int cmp = famstat[a].
cmp_rel(famstat[b]);
return cmp ? cmp : a-b; }
117 int needed_positions;
118 int accept_mismatches;
122 void count_match(
const AbsLoc& match)
const {
128 bool at_end()
const {
return *oligo ==
PT_QU; }
130 bool too_many_mismatches()
const {
return accept_mismatches<0; }
132 bool did_match()
const {
return needed_positions <= 0 && !too_many_mismatches(); }
133 bool need_match()
const {
return needed_positions > 0 && !too_many_mismatches(); }
134 bool match_possible()
const {
return need_match() && !at_end(); }
136 void match_one_char(
char c) {
139 if (*oligo++ != c) accept_mismatches--;
145 do match_one_char(loc[height]);
while (match_possible());
146 if (did_match()) count_match(loc);
150 inline void mark_chain_or_leaf(
POS_TREE2 *pt)
const;
155 range =
Range(start, end, oligo_len);
158 range =
Range(-1, -1, -1);
164 needed_positions(needed_positions_),
165 accept_mismatches(accept_mismatches_),
173 if (did_match()) count_match(loc);
174 else if (match_possible()) {
181 if (did_match()) count_match(loc);
182 else if (match_possible()) {
189 Range PT_Traversal::range(-1, -1, -1);
191 inline void PT_Traversal::mark_chain_or_leaf(
POS_TREE2 *pt)
const {
221 if (pt_son && !at_end()) {
223 sub.match_one_char(base);
224 if (!sub.too_many_mismatches()) {
225 if (sub.did_match()) sub.mark_all(pt_son);
232 mark_chain_or_leaf(pt);
236 void PT_Traversal::mark_all(
POS_TREE2 *pt)
const {
244 if (pt_son) mark_all(pt_son);
248 mark_chain_or_leaf(pt);
268 while (ffinder->fl) destroy_PT_family_list(ffinder->fl);
271 std::vector<int> sorted;
275 if (ffinder->min_score == 0) {
280 if (ffinder->sort_type == 0) {
281 double min_score = ffinder->min_score;
290 double min_score_rel = double(ffinder->min_score)/100.0;
298 matching_results = j;
301 bool sort_all = ffinder->sort_max == 0 || ffinder->sort_max >=
int(matching_results);
303 if (ffinder->sort_type == 0) {
305 std::sort(sorted.begin(), sorted.end(),
oligo_cmp_abs(famStat));
308 std::partial_sort(sorted.begin(), sorted.begin() + ffinder->sort_max, sorted.begin() + matching_results,
oligo_cmp_abs(famStat));
316 std::partial_sort(sorted.begin(), sorted.begin() + ffinder->sort_max, sorted.begin() + matching_results,
oligo_cmp_rel(famStat));
323 int end = (sort_all) ? matching_results : ffinder->sort_max;
324 for (
int i = 0; i < end; i++) {
329 PT_family_list *fl = create_PT_family_list();
340 ffinder->list_size = real_hits;
347 for (
int i = 0; i < oligo_len; i++) {
361 for (
int o = 0; o<oligo_len; ++o) {
362 if (p1[o] != p2[o]) {
363 isless = p1[o]<p2[o];
371 typedef std::map<const char *, int, oligo_comparator>
OligoMap;
381 OligoMap::iterator found = oligos.find(seq);
382 if (found == oligos.end()) oligos[seq] = 1;
383 else found->second++;
385 OligoIter
begin() {
return oligos.begin(); }
386 OligoIter
end() {
return oligos.end(); }
392 int oligo_len = ffinder->pr_len;
395 freedup(ffinder->ff_error,
"minimum oligo length is 1");
398 int mismatch_nr = ffinder->mis_nr;
399 int complement = ffinder->complement;
401 char *sequence = species->
data;
404 bool use_all_oligos = ffinder->only_A_probes == 0;
414 for (
int cmode = 1; cmode <= 8; cmode *= 2) {
427 if ((complement&cmode) != 0) {
428 char *
s = ARB_alloc<char>(sequence_len+1);
430 memcpy(s, sequence, sequence_len);
433 seq[seq_count++] =
s;
439 for (
int s = 0;
s<seq_count;
s++) {
440 char *last_oligo = seq[
s]+sequence_len-oligo_len;
441 for (
char *oligo = seq[
s]; oligo < last_oligo; ++oligo) {
442 if (use_all_oligos || oligo[0] ==
PT_A) {
444 occurring_oligos.
add(oligo);
450 for (OligoIter o = occurring_oligos.
begin(); o != occurring_oligos.
end(); ++o) {
451 const char *oligo = o->first;
452 int occur_count = o->second;
461 for (
int s = 0;
s<seq_count;
s++) {
struct probe_input_data * data
void count_match(size_t idx)
int get_match_count() const
oligo_comparator(int len)
OligoMap::const_iterator OligoIter
void complement_probe(char *Probe, int len)
void mark_matching(POS_TREE2 *pt) const
int operator()(const AbsLoc &loc) const
static int make_PT_family_list(PT_family *ffinder, const FamilyStat &famStat)
const double & get_rel_match_count() const
const FamilyStat & fam_stat
char * ARB_strdup(const char *str)
OligoRegistry(int oligo_len)
FamilyStat(size_t size_, RelativeScoreScaling scaling_)
int cmp_abs(int a, int b) const
void calc_rel_matches(int oligo_len, int sequence_length)
bool operator()(int a, int b)
PT_Traversal(const char *oligo_, int needed_positions_, int accept_mismatches_, FamilyStat &fam_stat_)
void reverse_probe(char *seq, int len)
static HelixNrInfo * start
void limit_hits_for_next_traversal(int hit_limit)
const FamilyStat & fam_stat
size_t probe_compress_sequence(char *seq, size_t seqsize)
int cmp_abs(const HitCounter &other) const
static void unrestrictMatchesToRegion()
CONSTEXPR_INLINE bool is_std_base(char b)
static void restrictMatchesToRegion(int start, int end, int oligo_len)
bool operator()(int a, int b)
bool operator()(const char *p1, const char *p2) const
PT * PT_read_son(PT *node, PT_base base)
void inc(const TraversalHitLimit &traversal)
const HitCounter & hits(size_t idx) const
oligo_cmp_rel(const FamilyStat &fam_stat_)
oligo_cmp_abs(const FamilyStat &fam_stat_)
TraversalHitLimit(int id_, int limit_)
bool contains_ambiguities(char *oligo, int oligo_len)
int find_family(PT_family *ffinder, bytestring *species)
int operator()(const DataLoc &loc) const
int cmp_rel(int a, int b) const
POS_TREE2 *& TREE_ROOT2()
void calc_rel_match(int max_poss_matches)
std::map< const char *, int, oligo_comparator > OligoMap
bool contains(const AbsLoc &match) const
void aisc_link(dll_public *dll, PT_family_list *family)
int cmp_rel(const HitCounter &other) const
void add(const char *seq)
GB_write_int const char s