ARB
PT_findEx.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : PT_findEx.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "probe.h"
12 #include <PT_server_prototypes.h>
13 #include "probe_tree.h"
14 #include "pt_prototypes.h"
15 #include <arb_strbuf.h>
16 
17 static bool findLeftmostProbe(POS_TREE2 *node, char *probe, int restlen, int height) {
18  if (restlen==0) return true;
19 
20  switch (node->get_type()) {
21  case PT2_NODE:
22  for (int i=PT_A; i<PT_BASES; ++i) { // Note: does not iterate probes containing N
23  POS_TREE2 *son = PT_read_son(node, PT_base(i));
24  if (son) {
25  probe[0] = PT_base(i); // write leftmost probe into result
26  bool found = findLeftmostProbe(son, probe+1, restlen-1, height+1);
27  pt_assert(implicated(found, strlen(probe) == (size_t)restlen));
28  if (found) return true;
29  }
30  }
31  break;
32 
33  case PT2_CHAIN:
34  // probe cut-off in index -> do not iterate
35  break;
36 
37  case PT2_LEAF: {
38  // here the probe-tree is cut off, because only one species matches
39  DataLoc loc(node);
40  int pos = loc.get_rel_pos() + height;
41  int name = loc.get_name();
42 
43  if (pos + restlen >= psg.data[name].get_size()) // @@@ superfluous ?
44  break; // at end-of-sequence -> no probe with wanted length here
45 
46  pt_assert(probe[restlen] == 0);
47 
48  const probe_input_data& pid = psg.data[name];
49  SmartCharPtr seq = pid.get_dataPtr();
50 
51  for (int r = 0; r<restlen; ++r) {
52  int rel_pos = pos+r;
53  int data = pid.valid_rel_pos(rel_pos) ? PT_base((&*seq)[rel_pos]) : PT_QU;
54  if (data == PT_QU || data == PT_N) return false; // ignore probes that contain 'N' or '.'
55  probe[r] = data;
56  }
57  pt_assert(probe[restlen] == 0);
58  pt_assert(strlen(probe) == (size_t)restlen);
59  return true;
60  }
61  }
62 
63  return false;
64 }
65 
66 static bool findNextProbe(POS_TREE2 *node, char *probe, int restlen, int height) {
67  // searches next probe after 'probe' ('probe' itself may not exist)
68  // returns: true if next probe was found
69  // 'probe' is modified to next probe
70 
71  if (restlen==0) return false; // in this case we found the recent probe
72  // returning false upwards takes the next after
73 
74  switch (node->get_type()) {
75  case PT2_NODE: {
76  if (!is_std_base(probe[0])) return false;
77 
78  POS_TREE2 *son = PT_read_son(node, PT_base(probe[0]));
79  bool found = son && findNextProbe(son, probe+1, restlen-1, height+1);
80 
81  pt_assert(implicated(found, strlen(probe) == (size_t)restlen));
82 
83  if (!found) {
84  for (int i=probe[0]+1; !found && i<PT_BASES; ++i) {
85  if (is_std_base(i)) {
86  son = PT_read_son(node, PT_base(i));
87  if (son) {
88  probe[0] = PT_base(i); // change probe
89  found = findLeftmostProbe(son, probe+1, restlen-1, height+1);
90  pt_assert(implicated(found, strlen(probe) == (size_t)restlen));
91  }
92  }
93  }
94  }
95  return found;
96  }
97  case PT2_CHAIN:
98  case PT2_LEAF: {
99  // species list or single species reached
100  return false;
101  }
102  }
103 
104  pt_assert(0);
105  return false;
106 }
107 
108 int PT_find_exProb(PT_exProb *pep, int) {
109  POS_TREE2 *pt = psg.TREE_ROOT2(); // start search at root
110  GBS_strstruct buf(pep->numget*(pep->plength+1)+1);
111  bool first = true;
112 
113  for (int c=0; c<pep->numget; ++c) {
114  bool found = false;
115 
116  if (pep->restart) {
117  pep->restart = 0;
118 
119  char *probe = ARB_alloc<char>(pep->plength+1);
120  memset(probe, 'N', pep->plength);
121  probe[pep->plength] = 0; // EOS marker
122 
123  compress_data(probe);
124 
125  pep->next_probe.data = probe;
126  pep->next_probe.size = pep->plength;
127 
128  found = findLeftmostProbe(pt, pep->next_probe.data, pep->plength, 0);
129  }
130  else {
131  found = findNextProbe(pt, pep->next_probe.data, pep->plength, 0);
132  }
133 
134  pt_assert(pep->next_probe.data[pep->plength] == 0);
135  pt_assert(strlen(pep->next_probe.data) == (size_t)pep->plength);
136 
137  if (!found) break;
138 
139  // append the probe to the probe list
140 
141  if (!first) buf.put((char)pep->separator);
142  first = false;
143  if (pep->readable) {
144  char *readable = readable_probe(pep->next_probe.data, pep->next_probe.size, pep->tu);
145  buf.cat(readable);
146  free(readable);
147  }
148  else {
149  buf.cat(pep->next_probe.data);
150  }
151  }
152 
153  pep->result = buf.release_memfriendly();
154 
155  return 0;
156 }
157 
struct probe_input_data * data
Definition: probe.h:356
Definition: probe.h:83
SmartCharPtr get_dataPtr() const
Definition: probe.h:165
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
Definition: probe.h:84
static bool findLeftmostProbe(POS_TREE2 *node, char *probe, int restlen, int height)
Definition: PT_findEx.cxx:17
int get_name() const
Definition: probe_tree.h:434
probe_struct_global psg
Definition: PT_main.cxx:36
Definition: probe.h:85
int get_size() const
Definition: probe.h:211
void cat(const char *from)
Definition: arb_strbuf.h:199
FILE * seq
Definition: rns.c:46
char * readable_probe(const char *compressed_probe, size_t len, char T_or_U)
Definition: PT_io.cxx:91
static bool findNextProbe(POS_TREE2 *node, char *probe, int restlen, int height)
Definition: PT_findEx.cxx:66
int compress_data(char *probestring)
Definition: PT_io.cxx:21
CONSTEXPR_INLINE bool is_std_base(char b)
Definition: probe.h:93
TYPE get_type() const
Definition: probe_tree.h:246
#define pt_assert(bed)
Definition: PT_tools.h:22
PT * PT_read_son(PT *node, PT_base base)
PT_base
Definition: probe.h:82
str readable(const copy< T > &v)
Definition: test_unit.h:345
POS_TREE2 *& TREE_ROOT2()
Definition: probe.h:390
bool valid_rel_pos(int rel_pos) const
Definition: probe.h:213
Definition: probe.h:89
int get_rel_pos() const
Definition: probe_tree.h:471
int PT_find_exProb(PT_exProb *pep, int)
Definition: PT_findEx.cxx:108
char * release_memfriendly()
Definition: arb_strbuf.h:133
void put(char c)
Definition: arb_strbuf.h:174