17 #include <sys/times.h>
32 while (__PROBE_LENGTH < 0) {
40 while (ARB_child && (__PROBE_LENGTH < 0)) {
56 for (
IDVectorCIter i = _path->begin(); i != _path->end(); ++i, ++c) {
58 current_node = current_node->
assertChild(current_ID);
63 for (current_ID = _caller_ID+1; current_ID <= _max_depth; ++current_ID, ++c) {
64 current_node = current_node->
assertChild(current_ID);
79 next_path_ID = (i == _path->end()) ? -1 : *i;
80 for (
SpeciesID current_ID = __MIN_ID; current_ID <= _caller_ID; ++current_ID, ++c) {
81 if (current_ID != next_path_ID) {
82 current_node = current_node->
assertChild(current_ID);
86 next_path_ID = (i == _path->end()) ? -1 : *i;
134 new_probe->quality = 100;
135 new_probe->GC_content = 0;
137 if ((buffer[i] ==
'C') || (buffer[i] ==
'G')) ++(new_probe->GC_content);
139 probes->insert(new_probe);
147 for (
int i=_parent_ID+1; ((i < id) && (i >= 0)); ++i) {
148 _inverse_path->push_back(i);
155 if (_depth <= (_max_depth >> 1)) {
160 current_node->
addProbes(probes->begin(), probes->end());
183 while ((_inverse_path->back() > _parent_ID) && (!_inverse_path->empty())) {
184 _inverse_path->pop_back();
189 int main(
int _argc,
char *_argv[]) {
195 printf(
"Missing arguments\n Usage %s <input database name>\n", _argv[0]);
196 printf(
"output database will be named like input database but with the suffix '.wf' instead of '.arb'\n");
200 const char *DB_name = _argv[1];
207 printf(
"Opening probe-group-database '%s'..\n ", DB_name);
208 gb_main =
GB_open(DB_name,
"rwcN");
214 printf(
"..loaded database (enter to continue) ");
220 printf(
"no 'group_tree' in database\n");
225 if (!first_level_node) {
226 printf(
"no 'node' found in group_tree\n");
235 printf(
"init Species <-> ID - Map\n");
238 printf(
"%i species in the map ", species_count);
239 if (species_count >= 10) {
240 printf(
"\nhere are the first 10 :\n");
243 printf(
"[ %2i ] %s\n", i->first, i->second.c_str());
248 printf(
"IDs %i .. %i\n(enter to continue) ", __MIN_ID, __MAX_ID);
254 string output_DB_name(DB_name);
255 size_t suffix_pos = output_DB_name.rfind(
".arb");
256 if (suffix_pos != string::npos) {
257 output_DB_name.erase(suffix_pos);
259 output_DB_name.append(
".wf");
260 if (suffix_pos == string::npos) {
261 printf(
"cannot find suffix '.arb' in database name '%s'\n", DB_name);
262 printf(
"output file will be named '%s'\n", output_DB_name.c_str());
264 PS_Database *ps_db =
new PS_Database(output_DB_name.c_str(), PS_Database::WRITEONLY);
275 printf(
"extracting probe-data...\n");
277 printf(
"probe_length = %d\n", __PROBE_LENGTH);
279 __ROOT = ps_db->getRootNode();
283 struct tms before_first_level_node;
284 for (; first_level_node; ++c) {
286 times(&before_first_level_node);
287 printf(
"1st level node #%u ", c+1);
296 printf(
"done after %u 1st level nodes\n", c);
297 printf(
"(enter to continue) ");
304 printf(
"writing probe-data to %s..\n", output_DB_name.c_str());
306 printf(
"..done saving (enter to continue) ");
311 before.tms_utime = 0;
312 before.tms_stime = 0;
GBDATA * PG_get_first_probe(GBDATA *pb_group)
GBDATA * GB_open(const char *path, const char *opent)
IDVector::const_iterator IDVectorCIter
GBDATA * PG_get_next_probe(GBDATA *pb_probe)
void GB_warning(const char *message)
static SpeciesID __MAX_ID
static Name2IDMap __NAME2ID_MAP
void PS_extract_probe_data(GBDATA *_ARB_node, int _max_depth, int _depth, const int _parent_ID, IDVector *_inverse_path)
static void PS_detect_probe_length(GBDATA *_ARB_node)
static SpeciesID __MIN_ID
PS_NodePtr assertChild(SpeciesID _id)
void addProbes(PS_ProbeSetCIter _begin, PS_ProbeSetCIter _end)
static ID2NameMap __ID2NAME_MAP
char buffer[MESSAGE_BUFFERSIZE]
void addProbesInverted(PS_ProbeSetCIter _begin, PS_ProbeSetCIter _end)
GB_ERROR GB_export_error(const char *error)
GB_ERROR GB_await_error()
static GB_ERROR PG_initSpeciesMaps(GBDATA *pb_main)
PS_ProbeSet * PS_ProbeSetPtr
GBDATA * PS_get_first_node(GBDATA *pb_nodecontainer)
static void error(const char *msg)
GBDATA * PS_get_next_node(GBDATA *pb_node)
std::vector< SpeciesID > IDVector
ID2NameMap::const_iterator ID2NameMapCIter
GB_transaction ta(gb_var)
PS_NodePtr PS_assert_path(const int _caller_ID, IDVector *_path)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
static int PG_NumberSpecies()
int main(int _argc, char *_argv[])
set< PS_ProbePtr, lt_probe > PS_ProbeSet
PS_NodePtr PS_assert_inverse_path(const int _max_depth, const int _caller_ID, IDVector *_path)
const char * PG_read_probe(GBDATA *pb_probe)
GBDATA * GB_entry(GBDATA *father, const char *key)
static int __PROBE_LENGTH