17 #include <sys/times.h>
23 next_candidate != _candidate_parent->
children.rend();
26 if (next_candidate->second->children.size() == 0) {
27 _leaf_candidates.insert(next_candidate->second);
41 candidate_iter != _leaf_candidates.end();
49 nodepath.insert(&(*(candidate->
node)));
50 candidate = candidate->
parent;
53 PS_Candidate2NodeSetPairByLengthMap::iterator stored_path;
55 for (stored_path = _paths.find(candidate->
depth);
56 ((stored_path != _paths.end()) &&
57 (stored_path->first == candidate->
depth) &&
60 found = stored_path->second.second == nodepath;
64 candidate_sptr = *candidate_iter;
65 candidate = &(*candidate_sptr);
72 typedef set<unsigned long>
ULSet;
73 typedef set<unsigned short>
USSet;
74 typedef map<float, float>
FFMap;
78 return 4*(_probe->length - _probe->GC_content) + 2*_probe->GC_content;
86 for (PS_NodeSet::const_iterator
node = _nodepath.begin();
87 node != _nodepath.end();
91 sums_for_node.clear();
95 probe_iter != (*node)->getProbesEnd();
101 ULSet::iterator found = temperatures.find(temperature);
102 if (found != temperatures.end())
continue;
105 temperatures.insert(temperature);
107 for (ULSet::iterator sum = _sums.begin();
110 sums_for_node.insert(*sum + temperature);
115 _sums = sums_for_node;
120 float &_best_average,
124 FSet sums_for_average;
125 float min_sum_for_node = -2.0;
126 float best_average = -2.0;
128 for (PS_NodeSet::const_iterator
node = _nodepath.begin();
129 (
node != _nodepath.end()) &&
131 (_min_sum > min_sum_for_node));
135 for (FFMap::iterator average = _averages.begin();
136 average != _averages.end();
138 if ((_min_sum > 0) && (average->second > _min_sum))
continue;
141 sums_for_average.clear();
142 temperatures.clear();
145 probe_iter != (*node)->getProbesEnd();
151 ULSet::iterator found = temperatures.find(temperature);
152 if (found != temperatures.end())
continue;
155 temperatures.insert(temperature);
156 float distance = fabsf(average->first - temperature);
157 float sum = average->second + (distance * distance);
158 sums_for_average.insert(sum);
162 average->second = *sums_for_average.begin();
166 min_sum_for_node = _averages.begin()->second;
167 best_average = _averages.begin()->first;
168 for (FFMap::iterator average = _averages.begin();
169 average != _averages.end();
171 if (average->second < min_sum_for_node) {
172 min_sum_for_node = average->second;
173 best_average = average->first;
178 if ((_min_sum < 0) || (min_sum_for_node < _min_sum)) {
179 printf(
"UPDATED _best_average (%7.3f) _min_sum (%10.3f) <- best_average (%7.3f) min_sum_for_node (%10.3f)\n", _best_average, _min_sum, best_average, min_sum_for_node);
180 _min_sum = min_sum_for_node;
181 _best_average = best_average;
185 printf(
"aborted _best_average (%7.3f) _min_sum (%10.3f) best_average (%7.3f) min_sum_for_node (%10.3f)\n", _best_average, _min_sum, best_average, min_sum_for_node);
190 float &_min_sum_of_square_distances_to_average,
191 float &_best_average) {
195 PS_Candidate2NodeSetPairByLengthMap::iterator stored_path = _paths.begin();
202 for (ULSet::iterator sum = sums.begin();
205 float avg = (float)(*sum) / stored_path->first;
209 _min_sum_of_square_distances_to_average = -1;
210 printf(
"[%p] distance: ", stored_path->second.first);
214 _min_sum_of_square_distances_to_average);
217 PS_Candidate2NodeSetPairByLengthMap::iterator best_candidate = stored_path;
219 stored_path != _paths.end();
229 for (ULSet::iterator sum = sums.begin();
232 float avg = (float)(*sum) / stored_path->first;
237 printf(
"[%p] distance: ", stored_path->second.first);
241 _min_sum_of_square_distances_to_average)) {
242 best_candidate = stored_path;
247 PS_Candidate2NodeSetPairByLengthMap::iterator to_delete = _paths.end();
248 for (stored_path = _paths.begin();
249 stored_path != _paths.end();
251 if (to_delete != _paths.end()) {
252 _paths.erase(to_delete);
253 to_delete = _paths.end();
255 if (stored_path != best_candidate) to_delete = stored_path;
257 if (to_delete != _paths.end()) {
258 _paths.erase(to_delete);
259 to_delete = _paths.end();
265 set<unsigned int> &_probe_lengths) {
266 for (PS_NodeSet::const_iterator node_iter = _nodes.begin();
267 node_iter != _nodes.end();
272 float min_distance = _average * _average;
277 if (distance < min_distance) min_distance = distance;
290 if (distance > min_distance) {
303 unsigned max_length = 0;
307 if (max_length < (*probe)->length) max_length = (*probe)->length;
318 if ((*probe)->length < max_length) {
339 printf(
"Missing argument\n Usage %s <database name> <final candidates filename>\n", argv[0]);
343 const char *input_DB_name = argv[1];
344 const char *final_candidates_filename = argv[2];
349 printf(
"Opening probe-set-database '%s'..\n", input_DB_name);
350 PS_Database *db =
new PS_Database(input_DB_name, PS_Database::READONLY);
353 unsigned long bits_in_map = ((max_id+1)*max_id)/2 + max_id+1;
354 printf(
"..loaded database (enter to continue)\n");
fflush(stdout);
359 printf(
"opening candidates-file '%s'..\n", final_candidates_filename);
362 candidates_root->
load(candidates_file, bits_in_map, db->getConstRootNode());
363 delete candidates_file;
364 printf(
"..loaded candidates file (enter to continue)\n");
fflush(stdout);
369 printf(
"\nsearching leaf candidates.. ");
372 printf(
"%zu\n", leaf_candidates.size());
377 printf(
"\ngetting node paths for leaf candidates..");
380 printf(
"%zu\n", node_paths.size());
381 for (PS_Candidate2NodeSetPairByLengthMap::const_iterator stored_path = node_paths.begin();
382 stored_path != node_paths.end();
384 printf(
"length (%3lu) candidate [%p] nodes (%3zu) ",
386 stored_path->second.first,
387 stored_path->second.second.size());
388 unsigned long sum = 0;
389 for (PS_NodeSet::const_iterator
node = stored_path->second.second.begin();
390 node != stored_path->second.second.end();
392 sum += (*node)->countProbes();
394 printf(
"probes (%6lu)\n", sum);
fflush(stdout);
400 printf(
"\nevaluating node paths of leaf candidates..\n");
401 float min_sum_of_square_distances_to_average;
404 printf(
" best candidate : average (%f) sum of square distances to average (%f)\n ", average, min_sum_of_square_distances_to_average);
405 node_paths.begin()->second.first->print();
410 printf(
"\nremoving unwanted probes from best candidate..\n");
411 set<unsigned int> probe_lengths;
417 char *final_candidates_paths_filename = (
char *)malloc(strlen(final_candidates_filename)+1+6);
418 strcpy(final_candidates_paths_filename, final_candidates_filename);
419 strcat(final_candidates_paths_filename,
".paths");
420 printf(
"Writing final candidate's paths to '%s'..\n", final_candidates_paths_filename);
424 unsigned long max_temp = min_temp;
428 final_candidates_paths_file->
put_uint(probe_lengths.size());
429 printf(
" probe lengths :");
430 for (set<unsigned int>::iterator
length = probe_lengths.begin();
431 length != probe_lengths.end();
441 if (temp < min_temp) min_temp = temp;
442 if (temp > max_temp) max_temp = temp;
443 printf(
"depth (%lu) candidate [%p] node [%p] probe (q_%+i__l_%2u__gc_%2u__temp_%3lu)\n",
449 (*probe)->GC_content,
452 final_candidates_paths_file->
put_uint((*probe)->length);
454 final_candidates_paths_file->
put_uint((*probe)->GC_content);
456 if ((*probe)->quality >= 0) {
463 final_candidates_paths_file->
put_int(*
id);
468 final_candidates_paths_file->
put_uint(db->getSpeciesCount() - c->
path.size());
473 id <= db->getMaxID();
477 path_id = (path_it == c->
path.end()) ? -1 : *path_it;
480 final_candidates_paths_file->
put_int(
id);
484 for (
unsigned int i = 0; i < (*probe)->length; ++i) {
485 final_candidates_paths_file->
put_char(
'\x00');
491 final_candidates_paths_file->
put_uint(probe_lengths.size());
492 for (set<unsigned int>::iterator
length = probe_lengths.begin();
493 length != probe_lengths.end();
498 printf(
" temperature range %lu..%lu\n", min_temp, max_temp);
499 free(final_candidates_paths_filename);
500 delete final_candidates_paths_file;
505 printf(
"cleaning up... candidates\n");
fflush(stdout);
506 delete candidates_root;
507 printf(
"cleaning up... database\n");
fflush(stdout);
void put_ulong(unsigned long int _ul)
void PS_eval_node_paths(PS_Candidate2NodeSetPairByLengthMap &_paths, float &_min_sum_of_square_distances_to_average, float &_best_average)
static const bool READONLY
void put_char(unsigned char _c)
map< float, float > FFMap
set< unsigned long > ULSet
int main(int argc, char *argv[])
void removeProbe(PS_ProbeSetCIter it)
PS_CandidateSet::const_iterator PS_CandidateSetCIter
void PS_get_leaf_candidates(PS_CandidatePtr _candidate_parent, PS_CandidateSet &_leaf_candidates)
PS_ProbeSet::const_iterator PS_ProbeSetCIter
size_t countProbes() const
multimap< unsigned long, PS_Candidate2NodeSetPair > PS_Candidate2NodeSetPairByLengthMap
bool isNull() const
test if SmartPtr is NULp
PS_ProbeSetCIter getProbesBegin() const
bool PS_calc_min_sum_of_square_distances_to_average(PS_NodeSet &_nodepath, FFMap &_averages, float &_best_average, float &_min_sum)
void put_uint(unsigned int _ui)
PS_ProbeSetCIter getProbesEnd() const
void PS_remove_bad_probes(PS_NodeSet &_nodes, float _average, set< unsigned int > &_probe_lengths)
set< PS_CandidateSPtr, cmp_candidates > PS_CandidateSet
set< unsigned short > USSet
PS_CandidateByGainMap children
pair< PS_CandidatePtr, PS_NodeSet > PS_Candidate2NodeSetPair
PS_CandidateByGainMap::reverse_iterator PS_CandidateByGainMapRIter
void PS_get_node_paths(PS_CandidateSet &_leaf_candidates, PS_Candidate2NodeSetPairByLengthMap &_paths)
set< PS_Node * > PS_NodeSet
void PS_calc_sums_for_nodepath(PS_NodeSet &_nodepath, ULSet &_sums)
unsigned long PS_calc_temp(const PS_ProbePtr &_probe)
void load(PS_FileBuffer *_file, const unsigned long _bits_in_map, const PS_NodePtr &_root_node)
static const bool WRITEONLY
IDSet::const_iterator IDSetCIter