15 #include <sys/times.h>
32 unsigned int gc_content;
36 for (
id = _path.begin();
37 (
id != _path.end()) && arb_node;
55 printf(
" ERROR : failed to get node for ID (%i)\n", *
id);
60 arb_group =
GB_entry(arb_node,
"group");
62 printf(
" ERROR : failed to get group of node");
67 printf(
" ERROR : failed to get first probe of group of node");
70 while (_probe_data[0] ==
'\x00') {
73 for (
unsigned int i = 0; i < _probe_length; ++i) {
74 if ((buffer[i] ==
'C') || (buffer[i] ==
'G')) ++gc_content;
76 if (gc_content == _GC_content) {
77 for (
unsigned int i = 0; i < _probe_length; ++i) {
78 _probe_data[i] = buffer[i];
87 printf(
" ERROR : failed to find probe with GC-content (%u)\n", _GC_content);
103 printf(
"Missing argument\n Usage %s <final candidates paths filename> <prefix to arb-databases>\n", argv[0]);
104 printf(
"Example:\n %s ~/data/850.final_candidates.paths ~/data/ssjun03_Eucarya_850.pg_\n", argv[0]);
108 char *final_candidates_paths_filename = argv[1];
109 char *arb_db_name_prefix = argv[2];
110 char *temp_filename = (
char *)malloc(strlen(final_candidates_paths_filename)+1+5);
111 strcpy(temp_filename, final_candidates_paths_filename);
112 strcat(temp_filename,
".temp");
113 unlink(temp_filename);
114 printf(
"Opening temp-file '%s'..\n", temp_filename);
120 printf(
"Opening candidates-paths-file '%s'..\n", final_candidates_paths_filename);
122 unsigned long paths_todo;
123 unsigned int probe_length_todo = 0;
124 unsigned int probe_buffer_length = 100;
125 char *probe_buffer =
NULp;
133 set<unsigned int> probe_lengths;
136 printf(
"probe lengths :");
137 for (
unsigned int i = 0; i < count; ++i) {
141 probe_lengths.insert(length);
142 printf(
" %u", length);
148 printf(
"\npaths todo (%lu)\n", paths_todo--);
149 unsigned int probe_length;
150 unsigned int probe_GC_content;
151 unsigned int path_length;
158 paths_file->
get_uint(probe_GC_content);
159 temp__file->
put_uint(probe_GC_content);
160 printf(
" probe length (%u) GC (%u)\n", probe_length, probe_GC_content);
163 printf(
" path size (%u) ( ", path_length);
164 for (
unsigned int i = 0; i < path_length; ++i) {
171 if (!probe_buffer || (probe_length > probe_buffer_length)) {
174 probe_buffer_length = 2 * probe_length;
176 probe_buffer = (
char*)calloc(probe_buffer_length,
sizeof(
char));
178 paths_file->
get(probe_buffer, probe_length);
179 probe_buffer[probe_length] =
'\x00';
182 if (probe_buffer[0] ==
'\x00') {
183 if (!probe_length_todo) {
184 printf(
"handling probes of length %u this time\n", probe_length);
185 probe_length_todo = probe_length;
188 __ARB_DB_NAME = (
char*)malloc(strlen(arb_db_name_prefix)+1+7+5);
189 sprintf(__ARB_DB_NAME,
"%s%utmp.arb", arb_db_name_prefix, probe_length);
190 printf(
"Opening ARB-Database '%s'..\n ", __ARB_DB_NAME);
191 __ARB_DB =
GB_open(__ARB_DB_NAME,
"rN");
194 printf(
"%s\n", __ARB_ERROR);
198 __ARB_GROUP_TREE =
GB_entry(__ARB_DB,
"group_tree");
199 if (!__ARB_GROUP_TREE) {
200 printf(
"no 'group_tree' in database\n");
204 if (!first_level_node) {
205 printf(
"no 'node' found in group_tree\n");
209 if (probe_length_todo == probe_length) {
214 printf(
" probe data (%s) ==> updated\n", probe_buffer);
217 printf(
" probe data (%s) --> skipped\n", probe_buffer);
221 printf(
" probe data (%s) --> finished\n", probe_buffer);
223 temp__file->
put(probe_buffer, probe_length);
225 probe_lengths.clear();
227 for (
unsigned int i = 0; i < count; ++i) {
230 if (length != probe_length_todo) probe_lengths.insert(length);
232 printf(
"remaining probe lengths :");
233 temp__file->
put_uint(probe_lengths.size());
234 for (set<unsigned int>::iterator
length = probe_lengths.begin();
235 length != probe_lengths.end();
241 if (probe_buffer) free(probe_buffer);
242 if (__ARB_DB_NAME) free(__ARB_DB_NAME);
244 printf(
"cleaning up... temp-file\n");
fflush(stdout);
246 printf(
"cleaning up... candidates-paths-file\n");
fflush(stdout);
248 printf(
"moving temp-file to candiates-paths-file\n");
fflush(stdout);
249 rename(temp_filename, final_candidates_paths_filename);
254 return probe_lengths.size()*2;
void put_ulong(unsigned long int _ul)
void get(void *_data, int _length)
static const bool READONLY
GBDATA * PG_get_first_probe(GBDATA *pb_group)
GBDATA * GB_open(const char *path, const char *opent)
GBDATA * PG_get_next_probe(GBDATA *pb_probe)
bool PS_get_probe_for_path(IDSet &_path, unsigned int _GC_content, unsigned int _probe_length, char *_probe_data)
std::set< SpeciesID > IDSet
char buffer[MESSAGE_BUFFERSIZE]
void put_uint(unsigned int _ui)
void get_uint(unsigned int &_ui)
GB_ERROR GB_await_error()
GBDATA * PS_get_first_node(GBDATA *pb_nodecontainer)
static char * __ARB_DB_NAME
void put(const void *_data, int _length)
void get_ulong(unsigned long int &_ul)
GBDATA * PS_get_next_node(GBDATA *pb_node)
int main(int argc, char *argv[])
static GB_ERROR __ARB_ERROR
GB_transaction ta(gb_var)
static GBDATA * __ARB_GROUP_TREE
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
static const bool WRITEONLY
const char * PG_read_probe(GBDATA *pb_probe)
GBDATA * GB_entry(GBDATA *father, const char *key)
IDSet::const_iterator IDSetCIter