ARB
ali_global.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ali_global.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "ali_global.hxx"
12 
13 #define EXCLUSIVE_FLAG_DEFAULT 1
14 #define MARK_FAMILY_FLAG_DEFAULT 0
15 #define MARK_EXTENSION_FLAG_DEFAULT 0
16 #define FIND_FAMILY_MODE_DEFAULT 1
17 #define MAX_FAMILY_SIZE_DEFAULT 10
18 #define MIN_FAMILY_SIZE_DEFAULT 5
19 #define MIN_WEIGHT_DEFAULT 0.7
20 #define EXT_MAX_WEIGHT_DEFAULT 0.2
21 #define MULTI_GAP_FACTOR_DEFAULT 0.1
22 #define INSERT_FACTOR_DEFAULT 2.0;
23 #define MULTI_INSERT_FACTOR_DEFAULT 0.5;
24 #define COST_LOW_DEFAULT 0.25
25 #define COST_MIDDLE_DEFAULT 0.5
26 #define COST_HIGH_DEFAULT 0.8
27 #define MAX_COST_OF_SUB_PERCENT_DEFAULT 0.5
28 #define MAX_COST_OF_HELIX 2.0
29 #define ERROR_COUNT_DEFAULT 2
30 #define MAX_NUMBER_OF_MAPS_DEFAULT 10 // 100
31 #define MAX_NUMBER_OF_MAPS_ALIGNER_DEFAULT 2
32 #define INTERVAL_BORDER_DEFAULT 5
33 #define INTERVAL_CENTER_DEFAULT 5
34 
35 // ACHTUNG: muss noch durch parameter belegbar sein
36 #define MATCHES_MIN_DEFAULT 1000
37 #define PERCENT_MIN_DEFAULT 0.75
38 #define FAM_LIST_MAX_DEFAULT 5
39 #define EXT_LIST_MAX_DEFAULT 10
40 #define USE_SPECIFIED_FAMILY_DEFAULT NULp
41 
42 
43 static double default_substitute_matrix[5][5] = {
44  // a c g u -
45  { 0.0, 3.0, 1.0, 3.0, 5.0 }, // a
46  { 3.0, 0.0, 3.0, 1.0, 5.0 }, // c
47  { 1.0, 3.0, 0.0, 3.0, 5.0 }, // g
48  { 3.0, 1.0, 3.0, 0.0, 5.0 }, // u
49  { 5.0, 5.0, 5.0, 5.0, 0.0 } // -
50 };
51 
52 static double default_binding_matrix[5][5] = {
53  // a c g u -
54  { 9.9, 9.9, 2.0, 0.9, 9.9 }, // a
55  { 9.9, 9.9, 0.6, 9.9, 9.9 }, // c
56  { 2.0, 0.6, 5.0, 1.1, 9.9 }, // g
57  { 0.9, 9.9, 1.1, 9.9, 9.9 }, // u
58  { 9.9, 9.9, 9.9, 9.9, 0.0 } // -
59 };
60 
61 
62 void ALI_GLOBAL::init(int *argc, const char *argv[]) {
63  int kill, i, h, j, ret;
64  const char *pos;
65  arb_params *params;
66  float fl;
67 
68  params = arb_trace_argv(argc, argv);
69 
70  prog_name = argv[0];
71  species_name = params->species_name;
72  default_file = params->default_file;
73  db_server = params->db_server;
74 
75  // Set the defaults
76 
77  for (i = 0; i < 5; i++)
78  for (j = 0; j < 5; j++) {
81  }
82 
95 
107 
113 
114  // evaluate the parameters
115 
116  for (i = 1; i < *argc;) {
117  kill = 0;
118  if (strcmp("-nx", argv[i]) == 0 && kill == 0) {
120  kill = i;
121  }
122  if (strncmp("-f", argv[i], 2) == 0 && kill == 0) {
124  kill = i;
125  }
126  if (strcmp("-ms", argv[i]) == 0 && kill == 0) {
127  mark_species_flag = 1;
128  kill = i;
129  }
130  if (strcmp("-mf", argv[i]) == 0 && kill == 0) {
132  kill = i;
133  }
134  if (strcmp("-mfe", argv[i]) == 0 && kill == 0) {
136  kill = i;
137  }
138  if (strncmp("-mgf", argv[i], 4) == 0 && kill == 0) {
139  kill = i;
140  pos = argv[i] + 4;
141  ret = sscanf(pos, "%f", &prof_context.multi_gap_factor);
142  if (ret != 1) {
143  ali_warning("Wrong format for -mgf");
144  break;
145  }
146  }
147  if (strncmp("-if", argv[i], 3) == 0 && kill == 0) {
148  kill = i;
149  pos = argv[i] + 3;
150  ret = sscanf(pos, "%f", &prof_context.insert_factor);
151  if (ret != 1) {
152  ali_warning("Wrong format for -if");
153  break;
154  }
155  }
156  if (strncmp("-mif", argv[i], 4) == 0 && kill == 0) {
157  kill = i;
158  pos = argv[i] + 4;
159  ret = sscanf(pos, "%f", &prof_context.multi_insert_factor);
160  if (ret != 1) {
161  ali_warning("Wrong format for -mif");
162  break;
163  }
164  }
165  if (strcmp("-m", argv[i]) == 0 && kill == 0) {
166  mark_species_flag = 1;
168  kill = i;
169  }
170  if (strncmp("-msub", argv[i], 5) == 0 && kill == 0) {
171  kill = i;
172  pos = argv[i] + 5;
173  for (h = 0; h < 5; h++)
174  for (j = 0; j < 5; j++) {
175  ret = sscanf(pos, "%f", &fl);
176  if (ret != 1) {
177  ali_warning("wrong format for -msub");
178  break;
179  }
180  else
181  prof_context.substitute_matrix[h][j] = (double) fl;
182  pos = strchr(pos, ',');
183  if ((h != 4 || j != 4) && !pos) {
184  ali_warning("Not enought values for -msub");
185  break;
186  }
187  pos++;
188  }
189  }
190  if (strncmp("-mbind", argv[i], 6) == 0 && kill == 0) {
191  kill = i;
192  pos = argv[i] + 6;
193  for (h = 0; h < 5; h++)
194  for (j = 0; j < 5; j++) {
195  ret = sscanf(pos, "%f", &fl);
196  if (ret != 1) {
197  ali_warning("Wrong format for -mbind");
198  break;
199  }
200  else
201  prof_context.binding_matrix[h][j] = (double) fl;
202  pos = strchr(pos, ',');
203  if ((h != 4 || j != 4) && !pos) {
204  ali_warning("Not enought values for -mbind");
205  break;
206  }
207  pos++;
208  }
209  }
210  if (strncmp("-maxf", argv[i], 5) == 0 && kill == 0) {
211  kill = i;
212  pos = argv[i] + 5;
213  ret = sscanf(pos, "%d", &prof_context.max_family_size);
214  if (ret != 1) {
215  ali_warning("Wrong format for -maxf");
216  break;
217  }
218  }
219  if (strncmp("-minf", argv[i], 5) == 0 && kill == 0) {
220  kill = i;
221  pos = argv[i] + 5;
222  ret = sscanf(pos, "%d", &prof_context.min_family_size);
223  if (ret != 1) {
224  ali_warning("Wrong format for -minf");
225  break;
226  }
227  }
228  if (strncmp("-minw", argv[i], 5) == 0 && kill == 0) {
229  kill = i;
230  pos = argv[i] + 5;
231  ret = sscanf(pos, "%f", &prof_context.min_weight);
232  if (ret != 1) {
233  ali_warning("Wrong format for -minw");
234  break;
235  }
236  }
237  if (strncmp("-maxew", argv[i], 6) == 0 && kill == 0) {
238  kill = i;
239  pos = argv[i] + 6;
240  ret = sscanf(pos, "%f", &prof_context.ext_max_weight);
241  if (ret != 1) {
242  ali_warning("Wrong format for -minw");
243  break;
244  }
245  }
246 
247  // ACHTUNG: Unused BEGIN
248  if (strncmp("-cl", argv[i], 3) == 0 && kill == 0) {
249  kill = i;
250  pos = argv[i] + 3;
251  ret = sscanf(pos, "%f", &cost_low);
252  if (ret != 1) {
253  ali_warning("Wrong format for -cl");
254  break;
255  }
256  }
257  if (strncmp("-cm", argv[i], 3) == 0 && kill == 0) {
258  kill = i;
259  pos = argv[i] + 3;
260  ret = sscanf(pos, "%f", &cost_middle);
261  if (ret != 1) {
262  ali_warning("Wrong format for -cm");
263  break;
264  }
265  }
266  if (strncmp("-ch", argv[i], 3) == 0 && kill == 0) {
267  kill = i;
268  pos = argv[i] + 3;
269  ret = sscanf(pos, "%f", &cost_high);
270  if (ret != 1) {
271  ali_warning("Wrong format for -ch");
272  break;
273  }
274  }
275  // ACHTUNG: Unused END
276 
277  if (strncmp("-csub", argv[i], 5) == 0 && kill == 0) {
278  kill = i;
279  pos = argv[i] + 5;
280  ret = sscanf(pos, "%f", &preali_context.max_cost_of_sub_percent);
281  if (ret != 1) {
282  ali_warning("Wrong format for -csub");
283  break;
284  }
285  }
286  if (strncmp("-chel", argv[i], 5) == 0 && kill == 0) {
287  kill = i;
288  pos = argv[i] + 5;
289  ret = sscanf(pos, "%f", &preali_context.max_cost_of_helix);
290  if (ret != 1) {
291  ali_warning("Wrong format for -chel");
292  break;
293  }
294  }
295  if (strncmp("-mma", argv[i], 4) == 0 && kill == 0) {
296  kill = i;
297  pos = argv[i] + 4;
298  ret = sscanf(pos, "%ld", &preali_context.max_number_of_maps_aligner);
299  if (ret != 1) {
300  ali_warning("Wrong format for -mma");
301  break;
302  }
303  }
304  if (strncmp("-mm", argv[i], 3) == 0 && kill == 0) {
305  kill = i;
306  pos = argv[i] + 3;
307  ret = sscanf(pos, "%ld", &preali_context.max_number_of_maps);
308  if (ret != 1) {
309  ali_warning("Wrong format for -mm");
310  break;
311  }
312  }
313  if (strncmp("-ec", argv[i], 3) == 0 && kill == 0) {
314  kill = i;
315  pos = argv[i] + 3;
316  ret = sscanf(pos, "%ld", &preali_context.error_count);
317  if (ret != 1) {
318  ali_warning("Wrong format for -ec");
319  break;
320  }
321  }
322  if (strncmp("-ib", argv[i], 3) == 0 && kill == 0) {
323  kill = i;
324  pos = argv[i] + 3;
325  ret = sscanf(pos, "%d", &preali_context.interval_border);
326  if (ret != 1) {
327  ali_warning("Wrong format for -ib");
328  break;
329  }
330  }
331  if (strncmp("-ic", argv[i], 3) == 0 && kill == 0) {
332  kill = i;
333  pos = argv[i] + 3;
334  ret = sscanf(pos, "%d", &preali_context.interval_center);
335  if (ret != 1) {
336  ali_warning("Wrong format for -ic");
337  break;
338  }
339  }
340 
341  if (kill > 0) {
342  for (i++; i < *argc; i++)
343  argv[i-1] = argv[i];
344  (*argc)--;
345  i = kill;
346  }
347  else
348  i++;
349  }
350 
351  // Check for consistency
352 
354  ali_warning("minf <= maxf");
355  }
356 
358  ali_warning("0 <= maxew <= 1.0");
359  }
360 
361  if (prof_context.min_weight < 0 || prof_context.min_weight > 1.0) {
362  ali_warning("0 <= minw <= 1.0");
363  }
364 
366  cost_low < 0 || cost_high > 1.0) {
367  ali_warning("0 <= cl <= cm <= ch <= 1.0");
368  }
369 
370  // Open Database and Pt server
371 
372  ali_message("Connecting to Database server");
373  if (arbdb.open(db_server) != 0) {
374  ali_error("Can't connect to Database server");
375  }
376  ali_message("Connection established");
378 
379  pt_context.servername = params->pt_server;
381 
382  pt = new ALI_PT(&pt_context);
383 
384  prof_context.pt = pt;
385 }
#define INTERVAL_CENTER_DEFAULT
Definition: ali_global.cxx:33
#define MAX_FAMILY_SIZE_DEFAULT
Definition: ali_global.cxx:17
static double default_substitute_matrix[5][5]
Definition: ali_global.cxx:43
#define INTERVAL_BORDER_DEFAULT
Definition: ali_global.cxx:32
#define MAX_NUMBER_OF_MAPS_ALIGNER_DEFAULT
Definition: ali_global.cxx:31
#define EXT_LIST_MAX_DEFAULT
Definition: ali_global.cxx:39
#define FAM_LIST_MAX_DEFAULT
Definition: ali_global.cxx:38
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
#define COST_MIDDLE_DEFAULT
Definition: ali_global.cxx:25
float cost_high
Definition: ali_global.hxx:39
#define MIN_WEIGHT_DEFAULT
Definition: ali_global.cxx:19
const char * prog_name
Definition: ali_global.hxx:23
#define COST_HIGH_DEFAULT
Definition: ali_global.cxx:26
#define INSERT_FACTOR_DEFAULT
Definition: ali_global.cxx:22
float percent_min
Definition: ali_pt.hxx:34
ALI_PREALIGNER_CONTEXT preali_context
Definition: ali_global.hxx:44
char * db_server
Definition: servercntrl.h:26
#define ERROR_COUNT_DEFAULT
Definition: ali_global.cxx:29
#define MAX_COST_OF_SUB_PERCENT_DEFAULT
Definition: ali_global.cxx:27
arb_params * arb_trace_argv(int *argc, const char **argv)
#define PERCENT_MIN_DEFAULT
Definition: ali_global.cxx:37
#define MARK_EXTENSION_FLAG_DEFAULT
Definition: ali_global.cxx:15
void init(int *argc, const char *argv[])
Definition: ali_global.cxx:62
GBDATA * gb_main
Definition: ali_arbdb.hxx:24
ALI_PT_CONTEXT pt_context
Definition: ali_global.hxx:42
int mark_species_flag
Definition: ali_global.hxx:34
char * species_name
Definition: servercntrl.h:16
float cost_middle
Definition: ali_global.hxx:38
void ali_error(const char *message, const char *func)
Definition: ali_main.cxx:90
#define USE_SPECIFIED_FAMILY_DEFAULT
Definition: ali_global.cxx:40
char * default_file
Definition: ali_global.hxx:25
#define MARK_FAMILY_FLAG_DEFAULT
Definition: ali_global.cxx:14
#define FIND_FAMILY_MODE_DEFAULT
Definition: ali_global.cxx:16
char * servername
Definition: ali_pt.hxx:30
#define COST_LOW_DEFAULT
Definition: ali_global.cxx:24
#define EXT_MAX_WEIGHT_DEFAULT
Definition: ali_global.cxx:20
char * species_name
Definition: ali_global.hxx:24
ALI_ARBDB arbdb
Definition: ali_global.hxx:30
#define MULTI_GAP_FACTOR_DEFAULT
Definition: ali_global.cxx:21
#define MAX_COST_OF_HELIX
Definition: ali_global.cxx:28
GBDATA * gb_main
Definition: ali_pt.hxx:31
void ali_warning(const char *message)
Definition: ali_misc.hxx:47
static double default_binding_matrix[5][5]
Definition: ali_global.cxx:52
unsigned long ext_list_max
Definition: ali_pt.hxx:36
#define EXCLUSIVE_FLAG_DEFAULT
Definition: ali_global.cxx:13
void ali_message(const char *message)
Definition: ali_misc.hxx:46
#define MIN_FAMILY_SIZE_DEFAULT
Definition: ali_global.cxx:18
float cost_low
Definition: ali_global.hxx:37
char * pt_server
Definition: servercntrl.h:28
ALI_PT * pt
Definition: ali_global.hxx:31
int open(char *name, char *use_alignment=NULp)
Definition: ali_arbdb.cxx:24
char * db_server
Definition: ali_global.hxx:26
char * use_specified_family
Definition: ali_pt.hxx:38
char * default_file
Definition: servercntrl.h:19
#define MATCHES_MIN_DEFAULT
Definition: ali_global.cxx:36
double binding_matrix[5][5]
Definition: ali_profile.hxx:49
#define MULTI_INSERT_FACTOR_DEFAULT
Definition: ali_global.cxx:23
int matches_min
Definition: ali_pt.hxx:33
unsigned long fam_list_max
Definition: ali_pt.hxx:35
#define MAX_NUMBER_OF_MAPS_DEFAULT
Definition: ali_global.cxx:30
ALI_PROFILE_CONTEXT prof_context
Definition: ali_global.hxx:43
double substitute_matrix[5][5]
Definition: ali_profile.hxx:48