ARB
arb_export_rates.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : arb_export_rates.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <arbdbt.h>
12 #include <aw_awar_defs.hxx>
13 
14 /* Input: SAI name from CL
15  * fastdnaml-args from CL
16  * ALINAME: AWAR_DEFAULT_ALIGNMENT
17  * FILTER: AWAR_GDE_EXPORT_FILTER
18  *
19  * Output:
20  * If SAI + Sequence is found: rates in fastdnaml-format (can be piped into arb_convert_aln)
21  * Otherwise : just forward args
22  *
23  * If flag '-r' is used, weights are always printed. If no SAI given, every alignment-column
24  * is given the same weight (1).
25  */
26 
27 #define CATSCALE 0.71 // downscale factor for rates
28 #define MIO 1000000 // factor to scale rate-values to integers (for RAxML)
29 
30 int ARB_main(int argc, char *argv[]) {
31  argc--; argv++;
32 
33  if (argc<1 || strcmp(argv[0], "--help") == 0) {
34  fprintf(stderr,
35  "\n"
36  "arb_export_rates: Add a line to phylip format which can be used by fastdnaml for rates\n"
37  "syntax: arb_export_rates [--arb-notify] [-d dbname] [SAI_NAME|'--none'] [other_fastdnaml_args]*\n"
38  "if other_fastdnaml_args are given they are inserted to the output\n"
39  "\n"
40  "or\n"
41  "\n"
42  "arb_export_rates: Write a weightsfile for RAxML\n"
43  "syntax: arb_export_rates [--arb-notify] [-d dbname] -r [SAI_NAME|'--none']\n"
44  "\n"
45  "Note: if no DB is specified using -d, the default server ':' will be used.\n"
46  );
47  return EXIT_FAILURE;
48  }
49 
50  bool RAxML_mode = false;
51  bool use_arb_message = false;
52 
53  const char *dbname = ":";
54  const char *SAI_name = NULp;
55 
56  if (argc >= 2) {
57  if (strcmp(argv[0], "--arb-notify") == 0) {
58  use_arb_message = true;
59  argc--; argv++;
60  }
61  }
62  if (argc >= 2) {
63  if (strcmp(argv[0], "-d") == 0) {
64  dbname = argv[1];
65  argc -= 2;
66  argv += 2;
67  }
68  }
69  if (argc >= 2) {
70  if (strcmp(argv[0], "-r") == 0) {
71  RAxML_mode = true;
72  argc--; argv++;
73  }
74  }
75 
77  if (argc >= 1) {
78  SAI_name = argv[0];
79  argc--; argv++;
80  }
81  else {
82  error = "Missing argument 'SAI_NAME'";
83  }
84 
85  if (!error) {
86  GB_shell shell;
87  GBDATA *gb_main = GBT_open(dbname, "r");
88  if (!gb_main) {
89  error = GB_await_error();
90  }
91  else {
92  char *seq = NULp;
93  char *filter = NULp;
94  int seq_len = 0;
95  int filter_len = 0;
96  long ali_len = 0;
97 
98  {
99  GB_transaction ta(gb_main);
100 
101  char *ali_name = GBT_get_default_alignment(gb_main);
102  if (!ali_name) {
103  error = GB_await_error();
104  }
105  else {
106  ali_len = GBT_get_alignment_len(gb_main, ali_name);
107  arb_assert(ali_len>0);
108 
109  filter = GBT_read_string(gb_main, AWAR_GDE_EXPORT_FILTER);
110  if (!filter) {
111  error = "Expected entry '" AWAR_GDE_EXPORT_FILTER "' does not exist";
112  }
113  else {
114  filter_len = strlen(filter);
115 
116  bool have_no_sai = SAI_name[0] == 0 || strcmp(SAI_name, "--none") == 0;
117  if (!have_no_sai) {
118  GBDATA *gb_sai = GBT_find_SAI(gb_main, SAI_name);
119  if (gb_sai) {
120  GBDATA *gb_data = GBT_find_sequence(gb_sai, ali_name);
121  if (gb_data) {
122  seq = GB_read_string(gb_data); // @@@ NOT_ALL_SAI_HAVE_DATA
123  if (!seq) {
124  error = GBS_global_string("SAI '%s' has no data in '%s'", SAI_name, ali_name);
125  }
126  else {
127  seq_len = strlen(seq);
128  }
129  }
130  }
131  else {
132  error = GBS_global_string("No such SAI '%s'", SAI_name);
133  }
134  }
135  }
136  free(ali_name);
137  }
138  }
139 
140  if (!error) {
141  if (!RAxML_mode) {
142 #define APPEARS_IN_HEADER(c) (!strchr(not_in_header, (c)))
143  const char *not_in_header = "Y"; // these flags don't appear in header and they should be written directly after header
144 
145  {
146  char *firstline = ARB_strdup("");
147  for (int arg = 0; arg<argc; ++arg) { // put all fastdnaml arguments to header
148  char command_char = argv[arg][0];
149  if (!command_char) continue; // skip empty arguments
150 
151  if (APPEARS_IN_HEADER(command_char)) {
152  freeset(firstline, GBS_global_string_copy("%s %c", firstline, command_char));
153  }
154  }
155 
156  // print header
157  if (seq) fputc('C', stdout); // prefix with categories
158  fputs(firstline, stdout); // then other_fastdnaml_args
159  free(firstline);
160  }
161 
162  // print other_fastdnaml_args in reverse order
163  // (first those which do not appear in header, rest afterwards)
164  for (int appears_in_header = 0; appears_in_header <= 1; ++appears_in_header) {
165  for (int arg = 0; arg < argc; ++arg) { // print [other_fastdnaml_args]*
166  if (!argv[arg][0]) continue; // skip empty arguments
167  if (!argv[arg][1]) continue; // don't print single character commands again on a own line
168  if (APPEARS_IN_HEADER(argv[arg][0]) != appears_in_header) continue;
169  fputc('\n', stdout);
170  fputs(argv[arg], stdout);
171  }
172  }
173 #undef APPEARS_IN_HEADER
174 
175  if (seq) { // if SAI was found
176  printf("\nC 35 ");
177  double rate = 1.0;
178  int i;
179  for (i=0; i<35; i++) {
180  printf("%f ", rate);
181  rate *= CATSCALE;
182  }
183  printf("\nCategories ");
184 
185  for (i=0; i<seq_len; i++) {
186  if (i>filter_len || filter[i] != '0') {
187  int c = seq[i];
188 
189  arb_assert(c != '0'); // only 35 cats (1-9 and A-Z)
190 
191  if ((c < '0' || c>'9') && (c < 'A' || c>'Z')) c = '1';
192  putchar(c);
193  }
194  }
195  for (; i<ali_len; i++) {
196  putchar('1');
197  }
198  }
199  }
200  else {
201  // write RAxML weightsfile content
202  int cnt = 0;
203 
204  char *weight['Z'+1];
205  int i;
206 
207  for (i = 0; i <= 'Z'; i++) weight[i] = NULp;
208 
209  double rate = 1.0;
210 
211  for (i = '1'; i <= '9'; i++) {
212  weight[i] = GBS_global_string_copy(" %i", int(rate*MIO+0.5));
213  rate *= CATSCALE;
214  }
215  for (i = 'A'; i <= 'Z'; i++) {
216  weight[i] = GBS_global_string_copy(" %i", int(rate*MIO+0.5));
217  rate *= CATSCALE;
218  }
219 
220  if (!seq) { // no SAI selected -> unique weights
221  seq = (char*)malloc(ali_len+1);
222  memset(seq, '1', ali_len);
223  seq[ali_len] = 0;
224 
225  seq_len = ali_len;
226 
227  freedup(weight['1'], " 1");
228  }
229 
230  for (i=0; i<seq_len; i++) {
231  if (i>filter_len || filter[i] != '0') {
232  int c = seq[i];
233  if ((c < '0' || c>'9') && (c < 'A' || c>'Z')) c = '1';
234  fputs(weight[c], stdout);
235  if (++cnt>30) {
236  fputc('\n', stdout);
237  cnt = 0;
238  }
239  }
240  }
241 
242  for (; i<ali_len; i++) {
243  if (i>filter_len || filter[i] != '0') {
244  fputs(weight['1'], stdout);
245  if (++cnt>30) {
246  fputc('\n', stdout);
247  cnt = 0;
248  }
249  }
250  }
251 
252  for (i = 0; i <= 'Z'; i++) free(weight[i]);
253 
254  fputc('\n', stdout);
255  }
256 
257  free(filter);
258  free(seq);
259  GB_close(gb_main);
260  }
261  }
262  }
263 
264  if (error) {
265  fprintf(stderr, "Error in arb_export_rates: %s\n", error);
266  if (use_arb_message) {
267  char *quotedErrorMsg = GBK_singlequote(GBS_global_string("Error in arb_export_rates: %s", error));
268  GB_ERROR msgerror = GBK_system(GBS_global_string("arb_message %s &", quotedErrorMsg)); // send async to avoid deadlock
269  if (msgerror) fprintf(stderr, "Error: %s\n", msgerror);
270  free(quotedErrorMsg);
271  }
272  return EXIT_FAILURE;
273  }
274  return EXIT_SUCCESS;
275 }
GB_ERROR GBK_system(const char *system_command)
Definition: arb_msg.cxx:571
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
#define MIO
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
#define APPEARS_IN_HEADER(c)
FILE * seq
Definition: rns.c:46
#define AWAR_GDE_EXPORT_FILTER
GBDATA * GBT_open(const char *path, const char *opent)
Definition: adtools.cxx:524
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:177
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
char * GBT_read_string(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:267
static int weight[maxsites+1]
static void error(const char *msg)
Definition: mkptypes.cxx:96
fputc('\n', stderr)
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:708
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
#define CATSCALE
fputs(TRACE_PREFIX, stderr)
char * GBK_singlequote(const char *arg)
Definition: arb_msg.cxx:599
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:909
#define NULp
Definition: cxxforward.h:116
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
int ARB_main(int argc, char *argv[])
GB_transaction ta(gb_var)
GBDATA * gb_main
Definition: adname.cxx:32
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655