ARB
arb_export_rates.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : arb_export_rates.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <arbdbt.h>
12 #include <aw_awar_defs.hxx>
13 
14 /* Input: SAI name from CL
15  * fastdnaml-args from CL
16  * ALINAME: AWAR_DEFAULT_ALIGNMENT
17  * FILTER: AWAR_GDE_EXPORT_FILTER
18  *
19  * Output:
20  * If SAI + Sequence is found: rates in fastdnaml-format (can be piped into arb_convert_aln)
21  * Otherwise : just forward args
22  *
23  * If flag '-r' is used, weights are always printed. If no SAI given, every alignment-column
24  * is given the same weight (1).
25  */
26 
27 #define CATSCALE 0.71 // downscale factor for rates
28 #define MIO 1000000 // factor to scale rate-values to integers (for RAxML)
29 
30 int ARB_main(int argc, char *argv[]) {
31  argc--; argv++;
32 
33  if (argc<1 || strcmp(argv[0], "--help") == 0) {
34  fprintf(stderr,
35  "\n"
36  "arb_export_rates: Add a line to phylip format which can be used by fastdnaml for rates\n"
37  "syntax: arb_export_rates [--arb-notify] [-d dbname] [SAI_NAME|'--none'] [other_fastdnaml_args]*\n"
38  "if other_fastdnaml_args are given they are inserted to the output\n"
39  "\n"
40  "or\n"
41  "\n"
42  "arb_export_rates: Write a weightsfile for RAxML\n"
43  "syntax: arb_export_rates [--arb-notify] [-d dbname] -r [SAI_NAME|'--none']\n"
44  "\n"
45  "Note: if no DB is specified using -d, the default server ':' will be used.\n"
46  );
47  return EXIT_FAILURE;
48  }
49 
50  bool RAxML_mode = false;
51  bool use_arb_message = false;
52 
53  const char *dbname = ":";
54  const char *SAI_name = NULp;
55 
56  if (argc >= 2) {
57  if (strcmp(argv[0], "--arb-notify") == 0) {
58  use_arb_message = true;
59  argc--; argv++;
60  }
61  }
62  if (argc >= 2) {
63  if (strcmp(argv[0], "-d") == 0) {
64  dbname = argv[1];
65  argc -= 2;
66  argv += 2;
67  }
68  }
69  if (argc >= 2) {
70  if (strcmp(argv[0], "-r") == 0) {
71  RAxML_mode = true;
72  argc--; argv++;
73  }
74  }
75 
77  if (argc >= 1) {
78  SAI_name = argv[0];
79  argc--; argv++;
80  }
81  else {
82  error = "Missing argument 'SAI_NAME'";
83  }
84 
85  if (!error) {
86  GB_shell shell;
87  GBDATA *gb_main = GBT_open(dbname, "r");
88  if (!gb_main) {
89  error = GB_await_error();
90  }
91  else {
92  char *seq = NULp;
93  char *filter = NULp;
94  int seq_len = 0;
95  int filter_len = 0;
96  long ali_len = 0;
97 
98  {
99  GB_transaction ta(gb_main);
100 
101  char *ali_name = GBT_get_default_alignment(gb_main);
102  ali_len = GBT_get_alignment_len(gb_main, ali_name);
103 
104  filter = GBT_read_string(gb_main, AWAR_GDE_EXPORT_FILTER);
105  if (!filter) {
106  error = "Expected entry '" AWAR_GDE_EXPORT_FILTER "' does not exist";
107  }
108  else {
109  filter_len = strlen(filter);
110 
111  bool have_no_sai = SAI_name[0] == 0 || strcmp(SAI_name, "--none") == 0;
112  if (!have_no_sai) {
113  GBDATA *gb_sai = GBT_find_SAI(gb_main, SAI_name);
114  if (gb_sai) {
115  GBDATA *gb_data = GBT_find_sequence(gb_sai, ali_name);
116  if (gb_data) {
117  seq = GB_read_string(gb_data); // @@@ NOT_ALL_SAI_HAVE_DATA
118  if (!seq) {
119  error = GBS_global_string("SAI '%s' has no data in '%s'", SAI_name, ali_name);
120  }
121  else {
122  seq_len = strlen(seq);
123  }
124  }
125  }
126  else {
127  error = GBS_global_string("No such SAI '%s'", SAI_name);
128  }
129  }
130  }
131  free(ali_name);
132  }
133 
134  if (!RAxML_mode) {
135 #define APPEARS_IN_HEADER(c) (!strchr(not_in_header, (c)))
136  const char *not_in_header = "Y"; // these flags don't appear in header and they should be written directly after header
137 
138  {
139  char *firstline = ARB_strdup("");
140  for (int arg = 0; arg<argc; ++arg) { // put all fastdnaml arguments to header
141  char command_char = argv[arg][0];
142  if (!command_char) continue; // skip empty arguments
143 
144  if (APPEARS_IN_HEADER(command_char)) {
145  freeset(firstline, GBS_global_string_copy("%s %c", firstline, command_char));
146  }
147  }
148 
149  // print header
150  if (seq) fputc('C', stdout); // prefix with categories
151  fputs(firstline, stdout); // then other_fastdnaml_args
152  free(firstline);
153  }
154 
155  // print other_fastdnaml_args in reverse order
156  // (first those which do not appear in header, rest afterwards)
157  for (int appears_in_header = 0; appears_in_header <= 1; ++appears_in_header) {
158  for (int arg = 0; arg < argc; ++arg) { // print [other_fastdnaml_args]*
159  if (!argv[arg][0]) continue; // skip empty arguments
160  if (!argv[arg][1]) continue; // don't print single character commands again on a own line
161  if (APPEARS_IN_HEADER(argv[arg][0]) != appears_in_header) continue;
162  fputc('\n', stdout);
163  fputs(argv[arg], stdout);
164  }
165  }
166 #undef APPEARS_IN_HEADER
167 
168  if (seq) { // if SAI was found
169  printf("\nC 35 ");
170  double rate = 1.0;
171  int i;
172  for (i=0; i<35; i++) {
173  printf("%f ", rate);
174  rate *= CATSCALE;
175  }
176  printf("\nCategories ");
177 
178  for (i=0; i<seq_len; i++) {
179  if (i>filter_len || filter[i] != '0') {
180  int c = seq[i];
181 
182  arb_assert(c != '0'); // only 35 cats (1-9 and A-Z)
183 
184  if ((c < '0' || c>'9') && (c < 'A' || c>'Z')) c = '1';
185  putchar(c);
186  }
187  }
188  for (; i<ali_len; i++) {
189  putchar('1');
190  }
191  }
192  }
193  else {
194  // write RAxML weightsfile content
195  int cnt = 0;
196 
197  char *weight['Z'+1];
198  int i;
199 
200  for (i = 0; i <= 'Z'; i++) weight[i] = NULp;
201 
202  double rate = 1.0;
203 
204  for (i = '1'; i <= '9'; i++) {
205  weight[i] = GBS_global_string_copy(" %i", int(rate*MIO+0.5));
206  rate *= CATSCALE;
207  }
208  for (i = 'A'; i <= 'Z'; i++) {
209  weight[i] = GBS_global_string_copy(" %i", int(rate*MIO+0.5));
210  rate *= CATSCALE;
211  }
212 
213  if (!seq) { // no SAI selected -> unique weights
214  seq = (char*)malloc(ali_len+1);
215  memset(seq, '1', ali_len);
216  seq[ali_len] = 0;
217 
218  seq_len = ali_len;
219 
220  freedup(weight['1'], " 1");
221  }
222 
223  for (i=0; i<seq_len; i++) {
224  if (i>filter_len || filter[i] != '0') {
225  int c = seq[i];
226  if ((c < '0' || c>'9') && (c < 'A' || c>'Z')) c = '1';
227  fputs(weight[c], stdout);
228  if (++cnt>30) {
229  fputc('\n', stdout);
230  cnt = 0;
231  }
232  }
233  }
234 
235  for (; i<ali_len; i++) {
236  if (i>filter_len || filter[i] != '0') {
237  fputs(weight['1'], stdout);
238  if (++cnt>30) {
239  fputc('\n', stdout);
240  cnt = 0;
241  }
242  }
243  }
244 
245  for (i = 0; i <= 'Z'; i++) free(weight[i]);
246 
247  fputc('\n', stdout);
248  }
249 
250  free(filter);
251  free(seq);
252  GB_close(gb_main);
253  }
254  }
255 
256  if (error) {
257  fprintf(stderr, "Error in arb_export_rates: %s\n", error);
258  if (use_arb_message) {
259  char *quotedErrorMsg = GBK_singlequote(GBS_global_string("Error in arb_export_rates: %s", error));
260  GB_ERROR msgerror = GBK_system(GBS_global_string("arb_message %s &", quotedErrorMsg)); // send async to avoid deadlock
261  if (msgerror) fprintf(stderr, "Error: %s\n", msgerror);
262  free(quotedErrorMsg);
263  }
264  return EXIT_FAILURE;
265  }
266  return EXIT_SUCCESS;
267 }
GB_ERROR GBK_system(const char *system_command)
Definition: arb_msg.cxx:519
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:718
#define MIO
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
#define APPEARS_IN_HEADER(c)
FILE * seq
Definition: rns.c:46
#define AWAR_GDE_EXPORT_FILTER
GBDATA * GBT_open(const char *path, const char *opent)
Definition: adtools.cxx:524
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:177
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
char * GBT_read_string(GBDATA *gb_container, const char *fieldpath)
Definition: adtools.cxx:267
static int weight[maxsites+1]
static void error(const char *msg)
Definition: mkptypes.cxx:96
fputc('\n', stderr)
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:682
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
#define CATSCALE
fputs(TRACE_PREFIX, stderr)
char * GBK_singlequote(const char *arg)
Definition: arb_msg.cxx:547
char * GB_read_string(GBDATA *gbd)
Definition: arbdb.cxx:903
#define NULp
Definition: cxxforward.h:97
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:687
int ARB_main(int argc, char *argv[])
GB_transaction ta(gb_var)
GBDATA * gb_main
Definition: adname.cxx:33
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:649