ARB
main.cxx
Go to the documentation of this file.
1 // ------------------------------------------------------------
2 //
3 // Format Conversion Program.
4 //
5 // Woese Lab., Dept. of Microbiology, UIUC
6 // Modified for use in ARB by ARB team
7 //
8 // ------------------------------------------------------------
9 
10 #include "defs.h"
11 #include "fun.h"
12 #include "global.h"
13 #include <arb_msg.h>
14 
15 Convaln_exception *Convaln_exception::thrown = NULp;
16 
17 struct TypeSwitch { const char *switchtext; Format format; };
18 
19 static TypeSwitch convertible_type[] = { // see fconv.cxx@format_spec
20  { "GenBank", GENBANK },
21  { "EMBL", EMBL },
22  { "AE2", MACKE },
23  { "SwissProt", SWISSPROT },
24  { "NEXUS", NEXUS },
25  { "PHYLIP", PHYLIP },
26  { "FASTDNAML", FASTDNAML },
27  { "GCG", GCG },
28  { "PRINTABLE", PRINTABLE },
29 };
30 
31 static void show_command_line_usage() {
32  fputs("Command line usage:\n"
33  " $ arb_convert_aln [--arb-notify] -INFMT input_file -OUTFMT output_file\n"
34  "\n"
35  " where\n"
36  " INFMT may be 'GenBank', 'EMBL', 'AE2' or 'SwissProt' and\n"
37  " OUTFMT may be 'GenBank', 'EMBL', 'AE2', 'NEXUS', 'PHYLIP', 'FASTDNAML', 'GCG' or 'Printable'\n"
38  " (Note: you may abbreviate the format names)\n"
39  "\n"
40  " FASTDNAML writes a PHYLIP file with content from STDIN appended at end of first line (used for arb_fastdnaml).\n"
41  "\n"
42  " if argument '--arb-notify' is given, arb_convert_aln assumes it has been started by ARB\n"
43  " and reports errors using the 'arb_message' script.\n"
44  , stderr);
45 }
46 
47 static void valid_name_or_die(const char *file_name) {
48  if (str0len(file_name) <= 0) {
49  throw_errorf(152, "illegal file name: %s", file_name);
50  }
51 }
52 static bool file_exists(const char *file_name) {
53  FILE *ifp = fopen(file_name, "r");
54  bool exists = ifp;
55  if (ifp) fclose(ifp);
56 
57  return exists;
58 }
59 
60 static void change_file_suffix(const char *old_file, char *file_name, int type) {
61  // Define the default file name by changing suffix.
62  int indi, indj;
63 
64  for (indi = str0len(old_file) - 1; indi >= 0 && old_file[indi] != '.'; indi--)
65  if (indi == 0)
66  strcpy(file_name, old_file);
67  else {
68  for (indj = 0; indj < (indi - 1); indj++)
69  file_name[indj] = old_file[indj];
70  file_name[indj] = '\0';
71  }
72  switch (type) {
73  case GENBANK:
74  strcat(file_name, ".GB");
75  break;
76  case MACKE:
77  strcat(file_name, ".aln");
78  break;
79  case NEXUS:
80  strcat(file_name, ".NEXUS");
81  break;
82  case PHYLIP:
83  strcat(file_name, ".PHY");
84  break;
85  case EMBL:
86  strcat(file_name, ".EMBL");
87  break;
88  case PRINTABLE:
89  strcat(file_name, ".prt");
90  break;
91  default:
92  strcat(file_name, ".???");
93  }
94 }
95 
97  char temp[LINESIZE];
98  char choice[LINESIZE];
99 
100  fputs("---------------------------------------------------------------\n"
101  "\n"
102  " convert_aln - an alignment and file converter written by\n"
103  " WenMin Kuan for the RDP database project.\n"
104  "\n"
105  " Modified for use in ARB by Oliver Strunk & Ralf Westram\n"
106  " Report errors or deficiencies to devel@arb-home.de\n"
107  "\n"
108  , stderr);
110  fputs("\n"
111  "---------------------------------------------------------------\n"
112  "\n"
113  "Select input format (<CR> means default)\n"
114  "\n"
115  " (1) GenBank [default]\n"
116  " (2) EMBL\n"
117  " (3) AE2\n"
118  " (4) SwissProt\n"
119  " (5) Quit\n"
120  " ? "
121  , stderr);
122 
123  Getstr(choice, LINESIZE);
124  {
125  Format inType = UNKNOWN;
126  switch (choice[0]) {
127  case '\0': // [default]
128  case '1': inType = GENBANK; break;
129  case '2': inType = EMBL; break;
130  case '3': inType = MACKE; break;
131  case '4': inType = SWISSPROT; break;
132  case '5': exit(0); // ok - interactive mode only
133  default: throw_errorf(16, "Unknown input format selection '%s'", choice);
134  }
135 
136  fputs("\nInput file name? ", stderr);
137  Getstr(temp, LINESIZE);
138  in.init(temp, inType);
139  }
140 
141  valid_name_or_die(temp);
142  if (!file_exists(temp)) throw_error(77, "Input file not found");
143 
144  // output file information
145  fputs("\n"
146  "Select output format (<CR> means default)\n"
147  "\n"
148  " (1) GenBank\n"
149  " (2) EMBL\n"
150  " (3) AE2 [default]\n"
151  " (4) NEXUS (Paup)\n"
152  " (5) PHYLIP\n"
153  " (6) GCG\n"
154  " (7) Printable\n"
155  " (8) Quit\n"
156  " ? ", stderr);
157 
158  Getstr(choice, LINESIZE);
159  {
160  Format ouType = UNKNOWN;
161  switch (choice[0]) {
162  case '1': ouType = GENBANK; break;
163  case '2': ouType = EMBL; break;
164  case '\0': // [default]
165  case '3': ouType = MACKE; break;
166  case '4': ouType = NEXUS; break;
167  case '5': ouType = PHYLIP; break;
168  case '6': ouType = GCG; break;
169  case '7': ouType = PRINTABLE; break;
170  case '8': exit(0); // ok - interactive mode only
171  default: throw_errorf(66, "Unknown output format selection '%s'", choice);
172  }
173  change_file_suffix(in.name(), temp, ouType);
174  if (ouType != GCG) {
175  fprintf(stderr, "\nOutput file name [%s]? ", temp);
176  Getstr(temp, LINESIZE);
177  if (str0len(temp) == 0)
178  change_file_suffix(in.name(), temp, ouType);
179  }
180  out.init(temp, ouType);
181  }
182 }
183 
184 static int strcasecmp_start(const char *s1, const char *s2) {
185  int cmp = 0;
186  for (int p = 0; !cmp; p++) {
187  cmp = tolower(s1[p])-tolower(s2[p]);
188  if (!s1[p]) return 0;
189  }
190  return cmp;
191 }
192 
193 static bool is_abbrev_switch(const char *arg, const char *switchtext) {
194  return arg[0] == '-' && strcasecmp_start(arg+1, switchtext) == 0;
195 }
196 
197 static Format parse_type(const char *arg) {
198  for (size_t i = 0; i<ARRAY_ELEMS(convertible_type); ++i) {
199  const TypeSwitch& type = convertible_type[i];
200  if (is_abbrev_switch(arg, type.switchtext)) {
201  return type.format;
202  }
203  }
204  return UNKNOWN;
205 }
206 
207 static Format parse_intype(const char *arg) {
208  Format type = parse_type(arg);
209  if (!is_input_format(type)) throw_errorf(65, "Unsupported input file type '%s'", arg);
210  if (type == UNKNOWN) throw_errorf(67, "UNKNOWN input file type '%s'", arg);
211  return type;
212 }
213 
214 static Format parse_outtype(const char *arg) {
215  Format type = parse_type(arg);
216  if (type == UNKNOWN) throw_errorf(68, "UNKNOWN output file type '%s'", arg);
217  return type;
218 }
219 
220 static bool is_help_req(const char *arg) {
221  return strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0;
222 }
223 static bool command_line_conversion(int argc, const char * const *argv, FormattedFile& in, FormattedFile& out) {
224  for (int c = 1; c<argc; c++) {
225  if (is_help_req(argv[c])) {
227  return false;
228  }
229  }
230 
231  if (argc != 5) throw_errorf(69, "arb_convert_aln expects exactly 4 parameters (you specified %i). Try '--help'", argc-1);
232 
233  in.init(argv[2], parse_intype(argv[1]));
234  out.init(argv[4], parse_outtype(argv[3]));
235 
236  return true;
237 }
238 
239 static void do_conversion(const FormattedFile& in, const FormattedFile& out) {
240 #ifdef CALOG
241  fprintf(stderr, "\n\nConvert file %s to file %s.\n", in.name(), out.name());
242 #endif
243 
244  // check if output file exists and filename's validation
245  valid_name_or_die(out.name());
246  if (file_exists(out.name())) warningf(151, "Output file %s exists, will be overwritten.", out.name());
247 
248  // file format transfer...
249  convert(in, out);
250 }
251 
252 int ARB_main(int argc, char *argv[]) {
253  int exitcode = EXIT_SUCCESS;
254  bool use_arb_message = false;
255  try {
256  FormattedFile in;
257  FormattedFile out;
258 
259  if (argc>1 && strcmp(argv[1], "--arb-notify") == 0) {
260  use_arb_message = true;
261  argc--; argv++;
262  }
263 
264  if (argc < 2) {
265  ask_for_conversion_params(in, out);
266  do_conversion(in, out);
267  }
268  else {
269  if (command_line_conversion(argc, argv, in, out)) {
270  do_conversion(in, out);
271  }
272  }
273  }
274  catch (Convaln_exception& err) {
275  fprintf(stderr, "ERROR(%d): %s\n", err.get_code(), err.get_msg());
276  if (use_arb_message) {
277  char *quotedErrorMsg = GBK_singlequote(GBS_global_string("Error: %s (in arb_convert_aln; code=%d)", err.get_msg(), err.get_code()));
278  GB_ERROR error = GBK_system(GBS_global_string("arb_message %s &", quotedErrorMsg)); // send async to avoid deadlock
279  if (error) fprintf(stderr, "Error: %s\n", error);
280  free(quotedErrorMsg);
281  }
282  exitcode = EXIT_FAILURE;
283  }
284  return exitcode;
285 }
286 
287 // --------------------------------------------------------------------------------
288 
289 #ifdef UNIT_TESTS
290 #include <test_unit.h>
291 
292 void TEST_BASIC_switch_parsing() {
293  TEST_EXPECT_ZERO(strcasecmp_start("GenBank", "GenBank"));
294  TEST_EXPECT_ZERO(strcasecmp_start("GEnbaNK", "genBANK"));
295  TEST_EXPECT_ZERO(strcasecmp_start("Ge", "GenBank"));
296  TEST_EXPECT(strcasecmp_start("GenBank", "NEXUS") < 0);
297  TEST_EXPECT(strcasecmp_start("NEXUS", "GenBank") > 0);
298 
299  TEST_REJECT(is_abbrev_switch("notAswitch", "notAswitch"));
300  TEST_REJECT(is_abbrev_switch("-GenbankPlus", "Genbank"));
301  TEST_REJECT(is_abbrev_switch("-Ge", "NEXUS"));
302 
303  TEST_EXPECT(is_abbrev_switch("-Ge", "Genbank"));
304  TEST_EXPECT(is_abbrev_switch("-N", "NEXUS"));
305  TEST_EXPECT(is_abbrev_switch("-NEXUS", "NEXUS"));
306 
310 }
311 
312 #endif // UNIT_TESTS
CONSTEXPR_INLINE int str0len(const char *str)
Definition: global.h:98
GB_ERROR GBK_system(const char *system_command)
Definition: arb_msg.cxx:519
const char * GB_ERROR
Definition: arb_core.h:25
Format
Definition: fun.h:10
GB_TYPES type
void throw_errorf(int error_num, const char *error_messagef,...) __ATTR__FORMAT(2) __ATTR__NORETURN
Definition: util.cxx:41
Definition: fun.h:22
void init(const char *Name, Format Type)
Definition: util.cxx:292
void warningf(int warning_num, const char *warning_messagef,...) __ATTR__FORMAT(2)
Definition: util.cxx:66
static void change_file_suffix(const char *old_file, char *file_name, int type)
Definition: main.cxx:60
Definition: fun.h:25
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
void convert(const FormattedFile &in, const FormattedFile &out)
Definition: convert.cxx:156
const char * get_msg() const
Definition: global.h:63
static bool file_exists(const char *file_name)
Definition: main.cxx:52
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
Definition: fun.h:19
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
static void do_conversion(const FormattedFile &in, const FormattedFile &out)
Definition: main.cxx:239
const char * name() const
Definition: fun.h:61
#define TEST_EXPECT(cond)
Definition: test_unit.h:1313
int get_code() const
Definition: global.h:62
Definition: fun.h:12
#define TEST_REJECT(cond)
Definition: test_unit.h:1315
static int strcasecmp_start(const char *s1, const char *s2)
Definition: main.cxx:184
static void error(const char *msg)
Definition: mkptypes.cxx:96
static Format parse_type(const char *arg)
Definition: main.cxx:197
static bool command_line_conversion(int argc, const char *const *argv, FormattedFile &in, FormattedFile &out)
Definition: main.cxx:223
CONSTEXPR_INLINE bool is_input_format(Format inType)
Definition: fun.h:28
Definition: fun.h:14
#define cmp(h1, h2)
Definition: admap.cxx:50
Format format
Definition: main.cxx:17
Definition: fun.h:13
static void ask_for_conversion_params(FormattedFile &in, FormattedFile &out)
Definition: main.cxx:96
static TypeSwitch convertible_type[]
Definition: main.cxx:19
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
#define TEST_EXPECT_ZERO(cond)
Definition: test_unit.h:1074
fputs(TRACE_PREFIX, stderr)
Definition: fun.h:15
void Getstr(char *line, int linenum)
Definition: util.cxx:92
char * GBK_singlequote(const char *arg)
Definition: arb_msg.cxx:547
static Format parse_outtype(const char *arg)
Definition: main.cxx:214
void throw_error(int error_num, const char *error_message) __ATTR__NORETURN
Definition: util.cxx:23
static bool is_help_req(const char *arg)
Definition: main.cxx:220
Definition: fun.h:23
Definition: fun.h:21
static bool is_abbrev_switch(const char *arg, const char *switchtext)
Definition: main.cxx:193
static void valid_name_or_die(const char *file_name)
Definition: main.cxx:47
static void show_command_line_usage()
Definition: main.cxx:31
#define NULp
Definition: cxxforward.h:97
int ARB_main(int argc, char *argv[])
Definition: main.cxx:252
const char * switchtext
Definition: main.cxx:17
#define LINESIZE
Definition: defs.h:16
Definition: fun.h:20
static Format parse_intype(const char *arg)
Definition: main.cxx:207
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283