ARB
fconv.cxx
Go to the documentation of this file.
1 // ------------- File format converting subroutine -------------
2 
3 #include "defs.h"
4 #include "fun.h"
5 #include "global.h"
6 #include <static_assert.h>
7 #include <unistd.h>
8 #include <arb_diff.h>
9 
10 static const char *format2name(Format type) {
11  switch (type) {
12  case EMBL: return "EMBL";
13  case GCG: return "GCG";
14  case GENBANK: return "GENBANK";
15  case MACKE: return "MACKE";
16  case NEXUS: return "NEXUS";
17  case PHYLIP: return "PHYLIP";
18  case FASTDNAML: return "FASTDNAML";
19  case PRINTABLE: return "PRINTABLE";
20  case SWISSPROT: return "SWISSPROT";
21 
22  case UNKNOWN: ca_assert(0);
23  }
24  return NULp;
25 }
26 
27 void throw_conversion_not_supported(Format inType, Format ouType) { // __ATTR__NORETURN
28  throw_errorf(90, "Conversion from %s to %s is not supported",
29  format2name(inType), format2name(ouType));
30 }
31 void throw_conversion_failure(Format inType, Format ouType) { // __ATTR__NORETURN
32  throw_errorf(91, "Conversion from %s to %s fails",
33  format2name(inType), format2name(ouType));
34 }
35 void throw_conversion_not_implemented(Format inType, Format ouType) { // __ATTR__NORETURN
36  throw_errorf(92, "Conversion from %s to %s is not implemented (but is expected to be here)",
37  format2name(inType), format2name(ouType));
38 }
39 void throw_unsupported_input_format(Format inType) { // __ATTR__NORETURN
40  throw_errorf(93, "Unsupported input format %s", format2name(inType));
41 }
42 
43 void throw_incomplete_entry() { // __ATTR__NORETURN
44  throw_error(84, "Reached EOF before complete entry has been read");
45 }
46 
47 static int log_processed_counter = 0;
48 static int log_seq_counter = 0;
49 
50 void log_processed(int seqCount) {
51 #if defined(CALOG)
52  fprintf(stderr, "Total %d sequences have been processed\n", seqCount);
53 #endif // CALOG
54 
55  log_processed_counter++;
56  log_seq_counter += seqCount;
57 }
58 
59 // --------------------------------------------------------------------------------
60 
61 #ifdef UNIT_TESTS
62 #include <arbdbt.h> // before test_unit.h!
63 #include <arb_file.h>
64 #include <test_unit.h>
65 
66 
67 #define TEST_THROW // comment out to temp. disable intentional throws
68 
69 struct FormatSpec {
70  Format type; // GENBANK, MACKE, ...
71  const char *name;
72  const char *testfile; // existing testfile (or NULp)
73  int sequence_count; // number of sequences in 'testfile'
74 };
75 
76 #define FORMATSPEC_OUT_ONLY(tag) { tag, #tag, NULp, 1 }
77 #define FORMATSPEC_GOT______(tag,file) { tag, #tag, "impexp/" file ".eft.exported", 1 }
78 #define FORMATSPEC_GOT_PLAIN(tag,file,seqcount) { tag, #tag, "impexp/" file, seqcount }
79 
80 static FormatSpec format_spec[] = {
81  // input formats
82  // FORMATSPEC_GOT______(GENBANK, "genbank"),
83  FORMATSPEC_GOT_PLAIN(GENBANK, "genbank.input", 3),
84  FORMATSPEC_GOT_PLAIN(EMBL, "embl.input", 5),
85  FORMATSPEC_GOT_PLAIN(MACKE, "macke.input", 5),
86  FORMATSPEC_GOT_PLAIN(SWISSPROT, "swissprot.input", 1), // SWISSPROT
87 
88  // output formats
89  FORMATSPEC_OUT_ONLY(GCG),
90  FORMATSPEC_OUT_ONLY(NEXUS),
91  FORMATSPEC_OUT_ONLY(PHYLIP),
92  FORMATSPEC_OUT_ONLY(PRINTABLE),
93 };
94 static const int fcount = ARRAY_ELEMS(format_spec);
95 
96 enum FormatNum { // same order as above
97  NUM_GENBANK,
98  NUM_EMBL,
99  NUM_MACKE,
100  NUM_SWISSPROT,
101 
102  NUM_GCG,
103  NUM_NEXUS,
104  NUM_PHYLIP,
105 
106  NUM_PRINTABLE,
107 
108  FORMATNUM_COUNT,
109 };
110 
111 struct Capabilities {
112  bool supported;
113  bool neverReturns;
114 
115  Capabilities() :
116  supported(true),
117  neverReturns(false)
118  {}
119 
120  bool shall_be_tested() {
121 #if defined(TEST_THROW)
122  return !neverReturns;
123 #else // !defined(TEST_THROW)
124  return supported && !neverReturns;
125 #endif
126  }
127 };
128 
129 static Capabilities cap[fcount][fcount];
130 #define CAP(from,to) (cap[NUM_##from][NUM_##to])
131 
132 #define TYPE(f) format_spec[f].type
133 #define NAME(f) format_spec[f].name
134 #define INPUT(f) format_spec[f].testfile
135 #define EXSEQ(f) format_spec[f].sequence_count
136 
137 // ----------------------------------
138 // update .expected files ?
139 
140 // #define TEST_AUTO_UPDATE // never does update if undefined
141 // #define UPDATE_ONLY_IF_MISSING
142 #define UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS
143 
144 inline bool more_than_date_differs(const char *file, const char *expected) {
146 }
147 
148 #if defined(TEST_AUTO_UPDATE)
149 inline bool want_auto_update(const char *file, const char *expected) {
150  bool shall_update = true;
151 
152  file = file;
153  expected = expected;
154 
155 #if defined(UPDATE_ONLY_IF_MISSING)
156  shall_update = shall_update && !GB_is_regularfile(expected);
157 #endif
158 #if defined(UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS)
159  shall_update = shall_update && more_than_date_differs(file, expected);
160 #endif
161  return shall_update;
162 }
163 #else // !TEST_AUTO_UPDATE
164 inline bool want_auto_update(const char * /* file */, const char * /* expected */) {
165  return false;
166 }
167 #endif
168 
169 static void test_expected_conversion(const char *file, const char *flavor) {
170  char *expected;
171  if (flavor) expected = GBS_global_string_copy("%s.%s.expected", file, flavor);
172  else expected = GBS_global_string_copy("%s.expected", file);
173 
174  bool shall_update = want_auto_update(file, expected);
175  if (shall_update) {
176  // TEST_EXPECT(0); // completely avoid real update
177  TEST_EXPECT_ZERO_OR_SHOW_ERRNO(system(GBS_global_string("cp %s %s", file, expected)));
178  }
179  else {
180  TEST_REJECT(more_than_date_differs(file, expected));
181  }
182  free(expected);
183 }
184 
185 static const char *test_convert(const char *inf, const char *outf, Format inType, Format ouType) {
186  const char *error = NULp;
187  try {
188  convert(FormattedFile(inf ? inf : "infilename", inType),
189  FormattedFile(outf ? outf : "outfilename", ouType));
190  }
191  catch (Convaln_exception& exc) { error = GBS_global_string("%s (#%i)", exc.get_msg(), exc.get_code()); }
192  return error;
193 }
194 
195 static void test_convert_by_format_num(int from, int to) {
196  char *toFile = GBS_global_string_copy("impexp/conv.%s_2_%s", NAME(from), NAME(to));
197  if (GB_is_regularfile(toFile)) TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(toFile));
198 
199  int old_processed_counter = log_processed_counter;
200  int old_seq_counter = log_seq_counter;
201 
202  const char *error = test_convert(INPUT(from), toFile, TYPE(from), TYPE(to));
203 
204  int converted_seqs = log_seq_counter-old_seq_counter;
205  int expected_seqs = EXSEQ(from);
206  if (to == NUM_GCG) expected_seqs = 1; // we stop after first file (useless to generate numerous files)
207 
208  Capabilities& me = cap[from][to];
209 
210  if (me.supported) {
211  if (error) TEST_ERROR("convert() reports error: '%s' (for supported conversion)", error);
213  TEST_EXPECT_EQUAL(converted_seqs, expected_seqs);
214  TEST_EXPECT_EQUAL(log_processed_counter, old_processed_counter+1);
215 
216  TEST_EXPECT_LESS_EQUAL(10, GB_size_of_file(toFile)); // less than 10 bytes
217  test_expected_conversion(toFile, NULp);
218  TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(toFile));
219  }
220  else {
221  if (!error) TEST_ERROR("No error for unsupported conversion '%s'", GBS_global_string("%s -> %s", NAME(from), NAME(to)));
222  TEST_REJECT_NULL(strstr(error, "supported")); // wrong error
223  TEST_REJECT(GB_is_regularfile(toFile)); // unsupported produced output
224  }
225  TEST_EXPECT_EQUAL(me.supported, !error);
226 
227 #if defined(TEST_THROW)
228  {
229  // test if conversion from empty and text file fails
230 
231  const char *fromFile = "general/empty.input";
232 
233  error = test_convert(fromFile, toFile, TYPE(from), TYPE(to));
234  TEST_REJECT_NULL(error);
235 
236  fromFile = "general/text.input";
237  error = test_convert(fromFile, toFile, TYPE(from), TYPE(to));
238  TEST_REJECT_NULL(error);
239  }
240 #endif
241 
242  free(toFile);
243 }
244 
245 inline bool isInputFormat(int num) { return is_input_format(TYPE(num)); }
246 
247 static void init_cap() {
248  for (int from = 0; from<fcount; from++) {
249  for (int to = 0; to<fcount; to++) {
250  Capabilities& me = cap[from][to];
251  if (!isInputFormat(from)) me.supported = false;
252  }
253  }
254 }
255 
256 #define NOT_SUPPORTED(t1,t2) TEST_EXPECT(isInputFormat(NUM_##t1)); cap[NUM_##t1][NUM_##t2].supported = false
257 
258 static int will_convert(int from) {
259  int will = 0;
260  for (int to = 0; to<fcount; to++) {
261  Capabilities& me = cap[from][to];
262  if (me.supported && me.shall_be_tested()) {
263  will++;
264  }
265  }
266  return will;
267 }
268 
269 void TEST_SLOW_converter() {
270  STATIC_ASSERT(FORMATNUM_COUNT == fcount);
271 
272  init_cap();
273 
274  NOT_SUPPORTED(GENBANK, SWISSPROT);
275  NOT_SUPPORTED(EMBL, SWISSPROT);
276  NOT_SUPPORTED(SWISSPROT, GENBANK);
277  NOT_SUPPORTED(SWISSPROT, EMBL);
278 
279  int possible = 0;
280  int tested = 0;
281  int unsupported = 0;
282  int neverReturns = 0;
283 
284  for (int from = 0; from<fcount; from++) {
285  TEST_ANNOTATE(GBS_global_string("while converting from '%s'", NAME(from)));
286  if (isInputFormat(from)) {
287  if (will_convert(from)<1) {
288  TEST_ERROR("Conversion from %s seems unsupported", NAME(from));
289  }
290  }
291  for (int to = 0; to<fcount; to++) {
292  possible++;
293  Capabilities& me = cap[from][to];
294 
295  if (me.shall_be_tested()) {
296  TEST_ANNOTATE(GBS_global_string("while converting %s -> %s", NAME(from), NAME(to)));
297  test_convert_by_format_num(from, to);
298  tested++;
299  }
300 
301  unsupported += !me.supported;
302  neverReturns += me.neverReturns;
303  }
304  }
305  TEST_ANNOTATE(NULp);
306 
307  fprintf(stderr,
308  "Conversion test summary:\n"
309  " - formats: %3i\n"
310  " - conversions: %3i (possible)\n"
311  " - unsupported: %3i\n"
312  " - tested: %3i\n"
313  " - neverReturns: %3i (would never return - not checked)\n"
314  " - converted: %3i\n",
315  fcount,
316  possible,
317  unsupported,
318  tested,
319  neverReturns,
320  tested-unsupported);
321 
322  int untested = possible - tested;
323  TEST_EXPECT_EQUAL(untested, neverReturns);
324 }
325 
326 #endif // UNIT_TESTS
Format
Definition: fun.h:10
GB_TYPES type
void throw_errorf(int error_num, const char *error_messagef,...) __ATTR__FORMAT(2) __ATTR__NORETURN
Definition: util.cxx:41
Definition: fun.h:22
TextDiffMode
Definition: arb_diff.h:19
#define ca_assert(cond)
Definition: global.h:33
Definition: fun.h:25
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
void convert(const FormattedFile &in, const FormattedFile &out)
Definition: convert.cxx:156
#define TEST_EXPECT_LESS_EQUAL(val, ref)
Definition: test_unit.h:1295
const char * get_msg() const
Definition: global.h:63
void throw_conversion_not_implemented(Format inType, Format ouType)
Definition: fconv.cxx:35
Definition: fun.h:19
long GB_size_of_file(const char *path)
Definition: arb_file.cxx:28
void throw_incomplete_entry()
Definition: fconv.cxx:43
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
static int log_seq_counter
Definition: fconv.cxx:48
static const char * format2name(Format type)
Definition: fconv.cxx:10
#define TEST_EXPECT(cond)
Definition: test_unit.h:1313
#define true
Definition: ureadseq.h:14
int get_code() const
Definition: global.h:62
#define false
Definition: ureadseq.h:13
Definition: fun.h:12
#define TEST_REJECT(cond)
Definition: test_unit.h:1315
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1310
static void error(const char *msg)
Definition: mkptypes.cxx:96
CONSTEXPR_INLINE bool is_input_format(Format inType)
Definition: fun.h:28
PT1_TYPE TYPE
Definition: probe_tree.h:41
Definition: fun.h:14
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
Definition: test_unit.h:1079
void throw_conversion_failure(Format inType, Format ouType)
Definition: fconv.cxx:31
Definition: fun.h:13
static int log_processed_counter
Definition: fconv.cxx:47
bool ARB_textfiles_have_difflines(const char *file1, const char *file2, int expected_difflines, TextDiffMode tdmode)
Definition: arb_diff.cxx:250
void throw_unsupported_input_format(Format inType)
Definition: fconv.cxx:39
void throw_conversion_not_supported(Format inType, Format ouType)
Definition: fconv.cxx:27
Definition: fun.h:15
void throw_error(int error_num, const char *error_message) __ATTR__NORETURN
Definition: util.cxx:23
#define TEST_ERROR(format, strarg)
Definition: test_unit.h:1059
Definition: fun.h:23
Definition: fun.h:21
#define NULp
Definition: cxxforward.h:97
bool GB_is_regularfile(const char *path)
Definition: arb_file.cxx:76
void log_processed(int seqCount)
Definition: fconv.cxx:50
#define STATIC_ASSERT(const_expression)
Definition: static_assert.h:36
Definition: fun.h:20
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195