ARB
convert.cxx
Go to the documentation of this file.
1 // ================================================================
2 //
3 // File : convert.cxx
4 // Purpose : some helpers for global data handling
5 //
6 // ================================================================
7 
8 
9 #include "embl.h"
10 #include "genbank.h"
11 #include "macke.h"
12 
14  switch (in.type()) {
15  case GENBANK: return new GenbankReader(in.name());
16  case SWISSPROT:
17  case EMBL: return new EmblSwissprotReader(in.name());
18  case MACKE: return new MackeReader(in.name());
19  default: throw_unsupported_input_format(in.type());
20  }
21  return NULp;
22 }
23 
25  const InputFormat& in;
26 
27  mutable OutputFormatPtr embl;
28  mutable OutputFormatPtr gbk;
29  mutable OutputFormatPtr macke;
30 
31 public:
32  ConvertibleData(const InputFormat& in_) : in(in_) {}
33 
34  Format format() const { return in.format(); }
35 
36  const Embl& to_embl(const Seq& seq) const;
37  const GenBank& to_genbank(const Seq& seq) const;
38  const Macke& to_macke(const Seq& seq) const;
39 };
40 
41 #if defined(WARN_TODO)
42 #warning refactor below
43 #endif
44 
45 const Embl& ConvertibleData::to_embl(const Seq& seq) const {
46  Format inType = in.format();
47 
48  if (inType == EMBL) return dynamic_cast<const Embl&>(in);
49 
50  embl = new Embl;
51 
52  bool ok = true;
53  switch (inType) {
54  case GENBANK: ok = gtoe(dynamic_cast<const GenBank&>(in), dynamic_cast<Embl&>(*embl), seq); break;
55  case MACKE: ok = mtoe(dynamic_cast<const Macke&>(in), dynamic_cast<Embl&>(*embl), seq); break;
56  default: throw_conversion_not_implemented(inType, EMBL);
57  }
58  if (!ok) throw_conversion_failure(inType, EMBL);
59 
60  ca_assert(!embl.isNull());
61  return dynamic_cast<const Embl&>(*embl);
62 }
63 
64 const GenBank& ConvertibleData::to_genbank(const Seq& seq) const {
65  Format inType = in.format();
66 
67  if (inType == GENBANK) return dynamic_cast<const GenBank&>(in);
68 
69  gbk = new GenBank;
70 
71  bool ok = true;
72  switch (inType) {
73  case EMBL: ok = etog(dynamic_cast<const Embl&>(in), dynamic_cast<GenBank&>(*gbk), seq); break;
74  case MACKE: ok = mtog(dynamic_cast<const Macke&>(in), dynamic_cast<GenBank&>(*gbk), seq); break;
75  default: throw_conversion_not_implemented(inType, GENBANK);
76  }
77  if (!ok) throw_conversion_failure(inType, GENBANK);
78 
79  ca_assert(!gbk.isNull());
80  return dynamic_cast<const GenBank&>(*gbk);
81 }
82 
83 const Macke& ConvertibleData::to_macke(const Seq& seq) const {
84  Format inType = in.format();
85 
86  if (inType == MACKE) return dynamic_cast<const Macke&>(in);
87 
88  macke = new Macke;
89 
90  bool ok = true;
91  switch (inType) {
92  case EMBL: ok = etom(dynamic_cast<const Embl&>(in), dynamic_cast<Macke&>(*macke), seq); break;
93  case GENBANK: ok = gtom(dynamic_cast<const GenBank&>(in), dynamic_cast<Macke&>(*macke)); break;
94  default: throw_conversion_not_implemented(inType, MACKE);
95  }
96  if (!ok) throw_conversion_failure(inType, MACKE);
97 
98  ca_assert(!macke.isNull());
99  return dynamic_cast<const Macke&>(*macke);
100 }
101 
102 static void write_to_embl(FileWriter& write, const ConvertibleData& data, const Seq& seq) {
103  const Embl& embl = data.to_embl(seq);
104  embl_out(embl, seq, write);
105  write.seq_done();
106 }
107 static void write_to_genbank(FileWriter& write, const ConvertibleData& data, const Seq& seq) {
108  const GenBank& gbk = data.to_genbank(seq);
109  genbank_out(gbk, seq, write);
110  write.seq_done();
111 }
112 static void write_to_macke(FileWriter& write, const ConvertibleData& data, const Seq& seq, int phase, bool first) {
113  Format inType = data.format();
114  const Macke& macke = data.to_macke(seq);
115  switch (phase) {
116  case 0:
117  macke_seq_display_out(macke, write, inType, first);
118  break;
119  case 1:
120  macke_seq_info_out(macke, write);
121  break;
122  case 2:
123  macke_seq_data_out(seq, macke, write);
124  write.seq_done(); // count only in one and last phase!
125  break;
126  }
127 }
128 
129 static void to_macke(const FormattedFile& in, const char *outf) {
131  FileWriter write(outf);
132  SmartPtr<Warnings> suppress;
133 
134  macke_out_header(write);
135 
136  for (int phase = 0; phase<3; ++phase) {
137  bool first = true;
138  while (1) {
139  Seq seq;
140  if (!reader->read_one_entry(seq)) break;
141  write_to_macke(write, reader->get_data(), seq, phase, first);
142 
143  first = false;
144  }
145  if (phase<2) {
146  if (phase == 0) {
147  write.out("#-\n");
148  suppress = new Warnings; // no warning messages for phase 1+2
149  }
150  reader->rewind();
151  }
152  }
153  write.expect_written();
154 }
155 
156 void convert(const FormattedFile& in, const FormattedFile& out) {
157  if (str_equal(in.name(), out.name()))
158  throw_error(30, "Input file and output file must be different file");
159 
160  bool converted = true;
161  switch (out.type()) {
162  case EMBL:
163  case SWISSPROT:
164  case GENBANK: {
165  Format inType = in.type();
166  Format ouType = out.type();
167  if ((inType == GENBANK && ouType == SWISSPROT) ||
168  (inType == SWISSPROT && ouType == GENBANK) ||
169  (inType == EMBL && ouType == SWISSPROT) ||
170  (inType == SWISSPROT && ouType == EMBL))
171  {
172  converted = false;
173  break;
174  }
175 
177  FileWriter write(out.name());
178 
179  while (1) {
180  Seq seq;
181  if (!reader->read_one_entry(seq)) break;
182  if (ouType == GENBANK) {
183  write_to_genbank(write, reader->get_data(), seq);
184  }
185  else {
186  write_to_embl(write, reader->get_data(), seq);
187  }
188  }
189  write.expect_written();
190  break;
191  }
192  case MACKE: to_macke(in, out.name()); break;
193  case GCG: to_gcg(in, out.name()); break;
194  case NEXUS: to_paup(in, out.name()); break;
195  case PHYLIP: to_phylip(in, out.name(), false); break;
196  case FASTDNAML: to_phylip(in, out.name(), true); break;
197  case PRINTABLE: to_printable(in, out.name()); break;
198 
199  default: converted = false; break;
200  }
201  if (!converted) {
203  }
204 }
205 
Format
Definition: fun.h:10
int mtog(const Macke &macke, GenBank &gbk, const Seq &seq)
Definition: mg.cxx:414
void seq_done()
Definition: reader.h:148
void macke_seq_display_out(const Macke &macke, Writer &write, Format inType, bool first_sequence)
Definition: macke.cxx:85
void genbank_out(const GenBank &gbk, const Seq &seq, Writer &write)
Definition: genbank.cxx:450
void out(char ch) FINAL_OVERRIDE
Definition: reader.h:139
int gtom(const GenBank &gbk, Macke &macke)
Definition: mg.cxx:454
Definition: fun.h:22
const Macke & to_macke(const Seq &seq) const
Definition: convert.cxx:83
Format type() const
Definition: fun.h:62
void embl_out(const Embl &embl, const Seq &seq, Writer &write)
Definition: embl.cxx:360
#define ca_assert(cond)
Definition: global.h:33
void expect_written()
Definition: reader.cxx:97
static void write_to_macke(FileWriter &write, const ConvertibleData &data, const Seq &seq, int phase, bool first)
Definition: convert.cxx:112
void macke_seq_data_out(const Seq &seq, const Macke &macke, Writer &write)
Definition: macke.cxx:196
void to_gcg(const FormattedFile &in, const char *outf)
Definition: gcg.cxx:199
void convert(const FormattedFile &in, const FormattedFile &out)
Definition: convert.cxx:156
int mtoe(const Macke &macke, Embl &embl, const Seq &seq)
Definition: embl.cxx:772
void throw_conversion_not_implemented(Format inType, Format ouType)
Definition: fconv.cxx:35
bool isNull() const
test if SmartPtr is NULp
Definition: smartptr.h:248
static void write_to_embl(FileWriter &write, const ConvertibleData &data, const Seq &seq)
Definition: convert.cxx:102
Definition: fun.h:19
void to_paup(const FormattedFile &in, const char *outf)
Definition: paup.cxx:88
int etom(const Embl &embl, Macke &macke, const Seq &seq)
Definition: embl.cxx:543
FILE * seq
Definition: rns.c:46
Format format() const OVERRIDE=0
const char * name() const
Definition: fun.h:61
void macke_out_header(Writer &write)
Definition: macke.cxx:78
void to_phylip(const FormattedFile &in, const char *outf, bool for_fastdnaml)
Definition: phylip.cxx:64
static SmartPtr< FormatReader > create(const FormattedFile &in)
Definition: convert.cxx:13
Generic smart pointer.
Definition: smartptr.h:149
Definition: fun.h:12
Definition: seq.h:43
const Embl & to_embl(const Seq &seq) const
Definition: convert.cxx:45
static void write_to_genbank(FileWriter &write, const ConvertibleData &data, const Seq &seq)
Definition: convert.cxx:107
Format format() const
Definition: convert.cxx:34
int gtoe(const GenBank &gbk, Embl &embl, const Seq &seq)
Definition: embl.cxx:677
Definition: fun.h:14
void throw_conversion_failure(Format inType, Format ouType)
Definition: fconv.cxx:31
Definition: fun.h:13
void throw_unsupported_input_format(Format inType)
Definition: fconv.cxx:39
void throw_conversion_not_supported(Format inType, Format ouType)
Definition: fconv.cxx:27
Definition: fun.h:15
void to_printable(const FormattedFile &in, const char *outf)
Definition: printable.cxx:37
void throw_error(int error_num, const char *error_message) __ATTR__NORETURN
Definition: util.cxx:23
CONSTEXPR_INLINE bool str_equal(const char *s1, const char *s2)
Definition: global.h:95
static void to_macke(const FormattedFile &in, const char *outf)
Definition: convert.cxx:129
Definition: fun.h:23
Definition: fun.h:21
const GenBank & to_genbank(const Seq &seq) const
Definition: convert.cxx:64
#define NULp
Definition: cxxforward.h:97
ConvertibleData(const InputFormat &in_)
Definition: convert.cxx:32
Definition: fun.h:20
void macke_seq_info_out(const Macke &macke, Writer &write)
Definition: macke.cxx:162
int etog(const Embl &embl, GenBank &gbk, const Seq &seq)
Definition: embl.cxx:488