ARB
convert.cxx
Go to the documentation of this file.
1 // ================================================================
2 //
3 // File : convert.cxx
4 // Purpose : some helpers for global data handling
5 //
6 // ================================================================
7 
8 
9 #include "embl.h"
10 #include "genbank.h"
11 #include "macke.h"
12 
14  switch (in.type()) {
15  case GENBANK: return new GenbankReader(in.name());
16  case SWISSPROT:
17  case EMBL: return new EmblSwissprotReader(in.name());
18  case MACKE: return new MackeReader(in.name());
19  default: throw_unsupported_input_format(in.type());
20  }
21  return NULp;
22 }
23 
25  const InputFormat& in;
26 
27  mutable OutputFormatPtr embl;
28  mutable OutputFormatPtr gbk;
29  mutable OutputFormatPtr macke;
30 
31 public:
32  ConvertibleData(const InputFormat& in_) : in(in_) {}
33 
34  Format format() const { return in.format(); }
35 
36  const Embl& to_embl(const Seq& seq) const;
37  const GenBank& to_genbank(const Seq& seq) const;
38  const Macke& to_macke(const Seq& seq) const;
39 };
40 
41 // @@@ refactor below
42 
43 const Embl& ConvertibleData::to_embl(const Seq& seq) const {
44  Format inType = in.format();
45 
46  if (inType == EMBL) return dynamic_cast<const Embl&>(in);
47 
48  embl = new Embl;
49 
50  bool ok = true;
51  switch (inType) {
52  case GENBANK: ok = gtoe(dynamic_cast<const GenBank&>(in), dynamic_cast<Embl&>(*embl), seq); break;
53  case MACKE: ok = mtoe(dynamic_cast<const Macke&>(in), dynamic_cast<Embl&>(*embl), seq); break;
54  default: throw_conversion_not_implemented(inType, EMBL);
55  }
56  if (!ok) throw_conversion_failure(inType, EMBL);
57 
58  ca_assert(!embl.isNull());
59  return dynamic_cast<const Embl&>(*embl);
60 }
61 
62 const GenBank& ConvertibleData::to_genbank(const Seq& seq) const {
63  Format inType = in.format();
64 
65  if (inType == GENBANK) return dynamic_cast<const GenBank&>(in);
66 
67  gbk = new GenBank;
68 
69  bool ok = true;
70  switch (inType) {
71  case EMBL: ok = etog(dynamic_cast<const Embl&>(in), dynamic_cast<GenBank&>(*gbk), seq); break;
72  case MACKE: ok = mtog(dynamic_cast<const Macke&>(in), dynamic_cast<GenBank&>(*gbk), seq); break;
73  default: throw_conversion_not_implemented(inType, GENBANK);
74  }
75  if (!ok) throw_conversion_failure(inType, GENBANK);
76 
77  ca_assert(!gbk.isNull());
78  return dynamic_cast<const GenBank&>(*gbk);
79 }
80 
81 const Macke& ConvertibleData::to_macke(const Seq& seq) const {
82  Format inType = in.format();
83 
84  if (inType == MACKE) return dynamic_cast<const Macke&>(in);
85 
86  macke = new Macke;
87 
88  bool ok = true;
89  switch (inType) {
90  case EMBL: ok = etom(dynamic_cast<const Embl&>(in), dynamic_cast<Macke&>(*macke), seq); break;
91  case GENBANK: ok = gtom(dynamic_cast<const GenBank&>(in), dynamic_cast<Macke&>(*macke)); break;
92  default: throw_conversion_not_implemented(inType, MACKE);
93  }
94  if (!ok) throw_conversion_failure(inType, MACKE);
95 
96  ca_assert(!macke.isNull());
97  return dynamic_cast<const Macke&>(*macke);
98 }
99 
100 static void write_to_embl(FileWriter& write, const ConvertibleData& data, const Seq& seq) {
101  const Embl& embl = data.to_embl(seq);
102  embl_out(embl, seq, write);
103  write.seq_done();
104 }
105 static void write_to_genbank(FileWriter& write, const ConvertibleData& data, const Seq& seq) {
106  const GenBank& gbk = data.to_genbank(seq);
107  genbank_out(gbk, seq, write);
108  write.seq_done();
109 }
110 static void write_to_macke(FileWriter& write, const ConvertibleData& data, const Seq& seq, int phase, bool first) {
111  Format inType = data.format();
112  const Macke& macke = data.to_macke(seq);
113  switch (phase) {
114  case 0:
115  macke_seq_display_out(macke, write, inType, first);
116  break;
117  case 1:
118  macke_seq_info_out(macke, write);
119  break;
120  case 2:
121  macke_seq_data_out(seq, macke, write);
122  write.seq_done(); // count only in one and last phase!
123  break;
124  }
125 }
126 
127 static void to_macke(const FormattedFile& in, const char *outf) {
129  FileWriter write(outf);
130  SmartPtr<Warnings> suppress;
131 
132  macke_out_header(write);
133 
134  for (int phase = 0; phase<3; ++phase) {
135  bool first = true;
136  while (1) {
137  Seq seq;
138  if (!reader->read_one_entry(seq)) break;
139  write_to_macke(write, reader->get_data(), seq, phase, first);
140 
141  first = false;
142  }
143  if (phase<2) {
144  if (phase == 0) {
145  write.out("#-\n");
146  suppress = new Warnings; // no warning messages for phase 1+2
147  }
148  reader->rewind();
149  }
150  }
151  write.expect_written();
152 }
153 
154 void convert(const FormattedFile& in, const FormattedFile& out) {
155  if (str_equal(in.name(), out.name()))
156  throw_error(30, "Input file and output file must be different file");
157 
158  bool converted = true;
159  switch (out.type()) {
160  case EMBL:
161  case SWISSPROT:
162  case GENBANK: {
163  Format inType = in.type();
164  Format ouType = out.type();
165  if ((inType == GENBANK && ouType == SWISSPROT) ||
166  (inType == SWISSPROT && ouType == GENBANK) ||
167  (inType == EMBL && ouType == SWISSPROT) ||
168  (inType == SWISSPROT && ouType == EMBL))
169  {
170  converted = false;
171  break;
172  }
173 
175  FileWriter write(out.name());
176 
177  while (1) {
178  Seq seq;
179  if (!reader->read_one_entry(seq)) break;
180  if (ouType == GENBANK) {
181  write_to_genbank(write, reader->get_data(), seq);
182  }
183  else {
184  write_to_embl(write, reader->get_data(), seq);
185  }
186  }
187  write.expect_written();
188  break;
189  }
190  case MACKE: to_macke(in, out.name()); break;
191  case GCG: to_gcg(in, out.name()); break;
192  case NEXUS: to_paup(in, out.name()); break;
193  case PHYLIP: to_phylip(in, out.name(), false); break;
194  case FASTDNAML: to_phylip(in, out.name(), true); break;
195  case PRINTABLE: to_printable(in, out.name()); break;
196 
197  default: converted = false; break;
198  }
199  if (!converted) {
201  }
202 }
203 
Format
Definition: fun.h:10
int mtog(const Macke &macke, GenBank &gbk, const Seq &seq)
Definition: mg.cxx:414
void seq_done()
Definition: reader.h:148
void macke_seq_display_out(const Macke &macke, Writer &write, Format inType, bool first_sequence)
Definition: macke.cxx:85
void genbank_out(const GenBank &gbk, const Seq &seq, Writer &write)
Definition: genbank.cxx:450
void out(char ch) FINAL_OVERRIDE
Definition: reader.h:139
int gtom(const GenBank &gbk, Macke &macke)
Definition: mg.cxx:454
Definition: fun.h:22
const Macke & to_macke(const Seq &seq) const
Definition: convert.cxx:81
Format type() const
Definition: fun.h:62
void embl_out(const Embl &embl, const Seq &seq, Writer &write)
Definition: embl.cxx:360
#define ca_assert(cond)
Definition: global.h:33
void expect_written()
Definition: reader.cxx:97
static void write_to_macke(FileWriter &write, const ConvertibleData &data, const Seq &seq, int phase, bool first)
Definition: convert.cxx:110
void macke_seq_data_out(const Seq &seq, const Macke &macke, Writer &write)
Definition: macke.cxx:196
void to_gcg(const FormattedFile &in, const char *outf)
Definition: gcg.cxx:199
void convert(const FormattedFile &in, const FormattedFile &out)
Definition: convert.cxx:154
int mtoe(const Macke &macke, Embl &embl, const Seq &seq)
Definition: embl.cxx:772
void throw_conversion_not_implemented(Format inType, Format ouType)
Definition: fconv.cxx:35
bool isNull() const
test if SmartPtr is NULp
Definition: smartptr.h:248
static void write_to_embl(FileWriter &write, const ConvertibleData &data, const Seq &seq)
Definition: convert.cxx:100
Definition: fun.h:19
void to_paup(const FormattedFile &in, const char *outf)
Definition: paup.cxx:88
int etom(const Embl &embl, Macke &macke, const Seq &seq)
Definition: embl.cxx:543
FILE * seq
Definition: rns.c:46
Format format() const OVERRIDE=0
const char * name() const
Definition: fun.h:61
void macke_out_header(Writer &write)
Definition: macke.cxx:78
void to_phylip(const FormattedFile &in, const char *outf, bool for_fastdnaml)
Definition: phylip.cxx:64
static SmartPtr< FormatReader > create(const FormattedFile &in)
Definition: convert.cxx:13
Generic smart pointer.
Definition: smartptr.h:149
Definition: fun.h:12
Definition: seq.h:43
const Embl & to_embl(const Seq &seq) const
Definition: convert.cxx:43
static void write_to_genbank(FileWriter &write, const ConvertibleData &data, const Seq &seq)
Definition: convert.cxx:105
Format format() const
Definition: convert.cxx:34
int gtoe(const GenBank &gbk, Embl &embl, const Seq &seq)
Definition: embl.cxx:677
Definition: fun.h:14
void throw_conversion_failure(Format inType, Format ouType)
Definition: fconv.cxx:31
Definition: fun.h:13
void throw_unsupported_input_format(Format inType)
Definition: fconv.cxx:39
void throw_conversion_not_supported(Format inType, Format ouType)
Definition: fconv.cxx:27
Definition: fun.h:15
void to_printable(const FormattedFile &in, const char *outf)
Definition: printable.cxx:37
void throw_error(int error_num, const char *error_message) __ATTR__NORETURN
Definition: util.cxx:23
CONSTEXPR_INLINE bool str_equal(const char *s1, const char *s2)
Definition: global.h:95
static void to_macke(const FormattedFile &in, const char *outf)
Definition: convert.cxx:127
Definition: fun.h:23
Definition: fun.h:21
const GenBank & to_genbank(const Seq &seq) const
Definition: convert.cxx:62
#define NULp
Definition: cxxforward.h:116
ConvertibleData(const InputFormat &in_)
Definition: convert.cxx:32
Definition: fun.h:20
void macke_seq_info_out(const Macke &macke, Writer &write)
Definition: macke.cxx:162
int etog(const Embl &embl, GenBank &gbk, const Seq &seq)
Definition: embl.cxx:488