ARB
phylip.cxx
Go to the documentation of this file.
1 #include "input_format.h"
2 #include "reader.h"
3 #include "ali.h"
4 
5 #include <cerrno>
6 
7 static void phylip_print_line(const Seq& seq, int index, Writer& write) {
8  // Print phylip line.
9  ca_assert(seq.get_len()>0);
10 
11  int length;
12  if (index == 0) {
13  int bnum;
14  const char *name = seq.get_id();
15  int nlen = str0len(name);
16  if (nlen > 10) {
17  // truncate id length of sequence ID is greater than 10
18  for (int indi = 0; indi < 10; indi++) write.out(name[indi]);
19  bnum = 1;
20  }
21  else {
22  write.out(name);
23  bnum = 10 - nlen + 1;
24  }
25  // fill in blanks to make up 10 chars for ID.
26  for (int indi = 0; indi < bnum; indi++) write.out(' ');
27  length = SEQLINE - 10;
28  }
29  else if (index >= seq.get_len()) {
30  length = 0;
31  }
32  else {
33  length = SEQLINE;
34  }
35 
36  const char *sequence = seq.get_seq();
37  for (int indi = 0, indj = 0; indi < length; indi++) {
38  if ((index + indi) < seq.get_len()) {
39  char c = sequence[index + indi];
40 
41  if (c == '.')
42  c = '?';
43  write.out(c);
44  indj++;
45  if (indj == 10 && (index + indi) < (seq.get_len() - 1) && indi < (length - 1)) {
46  write.out(' ');
47  indj = 0;
48  }
49  }
50  else
51  break;
52  }
53  write.out('\n');
54 }
55 
56 static void inject_STDIN(FileWriter& write) {
57  while (1) {
58  int ch = fgetc(stdin);
59  if (ch == EOF) break;
60  write.out(ch);
61  }
62 }
63 
64 void to_phylip(const FormattedFile& in, const char *outf, bool for_fastdnaml) {
65  // Convert from some format to PHYLIP format.
66  //
67  // if 'for_fastdnaml' is true, then
68  // - read extra parameters from STDIN and merge them at end of first line.
69  // Info on STDIN normally is generated by arb_export_rates and
70  // is expected by arb_fastdnaml.
71 
72  if (!is_input_format(in.type())) {
74  }
75 
76  FileWriter write(outf);
77 
78  if (write.get_FILE() == stdout) {
79  ca_assert(0); // can't use stdout (because rewind is used below)
80  throw_error(140, "Cannot write to standard output");
81  }
82 
83  Alignment ali;
84  read_alignment(ali, in);
85 
86  int maxsize = ali.get_max_len();
87  int total_seq = ali.get_count();
88  int current = 0;
89  int headersize1 = write.outf("%8d %8d", maxsize, current);
90 
91  if (for_fastdnaml) inject_STDIN(write);
92  write.out('\n');
93 
94  while (maxsize > current) {
95  for (int indi = 0; indi < total_seq; indi++) {
96  phylip_print_line(ali.get(indi), current, write);
97  }
98  if (current == 0)
99  current += (SEQLINE - 10);
100  else
101  current += SEQLINE;
102  if (maxsize > current)
103  write.out('\n');
104  }
105  // rewrite output header
106  errno = 0;
107  rewind(write.get_FILE());
108  ca_assert(errno == 0);
109  if (errno) {
110  perror("rewind error");
111  throw_errorf(141, "Failed to rewind file (errno=%i)", errno);
112  }
113 
114  int headersize2 = write.outf("%8d %8d", total_seq, maxsize);
115 
116  if (headersize1 != headersize2) {
117  ca_assert(0);
118  throw_errorf(142, "Failed to rewrite header (headersize differs: %i != %i)", headersize1, headersize2);
119  }
120 
121  write.seq_done(ali.get_count());
122  write.expect_written();
123 }
124 
CONSTEXPR_INLINE int str0len(const char *str)
Definition: global.h:98
void seq_done()
Definition: reader.h:148
int get_count() const
Definition: ali.h:15
void out(char ch) FINAL_OVERRIDE
Definition: reader.h:139
void throw_errorf(int error_num, const char *error_messagef,...) __ATTR__FORMAT(2) __ATTR__NORETURN
Definition: util.cxx:41
Definition: ali.h:11
Format type() const
Definition: fun.h:62
#define ca_assert(cond)
Definition: global.h:33
void expect_written()
Definition: reader.cxx:97
const char * get_id() const
Definition: seq.h:93
Definition: reader.h:95
static void phylip_print_line(const Seq &seq, int index, Writer &write)
Definition: phylip.cxx:7
FILE * seq
Definition: rns.c:46
int get_max_len() const
Definition: ali.h:22
void to_phylip(const FormattedFile &in, const char *outf, bool for_fastdnaml)
Definition: phylip.cxx:64
Definition: seq.h:43
virtual void out(char ch)=0
FILE * get_FILE()
Definition: reader.h:136
CONSTEXPR_INLINE bool is_input_format(Format inType)
Definition: fun.h:28
const Seq & get(int idx) const
Definition: ali.h:19
void throw_conversion_not_supported(Format inType, Format ouType)
Definition: fconv.cxx:27
int get_len() const
Definition: seq.h:107
void throw_error(int error_num, const char *error_message) __ATTR__NORETURN
Definition: util.cxx:23
static void inject_STDIN(FileWriter &write)
Definition: phylip.cxx:56
Definition: fun.h:21
#define SEQLINE
Definition: defs.h:14
void read_alignment(Alignment &ali, const FormattedFile &in)
Definition: seq.cxx:46
const char * get_seq() const
Definition: seq.h:110
int outf(const char *format,...) OVERRIDE __ATTR__FORMAT_MEMBER(1)
Definition: reader.cxx:112
size_t length