ARB
paup.cxx
Go to the documentation of this file.
1 #include "input_format.h"
2 #include "reader.h"
3 #include "paup.h"
4 #include "ali.h"
5 
6 static void paup_verify_name(char*& Str) {
7  // Verify short_id in NEXUS format.
8  if (strpbrk(Str, "*(){/,;_=:\\\'")) {
9  char temp[TOKENSIZE];
10  temp[0] = '\'';
11 
12  int len = str0len(Str);
13  int indi = 0;
14  int index = 1;
15  for (; indi < len; indi++, index++) {
16  temp[index] = Str[indi];
17  if (Str[indi] == '\'') temp[++index] = '\'';
18  }
19  temp[index++] = '\'';
20  temp[index] = '\0';
21 
22  freedup(Str, temp);
23  }
24 }
25 
26 static void paup_print_line(const Seq& seq, int offset, int first_line, Writer& write) {
27  // print paup file.
28  int length = SEQLINE - 10;
29  write.out(" ");
30 
31  int indi;
32 
33  const char *id = seq.get_id();
34  for (indi = 0; indi < 10 && id[indi]; indi++) // truncate id to 10 characters
35  write.out(id[indi]);
36 
37  if (offset < seq.get_len()) {
38  for (; indi < 11; indi++) write.out(' ');
39 
40  const char *sequence = seq.get_seq();
41 
42  int indj = 0;
43  for (indi = indj = 0; indi < length; indi++) {
44  if ((offset + indi) < seq.get_len()) {
45  write.out(sequence[offset + indi]);
46  indj++;
47  if (indj == 10 && indi < (length - 1) && (indi + offset) < (seq.get_len() - 1)) {
48  write.out(' ');
49  indj = 0;
50  }
51  }
52  else
53  break;
54  }
55  }
56 
57  if (first_line)
58  write.outf(" [%d - %d]", offset + 1, (offset + indi));
59 
60  write.out('\n');
61 }
62 
63 static void paup_print_headerstart(Writer& write) {
64  write.out("#NEXUS\n");
65  write.outf("[! RDP - the Ribosomal Database Project, (%s).]\n", today_date());
66  write.out("[! To get started, send HELP to rdp@info.mcs.anl.gov ]\n");
67  write.out("BEGIN DATA;\n DIMENSIONS\n");
68 }
69 
70 static void paup_print_header_counters(Writer& write) {
71  write.outf(" NTAX = %6s\n NCHAR = %6s\n ;\n", "", "");
72 }
73 static void paup_print_header_counters(Writer& write, int total_seq, int maxsize) {
74  write.outf(" NTAX = %6d\n NCHAR = %6d\n ;\n", total_seq, maxsize);
75 }
76 
77 static void paup_print_header(const Paup& paup, Writer& write) {
78  // Print out the header of each paup format.
81 
82  write.out(" FORMAT\n LABELPOS = LEFT\n");
83  write.outf(" MISSING = .\n EQUATE = \"%s\"\n", paup.equate);
84  write.outf(" INTERLEAVE\n DATATYPE = RNA\n GAP = %c\n ;\n", paup.gap);
85  write.out(" OPTIONS\n GAPMODE = MISSING\n ;\n MATRIX\n");
86 }
87 
88 void to_paup(const FormattedFile& in, const char *outf) {
89  // Convert from some format to NEXUS format.
90  if (!is_input_format(in.type())) {
92  }
93 
94  FileWriter write(outf);
95  Paup paup;
96 
97  paup_print_header(paup, write);
98 
99  Alignment ali;
100  read_alignment(ali, in);
101 
102  for (int i = 0; i<ali.get_count(); ++i) {
103  SeqPtr seq = ali.getSeqPtr(i);
104  char *name = ARB_strdup(seq->get_id());
105  paup_verify_name(name);
106  seq->replace_id(name);
107  ca_assert(seq->get_id());
108  free(name);
109  }
110 
111  int maxsize = ali.get_max_len();
112  int total_seq = ali.get_count();
113  int current = 0;
114 
115  while (maxsize > current) {
116  int first_line = 0;
117  for (int indi = 0; indi < total_seq; indi++) {
118  if (current < ali.get_len(indi))
119  first_line++;
120  paup_print_line(ali.get(indi), current, (first_line == 1), write);
121 
122  // Avoid repeating
123  if (first_line == 1)
124  first_line++;
125  }
126  current += (SEQLINE - 10);
127  if (maxsize > current) write.out('\n');
128  }
129 
130  write.out(" ;\nENDBLOCK;\n");
131 
132  // rewrite output header
133  rewind(write.get_FILE());
134  paup_print_headerstart(write);
135  paup_print_header_counters(write, total_seq, maxsize);
136 
137  write.seq_done(ali.get_count());
138  write.expect_written();
139 }
CONSTEXPR_INLINE int str0len(const char *str)
Definition: global.h:98
void seq_done()
Definition: reader.h:148
int get_count() const
Definition: ali.h:15
void out(char ch) FINAL_OVERRIDE
Definition: reader.h:139
Definition: ali.h:11
Format type() const
Definition: fun.h:62
#define ca_assert(cond)
Definition: global.h:33
void expect_written()
Definition: reader.cxx:97
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
int get_len(int idx) const
Definition: ali.h:18
const char * get_id() const
Definition: seq.h:93
Definition: reader.h:95
void to_paup(const FormattedFile &in, const char *outf)
Definition: paup.cxx:88
FILE * seq
Definition: rns.c:46
Definition: paup.h:4
int get_max_len() const
Definition: ali.h:22
SeqPtr getSeqPtr(int idx)
Definition: ali.h:20
Generic smart pointer.
Definition: smartptr.h:149
char gap
Definition: paup.h:8
Definition: seq.h:43
virtual void out(char ch)=0
FILE * get_FILE()
Definition: reader.h:136
CONSTEXPR_INLINE bool is_input_format(Format inType)
Definition: fun.h:28
virtual int outf(const char *format,...) __ATTR__FORMAT_MEMBER(1)
Definition: reader.cxx:121
static void paup_print_line(const Seq &seq, int offset, int first_line, Writer &write)
Definition: paup.cxx:26
const Seq & get(int idx) const
Definition: ali.h:19
void throw_conversion_not_supported(Format inType, Format ouType)
Definition: fconv.cxx:27
int get_len() const
Definition: seq.h:107
static void paup_verify_name(char *&Str)
Definition: paup.cxx:6
const char * today_date()
Definition: date.cxx:214
static void paup_print_header(const Paup &paup, Writer &write)
Definition: paup.cxx:77
#define TOKENSIZE
Definition: defs.h:18
#define SEQLINE
Definition: defs.h:14
static void paup_print_headerstart(Writer &write)
Definition: paup.cxx:63
static void paup_print_header_counters(Writer &write)
Definition: paup.cxx:70
void read_alignment(Alignment &ali, const FormattedFile &in)
Definition: seq.cxx:46
#define offset(field)
Definition: GLwDrawA.c:73
const char * get_seq() const
Definition: seq.h:110
const char * equate
Definition: paup.h:7
size_t length
Definition: fun.h:20