ARB
macke.cxx
Go to the documentation of this file.
1 // -------------------- Macke related subroutines --------------
2 
3 #include "macke.h"
4 #include "wrap.h"
5 #include "parser.h"
6 #include "rdp_info.h"
7 
8 
9 #define MACKELIMIT 10000
10 
11 static int macke_abbrev(const char *line, char *key, int index) {
12  // Get the key from a macke line.
13  // returns index behind delimiting ':'
14  index = Skip_white_space(line, index);
15  int len = parse_key_word(line+index, key, " :\t\n");
16  return index+len+1;
17 }
18 
19 static void macke_continue_line(const char *key, char *oldname, char*& var, Reader& reader) {
20  // Append macke continue line.
21 
22  for (++reader; reader.line(); ++reader) {
23  if (has_content(reader.line())) {
24  char name[TOKENSIZE];
25  int index = macke_abbrev(reader.line(), name, 0);
26 
27  if (!str_equal(name, oldname)) break;
28 
29  char newkey[TOKENSIZE];
30  index = macke_abbrev(reader.line(), newkey, index);
31  if (!str_equal(newkey, key)) break;
32 
33  skip_eolnl_and_append_spaced(var, reader.line() + index);
34  }
35  }
36 }
37 static void macke_one_entry_in(Reader& reader, const char *key, char *oldname, char*& var, int index) {
38  // Get one Macke entry.
39  if (has_content(var))
40  skip_eolnl_and_append_spaced(var, reader.line() + index);
41  else
42  freedup(var, reader.line() + index);
43 
44  macke_continue_line(key, oldname, var, reader);
45 }
46 
47 static void macke_read_seq(Seq& seq, char*& seqabbr, Reader& reader) {
48  ca_assert(seq.is_empty());
49  for (; reader.line(); ++reader) { // read in sequence data line by line
50  if (!has_content(reader.line())) continue;
51 
52  char name[TOKENSIZE];
53  int index = macke_abbrev(reader.line(), name, 0);
54 
55  if (seqabbr) {
56  if (!str_equal(seqabbr, name)) break; // stop if reached different abbrev
57  }
58  else {
59  seqabbr = nulldup(name);
60  }
61 
62  int seqnum;
63  char data[LINESIZE];
64  int scanned = sscanf(reader.line() + index, "%d%s", &seqnum, data);
65  if (scanned != 2) throw_errorf(80, "Failed to parse '%s'", reader.line());
66 
67  for (int indj = seq.get_len(); indj < seqnum; indj++) seq.add('.');
68  for (int indj = 0; data[indj] != '\n' && data[indj] != '\0'; indj++) seq.add(data[indj]);
69  }
70 }
71 
72 void macke_origin(Seq& seq, char*& seqabbr, Reader& reader) {
73  // Read in sequence data in macke file.
74  ca_assert(seqabbr); // = macke.seqabbr
75  macke_read_seq(seq, seqabbr, reader);
76 }
77 
78 void macke_out_header(Writer& write) {
79  // Output the Macke format header.
80  write.out("#-\n#-\n#-\teditor\n");
81  const char *date = today_date();
82  write.outf("#-\t%s\n#-\n#-\n", date);
83 }
84 
85 void macke_seq_display_out(const Macke& macke, Writer& write, Format inType, bool first_sequence) {
86  // Output the Macke format each sequence format (wot?)
87  if (first_sequence) {
88  write.outf("#-\tReference sequence: %s\n", macke.seqabbr);
89  write.out("#-\tAttributes:\n");
90  }
91 
92  write.out("#=\t\t");
93  if (write.out(macke.seqabbr)<8) write.out('\t');
94  write.out("\tin out vis prt ord ");
95  if (inType == SWISSPROT) {
96  write.out("pro lin n>c");
97  }
98  else {
99  write.out(macke.rna_or_dna == 'r' ? "rna" : "dna");
100  write.out(" lin 5>3");
101  }
102  write.out(" func");
103  if (first_sequence) write.out(" ref");
104  write.out('\n');
105 }
106 
107 static void macke_print_line(Writer& write, const char *prefix, const char *content) {
108  // print a macke line and wrap around line after MACKEMAXLINE column.
109  WrapMode(true).print(write, prefix, prefix, content, MACKEMAXLINE);
110 }
111 
112 static void macke_print_prefixed_line(const Macke& macke, Writer& write, const char *tag, const char *content) {
113  ca_assert(has_content(content));
114 
115  char prefix[LINESIZE];
116  sprintf(prefix, "#:%s:%s:", macke.seqabbr, tag);
117 
118  macke_print_line(write, prefix, content);
119 }
120 
121 static bool macke_print_prefixed_line_if_content(const Macke& macke, Writer& write, const char *tag, const char *content) {
122  if (!has_content(content)) return false;
123  macke_print_prefixed_line(macke, write, tag, content);
124  return true;
125 }
126 
127 static const char *genbankEntryComments[] = {
128  "KEYWORDS",
129  "GenBank ACCESSION",
130  "auth",
131  "title",
132  "jour",
133  "standard",
134  "Source of strain",
135  "Former name",
136  "Alternate name",
137  "Common name",
138  "Host organism",
139  "RDP ID",
140  "Sequencing methods",
141  "3' end complete",
142  "5' end complete",
143 };
144 
145 static bool macke_is_genbank_entry_comment(const char *Str) {
146  char keyword[TOKENSIZE];
147  macke_key_word(Str, 0, keyword);
148 
149  return lookup_keyword(keyword, genbankEntryComments) >= 0;
150 }
151 
152 static void macke_print_keyword_rem(const Macke& macke, int index, Writer& write) {
153  // Print out keyworded remark line in Macke file with wrap around functionality.
154  // (Those keywords are defined in GenBank COMMENTS by RDP group)
155  char first[LINESIZE]; sprintf(first, "#:%s:rem:", macke.seqabbr);
156  char other[LINESIZE+3]; sprintf(other, "%s:%*s", first, RDP_SUBKEY_INDENT, "");
157  const char *remark = macke.get_rem(index);
158 
159  WrapMode(true).print(write, first, other, remark, MACKEMAXLINE);
160 }
161 
162 void macke_seq_info_out(const Macke& macke, Writer& write) {
163  // Output sequence information
164 
165  macke_print_prefixed_line_if_content(macke, write, "name", macke.name);
166  macke_print_prefixed_line_if_content(macke, write, "strain", macke.strain);
167  macke_print_prefixed_line_if_content(macke, write, "subsp", macke.subspecies);
168  macke_print_prefixed_line_if_content(macke, write, "atcc", macke.atcc);
169  macke_print_prefixed_line_if_content(macke, write, "rna", macke.rna); // old version entry
170  macke_print_prefixed_line_if_content(macke, write, "date", macke.date);
171  macke_print_prefixed_line_if_content(macke, write, "acs", macke.acs)
172  || macke_print_prefixed_line_if_content(macke, write, "acs", macke.nbk); // old version entry
173  macke_print_prefixed_line_if_content(macke, write, "auth", macke.author);
174  macke_print_prefixed_line_if_content(macke, write, "jour", macke.journal);
175  macke_print_prefixed_line_if_content(macke, write, "title", macke.title);
176  macke_print_prefixed_line_if_content(macke, write, "who", macke.who);
177 
178  // print out remarks, wrap around if more than MACKEMAXLINE columns
179  for (int indi = 0; indi < macke.get_rem_count(); indi++) {
180  if (macke_is_genbank_entry_comment(macke.get_rem(indi))) {
181  macke_print_keyword_rem(macke, indi, write);
182  }
183  else { // general comment
184  macke_print_prefixed_line(macke, write, "rem", macke.get_rem(indi));
185  }
186  }
187 }
188 
189 int macke_key_word(const char *line, int index, char *key) {
190  // Find the key in Macke line.
191  // return position behind ':' delimiter
192  int len = parse_key_word(line+index, key, ":\n");
193  return len ? index+len+1 : index;
194 }
195 
196 void macke_seq_data_out(const Seq& seq, const Macke& macke, Writer& write) {
197  // Output Macke format sequence data
198  int indj, indk;
199 
200  if (seq.get_len() > MACKELIMIT) {
201  warningf(145, "Length of sequence data is %d over AE2's limit %d.", seq.get_len(), MACKELIMIT);
202  }
203 
204  const char *sequence = seq.get_seq();
205  for (indk = indj = 0; indk < seq.get_len(); indk++) {
206  if (indj == 0)
207  write.outf("%s%6d ", macke.seqabbr, indk);
208 
209  write.out(sequence[indk]);
210 
211  indj++;
212  if (indj == 50) {
213  indj = 0;
214  write.out('\n');
215  }
216  }
217 
218  if (indj != 0)
219  write.out('\n');
220  // every sequence
221 }
222 
223 void MackeReader::read_to_start() {
224  r1->skipOverLinesThat(Not(isMackeSeqInfo)); // skip to #:; where the sequence information is
225  r2->skipOverLinesThat(isMackeNonSeq); // skip to where sequence data starts
226  r3->skipOverLinesThat(Not(isMackeSeqHeader)); // skip to #=; where sequence first appears
227 }
228 
229 
230 MackeReader::MackeReader(const char *inName_)
231  : inName(ARB_strdup(inName_)),
232  seqabbr(dummy),
233  dummy(NULp),
234  r1(new Reader(inName)),
235  r2(new Reader(inName)),
236  r3(new Reader(inName)),
237  using_reader(&r1)
238 {
239  read_to_start();
240 }
241 
243  char *msg = NULp;
245 
246  ca_assert(using_reader);
247  delete *using_reader; *using_reader = NULp;
248 
249  // avoid that all 3 readers decorate the error
250  if (exc) msg = ARB_strdup(exc->get_msg());
251  delete r3; r3 = NULp;
252  delete r2; r2 = NULp;
253  delete r1; r1 = NULp;
254  if (exc) { exc->replace_msg(msg); free(msg); }
255 
256  free(inName);
257 }
258 
259 class MackeParser : public Parser {
260  Macke& macke;
261 
262 public:
263  MackeParser(Macke& macke_, Seq& seq_, Reader& reader_) : Parser(seq_, reader_), macke(macke_) {}
264 
266  ca_assert(0); // @@@ unused yet
267  }
268 };
269 
270 
271 bool MackeReader::macke_in(Macke& macke) {
272  // Read in one sequence data from Macke file.
273  char oldname[TOKENSIZE], name[TOKENSIZE];
274  char key[TOKENSIZE];
275  int index;
276 
277  // r1 points to sequence information
278  // r2 points to sequence data
279  // r3 points to sequence names
280 
281  usingReader(r3);
282  if (!r3->line() || !isMackeSeqHeader(r3->line())) return false;
283 
284  // skip to next "#:" line or end of file
285  usingReader(r1);
287 
288  // read in sequence name
289  usingReader(r3);
290  index = macke_abbrev(r3->line(), oldname, 2);
291  freedup(macke.seqabbr, oldname);
292  seqabbr = macke.seqabbr;
293 
294  // read sequence information
295  usingReader(r1);
296  for (index = macke_abbrev(r1->line(), name, 2);
297  r1->line() && isMackeSeqInfo(r1->line()) && str_equal(name, oldname);
298  )
299  {
300  index = macke_abbrev(r1->line(), key, index);
301  if (str_equal(key, "name")) {
302  macke_one_entry_in(*r1, "name", oldname, macke.name, index);
303  }
304  else if (str_equal(key, "atcc")) {
305  macke_one_entry_in(*r1, "atcc", oldname, macke.atcc, index);
306  }
307  else if (str_equal(key, "rna")) {
308  // old version entry
309  macke_one_entry_in(*r1, "rna", oldname, macke.rna, index);
310  }
311  else if (str_equal(key, "date")) {
312  macke_one_entry_in(*r1, "date", oldname, macke.date, index);
313  }
314  else if (str_equal(key, "nbk")) {
315  // old version entry
316  macke_one_entry_in(*r1, "nbk", oldname, macke.nbk, index);
317  }
318  else if (str_equal(key, "acs")) {
319  macke_one_entry_in(*r1, "acs", oldname, macke.acs, index);
320  }
321  else if (str_equal(key, "subsp")) {
322  macke_one_entry_in(*r1, "subsp", oldname, macke.subspecies, index);
323  }
324  else if (str_equal(key, "strain")) {
325  macke_one_entry_in(*r1, "strain", oldname, macke.strain, index);
326  }
327  else if (str_equal(key, "auth")) {
328  macke_one_entry_in(*r1, "auth", oldname, macke.author, index);
329  }
330  else if (str_equal(key, "title")) {
331  macke_one_entry_in(*r1, "title", oldname, macke.title, index);
332  }
333  else if (str_equal(key, "jour")) {
334  macke_one_entry_in(*r1, "jour", oldname, macke.journal, index);
335  }
336  else if (str_equal(key, "who")) {
337  macke_one_entry_in(*r1, "who", oldname, macke.who, index);
338  }
339  else if (str_equal(key, "rem")) {
340  macke.add_remark(r1->line()+index);
341  ++(*r1);
342  }
343  else {
344  warningf(144, "Unidentified AE2 key word #%s#", key);
345  ++(*r1);
346  }
347  if (r1->line() && isMackeSeqInfo(r1->line())) index = macke_abbrev(r1->line(), name, 2);
348  else index = 0;
349  }
350 
351  ++(*r3);
352 
353  return true;
354 }
355 
357  data.reinit();
358  if (!macke_in(data) || !read_seq_data(seq)) abort();
359  seq.set_id(data.get_id());
360  return ok();
361 }
362 
363 
static bool macke_is_genbank_entry_comment(const char *Str)
Definition: macke.cxx:145
Definition: reader.h:21
Format
Definition: fun.h:10
void macke_seq_display_out(const Macke &macke, Writer &write, Format inType, bool first_sequence)
Definition: macke.cxx:85
void throw_errorf(int error_num, const char *error_messagef,...) __ATTR__FORMAT(2) __ATTR__NORETURN
Definition: util.cxx:41
~MackeReader() OVERRIDE
Definition: macke.cxx:242
#define MACKEMAXLINE
Definition: defs.h:23
void warningf(int warning_num, const char *warning_messagef,...) __ATTR__FORMAT(2)
Definition: util.cxx:66
void add(char c)
Definition: seq.h:98
#define ca_assert(cond)
Definition: global.h:33
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void macke_seq_data_out(const Seq &seq, const Macke &macke, Writer &write)
Definition: macke.cxx:196
static void macke_print_prefixed_line(const Macke &macke, Writer &write, const char *tag, const char *content)
Definition: macke.cxx:112
Definition: macke.h:201
const char * get_msg() const
Definition: global.h:63
static const char * genbankEntryComments[]
Definition: macke.cxx:127
Definition: wrap.h:4
#define MACKELIMIT
Definition: macke.cxx:9
Definition: reader.h:95
int macke_key_word(const char *line, int index, char *key)
Definition: macke.cxx:189
CONSTEXPR_INLINE bool isMackeSeqInfo(const char *line)
Definition: macke.h:197
FILE * seq
Definition: rns.c:46
CONSTEXPR_INLINE bool has_content(const char *field)
Definition: global.h:127
static bool macke_print_prefixed_line_if_content(const Macke &macke, Writer &write, const char *tag, const char *content)
Definition: macke.cxx:121
MackeReader(const char *inName_)
Definition: macke.cxx:230
void parse_section() OVERRIDE
Definition: macke.cxx:265
void macke_origin(Seq &seq, char *&seqabbr, Reader &reader)
Definition: macke.cxx:72
void macke_out_header(Writer &write)
Definition: macke.cxx:78
void print(Writer &write, const char *first_prefix, const char *other_prefix, const char *content, int max_width) const
Definition: wrap.cxx:52
static void macke_print_line(Writer &write, const char *prefix, const char *content)
Definition: macke.cxx:107
static void macke_print_keyword_rem(const Macke &macke, int index, Writer &write)
Definition: macke.cxx:152
Definition: seq.h:43
virtual void out(char ch)=0
static void macke_read_seq(Seq &seq, char *&seqabbr, Reader &reader)
Definition: macke.cxx:47
void skip_eolnl_and_append_spaced(char *&string1, const char *string2)
Definition: util.cxx:135
virtual int outf(const char *format,...) __ATTR__FORMAT_MEMBER(1)
Definition: reader.cxx:121
static void macke_continue_line(const char *key, char *oldname, char *&var, Reader &reader)
Definition: macke.cxx:19
static void macke_one_entry_in(Reader &reader, const char *key, char *oldname, char *&var, int index)
Definition: macke.cxx:37
static int macke_abbrev(const char *line, char *key, int index)
Definition: macke.cxx:11
void replace_msg(const char *new_msg) const
Definition: global.h:64
int Skip_white_space(const char *line, int index)
Definition: util.cxx:84
int get_len() const
Definition: seq.h:107
bool is_empty() const
Definition: seq.h:108
Definition: fun.h:15
static const Convaln_exception * exception_thrown()
Definition: global.h:71
const char * today_date()
Definition: date.cxx:214
#define OVERRIDE
Definition: cxxforward.h:112
#define lookup_keyword(keyword, table)
Definition: global.h:144
CONSTEXPR_INLINE bool str_equal(const char *s1, const char *s2)
Definition: global.h:95
virtual const char * name() const =0
CONSTEXPR_INLINE bool isMackeSeqHeader(const char *line)
Definition: macke.h:196
#define TOKENSIZE
Definition: defs.h:18
const char * line() const
Definition: reader.h:43
static int line
Definition: arb_a2ps.c:296
#define NULp
Definition: cxxforward.h:116
#define RDP_SUBKEY_INDENT
Definition: defs.h:30
int parse_key_word(const char *line, char *key, const char *separator)
Definition: util.cxx:265
const char * get_seq() const
Definition: seq.h:110
void set_id(const char *id_)
Definition: seq.h:84
bool read_one_entry(Seq &seq) OVERRIDE __ATTR__USERESULT
Definition: macke.cxx:356
MackeParser(Macke &macke_, Seq &seq_, Reader &reader_)
Definition: macke.cxx:263
#define LINESIZE
Definition: defs.h:16
CONSTEXPR_INLINE bool isMackeNonSeq(const char *line)
Definition: macke.h:199
void macke_seq_info_out(const Macke &macke, Writer &write)
Definition: macke.cxx:162
void skipOverLinesThat(const PRED &match_condition)
Definition: reader.h:58