13 for (++reader; reader.
line(); ++reader) {
20 strcpy(temp, reader.
line() + ind);
29 freedup(entry, reader.
line() + index);
36 freedup(embl.dateu, reader.
line() + index);
44 freedup(embl.datec, reader.line() + index);
48 if (!reader.line())
break;
55 warning(33,
"one DT line is missing");
65 if (len > 2 && (ref.
title[0] !=
'"' || ref.
title[len - 3] !=
'"')) {
67 if (ref.
title[0] !=
'"')
72 if ((len > 2 && ref.
title[len - 3]
78 freedup(ref.
title, temp);
88 if (line[len] ==
':') {
102 freedup(datastring, reader.
line() + index);
113 strcpy(temp, reader.
line() + index);
142 for (++reader; reader.
line(); ++reader) {
153 parse_keyed_section(key);
166 for (
int idx = 5; line[idx]; ++idx) {
168 if (ch ==
' ' || ch ==
'\n')
continue;
169 if (idx>70)
continue;
175 void EmblParser::parse_keyed_section(
const char *key) {
195 if (!
has_content(embl.keywords)) freedup(embl.keywords,
".\n");
202 Emblref& ref = embl.get_latest_ref();
207 Emblref& ref = embl.get_latest_ref();
212 Emblref& ref = embl.get_latest_ref();
217 Emblref& ref = embl.get_latest_ref();
221 embl.resize_refs(embl.get_refcount()+1);
253 if (followed_by_spacer) write.
out(
"XX\n");
270 if (compX ==
' ')
return;
272 write.
outf(
"CC %c' end complete: %s\n", X, compX ==
'y' ?
"Yes" :
"No");
278 const OrgInfo& orginf = embl.comments.orginf;
280 write.
out(
"CC Organism information\n");
290 const SeqInfo& seqinf = embl.comments.seqinf;
292 write.
outf(
"CC Sequence information (bases 1 to %d)\n", seq.
get_len());
309 write.
outf(
"SQ Sequence %d BP; %d A; %d C; %d G; %d T; %d other;\n",
325 if (dt1 || dt2) write.
out(
"XX\n");
333 write.
out(
"OC No information.\n");
338 for (
int indi = 0; indi < embl.get_refcount(); indi++) {
339 const Emblref& ref = embl.get_ref(indi);
341 write.
outf(
"RN [%d]\n", indi + 1);
346 else write.
out(
"RT ;\n");
368 int indi, indk, len, index;
372 for (indi = index = 0, len =
str0len(Str) - 1; indi < len; indi++, index++) {
373 if (Str[indi] ==
',' || Str[indi] ==
';') {
374 token[index--] =
'\0';
376 Append(author, (Str[indi] ==
',') ?
"," :
" and");
379 for (indk = 0; index > 0 && indk == 0; index--)
380 if (token[index] ==
' ') {
388 token[index] = Str[indi];
395 char *new_journal =
NULp;
401 int len = strlen(eJournal);
403 new_journal =
strndup(eJournal, len-2);
404 Append(new_journal,
"\n");
407 const char *colon = strchr(eJournal,
':');
410 const char *p1 = strchr(colon+1,
'(');
412 const char *p2 = strchr(p1+1,
')');
413 if (p2 && strcmp(p2+1,
".\n") == 0) {
416 int l1 = colon-eJournal;
420 char *pos = new_journal;
422 memcpy(pos, eJournal, l1); pos += l1;
423 memcpy(pos,
", ", 2); pos += 2;
424 memcpy(pos, colon+1, l2); pos += l2;
425 memcpy(pos,
" ", 1); pos += 1;
426 memcpy(pos, p1, l3); pos += l3;
427 memcpy(pos,
"\n", 2);
433 warningf(148,
"Removed unknown journal format: %s", eJournal);
442 int indi, len,
start, end;
445 gbk.resize_refs(embl.get_refcount());
447 for (indi = 0; indi < embl.get_refcount(); indi++) {
448 const Emblref& ref = embl.get_ref(indi);
452 sscanf(ref.
processing,
"%d %d", &start, &end) == 2)
455 sprintf(temp,
"%d (bases %d to %d)\n", (indi + 1), start, end);
458 sprintf(temp,
"%d\n", (indi + 1));
461 freedup(gref.
ref, temp);
466 if (len > 2 && ref.
title[0] ==
'"' && ref.
title[len - 2] ==
';' && ref.
title[len - 3] ==
'"') {
467 ref.
title[len - 3] =
'\n';
468 ref.
title[len - 2] =
'\0';
470 ref.
title[len - 3] =
'"';
471 ref.
title[len - 2] =
';';
498 ASSERT_RESULT(
int, 3, sscanf(embl.dr,
"%s %s %s", t1, t2, t3));
511 for (indi =
str0len(temp); indi < 13; temp[indi++] =
' ') {}
514 sprintf((temp + 10),
"%7d bp RNA RNA %s\n",
518 freedup(gbk.locus, temp);
525 freedup(gbk.organism, embl.os);
527 freedup(gbk.definition, embl.os);
533 if (
has_content(embl.keywords) && embl.keywords[0] !=
'.') {
534 freedup(gbk.keywords, embl.keywords);
538 gbk.comments.set_content_from(embl.comments);
546 return etog(embl, gbk, seq) &&
gtom(gbk, macke);
554 #define TEST_EXPECT_ETOG_JOURNAL_PARSES(i,o) \
556 char *dup = ARB_strdup(i); \
557 char *res = etog_journal(dup); \
558 TEST_EXPECT_EQUAL(res, o); \
563 void TEST_BASIC_etog_journal() {
565 TEST_EXPECT_ETOG_JOURNAL_PARSES(
"Gene 134:283-287(1993).\n",
566 "Gene 134, 283-287 (1993)\n");
567 TEST_EXPECT_ETOG_JOURNAL_PARSES(
"J. Exp. Med. 179:1809-1821(1994).\n",
568 "J. Exp. Med. 179, 1809-1821 (1994)\n");
569 TEST_EXPECT_ETOG_JOURNAL_PARSES(
"Unpublished whatever.\n",
570 "Unpublished whatever\n");
571 TEST_EXPECT_ETOG_JOURNAL_PARSES(
"bla bla bla.\n",
573 TEST_EXPECT_ETOG_JOURNAL_PARSES(
"bla bla bla\n",
581 int indi, len, index, odd;
585 auth = nulldup(author);
591 Append(Str, auth + index + 4);
594 Str = nulldup(author);
596 for (indi = 0, len =
str0len(Str), odd = 1; indi < len; indi++) {
597 if (Str[indi] ==
',') {
614 int indi, indj, index, len;
618 journal = nulldup(Str);
624 journal = nulldup(Str);
625 for (indi = indj = index = 0, len =
str0len(journal); indi < len; indi++, indj++) {
626 if (journal[indi] ==
',') {
631 else if (journal[indi] ==
' ' && index) {
635 journal[indj] = journal[indi];
638 journal[indj] =
'\0';
644 if (gbk.has_refs()) {
645 embl.resize_refs(gbk.get_refcount());
648 for (
int indi = 0; indi < gbk.get_refcount(); indi++) {
649 Emblref& ref = embl.get_ref(indi);
662 int refnum,
start = 0, end = 0;
665 if (!gref.
ref || sscanf(gref.
ref,
"%d %s %d %s %d %s", &refnum, t1, &start, t2, &end, t3) != 6) {
681 strcpy(temp, gbk.get_id());
684 for (
int indi =
min(
str0len(temp), 9); indi < 10; indi++)
687 sprintf(temp + 10,
"preliminary; RNA; UNA; %d BP.\n", seq.
get_len());
688 freedup(embl.ID, temp);
694 freedup(embl.accession, gbk.accession);
698 char *date = gbk.get_date();
700 freeset(embl.dateu,
strf(
"%s (Rel. 1, Last updated, Version 1)\n", date));
701 freeset(embl.datec,
strf(
"%s (Rel. 1, Created)\n", date));
713 freedup(embl.keywords,
".\n");
729 sprintf(temp,
"RDP; %s; %s.\n", rdpid, token);
732 sprintf(temp,
"RDP; %s.\n", token);
734 freedup(embl.dr, temp);
736 embl.comments.set_content_from(gbk.comments);
743 char*& others = embl.comments.others;
747 bool have_strain = ridx >= 0 &&
stristr(others+ridx,
"strain=");
751 Append(others,
"*source: strain=");
752 Append(others, macke.strain);
763 Append(others,
"*source: subspecies=");
764 Append(others, macke.subspecies);
CONSTEXPR_INLINE int str0len(const char *str)
int mtog(const Macke &macke, GenBank &gbk, const Seq &seq)
static void embl_origin(Seq &seq, Reader &reader)
int gtom(const GenBank &gbk, Macke &macke)
static void etog_convert_references(const Embl &embl, GenBank &gbk)
void embl_out(const Embl &embl, const Seq &seq, Writer &write)
void warningf(int warning_num, const char *warning_messagef,...) __ATTR__FORMAT(2)
static void embl_one_entry(Reader &reader, char *&entry, const char *key)
const char * stristr(const char *str, const char *substring)
void(* RDP_comment_parser)(char *&datastring, int start_index, Reader &reader)
int find_pattern(const char *text, const char *pattern)
void embl_print_completeness(Writer &write, char compX, char X)
void skip_eolnl_and_append(char *&string1, const char *string2)
#define ASSERT_RESULT(Type, Expected, Expr)
char * ARB_strdup(const char *str)
void warning(int warning_num, const char *warning_message)
char * strf(const char *format,...) __ATTR__FORMAT(1)
static void embl_print_comment_if_content(Writer &write, const char *key, const char *content)
int mtoe(const Macke &macke, Embl &embl, const Seq &seq)
static void embl_out_origin(const Seq &seq, Writer &write)
bool read_one_entry(Seq &seq) OVERRIDE __ATTR__USERESULT
void count(BaseCounts &counter) const
static char * gtoe_journal(char *Str)
static char * gtoe_author(char *author)
const char * genbank_date(const char *other_date)
int etom(const Embl &embl, Macke &macke, const Seq &seq)
CONSTEXPR_INLINE bool has_content(const char *field)
static HelixNrInfo * start
int comment_subkey(const char *line, char *key)
static void gtoe_reference(const GenBank &gbk, Embl &embl)
static void embl_out_comments(const Embl &embl, const Seq &seq, Writer &write)
void print(Writer &write, const char *first_prefix, const char *other_prefix, const char *content, int max_width) const
bool copy_content(char *&entry, const char *content)
int find_subspecies(const char *str, char expect_behind)
static void embl_comments(Embl &embl, Reader &reader)
static char * etog_author(char *Str)
virtual void out(char ch)=0
static void embl_skip_unidentified(const char *pattern, Reader &reader)
void skip_eolnl_and_append_spaced(char *&string1, const char *string2)
CONSTEXPR_INLINE bool is_end_mark(char ch)
virtual int outf(const char *format,...) __ATTR__FORMAT_MEMBER(1)
bool is_embl_comment(const char *line)
int gtoe(const GenBank &gbk, Embl &embl, const Seq &seq)
static bool embl_print_lines_if_content(Writer &write, const char *key, const char *content, const WrapMode &wrapMode, bool followed_by_spacer)
void embl_key_word(const char *line, int index, char *key)
void scan_token_or_die(char *to, const char *from)
int Skip_white_space(const char *line, int index)
void embl_out_header(const Embl &embl, const Seq &seq, Writer &write)
static int partial_mtoe(const Macke &macke, Embl &embl)
void parse_section() OVERRIDE
static void embl_correct_title(Emblref &ref)
const char * today_date()
void Append(char *&string1, const char *string2)
static void embl_date(Embl &embl, Reader &reader)
void ARB_realloc(TYPE *&tgt, size_t nelem)
bool scan_token(char *to, const char *from) __ATTR__USERESULT
CONSTEXPR_INLINE bool str_equal(const char *s1, const char *s2)
bool parse_RDP_comment(RDP_comments &comments, RDP_comment_parser one_comment_entry, const char *key, int index, Reader &reader)
#define RDP_CONTINUED_INDENT
static void embl_one_comment_entry(char *&datastring, int start_index, Reader &reader)
static int pattern[maxsites+1]
static char * etog_journal(const char *eJournal)
const char * line() const
CONSTEXPR_INLINE int count_spaces(const char *str)
#define RDP_SUBKEY_INDENT
int parse_key_word(const char *line, char *key, const char *separator)
char * strndup(const char *str, int len)
int skip_pattern(const char *text, const char *pattern)
CONSTEXPR_INLINE bool is_sequence_terminator(const char *str)
void out(Writer &write, Format outType) const
void terminate_with(char *&str, char ch)
static void embl_continue_line(const char *pattern, char *&Str, Reader &reader)
int etog(const Embl &embl, GenBank &gbk, const Seq &seq)
static void embl_print_lines(Writer &write, const char *key, const char *content, const WrapMode &wrapMode)