13     for (++reader; reader.
line(); ++reader) {
 
   20             strcpy(temp, reader.
line() + ind);
 
   29     freedup(entry, reader.
line() + index);
 
   36     freedup(embl.dateu, reader.
line() + index);
 
   44         freedup(embl.datec, reader.line() + index);
 
   48             if (!reader.line()) 
break;
 
   55         warning(33, 
"one DT line is missing");
 
   65     if (len > 2 && (ref.
title[0] != 
'"' || ref.
title[len - 3] != 
'"')) {
 
   67         if (ref.
title[0] != 
'"')
 
   72         if ((len > 2 && ref.
title[len - 3]
 
   78         freedup(ref.
title, temp);
 
   88     if (line[len] == 
':') {
 
  102     freedup(datastring, reader.
line() + index);
 
  113         strcpy(temp, reader.
line() + index);
 
  142     for (++reader; reader.
line(); ++reader) {
 
  153     parse_keyed_section(key);
 
  166         for (
int idx = 5; line[idx]; ++idx) {
 
  168             if (ch == 
' ' || ch == 
'\n') 
continue;
 
  169             if (idx>70) 
continue;
 
  175 void EmblParser::parse_keyed_section(
const char *key) {
 
  195         if (!
has_content(embl.keywords)) freedup(embl.keywords, 
".\n");
 
  202         Emblref& ref = embl.get_latest_ref();
 
  207         Emblref& ref = embl.get_latest_ref();
 
  212         Emblref& ref = embl.get_latest_ref();
 
  217         Emblref& ref = embl.get_latest_ref();
 
  221         embl.resize_refs(embl.get_refcount()+1);
 
  253         if (followed_by_spacer) write.
out(
"XX\n");
 
  270     if (compX == 
' ') 
return;
 
  272     write.
outf(
"CC     %c' end complete: %s\n", X, compX == 
'y' ? 
"Yes" : 
"No");
 
  278     const OrgInfo& orginf = embl.comments.orginf;
 
  280         write.
out(
"CC   Organism information\n");
 
  290     const SeqInfo& seqinf = embl.comments.seqinf;
 
  292         write.
outf(
"CC   Sequence information (bases 1 to %d)\n", seq.
get_len());
 
  309     write.
outf(
"SQ   Sequence %d BP; %d A; %d C; %d G; %d T; %d other;\n",
 
  325         if (dt1 || dt2) write.
out(
"XX\n");
 
  333         write.
out(
"OC   No information.\n");
 
  338     for (
int indi = 0; indi < embl.get_refcount(); indi++) {
 
  339         const Emblref& ref = embl.get_ref(indi);
 
  341         write.
outf(
"RN   [%d]\n", indi + 1);
 
  346         else write.
out(
"RT   ;\n");
 
  368     int  indi, indk, len, index;
 
  372     for (indi = index = 0, len = 
str0len(Str) - 1; indi < len; indi++, index++) {
 
  373         if (Str[indi] == 
',' || Str[indi] == 
';') {
 
  374             token[index--] = 
'\0';
 
  376                 Append(author, (Str[indi] == 
',') ? 
"," : 
" and");
 
  379             for (indk = 0; index > 0 && indk == 0; index--)
 
  380                 if (token[index] == 
' ') {
 
  388             token[index] = Str[indi];
 
  395     char *new_journal = 
NULp;
 
  401         int len = strlen(eJournal);
 
  403         new_journal = 
strndup(eJournal, len-2);
 
  404         Append(new_journal, 
"\n");
 
  407         const char *colon = strchr(eJournal, 
':');
 
  410             const char *p1 = strchr(colon+1, 
'(');
 
  412                 const char *p2 = strchr(p1+1, 
')');
 
  413                 if (p2 && strcmp(p2+1, 
".\n") == 0) {
 
  416                     int l1 = colon-eJournal;
 
  420                     char *pos = new_journal;
 
  422                     memcpy(pos, eJournal, l1); pos += l1;
 
  423                     memcpy(pos, 
", ",     2);  pos += 2;
 
  424                     memcpy(pos, colon+1,  l2); pos += l2;
 
  425                     memcpy(pos, 
" ",      1);  pos += 1;
 
  426                     memcpy(pos, p1,       l3); pos += l3;
 
  427                     memcpy(pos, 
"\n",     2);
 
  433             warningf(148, 
"Removed unknown journal format: %s", eJournal);
 
  442     int  indi, len, 
start, end;
 
  445     gbk.resize_refs(embl.get_refcount());
 
  447     for (indi = 0; indi < embl.get_refcount(); indi++) {
 
  448         const Emblref& ref  = embl.get_ref(indi);
 
  452             sscanf(ref.
processing, 
"%d %d", &start, &end) == 2)
 
  455             sprintf(temp, 
"%d  (bases %d to %d)\n", (indi + 1), start, end);
 
  458             sprintf(temp, 
"%d\n", (indi + 1));
 
  461         freedup(gref.
ref, temp);
 
  466             if (len > 2 && ref.
title[0] == 
'"' && ref.
title[len - 2] == 
';' && ref.
title[len - 3] == 
'"') {
 
  467                 ref.
title[len - 3] = 
'\n';
 
  468                 ref.
title[len - 2] = 
'\0';
 
  470                 ref.
title[len - 3] = 
'"';
 
  471                 ref.
title[len - 2] = 
';';
 
  498         ASSERT_RESULT(
int, 3, sscanf(embl.dr, 
"%s %s %s", t1, t2, t3));
 
  511     for (indi = 
str0len(temp); indi < 13; temp[indi++] = 
' ') {}
 
  514         sprintf((temp + 10), 
"%7d bp    RNA             RNA       %s\n",
 
  518     freedup(gbk.locus, temp);
 
  525         freedup(gbk.organism, embl.os);
 
  527             freedup(gbk.definition, embl.os);
 
  533     if (
has_content(embl.keywords) && embl.keywords[0] != 
'.') {
 
  534         freedup(gbk.keywords, embl.keywords);
 
  538     gbk.comments.set_content_from(embl.comments);
 
  546     return etog(embl, gbk, seq) && 
gtom(gbk, macke);
 
  554 #define TEST_EXPECT_ETOG_JOURNAL_PARSES(i,o)              \ 
  556         char *dup = ARB_strdup(i);                        \ 
  557         char *res = etog_journal(dup);                    \ 
  558         TEST_EXPECT_EQUAL(res, o);                        \ 
  563 void TEST_BASIC_etog_journal() {
 
  565     TEST_EXPECT_ETOG_JOURNAL_PARSES(
"Gene 134:283-287(1993).\n",
 
  566                                     "Gene 134, 283-287 (1993)\n");
 
  567     TEST_EXPECT_ETOG_JOURNAL_PARSES(
"J. Exp. Med. 179:1809-1821(1994).\n",
 
  568                                     "J. Exp. Med. 179, 1809-1821 (1994)\n");
 
  569     TEST_EXPECT_ETOG_JOURNAL_PARSES(
"Unpublished whatever.\n",
 
  570                                     "Unpublished whatever\n");
 
  571     TEST_EXPECT_ETOG_JOURNAL_PARSES(
"bla bla bla.\n",
 
  573     TEST_EXPECT_ETOG_JOURNAL_PARSES(
"bla bla bla\n",
 
  581     int   indi, len, index, odd;
 
  585     auth = nulldup(author);
 
  591         Append(Str, auth + index + 4);
 
  594         Str = nulldup(author);
 
  596     for (indi = 0, len = 
str0len(Str), odd = 1; indi < len; indi++) {
 
  597         if (Str[indi] == 
',') {
 
  614     int  indi, indj, index, len;
 
  618             journal = nulldup(Str);
 
  624     journal = nulldup(Str);
 
  625     for (indi = indj = index = 0, len = 
str0len(journal); indi < len; indi++, indj++) {
 
  626         if (journal[indi] == 
',') {
 
  631         else if (journal[indi] == 
' ' && index) {
 
  635             journal[indj] = journal[indi];
 
  638     journal[indj] = 
'\0';
 
  644     if (gbk.has_refs()) {
 
  645         embl.resize_refs(gbk.get_refcount());
 
  648     for (
int indi = 0; indi < gbk.get_refcount(); indi++) {
 
  649         Emblref&          ref  = embl.get_ref(indi);
 
  662         int refnum, 
start = 0, end = 0;
 
  665         if (!gref.
ref || sscanf(gref.
ref, 
"%d %s %d %s %d %s", &refnum, t1, &start, t2, &end, t3) != 6) {
 
  681         strcpy(temp, gbk.get_id());
 
  684         for (
int indi = 
min(
str0len(temp), 9); indi < 10; indi++)
 
  687         sprintf(temp + 10, 
"preliminary; RNA; UNA; %d BP.\n", seq.
get_len());
 
  688         freedup(embl.ID, temp);
 
  694         freedup(embl.accession, gbk.accession);
 
  698         char *date = gbk.get_date();
 
  700         freeset(embl.dateu, 
strf(
"%s (Rel. 1, Last updated, Version 1)\n", date));
 
  701         freeset(embl.datec, 
strf(
"%s (Rel. 1, Created)\n", date));
 
  713         freedup(embl.keywords, 
".\n");
 
  729             sprintf(temp, 
"RDP; %s; %s.\n", rdpid, token);
 
  732             sprintf(temp, 
"RDP; %s.\n", token);
 
  734         freedup(embl.dr, temp);
 
  736     embl.comments.set_content_from(gbk.comments);
 
  743     char*& others = embl.comments.others;
 
  747         bool have_strain = ridx >= 0 && 
stristr(others+ridx, 
"strain=");
 
  751             Append(others, 
"*source: strain=");
 
  752             Append(others, macke.strain);
 
  763             Append(others, 
"*source: subspecies=");
 
  764             Append(others, macke.subspecies);
 
CONSTEXPR_INLINE int str0len(const char *str)
int mtog(const Macke &macke, GenBank &gbk, const Seq &seq)
static void embl_origin(Seq &seq, Reader &reader)
int gtom(const GenBank &gbk, Macke &macke)
static void etog_convert_references(const Embl &embl, GenBank &gbk)
void embl_out(const Embl &embl, const Seq &seq, Writer &write)
void warningf(int warning_num, const char *warning_messagef,...) __ATTR__FORMAT(2)
static void embl_one_entry(Reader &reader, char *&entry, const char *key)
const char * stristr(const char *str, const char *substring)
void(* RDP_comment_parser)(char *&datastring, int start_index, Reader &reader)
int find_pattern(const char *text, const char *pattern)
void embl_print_completeness(Writer &write, char compX, char X)
void skip_eolnl_and_append(char *&string1, const char *string2)
#define ASSERT_RESULT(Type, Expected, Expr)
char * ARB_strdup(const char *str)
void warning(int warning_num, const char *warning_message)
char * strf(const char *format,...) __ATTR__FORMAT(1)
static void embl_print_comment_if_content(Writer &write, const char *key, const char *content)
int mtoe(const Macke &macke, Embl &embl, const Seq &seq)
static void embl_out_origin(const Seq &seq, Writer &write)
bool read_one_entry(Seq &seq) OVERRIDE __ATTR__USERESULT
void count(BaseCounts &counter) const 
static char * gtoe_journal(char *Str)
static char * gtoe_author(char *author)
const char * genbank_date(const char *other_date)
int etom(const Embl &embl, Macke &macke, const Seq &seq)
CONSTEXPR_INLINE bool has_content(const char *field)
static HelixNrInfo * start
int comment_subkey(const char *line, char *key)
static void gtoe_reference(const GenBank &gbk, Embl &embl)
static void embl_out_comments(const Embl &embl, const Seq &seq, Writer &write)
void print(Writer &write, const char *first_prefix, const char *other_prefix, const char *content, int max_width) const 
CONSTEXPR_INLINE_NC bool str_equal(const char *s1, const char *s2)
bool copy_content(char *&entry, const char *content)
int find_subspecies(const char *str, char expect_behind)
static void embl_comments(Embl &embl, Reader &reader)
static char * etog_author(char *Str)
virtual void out(char ch)=0
static void embl_skip_unidentified(const char *pattern, Reader &reader)
void skip_eolnl_and_append_spaced(char *&string1, const char *string2)
CONSTEXPR_INLINE bool is_end_mark(char ch)
virtual int outf(const char *format,...) __ATTR__FORMAT_MEMBER(1)
bool is_embl_comment(const char *line)
int gtoe(const GenBank &gbk, Embl &embl, const Seq &seq)
static bool embl_print_lines_if_content(Writer &write, const char *key, const char *content, const WrapMode &wrapMode, bool followed_by_spacer)
void embl_key_word(const char *line, int index, char *key)
void scan_token_or_die(char *to, const char *from)
int Skip_white_space(const char *line, int index)
void embl_out_header(const Embl &embl, const Seq &seq, Writer &write)
static int partial_mtoe(const Macke &macke, Embl &embl)
void parse_section() OVERRIDE
static void embl_correct_title(Emblref &ref)
const char * today_date()
void Append(char *&string1, const char *string2)
static void embl_date(Embl &embl, Reader &reader)
void ARB_realloc(TYPE *&tgt, size_t nelem)
bool scan_token(char *to, const char *from) __ATTR__USERESULT
bool parse_RDP_comment(RDP_comments &comments, RDP_comment_parser one_comment_entry, const char *key, int index, Reader &reader)
#define RDP_CONTINUED_INDENT
static void embl_one_comment_entry(char *&datastring, int start_index, Reader &reader)
static int pattern[maxsites+1]
static char * etog_journal(const char *eJournal)
const char * line() const 
CONSTEXPR_INLINE int count_spaces(const char *str)
#define RDP_SUBKEY_INDENT
int parse_key_word(const char *line, char *key, const char *separator)
char * strndup(const char *str, int len)
int skip_pattern(const char *text, const char *pattern)
CONSTEXPR_INLINE bool is_sequence_terminator(const char *str)
void out(Writer &write, Format outType) const 
void terminate_with(char *&str, char ch)
static void embl_continue_line(const char *pattern, char *&Str, Reader &reader)
int etog(const Embl &embl, GenBank &gbk, const Seq &seq)
static void embl_print_lines(Writer &write, const char *key, const char *content, const WrapMode &wrapMode)