19 int blank = 1, indi, indk;
21 for (indi = 0; blank && indi < numb; indi++) {
22 if (line[indi] !=
' ' && line[indi] !=
'\t')
24 if (line[indi] ==
'\t') {
43 if (reader.
line()[0] !=
'\n') {
48 strcpy(temp, (reader.
line() + ind));
73 freedup(gbk.organism, reader.
line() + indent);
100 if (refnum <= gbk.get_refcount()) {
101 warningf(17,
"Might redefine reference %d", refnum);
105 gbk.resize_refs(refnum);
111 for (; reader.
line() && reader.
line()[0] ==
' ' && reader.
line()[1] ==
' ';) {
132 warningf(18,
"Unidentified REFERENCE subkeyword: %s#", key);
144 if (!reader.
line())
return;
151 for (
int indi = 0; indi <
GBINDENT; line[indi++] =
' ') {}
158 if (reader.
line()[0] ==
'\n') {
184 if (
str_equal(gbk.accession,
"No information\n"))
return;
186 char *new_acc =
NULp;
187 const char *sep =
" \t\n;";
188 SmartCharPtr req_fail;
192 for (
char *acc = strtok(&*copy, sep); acc && req_fail.isNull(); acc = strtok(
NULp, sep)) {
194 if (!isalpha(acc[0])) req_fail =
ARB_strdup(
"has to start with a letter");
196 for (
int i = 0; acc[i]; ++i) {
198 req_fail =
strf(
"invalid char '%c'", acc[i]);
204 if (new_acc)
Append(new_acc,
' ');
208 if (req_fail.isNull() && count>9) {
209 req_fail =
strf(
"No more than 9 accession number allowed (found %i)", count);
212 if (!req_fail.isNull()) {
214 throw_errorf(15,
"Invalid accession number '%s' (%s)", gbk.accession, &*req_fail);
218 freeset(gbk.accession, new_acc);
222 int indi, count, len;
227 for (indi = count = 0, len =
str0len(gbk.keywords); indi < len; indi++)
228 if (gbk.keywords[indi] ==
'.')
234 fprintf(stderr,
"\nKEYWORDS: %s", gbk.keywords);
235 warning(141,
"No more than one period is allowed in KEYWORDS line.");
242 parse_keyed_section(key);
252 for (
int index = 9; reader.
line()[index] !=
'\n' && reader.
line()[index] !=
'\0'; index++) {
253 if (reader.
line()[index] !=
' ')
260 void GenbankParser::parse_keyed_section(
const char *key) {
263 if (!gbk.locus_contains_date()) {
264 static bool alreadyWarned =
false;
265 if (!alreadyWarned) {
266 warning(14,
"LOCUS data might be incomplete (no date seen)");
267 alreadyWarned =
true;
310 ca_assert(content[strlen(content)-1] ==
'\n');
322 content = period ?
"No information.\n" :
"No information\n";
331 const char *r = gbk_ref.
ref;
335 sprintf(refnum,
"%d\n", gbk_ref_num);
363 if (compX ==
' ')
return;
365 write.
outf(
" %c' end complete: %s\n", X, compX ==
'y' ?
"Yes" :
"No");
379 if (gbk.has_refs()) {
380 for (indi = 0; indi < gbk.get_refcount(); indi++) {
393 write.
out(
"COMMENTS ");
396 write.
out(
"Organism information\n");
410 write.
outf(
"Sequence information (bases 1 to %d)\n", seq.
get_len());
428 for (indi = 0; indi <
length; indi++) {
432 if (comments.
others[indi] ==
'\n' && comments.
others[indi + 1] !=
'\0') {
443 write.
outf(
"BASE COUNT %6d a %6d c %6d g %6d t", bases.
a, bases.
c, bases.
g, bases.
t);
455 write.
out(
"ORIGIN\n");
CONSTEXPR_INLINE int str0len(const char *str)
static void genbank_one_comment_entry(char *&datastring, int start_index, Reader &reader)
static void genbank_one_entry_in(char *&datastring, Reader &reader)
void genbank_out(const GenBank &gbk, const Seq &seq, Writer &write)
void throw_errorf(int error_num, const char *error_messagef,...) __ATTR__FORMAT(2) __ATTR__NORETURN
static void genbank_print_comment_if_content(Writer &write, const char *key, const char *content)
bool valid_acc_char(char ch)
static int genbank_check_blanks(const char *line, int numb)
void warningf(int warning_num, const char *warning_messagef,...) __ATTR__FORMAT(2)
void(* RDP_comment_parser)(char *&datastring, int start_index, Reader &reader)
void skip_eolnl_and_append(char *&string1, const char *string2)
void repeated(char ch, int repeat)
#define ASSERT_RESULT(Type, Expected, Expr)
char * ARB_strdup(const char *str)
void warning(int warning_num, const char *warning_message)
char * strf(const char *format,...) __ATTR__FORMAT(1)
void count(BaseCounts &counter) const
static void genbank_origin(Seq &seq, Reader &reader)
static void genbank_out_one_reference(Writer &write, const GenbankRef &gbk_ref, int gbk_ref_num)
static void genbank_skip_unidentified(Reader &reader, int blank_num)
CONSTEXPR_INLINE bool has_content(const char *field)
static void genbank_verify_accession(GenBank &gbk)
int comment_subkey(const char *line, char *key)
static void genbank_source(GenBank &gbk, Reader &reader)
static void genbank_reference(GenBank &gbk, Reader &reader)
void print(Writer &write, const char *first_prefix, const char *other_prefix, const char *content, int max_width) const
static void genbank_comments(GenBank &gbk, Reader &reader)
virtual void out(char ch)=0
void skip_eolnl_and_append_spaced(char *&string1, const char *string2)
static void genbank_out_origin(const Seq &seq, Writer &write)
virtual int outf(const char *format,...) __ATTR__FORMAT_MEMBER(1)
static void genbank_out_one_entry(Writer &write, const char *key, const char *content, const WrapMode &wrapMode, int period)
void genbank_out_base_count(const Seq &seq, Writer &write)
void genbank_key_word(const char *line, int index, char *key)
int Skip_white_space(const char *line, int index)
startsWithBlanks(int blanks_)
static void copy(double **i, double **j)
void genbank_print_completeness(Writer &write, char compX, char X)
void Append(char *&string1, const char *string2)
bool read_one_entry(Seq &seq) OVERRIDE __ATTR__USERESULT
static void genbank_print_lines(Writer &write, const char *key, const char *content, const WrapMode &wrapMode)
bool operator()(const char *line) const
void genbank_out_header(const GenBank &gbk, const Seq &seq, Writer &write)
CONSTEXPR_INLINE bool str_equal(const char *s1, const char *s2)
bool parse_RDP_comment(RDP_comments &comments, RDP_comment_parser one_comment_entry, const char *key, int index, Reader &reader)
#define RDP_CONTINUED_INDENT
static void genbank_continue_line(char *&Str, int numb, Reader &reader)
static void genbank_verify_keywords(GenBank &gbk)
const char * line() const
#define RDP_SUBKEY_INDENT
int parse_key_word(const char *line, char *key, const char *separator)
void parse_section() OVERRIDE
void set_line(const char *new_line)
CONSTEXPR_INLINE bool is_sequence_terminator(const char *str)
void out(Writer &write, Format outType) const
void terminate_with(char *&str, char ch)
void skipOverLinesThat(const PRED &match_condition)