ARB
Importer.h
Go to the documentation of this file.
1 // ================================================================ //
2 // //
3 // File : Importer.h //
4 // Purpose : //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in November 2006 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ================================================================ //
11 #ifndef IMPORTER_H
12 #define IMPORTER_H
13 
14 #ifndef BUFFEREDFILEREADER_H
15 #include <BufferedFileReader.h>
16 #endif
17 #ifndef SMARTPTR_H
18 #include <smartptr.h>
19 #endif
20 #ifndef METATAG_H
21 #include "MetaTag.h"
22 #endif
23 
24 class DBwriter;
25 class Feature;
26 
28  FL_START = 1, // start of feature (e.g. 'CDS 352120..353193'). starts at offset 5
29  // all types below start at offset 21 (or higher):
30  FL_QUALIFIER = 2, // start of qualifier (e.g. '/codon_start=1')
31  FL_QUALIFIER_NODATA = 4, // start of qualifier w/o data (e.g. '/pseudo')
32  FL_QUALIFIER_QUOTED = 8, // start of qualifier with quoted data (e.g. '/product="phosphate"')
33  FL_QUALIFIER_QUOTE_OPENED = 16, // start of qualifier with quoted data (e.g. '/product="phosphate')
34  FL_CONTINUED_QUOTE_CLOSED = 32, // something terminated by a quote ('"')
35  FL_CONTINUED = 64, // other
36 
37  // meta types:
40 };
41 
42 class FeatureLine {
43  void interpret_as_continued_line();
44 public:
45  string name; // feature or qualifier name (only valid for FL_START, FL_QUALIFIER...)
46  string rest; // rest of line (behind '=' for FL_QUALIFIER..., not for FL_QUALIFIER_NODATA)
47  string orgLine;
49 
50  FeatureLine(const string& line);
52 };
53 
56 typedef std::vector<FeatureLinePtr> FeatureLines;
57 
58 class Importer : virtual Noncopyable {
59 protected:
63  FeatureLines pushedFeatureLines; // pushed back feature lines
65  long expectedSeqLength; // length read from LOCUS or ID line ( = 0 -> no length info found)
66 
67  void expectLine(string& line) { if (!flatfile.getLine(line)) throw flatfile.lineError("Unexpected EOF"); }
68  const MetaTag *findTag(const string& tag) { return tagTranslator.get(tag); }
69 
70  virtual bool readFeatureTableLine(string& line) = 0;
71 
72  FeatureLinePtr getFeatureTableLine();
73  void backFeatureTableLine(FeatureLinePtr& fline) { pushedFeatureLines.push_back(fline); }
74 
75  FeatureLinePtr getUnwrappedFeatureTableLine();
76 
77  FeaturePtr parseFeature();
78  void parseFeatureTable();
79 
80  virtual void import_section() = 0;
81 
82  void show_warnings(const string& import_of_what);
83 
84  void check_base_counters(const SequenceBuffer& seqData, const BaseCounter *headerCount);
85 
86 public:
87  Importer(LineReader& Flatfile, DBwriter& DB_writer, const MetaTag *meta_description);
88  virtual ~Importer() {}
89 
90  void import();
91  void warning(const char *msg); // add a warning
92 };
93 
94 
95 class GenebankImporter : public Importer {
96  void import_section() OVERRIDE;
97  bool readFeatureTableLine(string& line) OVERRIDE;
98  void parseSequence(const string& tag, const string& headerline);
99 
100 public:
101  GenebankImporter(LineReader& Flatfile, DBwriter& DB_writer);
102  ~GenebankImporter() OVERRIDE {}
103 
104 };
105 
106 
107 class EmblImporter : public Importer {
108  void import_section() OVERRIDE;
109  bool readFeatureTableLine(string& line) OVERRIDE;
110  void parseSequence(const string& headerline);
111 
112 public:
113  EmblImporter(LineReader& Flatfile, DBwriter& DB_writer);
114  ~EmblImporter() OVERRIDE {}
115 };
116 
117 
118 #else
119 #error Importer.h included twice
120 #endif // IMPORTER_H
121 
void backFeatureTableLine(FeatureLinePtr &fline)
Definition: Importer.h:73
void show_warnings(const string &import_of_what)
Definition: Importer.cxx:222
void check_base_counters(const SequenceBuffer &seqData, const BaseCounter *headerCount)
Definition: Importer.cxx:259
FeatureLine(const string &line)
Definition: Importer.cxx:33
bool getLine(string &line)
std::vector< FeatureLinePtr > FeatureLines
Definition: Importer.h:56
void expectLine(string &line)
Definition: Importer.h:67
virtual bool readFeatureTableLine(string &line)=0
std::vector< std::string > stringVector
Definition: types.h:30
GenebankImporter(LineReader &Flatfile, DBwriter &DB_writer)
Definition: Importer.cxx:352
Importer(LineReader &Flatfile, DBwriter &DB_writer, const MetaTag *meta_description)
Definition: Importer.cxx:121
~GenebankImporter() OVERRIDE
Definition: Importer.h:102
void warning(const char *msg)
Definition: Importer.cxx:128
~EmblImporter() OVERRIDE
Definition: Importer.h:114
FeatureLineType type
Definition: Importer.h:48
FeatureLinePtr getFeatureTableLine()
Definition: Importer.cxx:132
virtual void import_section()=0
SmartPtr< FeatureLine > FeatureLinePtr
Definition: Importer.h:55
Generic smart pointer.
Definition: smartptr.h:149
LineReader & flatfile
Definition: Importer.h:61
FeatureLineType
Definition: Importer.h:27
string lineError(const string &msg) const
stringVector warnings
Definition: Importer.h:64
virtual ~Importer()
Definition: Importer.h:88
EmblImporter(LineReader &Flatfile, DBwriter &DB_writer)
Definition: Importer.cxx:505
FeatureLines pushedFeatureLines
Definition: Importer.h:63
FeatureLinePtr getUnwrappedFeatureTableLine()
Definition: Importer.cxx:146
const MetaTag * findTag(const string &tag)
Definition: Importer.h:68
long expectedSeqLength
Definition: Importer.h:65
const MetaTag * get(const std::string &tag) const
Definition: MetaTag.h:51
string rest
Definition: Importer.h:46
MetaTagTranslator tagTranslator
Definition: Importer.h:62
string name
Definition: Importer.h:45
SmartPtr< Feature > FeaturePtr
Definition: Importer.h:54
FeaturePtr parseFeature()
Definition: Importer.cxx:191
void parseFeatureTable()
Definition: Importer.cxx:211
#define OVERRIDE
Definition: cxxforward.h:93
static int line
Definition: arb_a2ps.c:296
string orgLine
Definition: Importer.h:47
DBwriter & db_writer
Definition: Importer.h:60
bool reinterpret_as_continued_line()
Definition: Importer.cxx:105