ARB
Importer.h
Go to the documentation of this file.
1 // ================================================================ //
2 // //
3 // File : Importer.h //
4 // Purpose : //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in November 2006 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ================================================================ //
11 #ifndef IMPORTER_H
12 #define IMPORTER_H
13 
14 #ifndef BUFFEREDFILEREADER_H
15 #include <BufferedFileReader.h>
16 #endif
17 #ifndef SMARTPTR_H
18 #include <smartptr.h>
19 #endif
20 #ifndef METATAG_H
21 #include "MetaTag.h"
22 #endif
23 
24 class DBwriter;
25 class Feature;
26 
28  FL_START = 1, // start of feature (e.g. 'CDS 352120..353193'). starts at offset 5
29  // all types below start at offset 21 (or higher):
30  FL_QUALIFIER = 2, // start of qualifier (e.g. '/codon_start=1')
31  FL_QUALIFIER_NODATA = 4, // start of qualifier w/o data (e.g. '/pseudo')
32  FL_QUALIFIER_QUOTED = 8, // start of qualifier with quoted data (e.g. '/product="phosphate"')
33  FL_QUALIFIER_QUOTE_OPENED = 16, // start of qualifier with quoted data (e.g. '/product="phosphate')
34  FL_CONTINUED_QUOTE_CLOSED = 32, // something terminated by a quote ('"')
35  FL_CONTINUED = 64, // other
36 
37  // meta types:
40 };
41 
42 class FeatureLine {
43  void interpret_as_continued_line();
44 public:
45  string name; // feature or qualifier name (only valid for FL_START, FL_QUALIFIER...)
46  string rest; // rest of line (behind '=' for FL_QUALIFIER..., not for FL_QUALIFIER_NODATA)
47  string orgLine;
49 
50  FeatureLine(const string& line);
52 };
53 
56 typedef std::vector<FeatureLinePtr> FeatureLines;
57 
58 class Importer : virtual Noncopyable {
59 protected:
63  FeatureLines pushedFeatureLines; // pushed back feature lines
65  long expectedSeqLength; // length read from LOCUS or ID line ( = 0 -> no length info found)
66 
67  void expectLine(string& line) { if (!flatfile.getLine(line)) throw flatfile.lineError("Unexpected EOF"); }
68  const MetaTag *findTag(const string& tag) { return tagTranslator.get(tag); }
69 
70  virtual bool readFeatureTableLine(string& line) = 0;
71 
72  FeatureLinePtr getFeatureTableLine();
73  void backFeatureTableLine(FeatureLinePtr& fline) { pushedFeatureLines.push_back(fline); }
74 
75  FeatureLinePtr getUnwrappedFeatureTableLine();
76 
77  FeaturePtr parseFeature();
78  void parseFeatureTable();
79 
80  virtual void import_section() = 0;
81 
82  void show_warnings(const string& import_of_what);
83 
84 public:
85  Importer(LineReader& Flatfile, DBwriter& DB_writer, const MetaTag *meta_description);
86  virtual ~Importer() {}
87 
88  void import();
89  void warning(const char *msg); // add a warning
90 };
91 
92 
93 class GenebankImporter : public Importer {
94  void import_section() OVERRIDE;
95  bool readFeatureTableLine(string& line) OVERRIDE;
96  void parseSequence(const string& tag, const string& headerline);
97 
98 public:
99  GenebankImporter(LineReader& Flatfile, DBwriter& DB_writer);
100  ~GenebankImporter() OVERRIDE {}
101 
102 };
103 
104 
105 class EmblImporter : public Importer {
106  void import_section() OVERRIDE;
107  bool readFeatureTableLine(string& line) OVERRIDE;
108  void parseSequence(const string& headerline);
109 
110 public:
111  EmblImporter(LineReader& Flatfile, DBwriter& DB_writer);
112  ~EmblImporter() OVERRIDE {}
113 };
114 
115 
116 #else
117 #error Importer.h included twice
118 #endif // IMPORTER_H
119 
void backFeatureTableLine(FeatureLinePtr &fline)
Definition: Importer.h:73
void show_warnings(const string &import_of_what)
Definition: Importer.cxx:222
FeatureLine(const string &line)
Definition: Importer.cxx:33
bool getLine(string &line)
std::vector< FeatureLinePtr > FeatureLines
Definition: Importer.h:56
void expectLine(string &line)
Definition: Importer.h:67
virtual bool readFeatureTableLine(string &line)=0
std::vector< std::string > stringVector
Definition: types.h:30
GenebankImporter(LineReader &Flatfile, DBwriter &DB_writer)
Definition: Importer.cxx:337
Importer(LineReader &Flatfile, DBwriter &DB_writer, const MetaTag *meta_description)
Definition: Importer.cxx:121
~GenebankImporter() OVERRIDE
Definition: Importer.h:100
void warning(const char *msg)
Definition: Importer.cxx:128
~EmblImporter() OVERRIDE
Definition: Importer.h:112
FeatureLineType type
Definition: Importer.h:48
FeatureLinePtr getFeatureTableLine()
Definition: Importer.cxx:132
virtual void import_section()=0
SmartPtr< FeatureLine > FeatureLinePtr
Definition: Importer.h:55
Generic smart pointer.
Definition: smartptr.h:149
LineReader & flatfile
Definition: Importer.h:61
FeatureLineType
Definition: Importer.h:27
string lineError(const string &msg) const
stringVector warnings
Definition: Importer.h:64
virtual ~Importer()
Definition: Importer.h:86
EmblImporter(LineReader &Flatfile, DBwriter &DB_writer)
Definition: Importer.cxx:488
FeatureLines pushedFeatureLines
Definition: Importer.h:63
FeatureLinePtr getUnwrappedFeatureTableLine()
Definition: Importer.cxx:146
const MetaTag * findTag(const string &tag)
Definition: Importer.h:68
long expectedSeqLength
Definition: Importer.h:65
const MetaTag * get(const std::string &tag) const
Definition: MetaTag.h:51
string rest
Definition: Importer.h:46
MetaTagTranslator tagTranslator
Definition: Importer.h:62
string name
Definition: Importer.h:45
SmartPtr< Feature > FeaturePtr
Definition: Importer.h:54
FeaturePtr parseFeature()
Definition: Importer.cxx:191
void parseFeatureTable()
Definition: Importer.cxx:211
#define OVERRIDE
Definition: cxxforward.h:93
static int line
Definition: arb_a2ps.c:296
string orgLine
Definition: Importer.h:47
DBwriter & db_writer
Definition: Importer.h:60
bool reinterpret_as_continued_line()
Definition: Importer.cxx:105