ARB
Feature.cxx
Go to the documentation of this file.
1 // ================================================================ //
2 // //
3 // File : Feature.cxx //
4 // Purpose : //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in November 2006 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ================================================================ //
11 
12 #include "Feature.h"
13 #include "types.h"
14 #include <cctype>
15 
16 
17 using namespace std;
18 
19 
20 Feature::Feature(const string& Type, const string& locationString) :
21  type(Type),
22  location(parseLocation(locationString))
23 {}
24 
25 inline void setOrAppendQualifiedEntry(stringMap& qualifiers, const string& qualifier, const string& value) {
26  stringMapIter existing = qualifiers.find(qualifier);
27  if (existing != qualifiers.end()) { // existing qualifier
28  existing->second.append(1, '\n'); // append separated by LF
29  existing->second.append(value);
30  }
31  else {
32  qualifiers[qualifier] = value;
33  }
34 }
35 
36 void Feature::addQualifiedEntry(const string& qualifier, const string& value) {
37  // search for quotes
38  size_t vlen = value.length();
39 
40  gi_assert(vlen>0);
41 
42  stringCIter start = value.begin();
43  stringCIter end = start+vlen-1;
44 
45  if (*start == '"') {
46  if (vlen == 1 || *end != '"') {
47  throw GBS_global_string("Unclosed quotes at qualifier '%s'", qualifier.c_str());
48  }
49  // skip quotes :
50  ++start;
51  // end points to '"'
52  }
53  else {
54  ++end; // point behind last character
55  }
56 
57  setOrAppendQualifiedEntry(qualifiers, qualifier, string(start, end));
58 }
59 
60 static void appendData(string& id, const string& data, int maxAppend) {
61  // extract alphanumeric text portion from start of 'data'
62  // until some other character is found
63 
64  if (maxAppend >= 2) {
65  size_t old_id_len = id.length();
66 
67  id.append(1, '_');
68  maxAppend--;
69 
70  stringCIter end = data.end();
71  bool insideWord = false;
72  bool seenNonDigit = false;
73 
74  for (stringCIter i = data.begin(); maxAppend>0 && i != end; ++i) {
75  char c = *i;
76  if (isalnum(c)) {
77  if (!insideWord) c = toupper(c);
78  id.append(1, c);
79  maxAppend--;
80  insideWord = true;
81  if (!seenNonDigit && isalpha(c)) { seenNonDigit = true; }
82  }
83  else if (isspace(c) || c == '-') { // ignore space and '-'
84  insideWord = false;
85  }
86  else {
87  break; // anything else -> abort
88  }
89  }
90 
91  if (!seenNonDigit) { // data only contained digits (as far as data has been scanned)
92  id.resize(old_id_len); // undo changes
93  }
94  }
95 }
96 
97 string Feature::createGeneName() const {
98  stringMapCIter not_found = qualifiers.end();
99  stringMapCIter product = qualifiers.find("product");
100  stringMapCIter gene = qualifiers.find("gene");
101 
102  const size_t maxidlen = 30; // just an approx. limit
103  string id = type; // use gene type
104 
105  id.reserve(maxidlen+10);
106  if (gene != not_found) { // append gene name
107  appendData(id, gene->second, maxidlen-id.length());
108  }
109 
110  if (product != not_found) {
111  appendData(id, product->second, maxidlen-id.length());
112  }
113 
114  // now ensure that id doesn't end with digit
115  // (if it would, creating unique gene names gets too complicated)
116  if (isdigit(id[id.length()-1])) {
117  if (id.length() == maxidlen) id.resize(maxidlen-1);
118  id.append(1, 'X');
119  }
120 
121  return id;
122 }
123 
124 void Feature::expectLocationInSequence(long seqLength) const {
125  // test whether feature location is inside sequence
126  // throw error otherwise
127 
128  if (!location->isInRange(1, seqLength)) {
129  throw GBS_global_string("Illegal feature location (outside sequence 1..%li)", seqLength);
130  }
131 }
132 
134  // some qualifiers in feature table may be empty
135 
136  stringMapIter e = qualifiers.end();
137  for (stringMapIter i = qualifiers.begin(); i != e; ++i) {
138  if (i->second.empty()) { // with all qualifiers, that have no content, do..
139  if (i->first == "replace") {
140  // ARB cannot store empty strings!
141  // Since '/replace=""' means 'delete location', we need to store this
142  // this information differently.
143  i->second = "<empty>"; //
144  }
145  }
146  }
147 }
void setOrAppendQualifiedEntry(stringMap &qualifiers, const string &qualifier, const string &value)
Definition: Feature.cxx:25
GB_TYPES type
const char * id
Definition: AliAdmin.cxx:17
#define gi_assert(cond)
Definition: defs.h:26
virtual bool isInRange(long pos1, long pos2) const =0
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
STL namespace.
LocationPtr parseLocation(const string &source)
Definition: Location.cxx:243
static HelixNrInfo * start
Feature(const std::string &Type, const std::string &locationString)
Definition: Feature.cxx:20
std::string createGeneName() const
Definition: Feature.cxx:97
std::map< std::string, std::string > stringMap
Definition: types.h:29
void addQualifiedEntry(const std::string &qualifier, const std::string &value)
Definition: Feature.cxx:36
void expectLocationInSequence(long seqLength) const
Definition: Feature.cxx:124
void fixEmptyQualifiers()
Definition: Feature.cxx:133
static void appendData(string &id, const string &data, int maxAppend)
Definition: Feature.cxx:60
size_t length