ARB
SEC_read.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : SEC_read.cxx //
4 // Purpose : read structure from declaration string //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 
12 #include <sstream>
13 #include "SEC_root.hxx"
14 #include "SEC_iter.hxx"
15 #include <arb_msg.h>
16 
17 #define BUFFER_SIZE 1000
18 
19 using namespace std;
20 
21 static const char * sec_read_line(istream & in) {
22  static char string_buffer[BUFFER_SIZE];
23  in.getline(string_buffer, BUFFER_SIZE);
24 
25  // clean input-stream of whitespaces
26  int j=0;
27  for (int i=0; i<(BUFFER_SIZE-1); i++) {
28  if (!(isspace(string_buffer[i]))) {
29  string_buffer[j] = string_buffer[i];
30  j++;
31  if (string_buffer[i] == '\0') break;
32  }
33  }
34  return string_buffer;
35 }
36 
37 static GB_ERROR sec_scan_ints(const char * string_buffer, int *number_1, int *number_2) {
39  char *scanend; // this is a 'const char *'
40 
41  sec_assert(number_1);
42  *number_1 = (int)strtol(string_buffer, &scanend, 10);
43  if (number_2) {
44  if (scanend[0] != ':') {
45  error = "expected ':' after integer number";
46  }
47  else {
48  *number_2 = (int)strtol(scanend+1, &scanend, 10);
49  }
50  }
51  if (!error && scanend[0] != 0) { // not at string end
52  error = number_2 ? "unexpected content after '=NUMBER:NUMBER'" : "unexpected content after '=NUMBER'";
53  }
54  return error;
55 }
56 
57 static GB_ERROR sec_scan_doubles(const char * string_buffer, double *number_1, double *number_2) {
59  char *scanend; // this is a 'const char *'
60 
61  sec_assert(number_1);
62  *number_1 = strtod(string_buffer, &scanend);
63  if (number_2) {
64  if (scanend[0] != ':') {
65  error = "expected ':' after floating-point number";
66  }
67  else {
68  *number_2 = strtod(scanend+1, &scanend);
69  }
70  }
71  if (!error && scanend[0] != 0) { // not at string end
72  error = number_2 ? "unexpected content after '=NUMBER:NUMBER'" : "unexpected content after '=NUMBER'";
73  }
74  return error;
75 }
76 
77 static GB_ERROR sec_expect_keyword_and_ints(const char *string_buffer, const char *keyword, size_t keywordlen, int *number_1, int *number_2) {
78  // scans 'KEYWORD = NUM:NUM' or 'KEYWORD=NUM'
79  // 1 or 2 numbers are returned via number_1 and number_2 (depending on whether NULp is passed for number_X or not)
80 
81  sec_assert(strlen(keyword) == keywordlen);
82 
84  if (strncmp(string_buffer, keyword, keywordlen) != 0 || string_buffer[keywordlen] != '=') {
85  error = GBS_global_string("Expected '%s='", keyword);
86  }
87  else {
88  error = sec_scan_ints(string_buffer+keywordlen+1, number_1, number_2);
89  }
90  if (error) error = GBS_global_string("%s (while parsing '%s')", error, string_buffer);
91  return error;
92 }
93 
94 static GB_ERROR sec_expect_keyword_and_doubles(const char *string_buffer, const char *keyword, size_t keywordlen, double *number_1, double *number_2) {
95  // scans 'KEYWORD = NUM:NUM' or 'KEYWORD=NUM'
96  // 1 or 2 numbers are returned via number_1 and number_2
97 
98  sec_assert(strlen(keyword) == keywordlen);
99 
100  GB_ERROR error = NULp;
101  if (strncmp(string_buffer, keyword, keywordlen) != 0 || string_buffer[keywordlen] != '=') {
102  error = GBS_global_string("Expected '%s='", keyword);
103  }
104  else {
105  error = sec_scan_doubles(string_buffer+keywordlen+1, number_1, number_2);
106  }
107  if (error) error = GBS_global_string("%s (while parsing '%s')", error, string_buffer);
108  return error;
109 }
110 
111 static GB_ERROR sec_expect_constraints(const char *string_buffer, const char *keyword, size_t keywordlen, SEC_constrainted *elem) {
112  double min = 0, max = 0;
113  GB_ERROR error = sec_expect_keyword_and_doubles(string_buffer, keyword, keywordlen, &min, &max);
114  if (!error) elem->setConstraints(min, max);
115  return error;
116 }
117 
119  const char *string_buffer = sec_read_line(in);
120 
121  if (strcmp(string_buffer, "}") == 0) return NULp;
122  return GBS_global_string("Expected '}' instead of '%s'", string_buffer);
123 }
124 
125 GB_ERROR SEC_region::read(istream & in, SEC_root *root, int /* version */) {
126  int seq_start = 0, seq_end = 0;
127  const char *string_buffer = sec_read_line(in);
128  GB_ERROR error = sec_expect_keyword_and_ints(string_buffer, "SEQ", 3, &seq_start, &seq_end);
129 
130  if (!error) {
131  sec_assert(root->get_db()->canDisplay());
132  if (root->get_db()->canDisplay()) {
133  const XString& x_string = root->get_xString();
134  int x_count = x_string.getXcount();
135 
136  if (seq_start >= x_count) error = GBS_global_string("Region start (%i) out of bounds [0..%i]", seq_start, x_count-1);
137  else if (seq_end >= x_count) error = GBS_global_string("Region end (%i) out of bounds [0..%i]", seq_end, x_count-1);
138  else set_sequence_portion(x_string.getAbsPos(seq_start), x_string.getAbsPos(seq_end));
139  }
140  else {
141  set_sequence_portion(seq_start, seq_end);
142  }
143  }
144  return error;
145 }
146 
147 GB_ERROR SEC_segment::read(SEC_loop *loop_, istream & in, int version) {
148  loop = loop_;
149  GB_ERROR error = get_region()->read(in, get_root(), version);
150  if (!error) error = sec_expect_closing_bracket(in);
151  return error;
152 }
153 
154 GB_ERROR SEC_helix::read(istream & in, int version, double& old_angle_in) {
155  const char *string_buffer = sec_read_line(in);
156  GB_ERROR error = NULp;
157 
158  old_angle_in = NAN; // illegal for version >= 3
159 
160  if (version >= 3) {
161  double angle = 0;
162 
163  error = sec_expect_keyword_and_doubles(string_buffer, "REL", 3, &angle, NULp);
164 
165  if (!error) {
166  string_buffer = sec_read_line(in);
167  set_rel_angle(angle);
168  }
169  }
170  else { // version 2 or lower
171  double angle = 0;
172 
173  error = sec_expect_keyword_and_doubles(string_buffer, "DELTA", 5, &angle, NULp);
174  if (!error) {
175  string_buffer = sec_read_line(in);
176  set_abs_angle(angle);
177 
178  if (version == 2) {
179  error = sec_expect_keyword_and_doubles(string_buffer, "DELTA_IN", 8, &angle, NULp);
180 
181  if (!error) {
182  string_buffer = sec_read_line(in);
183  old_angle_in = angle+M_PI; // rotate! (DELTA_IN pointed from outside-loop to strand)
184  }
185  }
186  else {
187  old_angle_in = angle; // use strands angle
188  }
189  }
190  }
191 
192  if (!error) error = sec_expect_constraints(string_buffer, "LENGTH", 6, this);
193 
194  return error;
195 }
196 
197 
198 GB_ERROR SEC_helix_strand::read(SEC_loop *loop_, istream & in, int version) {
199  // this points away from root-loop, strand2 points towards root
200 
201  SEC_helix_strand *strand2 = new SEC_helix_strand;
202 
203  origin_loop = NULp;
204 
205  SEC_root *root = loop_->get_root();
206  GB_ERROR error = get_region()->read(in, root, version);
207 
208  double next_loop_angle = NAN;
209 
210  if (!error) {
211  helix_info = new SEC_helix(root, strand2, this);
212  origin_loop = loop_; // needed by read()
213  error = helix_info->read(in, version, next_loop_angle);
214  origin_loop = NULp;
215 
216  if (error) {
217  delete helix_info;
218  helix_info = NULp;
219  strand2->helix_info = NULp;
220  }
221  }
222 
223  if (!error) {
224  const char *string_buffer = sec_read_line(in);
225  if (strncmp(string_buffer, "LOOP={", 6) != 0) {
226  error = GBS_global_string("Strand must be followed by 'LOOP={' (not by '%s')", string_buffer);
227  }
228  else {
229  SEC_loop *new_loop = new SEC_loop(root);
230  strand2->origin_loop = new_loop;
231 
232  error = new_loop->read(strand2, in, version, next_loop_angle);
233 
234  if (!error) {
235  error = strand2->get_region()->read(in, root, version); // Loop is complete, now trailing SEQ information must be read
236  sec_read_line(in); // remove closing } from input-stream
237  }
238 
239  if (error) {
240  strand2->origin_loop = NULp;
241  delete new_loop;
242  }
243  }
244  }
245 
246  // Note: don't delete strand2 in case of error -- it's done by caller via deleting 'this'
247 
248  sec_assert(!origin_loop);
249  if (!error) origin_loop = loop_;
250 
251  return error;
252 }
253 
254 
255 
256 GB_ERROR SEC_loop::read(SEC_helix_strand *rootside_strand, istream & in, int version, double loop_angle) {
257  // loop_angle is only used by old versions<3
258 
259  const char *string_buffer = sec_read_line(in);
260  GB_ERROR error = sec_expect_constraints(string_buffer, "RADIUS", 6, this);
261 
262  if (!error) {
263  set_fixpoint_strand(rootside_strand);
264 
265  if (version == 3) {
266  string_buffer = sec_read_line(in);
267 
268  double angle = 0;
269  error = sec_expect_keyword_and_doubles(string_buffer, "REL", 3, &angle, NULp);
270  if (!error) set_rel_angle(angle);
271  }
272  else {
273  set_abs_angle(loop_angle);
274  sec_assert(get_rel_angle().valid());
275  }
276  }
277 
278  if (!error) {
279  enum { EXPECT_SEGMENT, EXPECT_STRAND } expect = (!rootside_strand && version >= 3) ? EXPECT_STRAND : EXPECT_SEGMENT;
280 
281  SEC_segment *first_new_segment = NULp;
282  SEC_helix_strand *first_new_strand = NULp;
283  SEC_segment *last_segment = NULp;
284  SEC_helix_strand *last_outside_strand = rootside_strand;
285 
286  bool done = false;
287  while (!done && !error) {
288  string_buffer = sec_read_line(in);
289 
290  if (strncmp(string_buffer, "}", 1) == 0) {
291  done = true;
292  }
293  else if (expect == EXPECT_SEGMENT) {
294  if (strncmp(string_buffer, "SEGMENT={", 9) == 0) {
295  SEC_segment *new_seg = new SEC_segment;
296 
297  error = new_seg->read(this, in, version);
298  if (!error) {
299  if (last_outside_strand) last_outside_strand->set_next_segment(new_seg);
300  last_outside_strand = NULp;
301  last_segment = new_seg;
302 
303  if (!first_new_segment) first_new_segment = new_seg;
304 
305  expect = EXPECT_STRAND;
306  }
307  else delete new_seg;
308  }
309  else error = GBS_global_string("Expected SEGMENT (in '%s')", string_buffer);
310  }
311  else {
312  sec_assert(expect == EXPECT_STRAND);
313  if (strncmp(string_buffer, "STRAND={", 8) == 0) {
314  SEC_helix_strand *new_strand = new SEC_helix_strand;
315 
316  error = new_strand->read(this, in, version);
317  if (!error) {
318  if (last_segment) last_segment->set_next_strand(new_strand);
319  last_outside_strand = new_strand;
320  last_segment = NULp;
321 
322  if (!first_new_strand) {
323  first_new_strand = new_strand;
324  if (!rootside_strand) set_fixpoint_strand(first_new_strand);
325  }
326 
327  expect = EXPECT_SEGMENT;
328  }
329  else delete new_strand;
330  }
331  else error = GBS_global_string("Expected STRAND (in '%s')", string_buffer);
332  }
333  }
334 
335  if (!error && !first_new_segment) error = "Expected at least one SEGMENT in LOOP{}";
336  if (!error && !first_new_strand && !rootside_strand) error = "Expected at least one STRAND in root-LOOP{}";
337 
338  if (!error) {
339  if (last_segment) {
340  sec_assert(!last_segment->get_next_strand());
341  if (rootside_strand) {
342  last_segment->set_next_strand(rootside_strand);
343  }
344  else { // root loop (since version 3)
345  last_segment->set_next_strand(first_new_strand);
346  }
347  }
348  else {
349  sec_assert(last_outside_strand);
350  sec_assert(version<3); // version 3 loops always end with segment
351 
352  sec_assert(!rootside_strand); // only occurs in root-loop
353  last_outside_strand->set_next_segment(first_new_segment);
354 
355  }
356  }
357  else {
358  if (rootside_strand) rootside_strand->set_next_segment(NULp); // unlink me from parent
359  }
360  }
361 
362  if (error) set_fixpoint_strand(NULp);
363 
364  return error;
365 }
366 
367 GB_ERROR SEC_root::read_data(const char *input_string, const char *x_string_in) {
368  istringstream in(input_string);
369 
370  delete xString;
371  xString = NULp;
372 
373  GB_ERROR error = NULp;
374  int version = -1; // version of structure string
375 
376  sec_assert(db->canDisplay());
377 
378  if (!error) {
379  const char *string_buffer = sec_read_line(in);
380  double firstLoopAngle = 0;
381 
382  error = sec_expect_keyword_and_ints(string_buffer, "VERSION", 7, &version, NULp); // version 3++
383  if (error) { // version < 3!
384  version = 2; // or less
385 
386  int ignoreMaxIndex;
387  error = sec_expect_keyword_and_ints(string_buffer, "MAX_INDEX", 9, &ignoreMaxIndex, NULp); // version 1+2
388  if (!error) {
389  string_buffer = sec_read_line(in);
390  error = sec_expect_keyword_and_doubles(string_buffer, "ROOT_ANGLE", 10, &firstLoopAngle, NULp); // version 2 only
391  if (error) {
392  firstLoopAngle += M_PI;
393  version = 1; // version 1 had no ROOT_ANGLE entry
394  error = NULp;
395  }
396  else {
397  firstLoopAngle += M_PI;
398  string_buffer = sec_read_line(in);
399  }
400  }
401  }
402  else {
403  if (version>DATA_VERSION) {
404  error = GBS_global_string("Structure has version %i, your ARB can only handle versions up to %i", version, DATA_VERSION);
405  }
406  else {
407  string_buffer = sec_read_line(in);
408  }
409  }
410 
411  if (!error) { // && db->canDisplay()
412  sec_assert(!xString);
413 
414  size_t len = strlen(x_string_in);
415  size_t exp_len = db->length();
416 
417  if (len != exp_len && len != (exp_len+1)) {
418  error = GBS_global_string("Wrong xstring-length (found=%zu, expected=%zu-%zu)",
419  len, exp_len, exp_len+1);
420  }
421  else {
422  xString = new XString(x_string_in, len, db->length());
423 #if defined(DEBUG)
424  size_t xlen = xString->getLength(); // internally one position longer than alignment length
425  printf("x_string_in len=%zu\nxlen=%zu\nali_length=%zu\n", strlen(x_string_in), xlen, db->length());
426 #endif // DEBUG
427  }
428  }
429 
430  delete_root_loop();
431 
432  if (!error) {
433 #if defined(DEBUG)
434  printf("Reading structure format (version %i)\n", version);
435 #endif // DEBUG
436 
437  if (strncmp(string_buffer, "LOOP={", 6) == 0) {
438  SEC_loop *rootLoop = new SEC_loop(this);
439 
440  set_root_loop(rootLoop);
441  set_under_construction(true);
442  error = rootLoop->read(NULp, in, version, firstLoopAngle);
443 
444  if (!error) {
445  set_under_construction(false); // mark as "constructed"
446  }
447  else {
448  delete_root_loop();
449  }
450  }
451  else {
452  error = GBS_global_string("Expected root loop ('LOOP={'), not '%s'", string_buffer);
453  }
454  }
455  }
456 
457  if (!error) {
458  if (version<DATA_VERSION) {
459  printf("Converting secondary structure from version %i to %i\n", version, DATA_VERSION);
460  }
461  if (version<3) fixStructureBugs(version);
462 #if defined(CHECK_INTEGRITY)
463  check_integrity(CHECK_STRUCTURE);
464 #endif // CHECK_INTEGRITY
465  if (version<3) {
466  relayout();
467  root_loop->fixAngleBugs(version);
468  }
469  }
470 
471  return error;
472 }
473 
474 void SEC_helix::fixAngleBugs(int version) {
475  if (version<3) {
476  outsideLoop()->fixAngleBugs(version); // first correct substructure
477 
478  // versions<3 silently mirrored angles between strand and origin-loop
479  // to ensure that strand always points away from loop (and not inside).
480  // This correction was done during refresh and therefore not saved to the DB.
481  // Since version 3 no angles will be mirrored automatically, so we need to correct them here.
482 
483  Vector loop2strand(rootsideLoop()->get_center(), get_fixpoint());
484  if (scalarProduct(loop2strand, get_abs_angle().normal()) < 0) { // < 0 means angle is an acute angle
485  printf("* Autofix acute angle (loop2strand=%.2f, abs=%.2f) of helix nr '%s'\n",
486  Angle(loop2strand).radian(),
487  get_abs_angle().radian(),
488  get_root()->helixNrAt(strandToOutside()->startAttachAbspos()));
489  set_rel_angle(Angle(get_rel_angle()).rotate180deg());
490  }
491  }
492 }
493 
494 void SEC_loop::fixAngleBugs(int version) {
495  if (version<3) {
496  for (SEC_strand_iterator strand(this); strand; ++strand) {
497  if (strand->pointsToOutside()) {
498  strand->get_helix()->fixAngleBugs(version);
499  }
500  }
501  }
502 }
503 
504 void SEC_root::fixStructureBugs(int version) {
505  if (version < 3) {
506  // old versions produced data structure with non-adjacent regions
507  SEC_base_part *start_part = root_loop->get_fixpoint_strand();
508  SEC_base_part *part = start_part;
509  SEC_region *reg = part->get_region();
510 
511  do {
512  SEC_base_part *next_part = part->next();
513  SEC_region *next_reg = next_part->get_region();
514 
515  if (!are_adjacent_regions(reg, next_reg)) {
516  printf("* Fixing non-adjacent regions: %i..%i and %i..%i\n",
517  reg->get_sequence_start(), reg->get_sequence_end(),
518  next_reg->get_sequence_start(), next_reg->get_sequence_end());
519 
520  // known bug occurs at segment->strand transition..
521  sec_assert(part->parent()->getType() == SEC_LOOP);
522 
523  // .. and segment region ends one position too early
524  sec_assert(!get_helixDef()->helixNr(reg->get_sequence_end()));
525  sec_assert(get_helixDef()->helixNr(next_reg->get_sequence_start()));
526 
527  // make them adjacent
529  }
530 
531  part = next_part;
532  reg = next_reg;
533  }
534  while (part != start_part);
535  }
536 }
const char * GB_ERROR
Definition: arb_core.h:25
static GB_ERROR sec_expect_keyword_and_doubles(const char *string_buffer, const char *keyword, size_t keywordlen, double *number_1, double *number_2)
Definition: SEC_read.cxx:94
int get_sequence_start() const
Definition: SEC_root.hxx:107
const SEC_db_interface * get_db() const
Definition: SEC_root.hxx:747
int getXcount() const
Definition: SEC_abspos.hxx:53
int get_sequence_end() const
Definition: SEC_root.hxx:108
bool are_adjacent_regions(const SEC_region *reg1, const SEC_region *reg2)
Definition: SEC_root.hxx:996
SEC_base * parent()
Definition: SEC_root.hxx:295
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
STL namespace.
#define DATA_VERSION
Definition: SEC_root.hxx:43
virtual SEC_BASE_TYPE getType() const =0
static GB_ERROR sec_scan_doubles(const char *string_buffer, double *number_1, double *number_2)
Definition: SEC_read.cxx:57
#define M_PI
static GB_ERROR sec_expect_constraints(const char *string_buffer, const char *keyword, size_t keywordlen, SEC_constrainted *elem)
Definition: SEC_read.cxx:111
const XString & get_xString() const
Definition: SEC_root.hxx:764
#define BUFFER_SIZE
Definition: SEC_read.cxx:17
void set_sequence_portion(int start, int end)
Definition: SEC_root.hxx:119
GB_ERROR read_data(const char *input_string, const char *x_string_in)
Definition: SEC_read.cxx:367
size_t getAbsPos(int x) const
Definition: SEC_abspos.cxx:106
static GB_ERROR sec_expect_closing_bracket(istream &in)
Definition: SEC_read.cxx:118
const SEC_region * get_region() const
Definition: SEC_root.hxx:304
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR read(std::istream &in, SEC_root *root, int version)
Definition: SEC_read.cxx:125
static const char * sec_read_line(istream &in)
Definition: SEC_read.cxx:21
size_t getLength() const
Definition: SEC_abspos.hxx:57
CONSTEXPR_INLINE bool valid(SpeciesCreationMode m)
Definition: ed4_class.hxx:2247
double scalarProduct(const Vector &v1, const Vector &v2)
static GB_ERROR sec_expect_keyword_and_ints(const char *string_buffer, const char *keyword, size_t keywordlen, int *number_1, int *number_2)
Definition: SEC_read.cxx:77
static GB_ERROR sec_scan_ints(const char *string_buffer, int *number_1, int *number_2)
Definition: SEC_read.cxx:37
SEC_base_part * next()
Definition: SEC_root.hxx:301
bool canDisplay() const
Definition: SEC_db.hxx:140
void setConstraints(double low, double high)
Definition: SEC_root.hxx:193
#define NULp
Definition: cxxforward.h:116
#define min(a, b)
Definition: f2c.h:153
#define sec_assert(cond)
Definition: SEC_defs.hxx:19
#define max(a, b)
Definition: f2c.h:154