ARB
arb_help2xml.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : arb_help2xml.cxx //
4 // Purpose : Converts old ARB help format to XML //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in October 2001 //
7 // Copyright Department of Microbiology (Technical University Munich) //
8 // //
9 // Visit our web site at: http://www.arb-home.de/ //
10 // //
11 // ==================================================================== //
12 
13 #include <xml.hxx>
14 #include <arb_defs.h>
15 #include <arb_diff.h>
16 #include <static_assert.h>
17 
18 #include <list>
19 #include <set>
20 #include <iostream>
21 #include <fstream>
22 
23 #include <cstdlib>
24 #include <cstdarg>
25 #include <cstring>
26 #include <climits>
27 
28 #include <unistd.h>
29 #include <sys/stat.h>
30 
31 using namespace std;
32 
33 #define h2x_assert(bed) arb_assert(bed)
34 
35 // Limit the length of the TITLE/SUBTITLE of helppages.
36 // - TITLE has to fit into UP/SUB subwindows of arb internal help window
37 // - SUBTITLE has to fit into default help-textsubwindow width
38 #define MAX_TITLE_CHARS 42
39 #define MAX_SUBTITLE_CHARS 75
40 
41 #if defined(DEBUG)
42 #define WARN_FORMATTING_PROBLEMS
43 #define WARN_MISSING_HELP
44 // #define DUMP_PARAGRAPHS
45 // #define PROTECT_HELP_VS_CHANGES
46 #endif // DEBUG
47 
48 
49 #if defined(WARN_FORMATTING_PROBLEMS)
50 
51 #define WARN_FIXED_LAYOUT_LIST_ELEMENTS
52 #define WARN_LONESOME_ENUM_ELEMENTS
53 
54 // warnings below are useless for production and shall be disabled in SVN
55 // #define WARN_LONESOME_LIST_ELEMENTS
56 // #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION
57 // #define WARN_IGNORED_ALPHA_ENUMS
58 
59 #endif
60 
61 
62 #define MAX_LINE_LENGTH 200 // maximum length of lines in input stream
63 #define TABSIZE 8
64 
65 static const char *knownSections[] = {
66  "OCCURRENCE",
67  "DESCRIPTION",
68  "NOTES",
69  "EXAMPLES",
70  "WARNINGS",
71  "BUGS",
72  "SECTION",
73 };
74 
83 
87 };
88 
90 
91 __ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) {
92  static size_t buf_size = 256;
93  static char *buffer = new char[buf_size];
94 
95  size_t length;
96  while (1) {
97  if (!buffer) {
98  h2x_assert(buffer); // to stop when debugging
99  throw string("out of memory");
100  }
101 
102  length = vsnprintf(buffer, buf_size, format, argPtr);
103  if (length < buf_size) break; // string fits into current buffer
104 
105  // otherwise resize buffer :
106  buf_size += buf_size/2;
107  delete [] buffer;
108  buffer = new char[buf_size];
109  }
110 
111  return string(buffer, length);
112 }
113 
114 __ATTR__FORMAT(1) static string strf(const char *format, ...) {
115  va_list argPtr;
116  va_start(argPtr, format);
117  string result = vstrf(format, argPtr);
118  va_end(argPtr);
119 
120  return result;
121 }
122 
123 // -----------------------------
124 // warnings and errors
125 
127  string message;
128  size_t lineno;
129 
130 public:
131  LineAttachedMessage(const string& message_, size_t lineno_) :
132  message(message_),
133  lineno(lineno_)
134  {}
135 
136  const string& Message() const { return message; }
137  size_t Lineno() const { return lineno; }
138 };
139 
140 const size_t NO_LINENUMBER_INFO = -1U;
141 
142 LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); }
143 
144 
145 static list<LineAttachedMessage> warnings;
146 inline void add_warning(const LineAttachedMessage& laMsg) {
147  warnings.push_back(laMsg);
148 }
149 inline void add_warning(const string& warning, size_t lineno) {
150  add_warning(LineAttachedMessage(warning, lineno));
151 }
152 
154  virtual ~MessageAttachable() {}
155 
156  virtual string location_description() const = 0; // may return empty string
157  virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO
158 
160  string where = location_description();
161  if (where.empty()) return LineAttachedMessage(message, line_number());
162  return LineAttachedMessage(message+" ["+where+"]", line_number());
163  }
164  void attach_warning(const string& message) const {
165  add_warning(attached_message(message));
166  }
167 };
168 
169 
170 // ----------------------
171 // class Reader
172 
173 class Reader : public MessageAttachable {
174 private:
175  istream& in;
176  char lineBuffer[MAX_LINE_LENGTH];
177  char lineBuffer2[MAX_LINE_LENGTH];
178  bool readAgain;
179  bool eof;
180  int lineNo;
181 
182  string location_description() const OVERRIDE { return ""; }
183  size_t line_number() const OVERRIDE { return lineNo; }
184 
185  void getline() {
186  if (!eof) {
187  if (in.eof()) eof = true;
188  else {
189  h2x_assert(in.good());
190 
191  in.getline(lineBuffer, MAX_LINE_LENGTH);
192  lineNo++;
193 
194  if (in.eof()) eof = true;
195  else if (in.fail()) throw "line too long";
196 
197  if (strchr(lineBuffer, '\t')) {
198  int o2 = 0;
199 
200  for (int o = 0; lineBuffer[o]; ++o) {
201  if (lineBuffer[o] == '\t') {
202  int spaces = TABSIZE - (o2 % TABSIZE);
203  while (spaces--) lineBuffer2[o2++] = ' ';
204  }
205  else {
206  lineBuffer2[o2++] = lineBuffer[o];
207  }
208  }
209  lineBuffer2[o2] = 0;
210  strcpy(lineBuffer, lineBuffer2);
211  }
212 
213  char *eol = strchr(lineBuffer, 0)-1;
214  while (eol >= lineBuffer && isspace(eol[0])) {
215  eol[0] = 0; // trim trailing whitespace
216  eol--;
217  }
218  if (eol > lineBuffer) {
219  // now eol points to last character
220  if (eol[0] == '-' && isalnum(eol[-1])) {
221  attach_warning("manual hyphenation detected");
222  }
223  }
224  }
225  }
226  }
227 
228 public:
229  Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); }
230  virtual ~Reader() {}
231 
232  const char *getNext() {
233  if (readAgain) readAgain = false;
234  else getline();
235  return eof ? NULp : lineBuffer;
236  }
237 
238  void back() {
239  h2x_assert(!readAgain);
240  readAgain = true;
241  }
242 
243  int getLineNo() const { return lineNo; }
244 };
245 
250 };
256 };
257 
258 class Ostring : public MessageAttachable {
259  string content;
260  size_t lineNo; // where string came from
262 
263  unsigned preformatted_width; // 0 = use default width, otherwise contains custom width from control comment.
264 
265  // only valid for type == ENUMERATED:
266  EnumerationType etype;
267  unsigned number;
268 
269  static unsigned current_preformatted_width; // same meaning as 'preformatted_width'
270  static unsigned current_preformatted_blocks; // automatically set 'current_preformatted_width' to zero (after this number of blocks were generated).
271 
272  void check_auto_unpreformat() {
273  if (current_preformatted_blocks && !--current_preformatted_blocks) { // count down to zero..
274  current_preformatted_width = 0; // .. then switch off preformatted section
275  }
276  }
277 
278 public:
279 
280  Ostring(const string& s, size_t line_no, ParagraphType type_)
281  : content(s),
282  lineNo(line_no),
283  type(type_),
284  preformatted_width(current_preformatted_width),
285  etype(NONE)
286  {
287  h2x_assert(type != ENUMERATED);
288  check_auto_unpreformat();
289  }
290  Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num)
291  : content(s),
292  lineNo(line_no),
293  type(type_),
294  preformatted_width(current_preformatted_width),
295  etype(etype_),
296  number(num)
297  {
298  h2x_assert(type == ENUMERATED);
299  h2x_assert(etype == DIGITS || etype == ALPHA_UPPER || etype == ALPHA_LOWER);
300  h2x_assert(num>0);
301  check_auto_unpreformat();
302  }
303 
304  static void set_current_preformatted_width(unsigned allowed) {
305  current_preformatted_width = allowed;
306  }
307  static void set_preformatted_blocks_wanted(unsigned preformatted_blocks) {
308  h2x_assert(!current_preformatted_blocks);
309  current_preformatted_blocks = preformatted_blocks;
310  }
311 
312  // MessageAttachable interface:
313  string location_description() const OVERRIDE { return ""; }
314  size_t line_number() const OVERRIDE { return get_lineno(); }
315 
316  operator const string&() const { return content; }
317  operator string&() { return content; }
318 
319  const string& as_string() const { return content; }
320  string& as_string() { return content; }
321 
322  size_t get_lineno() const { return lineNo; } // @@@ replace by line_number()?
323 
324  const ParagraphType& get_type() const { return type; }
326  h2x_assert(type == ENUMERATED);
327  return etype;
328  }
329  unsigned get_number() const {
330  h2x_assert(type == ENUMERATED);
331  return number;
332  }
333  unsigned get_preformatted_width() const {
334  // returns > 0 for text inside PREFORMATTED control comments.
335  return preformatted_width;
336  }
337 
338  // wrapper to make Ostring act like char*
339  const char *c_str() const { return content.c_str(); }
340 };
341 unsigned Ostring::current_preformatted_width = 0;
342 unsigned Ostring::current_preformatted_blocks = 0;
343 
344 typedef list<Ostring> Ostrings;
345 
346 #if defined(WARN_MISSING_HELP)
347 static void check_TODO(const char *line, const Reader& reader) {
348  if (strstr(line, "@@@") || strstr(line, "TODO")) {
349  reader.attach_warning(strf("TODO: %s", line));
350  }
351 }
352 #else
353 inline void check_TODO(const char *, const Reader&) { }
354 #endif // WARN_MISSING_HELP
355 
356 // ----------------------------
357 // class Section
358 
359 class Section FINAL_TYPE : public MessageAttachable {
361  string name;
362  Ostrings content;
363  size_t lineno;
364 
365  string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; }
366 
367 public:
368  Section(string name_, SectionType type_, size_t lineno_)
369  : type(type_),
370  name(name_),
371  lineno(lineno_)
372  {}
373  virtual ~Section() {}
374 
375  const Ostrings& Content() const { return content; }
376  Ostrings& Content() { return content; }
377  SectionType get_type() const { return type; }
378  size_t line_number() const OVERRIDE { return lineno; }
379  const string& getName() const { return name; }
380  void setName(const string& name_) { name = name_; }
381 
382  void set_line_number(size_t lineNumber) { lineno = lineNumber; }
383 };
384 
385 typedef list<Section> SectionList;
386 
387 // --------------------
388 // class Link
389 
390 class Link {
391  string target;
392  size_t source_lineno;
393 
394 public:
395  Link(const string& target_, size_t source_lineno_) :
396  target(target_),
397  source_lineno(source_lineno_)
398  {}
399 
400  const string& Target() const { return target; }
401  size_t SourceLineno() const { return source_lineno; }
402 };
403 
404 typedef list<Link> Links;
405 
406 // ------------------------
407 // class Helpfile
408 
409 class Helpfile {
410  Links uplinks;
411  Links references;
412  Links auto_references;
413  Section title;
414  SectionList sections;
415  string inputfile;
416 
417  void check_self_ref(const string& link) {
418  size_t slash = inputfile.find('/');
419  if (slash != string::npos) {
420  if (inputfile.substr(slash+1) == link) {
421  throw string("Invalid link to self");
422  }
423  }
424  }
425 
426 public:
427  Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {}
428  virtual ~Helpfile() {}
429 
430  void readHelp(istream& in, const string& filename);
431  void writeXML(FILE *out, const string& page_name);
432  void extractInternalLinks();
433 
434  const Section& get_title() const { return title; }
435 };
436 
437 inline bool isSpace(char c) { return c == ' '; }
438 inline bool isWhitespace(char c) { return isSpace(c) || c == '\n'; }
439 
440 inline bool isComment(const char *s) {
441  return s[0] == '#';
442 }
443 inline bool isEmptyOrComment(const char *s) {
444  if (isComment(s)) return true;
445  for (int off = 0; ; ++off) {
446  if (s[off] == 0) return true;
447  if (!isSpace(s[off])) break;
448  }
449 
450  return false;
451 }
452 
453 static void checkControlComment(const char *line) {
454  const int DEFAULT_WIDTH = 91; // has to match value in to_help.xsl@preformatted-default-width
455 
456  if (isComment(line)) {
457  // Note: currently all valid control comments start with the same keyword
458  // Please document control comments in help.readme@Control
459  const char *KEYWORD = "PREFORMATTED ";
460  const char *found = strstr(line, KEYWORD);
461  if (found) {
462  const char *rest = found + strlen(KEYWORD);
463  if (strcmp(rest, "RESET") == 0) {
465  }
466 #define WLEN 6
467  else if (strncmp(rest, "WIDTH ", WLEN) == 0) {
468  const char *rest2 = rest + WLEN;
469  int width = atoi(rest2);
470  if (width<1) {
471  if (strncmp(rest2, "DEFAULT", 7) == 0) {
472  width = DEFAULT_WIDTH;
473  }
474  else {
475  throw strf("invalid width %i in control comment '%s'", width, line);
476  }
477  }
479  }
480 #undef WLEN
481  else if (strcmp(rest, "1") == 0) { // @@@ allow higher numbers for more lines?
484  }
485  else {
486  throw strf("invalid control comment '%s' (while parsing at '%s')", line, rest);
487  }
488  }
489  }
490 }
491 
492 inline const char *extractKeyword(const char *line, string& keyword) {
493  // returns NULp if no keyword was found
494  // otherwise returns position behind keyword and sets value of 'keyword'
495 
496  const char *space = strchr(line, ' ');
497  if (space && space>line) {
498  keyword = string(line, 0, space-line);
499  return space;
500  }
501  else if (!space) { // test for keyword w/o content behind
502  if (line[0]) { // not empty
503  keyword = line;
504  return strchr(line, 0);
505  }
506  }
507  return NULp;
508 }
509 
510 inline const char *eatSpace(const char *line) {
511  // skip over spaces at start of 'line'
512  while (isSpace(*line)) ++line;
513  return line;
514 }
515 inline const char *eatWhitespace(const char *paragraph) {
516  // skip over spaces and empty lines at start of 'paragraph'
517  while (isWhitespace(*paragraph)) ++paragraph;
518  return paragraph;
519 }
520 
521 inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) {
522  if (paragraph.length()) {
523  if (type == ENUMERATED) {
524  sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num));
525  }
526  else {
527  sec.Content().push_back(Ostring(paragraph, lineNo, type));
528  }
529 
530  type = PLAIN_TEXT;
531  etype = NONE;
532  paragraph = "";
533  }
534 }
535 
536 inline const char *firstChar(const char *s) {
537  while (isSpace(s[0])) ++s;
538  return s;
539 }
540 
541 inline bool is_startof_itemlist_element(const char *contentStart) {
542  return
543  (contentStart[0] == '-' ||
544  contentStart[0] == '*')
545  &&
546  isspace(contentStart[1])
547  &&
548  !(isspace(contentStart[2]) ||
549  contentStart[2] == '-');
550 }
551 
552 #define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums
553 
554 static EnumerationType startsWithLetter(string& s, unsigned& number) {
555  // tests if first line starts with 'letter.'
556  // if true then 'letter.' is removed from the string
557  // the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..)
558 
559  size_t off = s.find_first_not_of(" \n");
560  if (off == string::npos) return NONE;
561  if (!isalpha(s[off])) return NONE;
562 
563  size_t astart = off;
564  EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER;
565 
566  number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1;
567  ++off;
568 
569  h2x_assert(number>0 && number<MAX_ALLOWED_ENUM);
570 
571  if (s[off] != '.' && s[off] != ')') return NONE;
572  if (s[off+1] != ' ') return NONE;
573 
574  // remove 'letter.' from string :
575  ++off;
576  while (s[off+1] == ' ') ++off;
577  s.erase(astart, off-astart+1);
578 
579  return etype;
580 }
581 
582 static bool startsWithNumber(string& s, unsigned& number) {
583  // tests if first line starts with 'number.'
584  // if true then 'number.' is removed from the string
585 
586  size_t off = s.find_first_not_of(" \n");
587  if (off == string::npos) return false;
588  if (!isdigit(s[off])) return false;
589 
590  size_t num_start = off;
591  number = 0;
592 
593  for (; isdigit(s[off]); ++off) {
594  number = number*10 + (s[off]-'0');
595  }
596  if (number>MAX_ALLOWED_ENUM) return false;
597 
598  if (s[off] != '.' && s[off] != ')') return false;
599  if (s[off+1] != ' ') return false;
600 
601  // remove 'number.' from string :
602  ++off;
603  while (s[off+1] == ' ') ++off;
604  s.erase(num_start, off-num_start+1);
605 
606  return true;
607 }
608 
609 static EnumerationType detectLineEnumType(string& line, unsigned& number) {
610  if (startsWithNumber(line, number)) return DIGITS;
611  return startsWithLetter(line, number);
612 }
613 
614 static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) {
615  string paragraph = line;
616  size_t para_start_lineno = reader.getLineNo();
617 
618  if (sec.line_number() == NO_LINENUMBER_INFO) { // linenumber is not known yet
619  // assume section just started (this happens with TITLE)
620  sec.set_line_number(para_start_lineno);
621  }
622 
624  EnumerationType etype = NONE;
625  unsigned num = 0;
626 
627  unsigned last_alpha_num = -1;
628 
629  h2x_assert(sec.Content().empty());
630 
631  while (1) {
632  line = reader.getNext();
633  if (!line) break;
634 
635  if (isEmptyOrComment(line)) {
636  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
637  checkControlComment(line);
638  check_TODO(line, reader);
639  }
640  else {
641  string keyword;
642  const char *rest = extractKeyword(line, keyword);
643 
644  if (rest) { // a new keyword
645  reader.back();
646  break;
647  }
648 
649  check_TODO(line, reader);
650 
651  string Line = line;
652 
653  if (sec.get_type() == SEC_OCCURRENCE) {
654  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
655  }
656  else {
657  const char *firstNonWhite = firstChar(line);
658  if (is_startof_itemlist_element(firstNonWhite)) {
659  h2x_assert(firstNonWhite != line);
660 
661  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
662 
663  Line[firstNonWhite-line] = ' ';
664  type = ITEM; // is reset in call to pushParagraph
665  }
666  else {
667  unsigned foundNum;
668  EnumerationType foundEtype = detectLineEnumType(Line, foundNum);
669 
670  if (foundEtype == ALPHA_UPPER || foundEtype == ALPHA_LOWER) {
671  if (foundNum == (last_alpha_num+1) || foundNum == 1) {
672  last_alpha_num = foundNum;
673  }
674  else {
675 #if defined(WARN_IGNORED_ALPHA_ENUMS)
676  add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum"));
677 #endif
678  foundEtype = NONE;
679 
680  reader.back();
681  Line = reader.getNext();
682  last_alpha_num = -1;
683  }
684  }
685 
686  if (foundEtype != NONE) {
687  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
688 
689  type = ENUMERATED;
690  num = foundNum;
691  etype = foundEtype;
692 
693  if (!num) {
694  h2x_assert(etype == DIGITS);
695  throw "Enumerations starting with zero are not supported";
696  }
697  }
698  }
699  }
700 
701  if (paragraph.length()) {
702  paragraph = paragraph+"\n"+Line;
703  }
704  else {
705  paragraph = string("\n")+Line;
706  para_start_lineno = reader.getLineNo();
707  }
708  }
709  }
710 
711  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
712 
713  if (sec.Content().size()>0 && indentation>0) {
714  string spaces;
715  spaces.reserve(indentation);
716  spaces.append(indentation, ' ');
717 
718  string& ostr = sec.Content().front();
719  ostr = string("\n") + spaces + ostr;
720  }
721 }
722 
723 inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) {
724  for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) {
725  if (ex->Target() == link) {
726  if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno());
727  throw strf("Link to '%s' duplicated here.", link.c_str());
728  }
729  }
730 }
731 inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) {
732  check_specific_duplicates(link, uplinks, add_warnings);
733  check_specific_duplicates(link, references, add_warnings);
734 }
735 
736 static void warnAboutDuplicate(SectionList& sections) {
737  set<string> seen;
738  SectionList::iterator end = sections.end();
739  for (SectionList::iterator s = sections.begin(); s != end; ++s) {
740  const string& sname = s->getName();
741  if (sname == "NOTES") continue; // do not warn about multiple NOTES sections
742 
743  SectionList::iterator o = s; ++o;
744  for (; o != end; ++o) {
745  if (sname == o->getName()) {
746  o->attach_warning("duplicated SECTION name");
747  if (seen.find(sname) == seen.end()) {
748  s->attach_warning("name was first used");
749  seen.insert(sname);
750  }
751  }
752  }
753  }
754 }
755 
756 void Helpfile::readHelp(istream& in, const string& filename) {
757  if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str()));
758 
759  Reader read(in);
760 
761  inputfile = filename; // remember file read (for comment)
762 
763  const char *line;
764  const char *name_only = strrchr(filename.c_str(), '/');
765 
766  h2x_assert(name_only);
767  ++name_only;
768 
769  try {
770  while (1) {
771  line = read.getNext();
772  if (!line) break;
773 
774  if (isEmptyOrComment(line)) {
775  checkControlComment(line);
776  check_TODO(line, read);
777  continue;
778  }
779 
780  check_TODO(line, read);
781 
782  string keyword;
783  const char *rest = extractKeyword(line, keyword);
784 
785  if (rest) { // found a keyword
786  if (keyword == "UP") {
787  rest = eatSpace(rest);
788  if (strlen(rest)) {
789  check_duplicates(rest, uplinks, references, true);
790  if (strcmp(name_only, rest) == 0) throw "UP link to self";
791 
792  uplinks.push_back(Link(rest, read.getLineNo()));
793  }
794  }
795  else if (keyword == "SUB") {
796  rest = eatSpace(rest);
797  if (strlen(rest)) {
798  check_duplicates(rest, uplinks, references, true);
799  if (strcmp(name_only, rest) == 0) throw "SUB link to self";
800 
801  references.push_back(Link(rest, read.getLineNo()));
802  }
803  }
804  else if (keyword == "TITLE") {
805  rest = eatSpace(rest);
806  parseSection(title, rest, 0, read);
807 
808  if (title.Content().empty()) throw "empty TITLE not allowed";
809 
810  const string& t = title.Content().front();
811  if (t.find("Standard help file form") != string::npos) {
812  throw strf("Illegal title for help file: '%s'", t.c_str());
813  }
814 
815  const size_t len = t.length();
816  if (len>MAX_TITLE_CHARS) {
817  // ignore non-alphanumeric characters at end of string:
818  size_t last_alnum_pos = len-1;
819  while (!isalnum(t[last_alnum_pos])) {
820  --last_alnum_pos;
821  }
822  ++last_alnum_pos;
823  arb_assert(last_alnum_pos<=len);
824 
825  const size_t ignored = len-last_alnum_pos;
826  if ((len-ignored)>MAX_TITLE_CHARS) {
827  title.attach_warning(strf("TITLE too verbose (max. %i chars allowed; found %zu%s)",
829  len,
830  ignored ? strf("; acceptable trailing chars: %zu", ignored).c_str() : ""
831  ));
832  }
833  }
834  }
835  else {
836  if (keyword == "NOTE") keyword = "NOTES";
837  if (keyword == "EXAMPLE") keyword = "EXAMPLES";
838  if (keyword == "WARNING") keyword = "WARNINGS";
839 
840  SectionType stype = SEC_NONE;
841  int idx;
842  for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) {
843  if (knownSections[idx] == keyword) {
844  stype = SectionType(idx);
845  break;
846  }
847  }
848 
849  size_t lineno = read.getLineNo();
850 
851  if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str());
852 
853  if (stype == SEC_SECTION) {
854  string section_name = eatSpace(rest);
855  Section sec(section_name, stype, lineno);
856  parseSection(sec, "", 0, read);
857  sections.push_back(sec);
858  }
859  else {
860  Section sec(keyword, stype, lineno);
861  rest = eatSpace(rest);
862  parseSection(sec, rest, rest-line, read);
863  sections.push_back(sec);
864  }
865  }
866  }
867  else {
868  throw strf("Unhandled line");
869  }
870  }
871 
872  warnAboutDuplicate(sections);
873  }
874  catch (string& err) { throw read.attached_message(err); }
875  catch (const char *err) { throw read.attached_message(err); }
876 }
877 
878 static bool shouldReflow(const string& s, int& foundIndentation) {
879  // foundIndentation is only valid if shouldReflow() returns true
880  enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START;
881  bool equal_indent = true;
882  int lastIndent = -1;
883  int thisIndent = 0;
884 
885  for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) {
886  if (*c == '\n') {
887  state = START;
888  thisIndent = 0;
889  }
890  else if (isSpace(*c)) {
891  if (state == DOT || state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed
892  else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces
893  else if (state == CHAR) state = SPACE; // now seen 1 space
894  }
895  else {
896  if (state == MULTIPLE) return false; // character after multiple spaces
897  if (state == START) {
898  if (lastIndent == -1) lastIndent = thisIndent;
899  else if (lastIndent != thisIndent) equal_indent = false;
900  }
901  state = (*c == '.' || *c == ',') ? DOT : CHAR;
902  }
903  }
904 
905  if (lastIndent<0) {
906  equal_indent = false;
907  }
908 
909  if (equal_indent) {
910  foundIndentation = lastIndent-1;
911  h2x_assert(foundIndentation >= 0);
912  }
913  return equal_indent;
914 }
915 
916 static string correctSpaces(const string& text, int change) {
917  h2x_assert(text.find('\n') == string::npos);
918 
919  if (!change) return text;
920 
921  size_t first = text.find_first_not_of(' ');
922  if (first == string::npos) return ""; // empty line
923 
924  if (change<0) {
925  int remove = -change;
926  h2x_assert(remove <= int(first));
927  return text.substr(remove);
928  }
929 
930  h2x_assert(change>0); // add spaces
931  return string(change, ' ')+text;
932 }
933 
934 static string correctIndentation(const string& text, int change) {
935  // removes 'remove' spaces from every line
936 
937  size_t this_lineend = text.find('\n');
938  string result;
939 
940  if (this_lineend == string::npos) {
941  result = correctSpaces(text, change);
942  }
943  else {
944  result = correctSpaces(text.substr(0, this_lineend), change);
945 
946  while (this_lineend != string::npos) {
947  size_t next_lineend = text.find('\n', this_lineend+1);
948  if (next_lineend == string::npos) { // last line
949  result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change);
950  }
951  else {
952  result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change);
953  }
954  this_lineend = next_lineend;
955  }
956  }
957  return result;
958 }
959 
960 inline size_t countSpaces(const string& text) {
961  size_t first = text.find_first_not_of(' ');
962  if (first == string::npos) return INT_MAX; // empty line
963  return first;
964 }
965 
966 static size_t scanMinIndentation(const string& text) {
967  size_t this_lineend = text.find('\n');
968  size_t min_indent = INT_MAX;
969 
970  if (this_lineend == string::npos) {
971  min_indent = countSpaces(text);
972  }
973  else {
974  while (this_lineend != string::npos) {
975  size_t next_lineend = text.find('\n', this_lineend+1);
976  if (next_lineend == string::npos) {
977  min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1)));
978  }
979  else {
980  min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1)));
981  }
982  this_lineend = next_lineend;
983  }
984  }
985 
986  if (min_indent == INT_MAX) min_indent = 0; // only empty lines
987  return min_indent;
988 }
989 
990 // -----------------------------
991 // class ParagraphTree
992 
993 class ParagraphTree FINAL_TYPE : public MessageAttachable, virtual Noncopyable {
994  ParagraphTree *brother; // has same indentation as this
995  ParagraphTree *son; // indentation + 1
996 
997  Ostring otext; // text of the Section (containing linefeeds)
998 
999  bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text)
1000  int indentation; // the real indentation of the blank (behind removed enumeration)
1001 
1002  string location_description() const OVERRIDE { return "in paragraph starting here"; }
1003  size_t line_number() const OVERRIDE { return otext.get_lineno(); }
1004 
1005  ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end)
1006  : son(NULp),
1007  otext(*begin),
1008  indentation(0)
1009  {
1010  h2x_assert(begin != end);
1011 
1012  string& text = otext;
1013  reflow = otext.get_preformatted_width()==0 && shouldReflow(text, indentation);
1014  if (!reflow) {
1015  size_t reststart = text.find('\n', 1);
1016 
1017  if (reststart == 0) {
1018  attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail");
1019  }
1020 
1021  if (reststart != string::npos) {
1022  int rest_indent = -1;
1023  string rest = text.substr(reststart);
1024  bool rest_reflow = shouldReflow(rest, rest_indent);
1025 
1026  if (rest_reflow) {
1027  int first_indent = countSpaces(text.substr(1));
1028  if (get_type() == PLAIN_TEXT) {
1029  size_t last = text.find_last_not_of(' ', reststart-1);
1030  bool is_header = last != string::npos && text[last] == ':';
1031 
1032  if (!is_header && rest_indent == (first_indent+8)) {
1033 #if defined(DEBUG)
1034  size_t textstart = text.find_first_not_of(" \n");
1035  h2x_assert(textstart != string::npos);
1036 #endif // DEBUG
1037 
1038  text = text.substr(0, reststart)+correctIndentation(rest, -8);
1039  reflow = shouldReflow(text, indentation);
1040  }
1041  }
1042  else {
1043  int diff = rest_indent-first_indent;
1044  if (diff>0) {
1045  text = text.substr(0, reststart)+correctIndentation(rest, -diff);
1046  reflow = shouldReflow(text, indentation);
1047  }
1048  else if (diff<0) {
1049  // paragraph with more indent on first line (occurs?)
1050  attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff));
1051  }
1052  }
1053  }
1054  }
1055  }
1056 
1057  if (!reflow) {
1058  indentation = scanMinIndentation(text);
1059  }
1060  text = correctIndentation(text, -indentation);
1061  if (get_type() == ITEM) {
1062  h2x_assert(indentation >= 2);
1063  indentation -= 2;
1064  }
1065 
1066  brother = buildParagraphTree(++begin, end);
1067  }
1068 
1069  void brothers_to_sons(ParagraphTree *new_brother);
1070 
1071  unsigned get_preformatted_width() const {
1072  return otext.get_preformatted_width();
1073  }
1074 
1075 public:
1076  virtual ~ParagraphTree() {
1077  delete brother;
1078  delete son;
1079  }
1080 
1081  ParagraphType get_type() const { return otext.get_type(); }
1082 
1083  bool is_itemlist_member() const { return get_type() == ITEM; }
1084  unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; }
1085  EnumerationType get_enum_type() const { return otext.get_enum_type(); }
1086 
1087  const char *readable_type() const {
1088  const char *res = NULp;
1089  switch (get_type()) {
1090  case PLAIN_TEXT: res = "PLAIN_TEXT"; break;
1091  case ITEM: res = "ITEM"; break;
1092  case ENUMERATED: res = "ENUMERATED"; break;
1093  }
1094  return res;
1095  }
1096 
1097  size_t countTextNodes() {
1098  size_t nodes = 1; // this
1099  if (son) nodes += son->countTextNodes();
1100  if (brother) nodes += brother->countTextNodes();
1101  return nodes;
1102  }
1103 
1104 #if defined(DUMP_PARAGRAPHS)
1105  void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; }
1106  char *masknl(const char *text) {
1107  char *result = ARB_strdup(text);
1108  for (int i = 0; result[i]; ++i) {
1109  if (result[i] == '\n') result[i] = '|';
1110  }
1111  return result;
1112  }
1113  void dump(ostream& out, int indent = 0) {
1114  print_indent(out, indent+1);
1115  {
1116  char *mtext = masknl(otext.as_string().c_str());
1117  out << "text='" << mtext << "'\n";
1118  free(mtext);
1119  }
1120 
1121  print_indent(out, indent+1);
1122  out << "type='" << readable_type() << "' ";
1123  if (get_type() == ENUMERATED) {
1124  out << "enumeration='" << otext.get_number() << "' ";
1125  }
1126  out << "reflow='" << reflow << "' ";
1127  out << "indentation='" << indentation << "'\n";
1128 
1129  if (son) {
1130  print_indent(out, indent+2); cout << "son:\n";
1131  son->dump(out, indent+2);
1132  cout << "\n";
1133  }
1134  if (brother) {
1135  print_indent(out, indent); cout << "brother:\n";
1136  brother->dump(out, indent);
1137  }
1138  }
1139 #endif // DUMP_PARAGRAPHS
1140 
1141 private:
1142  static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) {
1143  if (begin == end) return NULp;
1144  return new ParagraphTree(begin, end);
1145  }
1146 public:
1147  static ParagraphTree* buildParagraphTree(const Section& sec) {
1148  const Ostrings& txt = sec.Content();
1149  if (txt.empty()) throw "attempt to build an empty ParagraphTree";
1150  return buildParagraphTree(txt.begin(), txt.end());
1151  }
1152 
1153  bool contains(ParagraphTree *that) {
1154  return
1155  this == that ||
1156  (son && son->contains(that)) ||
1157  (brother && brother->contains(that));
1158  }
1159 
1160  ParagraphTree *predecessor(ParagraphTree *before_this) {
1161  if (brother == before_this) return this;
1162  if (!brother) return NULp;
1163  return brother->predecessor(before_this);
1164  }
1165 
1166  void append(ParagraphTree *new_brother) {
1167  if (!brother) brother = new_brother;
1168  else brother->append(new_brother);
1169  }
1170 
1171  bool is_some_brother(const ParagraphTree *other) const {
1172  return (other == brother) || (brother && brother->is_some_brother(other));
1173  }
1174 
1175  ParagraphTree* takeAllInFrontOf(ParagraphTree *after) {
1176  ParagraphTree *removed = this;
1177  ParagraphTree *after_pred = this;
1178 
1179  h2x_assert(is_some_brother(after));
1180 
1181  while (1) {
1182  h2x_assert(after_pred);
1183  h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after'
1184 
1185  if (after_pred->brother == after) { // found after
1186  after_pred->brother = NULp; // unlink
1187  break;
1188  }
1189  after_pred = after_pred->brother;
1190  }
1191 
1192  return removed;
1193  }
1194 
1195  ParagraphTree *firstListMember() {
1196  switch (get_type()) {
1197  case PLAIN_TEXT: break;
1198  case ITEM: return this;
1199  case ENUMERATED: {
1200  if (get_enumeration() == 1) return this;
1201  break;
1202  }
1203  }
1204  if (brother) return brother->firstListMember();
1205  return NULp;
1206  }
1207 
1208  ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) {
1209  if (indentation<previous.indentation) return NULp;
1210  if (indentation == previous.indentation && get_type() == previous.get_type()) {
1211  if (get_type() != ENUMERATED) return this;
1212  if (get_enumeration() > previous.get_enumeration()) return this;
1213  return NULp;
1214  }
1215  if (!brother) return NULp;
1216  return brother->nextListMemberAfter(previous);
1217  }
1218  ParagraphTree *nextListMember() const {
1219  return brother ? brother->nextListMemberAfter(*this) : NULp;
1220  }
1221 
1222  ParagraphTree* firstWithLessIndentThan(int wanted_indentation) {
1223  if (indentation < wanted_indentation) return this;
1224  if (!brother) return NULp;
1225  return brother->firstWithLessIndentThan(wanted_indentation);
1226  }
1227 
1228  void format_indentations();
1229  void format_lists();
1230 
1231 private:
1232  static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) {
1233  Ostrings S;
1234  S.push_back(Ostring(Text, beginLineNo, type));
1235  return new ParagraphTree(S.begin(), S.end());
1236  }
1237  ParagraphTree *xml_write_list_contents();
1238  ParagraphTree *xml_write_enum_contents();
1239  void xml_write_textblock();
1240 
1241 public:
1242  void xml_write();
1243 };
1244 
1245 #if defined(DUMP_PARAGRAPHS)
1246 static void dump_paragraph(ParagraphTree *para) {
1247  // helper function for use in gdb
1248  para->dump(cout, 0);
1249 }
1250 #endif
1251 
1252 void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) {
1258  if (new_brother) {
1259  h2x_assert(is_some_brother(new_brother));
1260 
1261  if (brother != new_brother) {
1262 #if defined(DEBUG)
1263  if (son) {
1264  son->attach_warning("Found unexpected son (in brothers_to_sons)");
1265  brother->attach_warning("while trying to transform paragraphs from here ..");
1266  new_brother->attach_warning(".. to here ..");
1267  attach_warning(".. into sons of this paragraph.");
1268  return;
1269  }
1270 #endif
1271 
1272  h2x_assert(!son);
1273  h2x_assert(brother);
1274 
1275  if (!new_brother) { // all brothers -> sons
1276  son = brother;
1277  brother = NULp;
1278  }
1279  else {
1280  son = brother->takeAllInFrontOf(new_brother);
1281  brother = new_brother;
1282  }
1283  }
1284  }
1285  else {
1286  h2x_assert(!son);
1287  son = brother;
1288  brother = NULp;
1289  }
1290 }
1291 void ParagraphTree::format_lists() {
1292  // reformats tree such that all items/enumerations are brothers
1293  ParagraphTree *member = firstListMember();
1294  if (member) {
1295  for (ParagraphTree *curr = this; curr != member; curr = curr->brother) {
1296  h2x_assert(curr);
1297  if (curr->son) curr->son->format_lists();
1298  }
1299 
1300  for (ParagraphTree *next = member->nextListMember();
1301  next;
1302  member = next, next = member->nextListMember())
1303  {
1304  member->brothers_to_sons(next);
1305  h2x_assert(member->brother == next);
1306 
1307  if (member->son) member->son->format_lists();
1308  }
1309 
1310  h2x_assert(!member->son); // member is the last item
1311 
1312  if (member->brother) {
1313  ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1);
1314  member->brothers_to_sons(non_member);
1315  }
1316 
1317  if (member->son) member->son->format_lists();
1318  if (member->brother) member->brother->format_lists();
1319  }
1320  else {
1321  for (ParagraphTree *curr = this; curr; curr = curr->brother) {
1322  h2x_assert(curr);
1323  if (curr->son) curr->son->format_lists();
1324  }
1325  }
1326 }
1327 
1328 void ParagraphTree::format_indentations() {
1329  if (brother) {
1330  ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1);
1331 #if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION)
1332  if (same_indent && indentation != same_indent->indentation) {
1333  same_indent->attach_warning("indentation is assumed to be same as ..");
1334  attach_warning(".. here");
1335  }
1336 #endif
1337  brothers_to_sons(same_indent); // if same_indent is NULp -> make all brothers childs
1338  if (brother) brother->format_indentations();
1339  }
1340 
1341  if (son) son->format_indentations();
1342 }
1343 
1344 // -----------------
1345 // LinkType
1346 
1347 enum LinkType {
1349  LT_HTTP = 1,
1351  LT_FTP = 4,
1352  LT_FILE = 8,
1353  LT_EMAIL = 16,
1354  LT_HLP = 32,
1355  LT_PS = 64,
1356  LT_PDF = 128,
1357  LT_TICKET = 256,
1358 };
1359 
1360 static const char *link_id[] = {
1361  "unknown",
1362  "www", // "http:"
1363  "www", // "https:"
1364  "www", // "ftp:"
1365  "www", // "file:"
1366  "email",
1367  "hlp",
1368  "ps",
1369  "pdf",
1370  "ticket",
1371 };
1372 
1373 static string LinkType2id(LinkType type) {
1374  size_t idx = 0;
1375  while (type >= 1) {
1376  idx++;
1377  type = LinkType(type>>1);
1378  }
1379  arb_assert(idx<ARRAY_ELEMS(link_id));
1380  return link_id[idx];
1381 }
1382 
1383 inline const char *getExtension(const string& name) {
1384  size_t last_dot = name.find_last_of('.');
1385  if (last_dot == string::npos) {
1386  return NULp;
1387  }
1388  return name.c_str()+last_dot+1;
1389 }
1390 
1391 static LinkType detectLinkType(const string& link_target) {
1392  LinkType type = LT_UNKNOWN;
1393  const char *ext = getExtension(link_target);
1394 
1395  if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP;
1396  else if (link_target.find("http://") == 0) type = LT_HTTP;
1397  else if (link_target.find("https://") == 0) type = LT_HTTPS;
1398  else if (link_target.find("ftp://") == 0) type = LT_FTP;
1399  else if (link_target.find("file://") == 0) type = LT_FILE;
1400  else if (link_target.find('@') != string::npos) type = LT_EMAIL;
1401  else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS;
1402  else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF;
1403  else if (link_target[0] == '#') type = LT_TICKET;
1404 
1405  return type;
1406 }
1407 
1408 // --------------------------------------------------------------------------------
1409 
1410 
1411 
1412 static string locate_helpfile(const string& helpname) {
1413  // search for 'helpname' in various helpfile locations
1414 
1415 #define PATHS 2
1416  static string path[PATHS] = { "source/", "genhelp/" };
1417  struct stat st;
1418 
1419  for (size_t p = 0; p<PATHS; p++) {
1420  string fullname = path[p]+helpname;
1421  if (stat(fullname.c_str(), &st) == 0) {
1422  return fullname;
1423  }
1424  }
1425  return "";
1426 #undef PATHS
1427 }
1428 
1429 static string locate_document(const string& docname) {
1430  // search for 'docname' or 'docname.gz' in various helpfile locations
1431 
1432  string located = locate_helpfile(docname);
1433  if (located.empty()) {
1434  located = locate_helpfile(docname+".gz");
1435  }
1436  return located;
1437 }
1438 
1439 static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) {
1440  if (type == LT_UNKNOWN) {
1441  string msg = string("Unknown link type (dest='")+dest+"')";
1442  throw LineAttachedMessage(msg, source_line);
1443  }
1444 
1445  link.add_attribute("dest", dest);
1446  link.add_attribute("type", LinkType2id(type));
1447  link.add_attribute("source_line", source_line);
1448 
1449  if (type&(LT_HLP|LT_PDF|LT_PS)) { // other links (www, email) cannot be checked for existence here
1450  string fullhelp = ((type&LT_HLP) ? locate_helpfile : locate_document)(dest);
1451  if (fullhelp.empty()) {
1452  link.add_attribute("missing", "1");
1453  string deadlink = strf("Dead link to '%s'", dest.c_str());
1454 #if 1
1455  throw LineAttachedMessage(deadlink, source_line);
1456 #else
1457  add_warning(deadlink, source_line);
1458 #endif
1459  }
1460  }
1461 }
1462 
1463 static void print_XML_Text_expanding_links(const string& text, size_t lineNo) {
1464  size_t found = text.find("LINK{", 0);
1465  if (found != string::npos) {
1466  size_t inside_link = found+5;
1467  size_t close = text.find('}', inside_link);
1468 
1469  if (close == string::npos) throw "unclosed 'LINK{}'";
1470 
1471  string link_target = text.substr(inside_link, close-inside_link);
1472  LinkType type = detectLinkType(link_target);
1473  string dest = link_target;
1474 
1475  XML_Text(text.substr(0, found));
1476 
1477  {
1478  XML_Tag link("LINK");
1479  link.set_on_extra_line(false);
1480  add_link_attributes(link, type, dest, lineNo);
1481  }
1482 
1483  print_XML_Text_expanding_links(text.substr(close+1), lineNo);
1484  }
1485  else {
1486  XML_Text t(text);
1487  }
1488 }
1489 
1490 static string autolink_ticket_references(const string& text) {
1491  size_t hashpos = text.find('#');
1492  if (hashpos == string::npos) {
1493  return text;
1494  }
1495 
1496  if (!isdigit(text[hashpos+1])) { // char after '#' is no digit = > not a ticketref
1497  size_t afterhash = hashpos+1;
1498  return
1499  text.substr(0, afterhash) +
1500  autolink_ticket_references(text.substr(afterhash));
1501  }
1502 
1503  size_t hashlength = 2;
1504  while (isdigit(text[hashpos+hashlength])) ++hashlength;
1505 
1506  return
1507  text.substr(0, hashpos) +
1508  "LINK{" +
1509  text.substr(hashpos, hashlength) +
1510  "}" +
1511  autolink_ticket_references(text.substr(hashpos+hashlength));
1512 }
1513 
1514 inline void print_XML_Text(const string& text, size_t lineNo) {
1515  string autolinkedText = autolink_ticket_references(text);
1516  print_XML_Text_expanding_links(autolinkedText, lineNo);
1517 }
1518 
1519 void ParagraphTree::xml_write_textblock() {
1520  XML_Tag textblock("T");
1521  textblock.add_attribute("reflow", reflow ? "1" : "0");
1522  if (!reflow) {
1523  unsigned width = get_preformatted_width();
1524  if (width>0) {
1525  textblock.add_attribute("width", strf("%i", width));
1526  }
1527  }
1528 
1529  {
1530  string usedText;
1531  const string& text = otext;
1532  if (reflow) {
1533  usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level);
1534  }
1535  else {
1536  usedText = text;
1537  }
1538  print_XML_Text(usedText, otext.get_lineno());
1539  }
1540 }
1541 
1542 ParagraphTree *ParagraphTree::xml_write_list_contents() {
1543  h2x_assert(is_itemlist_member());
1544 #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1545  if (!reflow) attach_warning("ITEM not reflown (check output)");
1546 #endif
1547  {
1548  XML_Tag entry("ENTRY");
1549  entry.add_attribute("item", "1");
1550  xml_write_textblock();
1551  if (son) son->xml_write();
1552  }
1553  if (brother && brother->is_itemlist_member()) {
1554  return brother->xml_write_list_contents();
1555  }
1556  return brother;
1557 }
1558 ParagraphTree *ParagraphTree::xml_write_enum_contents() {
1559  h2x_assert(get_enumeration());
1560 #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1561  if (!reflow) attach_warning("ENUMERATED not reflown (check output)");
1562 #endif
1563  {
1564  XML_Tag entry("ENTRY");
1565  switch (get_enum_type()) {
1566  case DIGITS:
1567  entry.add_attribute("enumerated", strf("%i", get_enumeration()));
1568  break;
1569  case ALPHA_UPPER:
1570  entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration()));
1571  break;
1572  case ALPHA_LOWER:
1573  entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration()));
1574  break;
1575  default:
1576  h2x_assert(0);
1577  break;
1578  }
1579  xml_write_textblock();
1580  if (son) son->xml_write();
1581  }
1582  if (brother && brother->get_enumeration()) {
1583  int diff = brother->get_enumeration()-get_enumeration();
1584  if (diff != 1) {
1585  attach_warning("Non-consecutive enumeration detected between here..");
1586  brother->attach_warning(".. and here");
1587  }
1588  return brother->xml_write_enum_contents();
1589  }
1590  return brother;
1591 }
1592 
1593 void ParagraphTree::xml_write() {
1594  try {
1595  ParagraphTree *next = NULp;
1596  if (get_enumeration()) {
1597  XML_Tag enu("ENUM");
1598  if (get_enumeration() != 1) {
1599  attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration()));
1600  }
1601  next = xml_write_enum_contents();
1602 #if defined(WARN_LONESOME_ENUM_ELEMENTS)
1603  if (next == brother) attach_warning("Suspicious single-element-ENUM");
1604 #endif
1605  }
1606  else if (is_itemlist_member()) {
1607  XML_Tag list("LIST");
1608  next = xml_write_list_contents();
1609 #if defined(WARN_LONESOME_LIST_ELEMENTS)
1610  if (next == brother) attach_warning("Suspicious single-element-LIST");
1611 #endif
1612  }
1613  else {
1614  {
1615  XML_Tag para("P");
1616  xml_write_textblock();
1617  if (son) son->xml_write();
1618  }
1619  next = brother;
1620  }
1621  if (next) next->xml_write();
1622  }
1623  catch (string& err) { throw attached_message(err); }
1624  catch (const char *err) { throw attached_message(err); }
1625 }
1626 
1627 static void create_top_links(const Links& links, const char *tag) {
1628  for (Links::const_iterator s = links.begin(); s != links.end(); ++s) {
1629  XML_Tag link(tag);
1630  add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno());
1631  }
1632 }
1633 
1634 inline string remove_LF_and_indentation(string paragraph) {
1635  // remove linefeeds + spaces behind linefeed (=indentation)
1636  size_t pos = 0;
1637  while (1) {
1638  size_t lf = paragraph.find('\n', pos);
1639  if (lf == string::npos) break; // all LFs handled
1640 
1641  // eliminate spaces before the LF:
1642  if (lf>0 && paragraph[lf-1] == ' ') { // LF is preceeded by space(s)
1643  size_t sp = lf-1;
1644  while (sp>=1 && paragraph[sp-1] == ' ') --sp; // position to 1st space
1645  arb_assert(sp<lf);
1646  paragraph.erase(sp, lf-sp);
1647  lf = sp;
1648  }
1649  arb_assert(paragraph[lf] == '\n');
1650 
1651  size_t ns = paragraph.find(' ', lf); // next space
1652  if (ns != lf+1) { // no space after LF
1653  paragraph[lf] = ' '; // -> replace LF by single space
1654  pos = lf+1;
1655  }
1656  else {
1657  size_t as = paragraph.find_first_not_of(' ', ns); // pos after consecutive space(s)
1658  size_t ls = as == string::npos ? ns : as-1; // last consecutive space
1659  paragraph.erase(lf, ls-lf); // keep one space (between concatenated line contents)
1660  }
1661  }
1662  // remove trailing spaces:
1663  size_t ls = paragraph.find_last_not_of(' ');
1664  if (ls == string::npos) { // only spaces
1665  paragraph.clear();
1666  }
1667  else {
1668  ++ls;
1669  paragraph.erase(ls, paragraph.length()-ls);
1670  }
1671  return paragraph;
1672 }
1673 
1674 void Helpfile::writeXML(FILE *out, const string& page_name) {
1675  XML_Document xml("PAGE", "arb_help.dtd", out);
1676 
1677  xml.skip_empty_tags = true;
1678  xml.indentation_per_level = 2;
1679 
1680  xml.getRoot().add_attribute("name", page_name);
1681 #if defined(DEBUG)
1682  xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version
1683 #else
1684  xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version
1685 #endif // DEBUG
1686 
1687  xml.getRoot().add_attribute("source", inputfile.c_str());
1688 
1689  {
1690  XML_Comment(string("automatically generated from ../")+inputfile+' ');
1691  }
1692 
1693  create_top_links(uplinks, "UP");
1694  create_top_links(references, "SUB");
1695  create_top_links(auto_references, "SUB");
1696 
1697  try {
1698  string titleText, subtitleText;
1699 
1700  const Ostrings& T = title.Content();
1701  Ostrings::const_iterator s = T.begin();
1702 
1703  if (s != T.end()) titleText = *s++;
1704 
1705  bool subtitleAdded = false; // @@@ not needed! (use !subtitleText.empty())
1706  for (; s != T.end(); ++s) {
1707  if (s->get_type() != PLAIN_TEXT) {
1708  throw s->attached_message("wrong paragraph type (plain text expected)");
1709  }
1710  string text = s->as_string();
1711  if (!text.empty()) { // ignore empty lines
1712  text = eatWhitespace(text.c_str());
1713  if (!text.empty()) {
1714  if (subtitleAdded) throw s->attached_message("only one subtitle accepted");
1715 
1716  text = remove_LF_and_indentation(text);
1717 
1718  if (text.length()>MAX_SUBTITLE_CHARS) {
1719  s->attach_warning(strf("subtitle too verbose (max. %i chars allowed; found %zu)", MAX_SUBTITLE_CHARS, text.length()));
1720  }
1721  subtitleText = text;
1722  subtitleAdded = true; // accept only one line
1723  }
1724  }
1725  }
1726 
1727  {
1728  XML_Tag title_tag("TITLE"); { XML_Text text(titleText); }
1729  }
1730  if (!subtitleText.empty()) {
1731  XML_Tag title_tag("SUBTITLE"); { XML_Text text(subtitleText); }
1732  }
1733 
1734  }
1735  catch (string& err) { throw title.attached_message(err); }
1736  catch (const char *err) { throw title.attached_message(err); }
1737 
1738  for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1739  try {
1740  XML_Tag section_tag("SECTION");
1741  section_tag.add_attribute("name", sec->getName());
1742 
1743  ParagraphTree *ptree = ParagraphTree::buildParagraphTree(*sec);
1744 
1745 #if defined(DEBUG)
1746  size_t textnodes = ptree->countTextNodes();
1747 #endif
1748 #if defined(DUMP_PARAGRAPHS)
1749  cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n";
1750  ptree->dump(cout);
1751  cout << "----------------------------------------\n";
1752 #endif
1753 
1754  ptree->format_lists();
1755 
1756 #if defined(DUMP_PARAGRAPHS)
1757  cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n";
1758  ptree->dump(cout);
1759  cout << "----------------------------------------\n";
1760 #endif
1761 #if defined(DEBUG)
1762  size_t textnodes2 = ptree->countTextNodes();
1763  h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error
1764 #endif
1765 
1766  ptree->format_indentations();
1767 
1768 #if defined(DUMP_PARAGRAPHS)
1769  cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n";
1770  ptree->dump(cout);
1771  cout << "----------------------------------------\n";
1772 #endif
1773 #if defined(DEBUG)
1774  size_t textnodes3 = ptree->countTextNodes();
1775  h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error
1776 #endif
1777 
1778  ptree->xml_write();
1779 
1780  delete ptree;
1781  }
1782  catch (string& err) { throw sec->attached_message(err); }
1783  catch (const char *err) { throw sec->attached_message(err); }
1784  }
1785 }
1786 
1788  for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1789  try {
1790  const Ostrings& s = sec->Content();
1791 
1792  for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) {
1793  const string& line = *li;
1794  size_t start = 0;
1795 
1796  while (1) {
1797  size_t found = line.find("LINK{", start);
1798  if (found == string::npos) break;
1799  found += 5;
1800  size_t close = line.find('}', found);
1801  if (close == string::npos) break;
1802 
1803  string link_target = line.substr(found, close-found);
1804 
1805  if (link_target.find("http://") == string::npos &&
1806  link_target.find("https://")== string::npos &&
1807  link_target.find("ftp://") == string::npos &&
1808  link_target.find("file://") == string::npos &&
1809  link_target.find('@') == string::npos)
1810  {
1811  check_self_ref(link_target);
1812 
1813  try {
1814  check_specific_duplicates(link_target, references, false); // check only sublinks here
1815  check_specific_duplicates(link_target, uplinks, false); // check only uplinks here
1816  check_specific_duplicates(link_target, auto_references, false); // check only sublinks here
1817 
1818  // only auto-add inline reference if none of the above checks has thrown
1819  auto_references.push_back(Link(link_target, li->line_number()));
1820  }
1821  catch (string& err) {
1822  ; // silently ignore inlined
1823  }
1824  }
1825  start = close+1;
1826  }
1827  }
1828  }
1829  catch (string& err) {
1830  throw sec->attached_message("'"+err+"' while scanning LINK{}");
1831  }
1832  }
1833 }
1834 
1835 static void show_err(const string& err, size_t lineno, const string& helpfile) {
1836  if (err.find(helpfile+':') != string::npos) {
1837  cerr << err;
1838  }
1839  else if (lineno == NO_LINENUMBER_INFO) {
1840  cerr << helpfile << ":1: [in unknown line] " << err;
1841  }
1842  else {
1843  cerr << helpfile << ":" << lineno << ": " << err;
1844  }
1845  cerr << '\n';
1846 }
1847 inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) {
1848  show_err(line_err.Message(), line_err.Lineno(), helpfile);
1849 }
1850 inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) {
1851  show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile);
1852 }
1853 inline void show_warnings(const string& helpfile) {
1854  for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) {
1855  show_warning(*wi, helpfile);
1856  }
1857 }
1858 static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) {
1859  show_err(error, helpfile);
1860  show_warnings(helpfile);
1861 }
1862 
1863 int ARB_main(int argc, char *argv[]) {
1864  if (argc != 3) {
1865  cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n";
1866  return EXIT_FAILURE;
1867  }
1868 
1869  Helpfile help;
1870  string arb_help;
1871 
1872  try {
1873  try {
1874  arb_help = argv[1];
1875  string xml_output = argv[2];
1876 
1877  {
1878  ifstream in(arb_help.c_str());
1879  help.readHelp(in, arb_help);
1880  }
1881 
1882  help.extractInternalLinks();
1883 
1884  {
1885  FILE *out = std::fopen(xml_output.c_str(), "wt");
1886  if (!out) throw string("Can't open '")+xml_output+'\'';
1887 
1888  try {
1889  // arb_help contains 'source/name.hlp'
1890  size_t slash = arb_help.find('/');
1891  size_t dot = arb_help.find_last_of('.');
1892 
1893  if (slash == string::npos || dot == string::npos) {
1894  throw string("parameter <ARB helpfile> has to be in format 'source/name.hlp' (not '"+arb_help+"')");
1895  }
1896 
1897  string page_name(arb_help, slash+1, dot-slash-1);
1898  help.writeXML(out, page_name);
1899  fclose(out);
1900  }
1901  catch (...) {
1902  fclose(out);
1903  remove(xml_output.c_str());
1904  throw;
1905  }
1906  }
1907 
1908  show_warnings(arb_help);
1909 
1910  return EXIT_SUCCESS;
1911  }
1912  catch (string& err) { throw unattached_message(err); }
1913  catch (const char * err) { throw unattached_message(err); }
1914  catch (LineAttachedMessage& err) { throw; }
1915  catch (...) { throw unattached_message("unknown exception in arb_help2xml"); }
1916  }
1917  catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); }
1918  catch (...) { h2x_assert(0); }
1919 
1920  return EXIT_FAILURE;
1921 }
1922 
1923 // --------------------------------------------------------------------------------
1924 
1925 #ifdef UNIT_TESTS
1926 #include <test_unit.h>
1927 #include <arb_msg.h>
1928 #include <arb_file.h>
1929 
1930 // Hint: you may set ONLY_DO_UNITTEST = 1 to speed up code/test-cycle
1931 // see ./Makefile@ONLY_DO_UNITTEST
1932 
1933 #define TEST_REMOVE_LF_AND_INDENTATION(i,want) TEST_EXPECT_EQUAL(remove_LF_and_indentation(i).c_str(), want)
1934 #define TEST_REMOVE_LF_AND_INDENTATION__BROKEN(i,want,got) TEST_EXPECT_EQUAL__BROKEN(remove_LF_and_indentation(i).c_str(), want, got)
1935 
1936 void TEST_remove_LF_and_indentation() {
1937  TEST_REMOVE_LF_AND_INDENTATION("",
1938  "");
1939 
1940  TEST_REMOVE_LF_AND_INDENTATION(" \n \n \n ",
1941  "");
1942  TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline",
1943  "hello Newline");
1944  TEST_REMOVE_LF_AND_INDENTATION("hello\nNewline\n 1\n2 \n 3 \n4\n5\n 6 \n 7 \n 8\n",
1945  "hello Newline 1 2 3 4 5 6 7 8");
1946 
1947  TEST_REMOVE_LF_AND_INDENTATION("Visualization of Three-dimensional\n structure of small subunit (16S) rRNA",
1948  "Visualization of Three-dimensional structure of small subunit (16S) rRNA");
1949 }
1950 
1951 static arb_test::match_expectation help_file_compiles(const char *helpfile, const char *expected_title, const char *expected_error_part) {
1952  using namespace arb_test;
1953  expectation_group expected;
1954 
1955  ifstream in(helpfile);
1956 
1958 
1959  Helpfile help;
1960  try {
1961  help.readHelp(in, helpfile);
1962  help.extractInternalLinks();
1963 
1964  FILE *devnul = fopen("/dev/null", "wt");
1965  if (!devnul) throw unattached_message("can't write to null device");
1966  help.writeXML(devnul, "dummy");
1967  fclose(devnul);
1968  }
1969  catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); }
1970  catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); }
1971 
1972  if (expected_error_part) {
1973  expected.add(that(error).does_differ_from_NULL());
1974  if (error) expected.add(that(error->Message()).does_contain(expected_error_part));
1975  }
1976  else {
1977  expected.add(that(error).is_equal_to_NULL());
1978  if (!error) {
1979  Section title = help.get_title();
1980  const Ostrings& title_strings = title.Content();
1981 
1982  expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title));
1983  expected.add(that(title_strings.size()).is_equal_to(1));
1984  }
1985  else {
1986  show_error_and_warnings(*error, helpfile);
1987  }
1988  }
1989 
1990  delete error;
1991 
1992  return all().ofgroup(expected);
1993 }
1994 
1995 #define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULp))
1996 #define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULp,expError))
1997 
1998 void TEST_hlp2xml_conversion() {
1999  TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE"));
2000 
2001  HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp
2002 
2003  HELP_FILE_COMPILES("source/markbyref.hlp", "Mark by reference"); // source/markbyref.hlp
2004  HELP_FILE_COMPILES("source/ad_align.hlp", "Alignment Administration"); // source/ad_align.hlp
2005  HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights and licenses"); // genhelp/copyright.hlp
2006 
2007  // @@@ add test for helpfile with subtitle
2008 
2009  HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file
2010 }
2011 TEST_PUBLISH(TEST_hlp2xml_conversion);
2012 
2013 
2014 // #define TEST_AUTO_UPDATE // uncomment to update expected xml
2015 
2016 void TEST_hlp2xml_output() {
2017  string tested_helpfile[] = {
2018  "unittest"
2019  };
2020 
2021  string HELP_SOURCE = "../../HELP_SOURCE/";
2022  string LIB = "../../lib/";
2023  string EXPECTED = "help/";
2024 
2025  for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) {
2026  string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml";
2027  string html = LIB + "help_html/" + tested_helpfile[i] + ".html";
2028  string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp";
2029 
2030  string xml_expected = EXPECTED + tested_helpfile[i] + ".xml";
2031  string html_expected = EXPECTED + tested_helpfile[i] + ".html";
2032  string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp";
2033 
2034 
2035 #if defined(TEST_AUTO_UPDATE)
2036 # if defined(NDEBUG)
2037 # error please use auto-update only in DEBUG mode
2038 # endif
2039  TEST_COPY_FILE(xml.c_str(), xml_expected.c_str());
2040  TEST_COPY_FILE(html.c_str(), html_expected.c_str());
2041  TEST_COPY_FILE(hlp.c_str(), hlp_expected.c_str());
2042 
2043 #else // !defined(TEST_AUTO_UPDATE)
2044 
2045 # if defined(DEBUG)
2046  int expected_xml_difflines = 0;
2047  int expected_hlp_difflines = 0;
2048 # else // !defined(DEBUG)
2049  int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning
2050  int expected_hlp_difflines = 2; // resulting warning in helpfile
2051 # endif
2052  TEST_EXPECT_TEXTFILE_DIFFLINES(xml.c_str(), xml_expected.c_str(), expected_xml_difflines);
2053  TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html.c_str(), html_expected.c_str(), 0); // html contains the update-date
2054  TEST_EXPECT_TEXTFILE_DIFFLINES(hlp.c_str(), hlp_expected.c_str(), expected_hlp_difflines);
2055 #endif
2056  }
2057 }
2058 
2059 
2060 #if defined(PROTECT_HELP_VS_CHANGES)
2061 void TEST_protect_help_vs_changes() { // should normally be disabled
2062  // fails if help changes compared to another checkout
2063  // or just updates the diff w/o failing (if you comment out the last line)
2064  //
2065  // if the patch is hugo and you load it into xemacs
2066  // you might want to (turn-on-lazy-shot)
2067  //
2068  // patch-pointer: ../UNIT_TESTER/run/help_changes.patch
2069 
2070  bool do_help = true;
2071  bool do_html = true;
2072 
2073  const char *ref_WC = "ARB.help.ref";
2074 
2075  // ---------------------------------------- config above
2076 
2077  string this_base = "../..";
2078  string ref_base = this_base+"/../"+ref_WC;
2079  string to_help = "/lib/help";
2080  string to_html = "/lib/help_html";
2081  string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help;
2082  string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html;
2083 
2084  string update_cmd;
2085 
2086  if (do_help) {
2087  if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")";
2088  else update_cmd = diff_help;
2089  }
2090  else if (do_html) update_cmd = diff_html;
2091 
2092  string patch = "help_changes.patch";
2093  update_cmd += " >"+patch+" ||true";
2094 
2095  string fail_on_change_cmd = "test \"`cat "+patch+" | grep -v '^Common subdirectories' | wc -l`\" = \"0\" || ( echo \"Error: Help changed\"; false)";
2096 
2097  TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str()));
2098  TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit
2099 }
2100 #endif
2101 
2102 #endif // UNIT_TESTS
static LinkType detectLinkType(const string &link_target)
GB_ERROR GBK_system(const char *system_command)
Definition: arb_msg.cxx:571
#define arb_assert(cond)
Definition: arb_assert.h:245
#define MAX_LINE_LENGTH
Definition: reader.h:21
string result
GB_TYPES type
EnumerationType
Ostrings & Content()
group_matcher all()
Definition: test_unit.h:1011
static void show_err(const string &err, size_t lineno, const string &helpfile)
const char * c_str() const
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
Definition: insdel.cxx:615
int ARB_main(int argc, char *argv[])
return string(buffer, length)
void space()
Definition: test_unit.h:414
static void dot(double **i, double **j, double **k)
Definition: trnsprob.cxx:59
void show_warnings(const string &helpfile)
EnumerationType get_enum_type() const
bool is_some_brother(const ParagraphTree *other) const
static bool shouldReflow(const string &s, int &foundIndentation)
#define MAX_TITLE_CHARS
static string correctIndentation(const string &text, int change)
bool is_itemlist_member() const
void writeXML(FILE *out, const string &page_name)
void attach_warning(const string &message) const
static void help()
ParagraphTree * predecessor(ParagraphTree *before_this)
LineAttachedMessage attached_message(const string &message) const
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void readHelp(istream &in, const string &filename)
static size_t scanMinIndentation(const string &text)
const char * extractKeyword(const char *line, string &keyword)
size_t line_number() const OVERRIDE
void warning(int warning_num, const char *warning_message)
Definition: util.cxx:61
const char * firstChar(const char *s)
const char * title
Definition: readseq.c:22
static string locate_document(const string &docname)
char * strf(const char *format,...) __ATTR__FORMAT(1)
Definition: util.cxx:27
Ostring(const string &s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num)
STL namespace.
ParagraphType
void add_warning(const LineAttachedMessage &laMsg)
list< Section > SectionList
void print_XML_Text(const string &text, size_t lineNo)
void show_warning(const LineAttachedMessage &line_err, const string &helpfile)
static EnumerationType detectLineEnumType(string &line, unsigned &number)
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
const string & getName() const
unsigned get_enumeration() const
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
virtual ~MessageAttachable()
static void print_XML_Text_expanding_links(const string &text, size_t lineNo)
string remove_LF_and_indentation(string paragraph)
static char * buffer
static void checkControlComment(const char *line)
void check_duplicates(const string &link, const Links &uplinks, const Links &references, bool add_warnings)
virtual ~ParagraphTree()
static HelixNrInfo * start
void check_specific_duplicates(const string &link, const Links &existing, bool add_warnings)
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1517
LineAttachedMessage unattached_message(const string &message)
LineAttachedMessage(const string &message_, size_t lineno_)
#define PATHS
AliDataPtr after(AliDataPtr data, size_t pos)
Definition: insdel.cxx:593
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
const Ostrings & Content() const
LinkType
const char * eatWhitespace(const char *paragraph)
#define is_equal_to_NULL()
Definition: test_unit.h:1028
static EnumerationType startsWithLetter(string &s, unsigned &number)
void extractInternalLinks()
__ATTR__VFORMAT(1) static string vstrf(const char *format
size_t countTextNodes()
static const char * link_id[]
#define true
Definition: ureadseq.h:14
const char * eatSpace(const char *line)
static void set_preformatted_blocks_wanted(unsigned preformatted_blocks)
#define false
Definition: ureadseq.h:13
void setName(const string &name_)
string location_description() const OVERRIDE
va_list static argPtr size_t buf_size
XML_Document * the_XML_Document
Definition: xml.cxx:23
void message(char *errortext)
#define WLEN
static void error(const char *msg)
Definition: mkptypes.cxx:96
ParagraphTree * nextListMemberAfter(const ParagraphTree &previous)
static const char * knownSections[]
void back()
expectation_group & add(const expectation &e)
Definition: test_unit.h:812
#define MAX_SUBTITLE_CHARS
static void show_error_and_warnings(const LineAttachedMessage &error, const string &helpfile)
#define that(thing)
Definition: test_unit.h:1043
static string autolink_ticket_references(const string &text)
static void set_current_preformatted_width(unsigned allowed)
bool isComment(const char *s)
const Section & get_title() const
static void add_link_attributes(XML_Tag &link, LinkType type, const string &dest, size_t source_line)
ParagraphType get_type() const
#define TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(fgot, fwant, diff)
Definition: test_unit.h:1419
virtual ~Reader()
#define does_differ_from_NULL()
Definition: test_unit.h:1029
ParagraphTree * nextListMember() const
#define TABSIZE
size_t Lineno() const
va_end(argPtr)
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
AW_selection_list * links
Definition: AW_help.cxx:75
STATIC_ASSERT(ARRAY_ELEMS(knownSections)==KNOWN_SECTION_TYPES)
#define is_equal_to(val)
Definition: test_unit.h:1025
#define h2x_assert(bed)
bool is_startof_itemlist_element(const char *contentStart)
#define TEST_EXPECT_ZERO(cond)
Definition: test_unit.h:1085
bool isWhitespace(char c)
static void warnAboutDuplicate(SectionList &sections)
unsigned get_preformatted_width() const
void set_line_number(size_t lineNumber)
const string & Message() const
TYPE get_type() const
Definition: probe_tree.h:64
a xml text node
Definition: xml.hxx:122
#define does_contain(val)
Definition: test_unit.h:1040
const string & as_string() const
ParagraphTree * takeAllInFrontOf(ParagraphTree *after)
Ostring(const string &s, size_t line_no, ParagraphType type_)
static ParagraphTree * buildParagraphTree(const Section &sec)
size_t get_lineno() const
SectionType
static list< LineAttachedMessage > warnings
const size_t NO_LINENUMBER_INFO
xml element
static bool startsWithNumber(string &s, unsigned &number)
ParagraphTree * firstWithLessIndentThan(int wanted_indentation)
const char CHAR
aisc_com * link
bool contains(ParagraphTree *that)
Definition: output.h:122
#define OVERRIDE
Definition: cxxforward.h:112
static string LinkType2id(LinkType type)
const char * name_only(const char *fullpath)
Definition: AWTI_import.cxx:46
static string correctSpaces(const string &text, int change)
bool isSpace(char c)
static char eol[3]
const EnumerationType & get_enum_type() const
va_start(argPtr, format)
__ATTR__FORMAT(1) static string strf(const char *format
Section(string name_, SectionType type_, size_t lineno_)
unsigned get_number() const
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
const char * getNext()
bool isEmptyOrComment(const char *s)
const char * getExtension(const string &name)
void append(ParagraphTree *new_brother)
static int line
Definition: arb_a2ps.c:296
#define MAX_ALLOWED_ENUM
static void parseSection(Section &sec, const char *line, int indentation, Reader &reader)
#define NULp
Definition: cxxforward.h:116
static char * inputfile
Definition: readseq.c:181
virtual ~Helpfile()
const char * readable_type() const
void check_TODO(const char *, const Reader &)
list< Ostring > Ostrings
#define TEST_EXPECT_TEXTFILE_DIFFLINES(fgot, fwant, diff)
Definition: test_unit.h:1416
string & as_string()
void print_indent(int indent)
Definition: test_unit.h:417
Definition: trnsprob.h:20
ParagraphTree * firstListMember()
const ParagraphType & get_type() const
int getLineNo() const
Reader(istream &in_)
static void create_top_links(const Links &links, const char *tag)
size_t length
static string locate_helpfile(const string &helpname)
list< Link > Links
size_t countSpaces(const string &text)
#define min(a, b)
Definition: f2c.h:153
size_t line_number() const OVERRIDE
virtual ~Section()
AW_selection_list * uplinks
Definition: AW_help.cxx:74
li
Definition: AW_awar.cxx:152
static int line_number
Definition: arb_a2ps.c:297
void pushParagraph(Section &sec, string &paragraph, size_t lineNo, ParagraphType &type, EnumerationType &etype, unsigned num)
SectionType get_type() const
static Score ** U
Definition: align.cxx:67
GB_write_int const char s
Definition: AW_awar.cxx:154