ARB
arb_help2xml.cxx
Go to the documentation of this file.
1 // ==================================================================== //
2 // //
3 // File : arb_help2xml.cxx //
4 // Purpose : Converts old ARB help format to XML //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in October 2001 //
7 // Copyright Department of Microbiology (Technical University Munich) //
8 // //
9 // Visit our web site at: http://www.arb-home.de/ //
10 // //
11 // ==================================================================== //
12 
13 #include <xml.hxx>
14 #include <arb_defs.h>
15 #include <arb_diff.h>
16 #include <static_assert.h>
17 
18 #include <list>
19 #include <set>
20 #include <iostream>
21 #include <fstream>
22 
23 #include <cstdlib>
24 #include <cstdarg>
25 #include <cstring>
26 #include <climits>
27 
28 #include <unistd.h>
29 #include <sys/stat.h>
30 
31 using namespace std;
32 
33 #define h2x_assert(bed) arb_assert(bed)
34 
35 #if defined(DEBUG)
36 #define WARN_FORMATTING_PROBLEMS
37 #define WARN_MISSING_HELP
38 // #define DUMP_PARAGRAPHS
39 // #define PROTECT_HELP_VS_CHANGES
40 #endif // DEBUG
41 
42 
43 #if defined(WARN_FORMATTING_PROBLEMS)
44 
45 #define WARN_FIXED_LAYOUT_LIST_ELEMENTS
46 #define WARN_LONESOME_ENUM_ELEMENTS
47 
48 // warnings below are useless for production and shall be disabled in SVN
49 // #define WARN_LONESOME_LIST_ELEMENTS
50 // #define WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION
51 // #define WARN_IGNORED_ALPHA_ENUMS
52 
53 #endif
54 
55 
56 #define MAX_LINE_LENGTH 200 // maximum length of lines in input stream
57 #define TABSIZE 8
58 
59 static const char *knownSections[] = {
60  "OCCURRENCE",
61  "DESCRIPTION",
62  "NOTES",
63  "EXAMPLES",
64  "WARNINGS",
65  "BUGS",
66  "SECTION",
67 };
68 
77 
81 };
82 
84 
85 __ATTR__VFORMAT(1) static string vstrf(const char *format, va_list argPtr) {
86  static size_t buf_size = 256;
87  static char *buffer = new char[buf_size];
88 
89  size_t length;
90  while (1) {
91  if (!buffer) {
92  h2x_assert(buffer); // to stop when debugging
93  throw string("out of memory");
94  }
95 
96  length = vsnprintf(buffer, buf_size, format, argPtr);
97  if (length < buf_size) break; // string fits into current buffer
98 
99  // otherwise resize buffer :
100  buf_size += buf_size/2;
101  delete [] buffer;
102  buffer = new char[buf_size];
103  }
104 
105  return string(buffer, length);
106 }
107 
108 __ATTR__FORMAT(1) static string strf(const char *format, ...) {
109  va_list argPtr;
110  va_start(argPtr, format);
111  string result = vstrf(format, argPtr);
112  va_end(argPtr);
113 
114  return result;
115 }
116 
117 // -----------------------------
118 // warnings and errors
119 
121  string message;
122  size_t lineno;
123 
124 public:
125  LineAttachedMessage(const string& message_, size_t lineno_) :
126  message(message_),
127  lineno(lineno_)
128  {}
129 
130  const string& Message() const { return message; }
131  size_t Lineno() const { return lineno; }
132 };
133 
134 const size_t NO_LINENUMBER_INFO = -1U;
135 
136 LineAttachedMessage unattached_message(const string& message) { return LineAttachedMessage(message, NO_LINENUMBER_INFO); }
137 
138 
139 static list<LineAttachedMessage> warnings;
140 inline void add_warning(const LineAttachedMessage& laMsg) {
141  warnings.push_back(laMsg);
142 }
143 inline void add_warning(const string& warning, size_t lineno) {
144  add_warning(LineAttachedMessage(warning, lineno));
145 }
146 
148  virtual ~MessageAttachable() {}
149 
150  virtual string location_description() const = 0; // may return empty string
151  virtual size_t line_number() const = 0; // if unknown -> should return NO_LINENUMBER_INFO
152 
154  string where = location_description();
155  if (where.empty()) return LineAttachedMessage(message, line_number());
156  return LineAttachedMessage(message+" ["+where+"]", line_number());
157  }
158  void attach_warning(const string& message) const {
159  add_warning(attached_message(message));
160  }
161 };
162 
163 
164 // ----------------------
165 // class Reader
166 
167 class Reader : public MessageAttachable {
168 private:
169  istream& in;
170  char lineBuffer[MAX_LINE_LENGTH];
171  char lineBuffer2[MAX_LINE_LENGTH];
172  bool readAgain;
173  bool eof;
174  int lineNo;
175 
176  string location_description() const OVERRIDE { return ""; }
177  size_t line_number() const OVERRIDE { return lineNo; }
178 
179  void getline() {
180  if (!eof) {
181  if (in.eof()) eof = true;
182  else {
183  h2x_assert(in.good());
184 
185  in.getline(lineBuffer, MAX_LINE_LENGTH);
186  lineNo++;
187 
188  if (in.eof()) eof = true;
189  else if (in.fail()) throw "line too long";
190 
191  if (strchr(lineBuffer, '\t')) {
192  int o2 = 0;
193 
194  for (int o = 0; lineBuffer[o]; ++o) {
195  if (lineBuffer[o] == '\t') {
196  int spaces = TABSIZE - (o2 % TABSIZE);
197  while (spaces--) lineBuffer2[o2++] = ' ';
198  }
199  else {
200  lineBuffer2[o2++] = lineBuffer[o];
201  }
202  }
203  lineBuffer2[o2] = 0;
204  strcpy(lineBuffer, lineBuffer2);
205  }
206 
207  char *eol = strchr(lineBuffer, 0)-1;
208  while (eol >= lineBuffer && isspace(eol[0])) {
209  eol[0] = 0; // trim trailing whitespace
210  eol--;
211  }
212  if (eol > lineBuffer) {
213  // now eol points to last character
214  if (eol[0] == '-' && isalnum(eol[-1])) {
215  attach_warning("manual hyphenation detected");
216  }
217  }
218  }
219  }
220  }
221 
222 public:
223  Reader(istream& in_) : in(in_), readAgain(true), eof(false), lineNo(0) { getline(); }
224  virtual ~Reader() {}
225 
226  const char *getNext() {
227  if (readAgain) readAgain = false;
228  else getline();
229  return eof ? NULp : lineBuffer;
230  }
231 
232  void back() {
233  h2x_assert(!readAgain);
234  readAgain = true;
235  }
236 
237  int getLineNo() const { return lineNo; }
238 };
239 
244 };
250 };
251 
252 class Ostring {
253  string content;
254  size_t lineNo; // where string came from
256 
257  // only valid for type==ENUMERATED:
258  EnumerationType etype;
259  unsigned number;
260 
261 public:
262 
263  Ostring(const string& s, size_t line_no, ParagraphType type_)
264  : content(s),
265  lineNo(line_no),
266  type(type_),
267  etype(NONE)
268  {
269  h2x_assert(type != ENUMERATED);
270  }
271  Ostring(const string& s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num)
272  : content(s),
273  lineNo(line_no),
274  type(type_),
275  etype(etype_),
276  number(num)
277  {
278  h2x_assert(type == ENUMERATED);
279  h2x_assert(etype == DIGITS || etype == ALPHA_UPPER || etype == ALPHA_LOWER);
280  h2x_assert(num>0);
281  }
282 
283 
284  operator const string&() const { return content; }
285  operator string&() { return content; }
286 
287  const string& as_string() const { return content; }
288  string& as_string() { return content; }
289 
290  size_t get_lineno() const { return lineNo; }
291 
292  const ParagraphType& get_type() const { return type; }
294  h2x_assert(type == ENUMERATED);
295  return etype;
296  }
297  unsigned get_number() const {
298  h2x_assert(type == ENUMERATED);
299  return number;
300  }
301 
302  // some wrapper to make Ostring act like string
303  const char *c_str() const { return content.c_str(); }
304 };
305 
306 typedef list<Ostring> Ostrings;
307 
308 #if defined(WARN_MISSING_HELP)
309 static void check_TODO(const char *line, const Reader& reader) {
310  if (strstr(line, "@@@") || strstr(line, "TODO")) {
311  reader.attach_warning(strf("TODO: %s", line));
312  }
313 }
314 #else
315 inline void check_TODO(const char *, const Reader&) { }
316 #endif // WARN_MISSING_HELP
317 
318 // ----------------------------
319 // class Section
320 
321 class Section FINAL_TYPE : public MessageAttachable {
323  string name;
324  Ostrings content;
325  size_t lineno;
326 
327  string location_description() const OVERRIDE { return string("in SECTION '")+name+"'"; }
328 
329 public:
330  Section(string name_, SectionType type_, size_t lineno_)
331  : type(type_),
332  name(name_),
333  lineno(lineno_)
334  {}
335  virtual ~Section() {}
336 
337  const Ostrings& Content() const { return content; }
338  Ostrings& Content() { return content; }
339  SectionType get_type() const { return type; }
340  size_t line_number() const OVERRIDE { return lineno; }
341  const string& getName() const { return name; }
342  void setName(const string& name_) { name = name_; }
343 };
344 
345 typedef list<Section> SectionList;
346 
347 // --------------------
348 // class Link
349 
350 class Link {
351  string target;
352  size_t source_lineno;
353 
354 public:
355  Link(const string& target_, size_t source_lineno_) :
356  target(target_),
357  source_lineno(source_lineno_)
358  {}
359 
360  const string& Target() const { return target; }
361  size_t SourceLineno() const { return source_lineno; }
362 };
363 
364 typedef list<Link> Links;
365 
366 // ------------------------
367 // class Helpfile
368 
369 class Helpfile {
370 private:
371  Links uplinks;
372  Links references;
373  Links auto_references;
374  Section title;
375  SectionList sections;
376  string inputfile;
377 
378  void check_self_ref(const string& link) {
379  size_t slash = inputfile.find('/');
380  if (slash != string::npos) {
381  if (inputfile.substr(slash+1) == link) {
382  throw string("Invalid link to self");
383  }
384  }
385  }
386 
387 public:
388  Helpfile() : title("TITLE", SEC_FAKE, NO_LINENUMBER_INFO) {}
389  virtual ~Helpfile() {}
390 
391  void readHelp(istream& in, const string& filename);
392  void writeXML(FILE *out, const string& page_name);
393  void extractInternalLinks();
394 
395  const Section& get_title() const { return title; }
396 };
397 
398 inline bool isWhite(char c) { return c == ' '; }
399 
400 inline bool isEmptyOrComment(const char *s) {
401  if (s[0] == '#') return true;
402  for (int off = 0; ; ++off) {
403  if (s[off] == 0) return true;
404  if (!isWhite(s[off])) break;
405  }
406 
407  return false;
408 }
409 
410 inline const char *extractKeyword(const char *line, string& keyword) {
411  // returns NULp if no keyword was found
412  // otherwise returns position behind keyword and sets value of 'keyword'
413 
414  const char *space = strchr(line, ' ');
415  if (space && space>line) {
416  keyword = string(line, 0, space-line);
417  return space;
418  }
419  else if (!space) { // test for keyword w/o content behind
420  if (line[0]) { // not empty
421  keyword = line;
422  return strchr(line, 0);
423  }
424  }
425  return NULp;
426 }
427 
428 inline const char *eatWhite(const char *line) {
429  // skips whitespace
430  while (isWhite(*line)) ++line;
431  return line;
432 }
433 
434 inline void pushParagraph(Section& sec, string& paragraph, size_t lineNo, ParagraphType& type, EnumerationType& etype, unsigned num) {
435  if (paragraph.length()) {
436  if (type == ENUMERATED) {
437  sec.Content().push_back(Ostring(paragraph, lineNo, type, etype, num));
438  }
439  else {
440  sec.Content().push_back(Ostring(paragraph, lineNo, type));
441  }
442 
443  type = PLAIN_TEXT;
444  etype = NONE;
445  paragraph = "";
446  }
447 }
448 
449 inline const char *firstChar(const char *s) {
450  while (isWhite(s[0])) ++s;
451  return s;
452 }
453 
454 inline bool is_startof_itemlist_element(const char *contentStart) {
455  return
456  (contentStart[0] == '-' ||
457  contentStart[0] == '*')
458  &&
459  isspace(contentStart[1])
460  &&
461  !(isspace(contentStart[2]) ||
462  contentStart[2] == '-');
463 }
464 
465 #define MAX_ALLOWED_ENUM 99 // otherwise it starts interpreting years as enums
466 
467 static EnumerationType startsWithLetter(string& s, unsigned& number) {
468  // tests if first line starts with 'letter.'
469  // if true then 'letter.' is removed from the string
470  // the letter is converted and returned in 'number' ('a'->1, 'b'->2, ..)
471 
472  size_t off = s.find_first_not_of(" \n");
473  if (off == string::npos) return NONE;
474  if (!isalpha(s[off])) return NONE;
475 
476  size_t astart = off;
477  EnumerationType etype = isupper(s[off]) ? ALPHA_UPPER : ALPHA_LOWER;
478 
479  number = s[off]-(etype == ALPHA_UPPER ? 'A' : 'a')+1;
480  ++off;
481 
482  h2x_assert(number>0 && number<MAX_ALLOWED_ENUM);
483 
484  if (s[off] != '.' && s[off] != ')') return NONE;
485  if (s[off+1] != ' ') return NONE;
486 
487  // remove 'letter.' from string :
488  ++off;
489  while (s[off+1] == ' ') ++off;
490  s.erase(astart, off-astart+1);
491 
492  return etype;
493 }
494 
495 static bool startsWithNumber(string& s, unsigned& number) {
496  // tests if first line starts with 'number.'
497  // if true then 'number.' is removed from the string
498 
499  size_t off = s.find_first_not_of(" \n");
500  if (off == string::npos) return false;
501  if (!isdigit(s[off])) return false;
502 
503  size_t num_start = off;
504  number = 0;
505 
506  for (; isdigit(s[off]); ++off) {
507  number = number*10 + (s[off]-'0');
508  }
509  if (number>MAX_ALLOWED_ENUM) return false;
510 
511  if (s[off] != '.' && s[off] != ')') return false;
512  if (s[off+1] != ' ') return false;
513 
514  // remove 'number.' from string :
515  ++off;
516  while (s[off+1] == ' ') ++off;
517  s.erase(num_start, off-num_start+1);
518 
519  return true;
520 }
521 
522 static EnumerationType detectLineEnumType(string& line, unsigned& number) {
523  if (startsWithNumber(line, number)) return DIGITS;
524  return startsWithLetter(line, number);
525 }
526 
527 static void parseSection(Section& sec, const char *line, int indentation, Reader& reader) {
528  string paragraph = line;
529  size_t para_start_lineno = reader.getLineNo();
530 
532  EnumerationType etype = NONE;
533  unsigned num = 0;
534 
535  unsigned last_alpha_num = -1;
536 
537  h2x_assert(sec.Content().empty());
538 
539  while (1) {
540  line = reader.getNext();
541  if (!line) break;
542 
543  if (isEmptyOrComment(line)) {
544  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
545  check_TODO(line, reader);
546  }
547  else {
548  string keyword;
549  const char *rest = extractKeyword(line, keyword);
550 
551  if (rest) { // a new keyword
552  reader.back();
553  break;
554  }
555 
556  check_TODO(line, reader);
557 
558  string Line = line;
559 
560  if (sec.get_type() == SEC_OCCURRENCE) {
561  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
562  }
563  else {
564  const char *firstNonWhite = firstChar(line);
565  if (is_startof_itemlist_element(firstNonWhite)) {
566  h2x_assert(firstNonWhite != line);
567 
568  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
569 
570  Line[firstNonWhite-line] = ' ';
571  type = ITEM; // is reset in call to pushParagraph
572  }
573  else {
574  unsigned foundNum;
575  EnumerationType foundEtype = detectLineEnumType(Line, foundNum);
576 
577  if (foundEtype == ALPHA_UPPER || foundEtype == ALPHA_LOWER) {
578  if (foundNum == (last_alpha_num+1) || foundNum == 1) {
579  last_alpha_num = foundNum;
580  }
581  else {
582 #if defined(WARN_IGNORED_ALPHA_ENUMS)
583  add_warning(reader.attached_message("Ignoring non-consecutive alpha-enum"));
584 #endif
585  foundEtype = NONE;
586 
587  reader.back();
588  Line = reader.getNext();
589  last_alpha_num = -1;
590  }
591  }
592 
593  if (foundEtype != NONE) {
594  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
595 
596  type = ENUMERATED;
597  num = foundNum;
598  etype = foundEtype;
599 
600  if (!num) {
601  h2x_assert(etype == DIGITS);
602  throw "Enumerations starting with zero are not supported";
603  }
604  }
605  }
606  }
607 
608  if (paragraph.length()) {
609  paragraph = paragraph+"\n"+Line;
610  }
611  else {
612  paragraph = string("\n")+Line;
613  para_start_lineno = reader.getLineNo();
614  }
615  }
616  }
617 
618  pushParagraph(sec, paragraph, para_start_lineno, type, etype, num);
619 
620  if (sec.Content().size()>0 && indentation>0) {
621  string spaces;
622  spaces.reserve(indentation);
623  spaces.append(indentation, ' ');
624 
625  string& ostr = sec.Content().front();
626  ostr = string("\n") + spaces + ostr;
627  }
628 }
629 
630 inline void check_specific_duplicates(const string& link, const Links& existing, bool add_warnings) {
631  for (Links::const_iterator ex = existing.begin(); ex != existing.end(); ++ex) {
632  if (ex->Target() == link) {
633  if (add_warnings) add_warning(strf("First Link to '%s' was found here.", ex->Target().c_str()), ex->SourceLineno());
634  throw strf("Link to '%s' duplicated here.", link.c_str());
635  }
636  }
637 }
638 inline void check_duplicates(const string& link, const Links& uplinks, const Links& references, bool add_warnings) {
639  check_specific_duplicates(link, uplinks, add_warnings);
640  check_specific_duplicates(link, references, add_warnings);
641 }
642 
643 static void warnAboutDuplicate(SectionList& sections) {
644  set<string> seen;
645  SectionList::iterator end = sections.end();
646  for (SectionList::iterator s = sections.begin(); s != end; ++s) {
647  const string& sname = s->getName();
648  if (sname == "NOTES") continue; // do not warn about multiple NOTES sections
649 
650  SectionList::iterator o = s; ++o;
651  for (; o != end; ++o) {
652  if (sname == o->getName()) {
653  o->attach_warning("duplicated SECTION name");
654  if (seen.find(sname) == seen.end()) {
655  s->attach_warning("name was first used");
656  seen.insert(sname);
657  }
658  }
659  }
660  }
661 }
662 
663 void Helpfile::readHelp(istream& in, const string& filename) {
664  if (!in.good()) throw unattached_message(strf("Can't read from '%s'", filename.c_str()));
665 
666  Reader read(in);
667 
668  inputfile = filename; // remember file read (for comment)
669 
670  const char *line;
671  const char *name_only = strrchr(filename.c_str(), '/');
672 
673  h2x_assert(name_only);
674  ++name_only;
675 
676  try {
677  while (1) {
678  line = read.getNext();
679  if (!line) break;
680 
681  if (isEmptyOrComment(line)) {
682  check_TODO(line, read);
683  continue;
684  }
685 
686  check_TODO(line, read);
687 
688  string keyword;
689  const char *rest = extractKeyword(line, keyword);
690 
691  if (rest) { // found a keyword
692  if (keyword == "UP") {
693  rest = eatWhite(rest);
694  if (strlen(rest)) {
695  check_duplicates(rest, uplinks, references, true);
696  if (strcmp(name_only, rest) == 0) throw "UP link to self";
697 
698  uplinks.push_back(Link(rest, read.getLineNo()));
699  }
700  }
701  else if (keyword == "SUB") {
702  rest = eatWhite(rest);
703  if (strlen(rest)) {
704  check_duplicates(rest, uplinks, references, true);
705  if (strcmp(name_only, rest) == 0) throw "SUB link to self";
706 
707  references.push_back(Link(rest, read.getLineNo()));
708  }
709  }
710  else if (keyword == "TITLE") {
711  rest = eatWhite(rest);
712  parseSection(title, rest, 0, read);
713 
714  if (title.Content().empty()) throw "empty TITLE not allowed";
715 
716  const char *t = title.Content().front().c_str();
717 
718  if (strstr(t, "Standard help file form")) {
719  throw strf("Illegal title for help file: '%s'", t);
720  }
721  }
722  else {
723  if (keyword == "NOTE") keyword = "NOTES";
724  if (keyword == "EXAMPLE") keyword = "EXAMPLES";
725  if (keyword == "WARNING") keyword = "WARNINGS";
726 
727  SectionType stype = SEC_NONE;
728  int idx;
729  for (idx = 0; idx<KNOWN_SECTION_TYPES; ++idx) {
730  if (knownSections[idx] == keyword) {
731  stype = SectionType(idx);
732  break;
733  }
734  }
735 
736  size_t lineno = read.getLineNo();
737 
738  if (idx >= KNOWN_SECTION_TYPES) throw strf("unknown keyword '%s'", keyword.c_str());
739 
740  if (stype == SEC_SECTION) {
741  string section_name = eatWhite(rest);
742  Section sec(section_name, stype, lineno);
743  parseSection(sec, "", 0, read);
744  sections.push_back(sec);
745  }
746  else {
747  Section sec(keyword, stype, lineno);
748  rest = eatWhite(rest);
749  parseSection(sec, rest, rest-line, read);
750  sections.push_back(sec);
751  }
752  }
753  }
754  else {
755  throw strf("Unhandled line");
756  }
757  }
758 
759  warnAboutDuplicate(sections);
760  }
761  catch (string& err) { throw read.attached_message(err); }
762  catch (const char *err) { throw read.attached_message(err); }
763 }
764 
765 static bool shouldReflow(const string& s, int& foundIndentation) {
766  // foundIndentation is only valid if shouldReflow() returns true
767  enum { START, CHAR, SPACE, MULTIPLE, DOT, DOTSPACE } state = START;
768  bool equal_indent = true;
769  int lastIndent = -1;
770  int thisIndent = 0;
771 
772  for (string::const_iterator c = s.begin(); c != s.end(); ++c, ++thisIndent) {
773  if (*c == '\n') {
774  state = START;
775  thisIndent = 0;
776  }
777  else if (isWhite(*c)) {
778  if (state == DOT || state == DOTSPACE) state = DOTSPACE; // multiple spaces after DOT are allowed
779  else if (state == SPACE) state = MULTIPLE; // now seen multiple spaces
780  else if (state == CHAR) state = SPACE; // now seen 1 space
781  }
782  else {
783  if (state == MULTIPLE) return false; // character after multiple spaces
784  if (state == START) {
785  if (lastIndent == -1) lastIndent = thisIndent;
786  else if (lastIndent != thisIndent) equal_indent = false;
787  }
788  state = (*c == '.' || *c == ',') ? DOT : CHAR;
789  }
790  }
791 
792  if (lastIndent<0) {
793  equal_indent = false;
794  }
795 
796  if (equal_indent) {
797  foundIndentation = lastIndent-1;
798  h2x_assert(foundIndentation >= 0);
799  }
800  return equal_indent;
801 }
802 
803 static string correctSpaces(const string& text, int change) {
804  h2x_assert(text.find('\n') == string::npos);
805 
806  if (!change) return text;
807 
808  size_t first = text.find_first_not_of(' ');
809  if (first == string::npos) return ""; // empty line
810 
811  if (change<0) {
812  int remove = -change;
813  h2x_assert(remove <= int(first));
814  return text.substr(remove);
815  }
816 
817  h2x_assert(change>0); // add spaces
818  return string(change, ' ')+text;
819 }
820 
821 static string correctIndentation(const string& text, int change) {
822  // removes 'remove' spaces from every line
823 
824  size_t this_lineend = text.find('\n');
825  string result;
826 
827  if (this_lineend == string::npos) {
828  result = correctSpaces(text, change);
829  }
830  else {
831  result = correctSpaces(text.substr(0, this_lineend), change);
832 
833  while (this_lineend != string::npos) {
834  size_t next_lineend = text.find('\n', this_lineend+1);
835  if (next_lineend == string::npos) { // last line
836  result = result+"\n"+correctSpaces(text.substr(this_lineend+1), change);
837  }
838  else {
839  result = result+"\n"+correctSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1), change);
840  }
841  this_lineend = next_lineend;
842  }
843  }
844  return result;
845 }
846 
847 inline size_t countSpaces(const string& text) {
848  size_t first = text.find_first_not_of(' ');
849  if (first == string::npos) return INT_MAX; // empty line
850  return first;
851 }
852 
853 static size_t scanMinIndentation(const string& text) {
854  size_t this_lineend = text.find('\n');
855  size_t min_indent = INT_MAX;
856 
857  if (this_lineend == string::npos) {
858  min_indent = countSpaces(text);
859  }
860  else {
861  while (this_lineend != string::npos) {
862  size_t next_lineend = text.find('\n', this_lineend+1);
863  if (next_lineend == string::npos) {
864  min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1)));
865  }
866  else {
867  min_indent = min(min_indent, countSpaces(text.substr(this_lineend+1, next_lineend-this_lineend-1)));
868  }
869  this_lineend = next_lineend;
870  }
871  }
872 
873  if (min_indent == INT_MAX) min_indent = 0; // only empty lines
874  return min_indent;
875 }
876 
877 // -----------------------------
878 // class ParagraphTree
879 
880 class ParagraphTree FINAL_TYPE : public MessageAttachable, virtual Noncopyable {
881  ParagraphTree *brother; // has same indentation as this
882  ParagraphTree *son; // indentation + 1
883 
884  Ostring otext; // text of the Section (containing linefeeds)
885 
886  bool reflow; // should the paragraph be reflown ? (true if indentation is equal for all lines of text)
887  int indentation; // the real indentation of the blank (behind removed enumeration)
888 
889 
890  string location_description() const OVERRIDE { return "in paragraph starting here"; }
891  size_t line_number() const OVERRIDE { return otext.get_lineno(); }
892 
893  ParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end)
894  : son(NULp),
895  otext(*begin),
896  indentation(0)
897  {
898  h2x_assert(begin != end);
899 
900  string& text = otext;
901 
902  reflow = shouldReflow(text, indentation);
903  if (!reflow) {
904  size_t reststart = text.find('\n', 1);
905 
906  if (reststart == 0) {
907  attach_warning("[internal] Paragraph starts with LF -> reflow calculation will probably fail");
908  }
909 
910  if (reststart != string::npos) {
911  int rest_indent = -1;
912  string rest = text.substr(reststart);
913  bool rest_reflow = shouldReflow(rest, rest_indent);
914 
915  if (rest_reflow) {
916  int first_indent = countSpaces(text.substr(1));
917  if (get_type() == PLAIN_TEXT) {
918  size_t last = text.find_last_not_of(' ', reststart-1);
919  bool is_header = last != string::npos && text[last] == ':';
920 
921  if (!is_header && rest_indent == (first_indent+8)) {
922 #if defined(DEBUG)
923  size_t textstart = text.find_first_not_of(" \n");
924  h2x_assert(textstart != string::npos);
925 #endif // DEBUG
926 
927  text = text.substr(0, reststart)+correctIndentation(rest, -8);
928  reflow = shouldReflow(text, indentation);
929  }
930  }
931  else {
932  int diff = rest_indent-first_indent;
933  if (diff>0) {
934  text = text.substr(0, reststart)+correctIndentation(rest, -diff);
935  reflow = shouldReflow(text, indentation);
936  }
937  else if (diff<0) {
938  // paragraph with more indent on first line (occurs?)
939  attach_warning(strf("[internal] unhandled: more indentation on the 1st line (diff=%i)", diff));
940  }
941  }
942  }
943  }
944  }
945 
946  if (!reflow) {
947  indentation = scanMinIndentation(text);
948  }
949  text = correctIndentation(text, -indentation);
950  if (get_type() == ITEM) {
951  h2x_assert(indentation >= 2);
952  indentation -= 2;
953  }
954 
955  brother = buildParagraphTree(++begin, end);
956  }
957 
958  void brothers_to_sons(ParagraphTree *new_brother);
959 
960 public:
961  virtual ~ParagraphTree() {
962  delete brother;
963  delete son;
964  }
965 
966  ParagraphType get_type() const { return otext.get_type(); }
967 
968  bool is_itemlist_member() const { return get_type() == ITEM; }
969  unsigned get_enumeration() const { return get_type() == ENUMERATED ? otext.get_number() : 0; }
970  EnumerationType get_enum_type() const { return otext.get_enum_type(); }
971 
972  const char *readable_type() const {
973  const char *res = NULp;
974  switch (get_type()) {
975  case PLAIN_TEXT: res = "PLAIN_TEXT"; break;
976  case ITEM: res = "ITEM"; break;
977  case ENUMERATED: res = "ENUMERATED"; break;
978  }
979  return res;
980  }
981 
982  size_t countTextNodes() {
983  size_t nodes = 1; // this
984  if (son) nodes += son->countTextNodes();
985  if (brother) nodes += brother->countTextNodes();
986  return nodes;
987  }
988 
989 #if defined(DUMP_PARAGRAPHS)
990  void print_indent(ostream& out, int indent) { while (indent-->0) out << ' '; }
991  char *masknl(const char *text) {
992  char *result = ARB_strdup(text);
993  for (int i = 0; result[i]; ++i) {
994  if (result[i] == '\n') result[i] = '|';
995  }
996  return result;
997  }
998  void dump(ostream& out, int indent = 0) {
999  print_indent(out, indent+1);
1000  {
1001  char *mtext = masknl(otext.as_string().c_str());
1002  out << "text='" << mtext << "'\n";
1003  free(mtext);
1004  }
1005 
1006  print_indent(out, indent+1);
1007  out << "type='" << readable_type() << "' ";
1008  if (get_type() == ENUMERATED) {
1009  out << "enumeration='" << otext.get_number() << "' ";
1010  }
1011  out << "reflow='" << reflow << "' ";
1012  out << "indentation='" << indentation << "'\n";
1013 
1014  if (son) {
1015  print_indent(out, indent+2); cout << "son:\n";
1016  son->dump(out, indent+2);
1017  cout << "\n";
1018  }
1019  if (brother) {
1020  print_indent(out, indent); cout << "brother:\n";
1021  brother->dump(out, indent);
1022  }
1023  }
1024 #endif // DUMP_PARAGRAPHS
1025 
1026 private:
1027  static ParagraphTree* buildParagraphTree(Ostrings::const_iterator begin, const Ostrings::const_iterator end) {
1028  if (begin == end) return NULp;
1029  return new ParagraphTree(begin, end);
1030  }
1031 public:
1032  static ParagraphTree* buildParagraphTree(const Section& sec) {
1033  const Ostrings& txt = sec.Content();
1034  if (txt.empty()) throw "attempt to build an empty ParagraphTree";
1035  return buildParagraphTree(txt.begin(), txt.end());
1036  }
1037 
1038  bool contains(ParagraphTree *that) {
1039  return
1040  this == that ||
1041  (son && son->contains(that)) ||
1042  (brother && brother->contains(that));
1043  }
1044 
1045  ParagraphTree *predecessor(ParagraphTree *before_this) {
1046  if (brother == before_this) return this;
1047  if (!brother) return NULp;
1048  return brother->predecessor(before_this);
1049  }
1050 
1051  void append(ParagraphTree *new_brother) {
1052  if (!brother) brother = new_brother;
1053  else brother->append(new_brother);
1054  }
1055 
1056  bool is_some_brother(const ParagraphTree *other) const {
1057  return (other == brother) || (brother && brother->is_some_brother(other));
1058  }
1059 
1060  ParagraphTree* takeAllInFrontOf(ParagraphTree *after) {
1061  ParagraphTree *removed = this;
1062  ParagraphTree *after_pred = this;
1063 
1064  h2x_assert(is_some_brother(after));
1065 
1066  while (1) {
1067  h2x_assert(after_pred);
1068  h2x_assert(after_pred->brother); // takeAllInFrontOf called with non-existing 'after'
1069 
1070  if (after_pred->brother == after) { // found after
1071  after_pred->brother = NULp; // unlink
1072  break;
1073  }
1074  after_pred = after_pred->brother;
1075  }
1076 
1077  return removed;
1078  }
1079 
1080  ParagraphTree *firstListMember() {
1081  switch (get_type()) {
1082  case PLAIN_TEXT: break;
1083  case ITEM: return this;
1084  case ENUMERATED: {
1085  if (get_enumeration() == 1) return this;
1086  break;
1087  }
1088  }
1089  if (brother) return brother->firstListMember();
1090  return NULp;
1091  }
1092 
1093  ParagraphTree *nextListMemberAfter(const ParagraphTree& previous) {
1094  if (indentation<previous.indentation) return NULp;
1095  if (indentation == previous.indentation && get_type() == previous.get_type()) {
1096  if (get_type() != ENUMERATED) return this;
1097  if (get_enumeration() > previous.get_enumeration()) return this;
1098  return NULp;
1099  }
1100  if (!brother) return NULp;
1101  return brother->nextListMemberAfter(previous);
1102  }
1103  ParagraphTree *nextListMember() const {
1104  return brother ? brother->nextListMemberAfter(*this) : NULp;
1105  }
1106 
1107  ParagraphTree* firstWithLessIndentThan(int wanted_indentation) {
1108  if (indentation < wanted_indentation) return this;
1109  if (!brother) return NULp;
1110  return brother->firstWithLessIndentThan(wanted_indentation);
1111  }
1112 
1113  void format_indentations();
1114  void format_lists();
1115 
1116 private:
1117  static ParagraphTree* buildNewParagraph(const string& Text, size_t beginLineNo, ParagraphType type) {
1118  Ostrings S;
1119  S.push_back(Ostring(Text, beginLineNo, type));
1120  return new ParagraphTree(S.begin(), S.end());
1121  }
1122  ParagraphTree *xml_write_list_contents();
1123  ParagraphTree *xml_write_enum_contents();
1124  void xml_write_textblock();
1125 
1126 public:
1127  void xml_write();
1128 };
1129 
1130 #if defined(DUMP_PARAGRAPHS)
1131 static void dump_paragraph(ParagraphTree *para) {
1132  // helper function for use in gdb
1133  para->dump(cout, 0);
1134 }
1135 #endif
1136 
1137 void ParagraphTree::brothers_to_sons(ParagraphTree *new_brother) {
1143  if (new_brother) {
1144  h2x_assert(is_some_brother(new_brother));
1145 
1146  if (brother != new_brother) {
1147 #if defined(DEBUG)
1148  if (son) {
1149  son->attach_warning("Found unexpected son (in brothers_to_sons)");
1150  brother->attach_warning("while trying to transform paragraphs from here ..");
1151  new_brother->attach_warning(".. to here ..");
1152  attach_warning(".. into sons of this paragraph.");
1153  return;
1154  }
1155 #endif
1156 
1157  h2x_assert(!son);
1158  h2x_assert(brother);
1159 
1160  if (!new_brother) { // all brothers -> sons
1161  son = brother;
1162  brother = NULp;
1163  }
1164  else {
1165  son = brother->takeAllInFrontOf(new_brother);
1166  brother = new_brother;
1167  }
1168  }
1169  }
1170  else {
1171  h2x_assert(!son);
1172  son = brother;
1173  brother = NULp;
1174  }
1175 }
1176 void ParagraphTree::format_lists() {
1177  // reformats tree such that all items/enumerations are brothers
1178  ParagraphTree *member = firstListMember();
1179  if (member) {
1180  for (ParagraphTree *curr = this; curr != member; curr = curr->brother) {
1181  h2x_assert(curr);
1182  if (curr->son) curr->son->format_lists();
1183  }
1184 
1185  for (ParagraphTree *next = member->nextListMember();
1186  next;
1187  member = next, next = member->nextListMember())
1188  {
1189  member->brothers_to_sons(next);
1190  h2x_assert(member->brother == next);
1191 
1192  if (member->son) member->son->format_lists();
1193  }
1194 
1195  h2x_assert(!member->son); // member is the last item
1196 
1197  if (member->brother) {
1198  ParagraphTree *non_member = member->brother->firstWithLessIndentThan(member->indentation+1);
1199  member->brothers_to_sons(non_member);
1200  }
1201 
1202  if (member->son) member->son->format_lists();
1203  if (member->brother) member->brother->format_lists();
1204  }
1205  else {
1206  for (ParagraphTree *curr = this; curr; curr = curr->brother) {
1207  h2x_assert(curr);
1208  if (curr->son) curr->son->format_lists();
1209  }
1210  }
1211 }
1212 
1213 void ParagraphTree::format_indentations() {
1214  if (brother) {
1215  ParagraphTree *same_indent = brother->firstWithLessIndentThan(indentation+1);
1216 #if defined(WARN_POSSIBLY_WRONG_INDENTATION_CORRECTION)
1217  if (same_indent && indentation != same_indent->indentation) {
1218  same_indent->attach_warning("indentation is assumed to be same as ..");
1219  attach_warning(".. here");
1220  }
1221 #endif
1222  brothers_to_sons(same_indent); // if same_indent is NULp -> make all brothers childs
1223  if (brother) brother->format_indentations();
1224  }
1225 
1226  if (son) son->format_indentations();
1227 }
1228 
1229 // -----------------
1230 // LinkType
1231 
1232 enum LinkType {
1234  LT_HTTP = 1,
1236  LT_FTP = 4,
1237  LT_FILE = 8,
1238  LT_EMAIL = 16,
1239  LT_HLP = 32,
1240  LT_PS = 64,
1241  LT_PDF = 128
1242 };
1243 
1244 static const char *link_id[] = {
1245  "unknown",
1246  "www", // "http:"
1247  "www", // "https:"
1248  "www", // "ftp:"
1249  "www", // "file:"
1250  "email",
1251  "hlp",
1252  "ps",
1253  "pdf",
1254 };
1255 
1256 static string LinkType2id(LinkType type) {
1257  int idx = 0;
1258  while (type >= 1) {
1259  idx++;
1260  type = LinkType(type>>1);
1261  }
1262  return link_id[idx];
1263 }
1264 
1265 inline const char *getExtension(const string& name) {
1266  size_t last_dot = name.find_last_of('.');
1267  if (last_dot == string::npos) {
1268  return NULp;
1269  }
1270  return name.c_str()+last_dot+1;
1271 }
1272 
1273 static LinkType detectLinkType(const string& link_target) {
1274  LinkType type = LT_UNKNOWN;
1275  const char *ext = getExtension(link_target);
1276 
1277  if (ext && strcasecmp(ext, "hlp") == 0) type = LT_HLP;
1278  else if (link_target.find("http://") == 0) type = LT_HTTP;
1279  else if (link_target.find("https://") == 0) type = LT_HTTPS;
1280  else if (link_target.find("ftp://") == 0) type = LT_FTP;
1281  else if (link_target.find("file://") == 0) type = LT_FILE;
1282  else if (link_target.find('@') != string::npos) type = LT_EMAIL;
1283  else if (ext && strcasecmp(ext, "ps") == 0) type = LT_PS;
1284  else if (ext && strcasecmp(ext, "pdf") == 0) type = LT_PDF;
1285 
1286  return type;
1287 }
1288 
1289 // --------------------------------------------------------------------------------
1290 
1291 
1292 
1293 static string locate_helpfile(const string& helpname) {
1294  // search for 'helpname' in various helpfile locations
1295 
1296 #define PATHS 2
1297  static string path[PATHS] = { "oldhelp/", "genhelp/" };
1298  struct stat st;
1299 
1300  for (size_t p = 0; p<PATHS; p++) {
1301  string fullname = path[p]+helpname;
1302  if (stat(fullname.c_str(), &st) == 0) {
1303  return fullname;
1304  }
1305  }
1306  return "";
1307 #undef PATHS
1308 }
1309 
1310 static string locate_document(const string& docname) {
1311  // search for 'docname' or 'docname.gz' in various helpfile locations
1312 
1313  string located = locate_helpfile(docname);
1314  if (located.empty()) {
1315  located = locate_helpfile(docname+".gz");
1316  }
1317  return located;
1318 }
1319 
1320 static void add_link_attributes(XML_Tag& link, LinkType type, const string& dest, size_t source_line) {
1321  if (type == LT_UNKNOWN) {
1322  string msg = string("Invalid link (dest='")+dest+"')";
1323  throw LineAttachedMessage(msg, source_line);
1324  }
1325 
1326  link.add_attribute("dest", dest);
1327  link.add_attribute("type", LinkType2id(type));
1328  link.add_attribute("source_line", source_line);
1329 
1330  if (type&(LT_HLP|LT_PDF|LT_PS)) { // other links (www, email) cannot be checked for existence here
1331  string fullhelp = ((type&LT_HLP) ? locate_helpfile : locate_document)(dest);
1332  if (fullhelp.empty()) {
1333  link.add_attribute("missing", "1");
1334  string deadlink = strf("Dead link to '%s'", dest.c_str());
1335 #if defined(DEVEL_RELEASE)
1336  throw LineAttachedMessage(deadlink, source_line);
1337 #else // !defined(DEVEL_RELEASE)
1338  add_warning(deadlink, source_line);
1339 #endif
1340  }
1341  }
1342 }
1343 
1344 static void print_XML_Text_expanding_links(const string& text, size_t lineNo) {
1345  size_t found = text.find("LINK{", 0);
1346  if (found != string::npos) {
1347  size_t inside_link = found+5;
1348  size_t close = text.find('}', inside_link);
1349 
1350  if (close == string::npos) throw "unclosed 'LINK{}'";
1351 
1352  string link_target = text.substr(inside_link, close-inside_link);
1353  LinkType type = detectLinkType(link_target);
1354  string dest = link_target;
1355 
1356  XML_Text(text.substr(0, found));
1357 
1358  {
1359  XML_Tag link("LINK");
1360  link.set_on_extra_line(false);
1361  add_link_attributes(link, type, dest, lineNo);
1362  }
1363 
1364  print_XML_Text_expanding_links(text.substr(close+1), lineNo);
1365  }
1366  else {
1367  XML_Text t(text);
1368  }
1369 }
1370 
1371 void ParagraphTree::xml_write_textblock() {
1372  XML_Tag textblock("T");
1373  textblock.add_attribute("reflow", reflow ? "1" : "0");
1374 
1375  {
1376  string usedText;
1377  const string& text = otext;
1378  if (reflow) {
1379  usedText = correctIndentation(text, (textblock.Indent()+1) * the_XML_Document->indentation_per_level);
1380  }
1381  else {
1382  usedText = text;
1383  }
1384  print_XML_Text_expanding_links(usedText, otext.get_lineno());
1385  }
1386 }
1387 
1388 ParagraphTree *ParagraphTree::xml_write_list_contents() {
1389  h2x_assert(is_itemlist_member());
1390 #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1391  if (!reflow) attach_warning("ITEM not reflown (check output)");
1392 #endif
1393  {
1394  XML_Tag entry("ENTRY");
1395  entry.add_attribute("item", "1");
1396  xml_write_textblock();
1397  if (son) son->xml_write();
1398  }
1399  if (brother && brother->is_itemlist_member()) {
1400  return brother->xml_write_list_contents();
1401  }
1402  return brother;
1403 }
1404 ParagraphTree *ParagraphTree::xml_write_enum_contents() {
1405  h2x_assert(get_enumeration());
1406 #if defined(WARN_FIXED_LAYOUT_LIST_ELEMENTS)
1407  if (!reflow) attach_warning("ENUMERATED not reflown (check output)");
1408 #endif
1409  {
1410  XML_Tag entry("ENTRY");
1411  switch (get_enum_type()) {
1412  case DIGITS:
1413  entry.add_attribute("enumerated", strf("%i", get_enumeration()));
1414  break;
1415  case ALPHA_UPPER:
1416  entry.add_attribute("enumerated", strf("%c", 'A'-1+get_enumeration()));
1417  break;
1418  case ALPHA_LOWER:
1419  entry.add_attribute("enumerated", strf("%c", 'a'-1+get_enumeration()));
1420  break;
1421  default:
1422  h2x_assert(0);
1423  break;
1424  }
1425  xml_write_textblock();
1426  if (son) son->xml_write();
1427  }
1428  if (brother && brother->get_enumeration()) {
1429  int diff = brother->get_enumeration()-get_enumeration();
1430  if (diff != 1) {
1431  attach_warning("Non-consecutive enumeration detected between here..");
1432  brother->attach_warning(".. and here");
1433  }
1434  return brother->xml_write_enum_contents();
1435  }
1436  return brother;
1437 }
1438 
1439 void ParagraphTree::xml_write() {
1440  try {
1441  ParagraphTree *next = NULp;
1442  if (get_enumeration()) {
1443  XML_Tag enu("ENUM");
1444  if (get_enumeration() != 1) {
1445  attach_warning(strf("First enum starts with '%u.' (maybe previous enum was not detected)", get_enumeration()));
1446  }
1447  next = xml_write_enum_contents();
1448 #if defined(WARN_LONESOME_ENUM_ELEMENTS)
1449  if (next == brother) attach_warning("Suspicious single-element-ENUM");
1450 #endif
1451  }
1452  else if (is_itemlist_member()) {
1453  XML_Tag list("LIST");
1454  next = xml_write_list_contents();
1455 #if defined(WARN_LONESOME_LIST_ELEMENTS)
1456  if (next == brother) attach_warning("Suspicious single-element-LIST");
1457 #endif
1458  }
1459  else {
1460  {
1461  XML_Tag para("P");
1462  xml_write_textblock();
1463  if (son) son->xml_write();
1464  }
1465  next = brother;
1466  }
1467  if (next) next->xml_write();
1468  }
1469  catch (string& err) { throw attached_message(err); }
1470  catch (const char *err) { throw attached_message(err); }
1471 }
1472 
1473 static void create_top_links(const Links& links, const char *tag) {
1474  for (Links::const_iterator s = links.begin(); s != links.end(); ++s) {
1475  XML_Tag link(tag);
1476  add_link_attributes(link, detectLinkType(s->Target()), s->Target(), s->SourceLineno());
1477  }
1478 }
1479 
1480 void Helpfile::writeXML(FILE *out, const string& page_name) {
1481  XML_Document xml("PAGE", "arb_help.dtd", out);
1482 
1483  xml.skip_empty_tags = true;
1484  xml.indentation_per_level = 2;
1485 
1486  xml.getRoot().add_attribute("name", page_name);
1487 #if defined(DEBUG)
1488  xml.getRoot().add_attribute("edit_warning", "devel"); // inserts a edit warning into development version
1489 #else
1490  xml.getRoot().add_attribute("edit_warning", "release"); // inserts a different edit warning into release version
1491 #endif // DEBUG
1492 
1493  xml.getRoot().add_attribute("source", inputfile.c_str());
1494 
1495  {
1496  XML_Comment(string("automatically generated from ../")+inputfile+' ');
1497  }
1498 
1499  create_top_links(uplinks, "UP");
1500  create_top_links(references, "SUB");
1501  create_top_links(auto_references, "SUB");
1502 
1503  {
1504  XML_Tag title_tag("TITLE");
1505  const Ostrings& T = title.Content();
1506  for (Ostrings::const_iterator s = T.begin(); s != T.end(); ++s) {
1507  if (s != T.begin()) { XML_Text text("\n"); }
1508  XML_Text text(*s);
1509  }
1510  }
1511 
1512  for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1513  try {
1514  XML_Tag section_tag("SECTION");
1515  section_tag.add_attribute("name", sec->getName());
1516 
1517  ParagraphTree *ptree = ParagraphTree::buildParagraphTree(*sec);
1518 
1519 #if defined(DEBUG)
1520  size_t textnodes = ptree->countTextNodes();
1521 #endif
1522 #if defined(DUMP_PARAGRAPHS)
1523  cout << "Dump of section '" << sec->getName() << "' (before format_lists):\n";
1524  ptree->dump(cout);
1525  cout << "----------------------------------------\n";
1526 #endif
1527 
1528  ptree->format_lists();
1529 
1530 #if defined(DUMP_PARAGRAPHS)
1531  cout << "Dump of section '" << sec->getName() << "' (after format_lists):\n";
1532  ptree->dump(cout);
1533  cout << "----------------------------------------\n";
1534 #endif
1535 #if defined(DEBUG)
1536  size_t textnodes2 = ptree->countTextNodes();
1537  h2x_assert(textnodes2 == textnodes); // if this occurs format_lists has an error
1538 #endif
1539 
1540  ptree->format_indentations();
1541 
1542 #if defined(DUMP_PARAGRAPHS)
1543  cout << "Dump of section '" << sec->getName() << "' (after format_indentations):\n";
1544  ptree->dump(cout);
1545  cout << "----------------------------------------\n";
1546 #endif
1547 #if defined(DEBUG)
1548  size_t textnodes3 = ptree->countTextNodes();
1549  h2x_assert(textnodes3 == textnodes2); // if this occurs format_indentations has an error
1550 #endif
1551 
1552  ptree->xml_write();
1553 
1554  delete ptree;
1555  }
1556  catch (string& err) { throw sec->attached_message(err); }
1557  catch (const char *err) { throw sec->attached_message(err); }
1558  }
1559 }
1560 
1562  for (SectionList::const_iterator sec = sections.begin(); sec != sections.end(); ++sec) {
1563  try {
1564  const Ostrings& s = sec->Content();
1565 
1566  for (Ostrings::const_iterator li = s.begin(); li != s.end(); ++li) {
1567  const string& line = *li;
1568  size_t start = 0;
1569 
1570  while (1) {
1571  size_t found = line.find("LINK{", start);
1572  if (found == string::npos) break;
1573  found += 5;
1574  size_t close = line.find('}', found);
1575  if (close == string::npos) break;
1576 
1577  string link_target = line.substr(found, close-found);
1578 
1579  if (link_target.find("http://") == string::npos &&
1580  link_target.find("https://")== string::npos &&
1581  link_target.find("ftp://") == string::npos &&
1582  link_target.find("file://") == string::npos &&
1583  link_target.find('@') == string::npos)
1584  {
1585  check_self_ref(link_target);
1586 
1587  try {
1588  check_specific_duplicates(link_target, references, false); // check only sublinks here
1589  check_specific_duplicates(link_target, uplinks, false); // check only uplinks here
1590  check_specific_duplicates(link_target, auto_references, false); // check only sublinks here
1591 
1592  // only auto-add inline reference if none of the above checks has thrown
1593  auto_references.push_back(Link(link_target, sec->line_number()));
1594  }
1595  catch (string& err) {
1596  ; // silently ignore inlined
1597  }
1598  }
1599  start = close+1;
1600  }
1601  }
1602  }
1603  catch (string& err) {
1604  throw sec->attached_message("'"+err+"' while scanning LINK{}");
1605  }
1606  }
1607 }
1608 
1609 static void show_err(const string& err, size_t lineno, const string& helpfile) {
1610  if (err.find(helpfile+':') != string::npos) {
1611  cerr << err;
1612  }
1613  else if (lineno == NO_LINENUMBER_INFO) {
1614  cerr << helpfile << ":1: [in unknown line] " << err;
1615  }
1616  else {
1617  cerr << helpfile << ":" << lineno << ": " << err;
1618  }
1619  cerr << '\n';
1620 }
1621 inline void show_err(const LineAttachedMessage& line_err, const string& helpfile) {
1622  show_err(line_err.Message(), line_err.Lineno(), helpfile);
1623 }
1624 inline void show_warning(const LineAttachedMessage& line_err, const string& helpfile) {
1625  show_err(string("Warning: ")+line_err.Message(), line_err.Lineno(), helpfile);
1626 }
1627 inline void show_warnings(const string& helpfile) {
1628  for (list<LineAttachedMessage>::const_iterator wi = warnings.begin(); wi != warnings.end(); ++wi) {
1629  show_warning(*wi, helpfile);
1630  }
1631 }
1632 static void show_error_and_warnings(const LineAttachedMessage& error, const string& helpfile) {
1633  show_err(error, helpfile);
1634  show_warnings(helpfile);
1635 }
1636 
1637 int ARB_main(int argc, char *argv[]) {
1638  if (argc != 3) {
1639  cerr << "Usage: arb_help2xml <ARB helpfile> <XML output>\n";
1640  return EXIT_FAILURE;
1641  }
1642 
1643  Helpfile help;
1644  string arb_help;
1645 
1646  try {
1647  try {
1648  arb_help = argv[1];
1649  string xml_output = argv[2];
1650 
1651  {
1652  ifstream in(arb_help.c_str());
1653  help.readHelp(in, arb_help);
1654  }
1655 
1656  help.extractInternalLinks();
1657 
1658  {
1659  FILE *out = std::fopen(xml_output.c_str(), "wt");
1660  if (!out) throw string("Can't open '")+xml_output+'\'';
1661 
1662  try {
1663  // arb_help contains 'oldhelp/name.hlp'
1664  size_t slash = arb_help.find('/');
1665  size_t dot = arb_help.find_last_of('.');
1666 
1667  if (slash == string::npos || dot == string::npos) {
1668  throw string("parameter <ARB helpfile> has to be in format 'oldhelp/name.hlp' (not '"+arb_help+"')");
1669  }
1670 
1671  string page_name(arb_help, slash+1, dot-slash-1);
1672  help.writeXML(out, page_name);
1673  fclose(out);
1674  }
1675  catch (...) {
1676  fclose(out);
1677  remove(xml_output.c_str());
1678  throw;
1679  }
1680  }
1681 
1682  show_warnings(arb_help);
1683 
1684  return EXIT_SUCCESS;
1685  }
1686  catch (string& err) { throw unattached_message(err); }
1687  catch (const char * err) { throw unattached_message(err); }
1688  catch (LineAttachedMessage& err) { throw; }
1689  catch (...) { throw unattached_message("unknown exception in arb_help2xml"); }
1690  }
1691  catch (LineAttachedMessage& err) { show_error_and_warnings(err, arb_help); }
1692  catch (...) { h2x_assert(0); }
1693 
1694  return EXIT_FAILURE;
1695 }
1696 
1697 // --------------------------------------------------------------------------------
1698 
1699 #ifdef UNIT_TESTS
1700 #include <test_unit.h>
1701 #include <arb_msg.h>
1702 
1703 static arb_test::match_expectation help_file_compiles(const char *helpfile, const char *expected_title, const char *expected_error_part) {
1704  using namespace arb_test;
1705  expectation_group expected;
1706 
1707  ifstream in(helpfile);
1708 
1710 
1711  Helpfile help;
1712  try {
1713  help.readHelp(in, helpfile);
1714  help.extractInternalLinks();
1715 
1716  FILE *devnul = fopen("/dev/null", "wt");
1717  if (!devnul) throw unattached_message("can't write to null device");
1718  help.writeXML(devnul, "dummy");
1719  fclose(devnul);
1720  }
1721  catch (LineAttachedMessage& err) { error = new LineAttachedMessage(err); }
1722  catch (...) { error = new LineAttachedMessage(unattached_message("unknown exception")); }
1723 
1724  if (expected_error_part) {
1725  expected.add(that(error).does_differ_from_NULL());
1726  if (error) expected.add(that(error->Message()).does_contain(expected_error_part));
1727  }
1728  else {
1729  expected.add(that(error).is_equal_to_NULL());
1730  if (!error) {
1731  Section title = help.get_title();
1732  const Ostrings& title_strings = title.Content();
1733 
1734  expected.add(that(title_strings.front().as_string()).is_equal_to(expected_title));
1735  expected.add(that(title_strings.size()).is_equal_to(1));
1736  }
1737  else {
1738  show_error_and_warnings(*error, helpfile);
1739  }
1740  }
1741 
1742  delete error;
1743 
1744  return all().ofgroup(expected);
1745 }
1746 
1747 #define HELP_FILE_COMPILES(name,expTitle) TEST_EXPECTATION(help_file_compiles(name,expTitle,NULp))
1748 #define HELP_FILE_COMPILE_ERROR(name,expError) TEST_EXPECTATION(help_file_compiles(name,NULp,expError))
1749 
1750 void TEST_hlp2xml_conversion() {
1751  TEST_EXPECT_ZERO(chdir("../../HELP_SOURCE"));
1752 
1753  HELP_FILE_COMPILES("genhelp/agde_treepuzzle.hlp", "treepuzzle"); // genhelp/agde_treepuzzle.hlp
1754 
1755  HELP_FILE_COMPILES("oldhelp/markbyref.hlp", "Mark by reference"); // oldhelp/markbyref.hlp
1756  HELP_FILE_COMPILES("oldhelp/ad_align.hlp", "Alignment Administration"); // oldhelp/ad_align.hlp
1757  HELP_FILE_COMPILES("genhelp/copyright.hlp", "Copyrights"); // genhelp/copyright.hlp
1758 
1759  HELP_FILE_COMPILE_ERROR("akjsdlkad.hlp", "Can't read from"); // no such file
1760 }
1761 TEST_PUBLISH(TEST_hlp2xml_conversion);
1762 
1763 
1764 // #define TEST_AUTO_UPDATE // uncomment to update expected xml // @@@ comment-out!
1765 
1766 void TEST_hlp2xml_output() {
1767  string tested_helpfile[] = {
1768  "unittest"
1769  };
1770 
1771  string HELP_SOURCE = "../../HELP_SOURCE/";
1772  string LIB = "../../lib/";
1773  string EXPECTED = "help/";
1774 
1775  for (size_t i = 0; i<ARRAY_ELEMS(tested_helpfile); ++i) {
1776  string xml = HELP_SOURCE + "Xml/" + tested_helpfile[i] + ".xml";
1777  string html = LIB + "help_html/" + tested_helpfile[i] + ".html";
1778  string hlp = LIB + "help/" + tested_helpfile[i] + ".hlp";
1779 
1780  string xml_expected = EXPECTED + tested_helpfile[i] + ".xml";
1781  string html_expected = EXPECTED + tested_helpfile[i] + ".html";
1782  string hlp_expected = EXPECTED + tested_helpfile[i] + ".hlp";
1783 
1784 
1785 #if defined(TEST_AUTO_UPDATE)
1786 # if defined(NDEBUG)
1787 # error please use auto-update only in DEBUG mode
1788 # endif
1789  TEST_COPY_FILE(xml.c_str(), xml_expected.c_str());
1790  TEST_COPY_FILE(html.c_str(), html_expected.c_str());
1791  TEST_COPY_FILE(hlp.c_str(), hlp_expected.c_str());
1792 
1793 #else // !defined(TEST_AUTO_UPDATE)
1794 
1795 # if defined(DEBUG)
1796  int expected_xml_difflines = 0;
1797  int expected_hlp_difflines = 0;
1798 # else // !defined(DEBUG)
1799  int expected_xml_difflines = 1; // value of "edit_warning" differs - see .@edit_warning
1800  int expected_hlp_difflines = 1; // resulting warning in helpfile
1801 # endif
1802  TEST_EXPECT_TEXTFILE_DIFFLINES(xml_expected.c_str(), xml.c_str(), expected_xml_difflines);
1803  TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(html_expected.c_str(), html.c_str(), 0); // html contains the update-date
1804  TEST_EXPECT_TEXTFILE_DIFFLINES(hlp_expected.c_str(), hlp.c_str(), expected_hlp_difflines);
1805 #endif
1806  }
1807 }
1808 
1809 
1810 #if defined(PROTECT_HELP_VS_CHANGES)
1811 void TEST_protect_help_vs_changes() { // should normally be disabled
1812  // fails if help changes compared to another checkout
1813  // or just updates the diff w/o failing (if you comment out the last line)
1814  //
1815  // if the patch is hugo and you load it into xemacs
1816  // you might want to (turn-on-lazy-shot)
1817  //
1818  // patch-pointer: ../UNIT_TESTER/run/help_changes.patch
1819 
1820  bool do_help = true;
1821  bool do_html = true;
1822 
1823  const char *ref_WC = "ARB.help.ref";
1824 
1825  // ---------------------------------------- config above
1826 
1827  string this_base = "../..";
1828  string ref_base = this_base+"/../"+ref_WC;
1829  string to_help = "/lib/help";
1830  string to_html = "/lib/help_html";
1831  string diff_help = "diff -u "+ref_base+to_help+" "+this_base+to_help;
1832  string diff_html = "diff -u "+ref_base+to_html+" "+this_base+to_html;
1833 
1834  string update_cmd;
1835 
1836  if (do_help) {
1837  if (do_html) update_cmd = string("(")+diff_help+";"+diff_html+")";
1838  else update_cmd = diff_help;
1839  }
1840  else if (do_html) update_cmd = diff_html;
1841 
1842  string patch = "help_changes.patch";
1843  update_cmd += " >"+patch+" ||true";
1844 
1845  string fail_on_change_cmd = "test \"`cat "+patch+" | grep -v '^Common subdirectories' | wc -l`\" = \"0\" || ( echo \"Error: Help changed\"; false)";
1846 
1847  TEST_EXPECT_NO_ERROR(GBK_system(update_cmd.c_str()));
1848  TEST_EXPECT_NO_ERROR(GBK_system(fail_on_change_cmd.c_str())); // @@@ uncomment before commit
1849 }
1850 #endif
1851 
1852 #endif // UNIT_TESTS
static LinkType detectLinkType(const string &link_target)
GB_ERROR GBK_system(const char *system_command)
Definition: arb_msg.cxx:519
#define MAX_LINE_LENGTH
Definition: reader.h:21
string result
GB_TYPES type
EnumerationType
const char * eatWhite(const char *line)
Ostrings & Content()
group_matcher all()
Definition: test_unit.h:1000
static void show_err(const string &err, size_t lineno, const string &helpfile)
const char * c_str() const
AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR &error)
Definition: insdel.cxx:615
int ARB_main(int argc, char *argv[])
return string(buffer, length)
void space()
Definition: test_unit.h:403
static void dot(double **i, double **j, double **k)
Definition: trnsprob.cxx:59
void show_warnings(const string &helpfile)
EnumerationType get_enum_type() const
bool is_some_brother(const ParagraphTree *other) const
static bool shouldReflow(const string &s, int &foundIndentation)
static string correctIndentation(const string &text, int change)
bool is_itemlist_member() const
void writeXML(FILE *out, const string &page_name)
void attach_warning(const string &message) const
static void help()
ParagraphTree * predecessor(ParagraphTree *before_this)
LineAttachedMessage attached_message(const string &message) const
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void readHelp(istream &in, const string &filename)
static size_t scanMinIndentation(const string &text)
const char * extractKeyword(const char *line, string &keyword)
size_t line_number() const OVERRIDE
void warning(int warning_num, const char *warning_message)
Definition: util.cxx:61
const char * firstChar(const char *s)
const char * title
Definition: readseq.c:22
static string locate_document(const string &docname)
char * strf(const char *format,...) __ATTR__FORMAT(1)
Definition: util.cxx:27
Ostring(const string &s, size_t line_no, ParagraphType type_, EnumerationType etype_, unsigned num)
STL namespace.
ParagraphType
void add_warning(const LineAttachedMessage &laMsg)
list< Section > SectionList
void show_warning(const LineAttachedMessage &line_err, const string &helpfile)
static EnumerationType detectLineEnumType(string &line, unsigned &number)
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
const string & getName() const
unsigned get_enumeration() const
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
virtual ~MessageAttachable()
static void print_XML_Text_expanding_links(const string &text, size_t lineNo)
static char * buffer
void check_duplicates(const string &link, const Links &uplinks, const Links &references, bool add_warnings)
virtual ~ParagraphTree()
static HelixNrInfo * start
void check_specific_duplicates(const string &link, const Links &existing, bool add_warnings)
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1485
LineAttachedMessage unattached_message(const string &message)
LineAttachedMessage(const string &message_, size_t lineno_)
#define PATHS
AliDataPtr after(AliDataPtr data, size_t pos)
Definition: insdel.cxx:593
static int diff(int v1, int v2, int v3, int v4, int st, int en)
Definition: ClustalV.cxx:534
const Ostrings & Content() const
LinkType
#define is_equal_to_NULL()
Definition: test_unit.h:1017
static EnumerationType startsWithLetter(string &s, unsigned &number)
void extractInternalLinks()
__ATTR__VFORMAT(1) static string vstrf(const char *format
size_t countTextNodes()
static const char * link_id[]
#define true
Definition: ureadseq.h:14
#define false
Definition: ureadseq.h:13
void setName(const string &name_)
va_list static argPtr size_t buf_size
XML_Document * the_XML_Document
Definition: xml.cxx:23
void message(char *errortext)
static void error(const char *msg)
Definition: mkptypes.cxx:96
ParagraphTree * nextListMemberAfter(const ParagraphTree &previous)
static const char * knownSections[]
void back()
expectation_group & add(const expectation &e)
Definition: test_unit.h:801
static void show_error_and_warnings(const LineAttachedMessage &error, const string &helpfile)
#define that(thing)
Definition: test_unit.h:1032
const Section & get_title() const
static void add_link_attributes(XML_Tag &link, LinkType type, const string &dest, size_t source_line)
ParagraphType get_type() const
#define TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(fgot, fwant, diff)
Definition: test_unit.h:1391
virtual ~Reader()
#define does_differ_from_NULL()
Definition: test_unit.h:1018
ParagraphTree * nextListMember() const
#define TABSIZE
size_t Lineno() const
va_end(argPtr)
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
AW_selection_list * links
Definition: AW_help.cxx:57
STATIC_ASSERT(ARRAY_ELEMS(knownSections)==KNOWN_SECTION_TYPES)
#define is_equal_to(val)
Definition: test_unit.h:1014
#define h2x_assert(bed)
bool is_startof_itemlist_element(const char *contentStart)
#define TEST_EXPECT_ZERO(cond)
Definition: test_unit.h:1074
static void warnAboutDuplicate(SectionList &sections)
const string & Message() const
TYPE get_type() const
Definition: probe_tree.h:64
a xml text node
Definition: xml.hxx:122
#define does_contain(val)
Definition: test_unit.h:1029
const string & as_string() const
ParagraphTree * takeAllInFrontOf(ParagraphTree *after)
bool isWhite(char c)
Ostring(const string &s, size_t line_no, ParagraphType type_)
static ParagraphTree * buildParagraphTree(const Section &sec)
size_t get_lineno() const
SectionType
static list< LineAttachedMessage > warnings
const size_t NO_LINENUMBER_INFO
xml element
static bool startsWithNumber(string &s, unsigned &number)
ParagraphTree * firstWithLessIndentThan(int wanted_indentation)
const char CHAR
aisc_com * link
bool contains(ParagraphTree *that)
Definition: output.h:122
#define OVERRIDE
Definition: cxxforward.h:93
static string LinkType2id(LinkType type)
const char * name_only(const char *fullpath)
Definition: AWTI_import.cxx:46
static string correctSpaces(const string &text, int change)
static char eol[3]
const EnumerationType & get_enum_type() const
va_start(argPtr, format)
__ATTR__FORMAT(1) static string strf(const char *format
Section(string name_, SectionType type_, size_t lineno_)
unsigned get_number() const
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
const char * getNext()
bool isEmptyOrComment(const char *s)
const char * getExtension(const string &name)
void append(ParagraphTree *new_brother)
static int line
Definition: arb_a2ps.c:296
#define MAX_ALLOWED_ENUM
static void parseSection(Section &sec, const char *line, int indentation, Reader &reader)
#define NULp
Definition: cxxforward.h:97
static char * inputfile
Definition: readseq.c:182
virtual ~Helpfile()
const char * readable_type() const
void check_TODO(const char *, const Reader &)
list< Ostring > Ostrings
#define TEST_EXPECT_TEXTFILE_DIFFLINES(fgot, fwant, diff)
Definition: test_unit.h:1388
string & as_string()
void print_indent(int indent)
Definition: test_unit.h:406
Definition: trnsprob.h:20
ParagraphTree * firstListMember()
const ParagraphType & get_type() const
int getLineNo() const
Reader(istream &in_)
static void create_top_links(const Links &links, const char *tag)
size_t length
static string locate_helpfile(const string &helpname)
list< Link > Links
size_t countSpaces(const string &text)
#define min(a, b)
Definition: f2c.h:153
virtual ~Section()
AW_selection_list * uplinks
Definition: AW_help.cxx:56
li
Definition: AW_awar.cxx:154
static int line_number
Definition: arb_a2ps.c:297
void pushParagraph(Section &sec, string &paragraph, size_t lineNo, ParagraphType &type, EnumerationType &etype, unsigned num)
SectionType get_type() const
static Score ** U
Definition: align.cxx:67
GB_write_int const char s
Definition: AW_awar.cxx:156