ARB
adstring.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adstring.cxx //
4 // Purpose : various string functions //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <arb_backtrace.h>
12 #include <arb_strbuf.h>
13 #include <arb_defs.h>
14 #include <arb_str.h>
15 
16 #include "gb_key.h"
17 #include "gb_aci.h"
18 
19 #include <SigHandler.h>
20 
21 #include <execinfo.h>
22 
23 #include <cstdarg>
24 #include <cctype>
25 #include <cerrno>
26 #include <ctime>
27 #include <setjmp.h>
28 
29 #include <valgrind.h>
30 
31 static char *GBS_string_2_key_with_exclusions(const char *str, const char *additional) {
32  // converts any string to a valid key (all chars in 'additional' are additionally allowed)
33  char buf[GB_KEY_LEN_MAX+1];
34  int i;
35  int c;
36  for (i=0; i<GB_KEY_LEN_MAX;) {
37  c = *(str++);
38  if (!c) break;
39 
40  if (c==' ' || c == '_') {
41  buf[i++] = '_';
42  }
43  else if (isalnum(c) || strchr(additional, c)) {
44  buf[i++] = c;
45  }
46  }
47  for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48  buf[i] = 0;
49  return ARB_strdup(buf);
50 }
51 
52 char *GBS_string_2_key(const char *str) { // converts any string to a valid key
53  return GBS_string_2_key_with_exclusions(str, "");
54 }
55 
56 char *GB_memdup(const char *source, size_t len) {
57  char *dest = ARB_alloc<char>(len);
58  memcpy(dest, source, len);
59  return dest;
60 }
61 
62 static const char *EMPTY_KEY_NOT_ALLOWED = "Empty key is not allowed";
63 
64 inline __ATTR__USERESULT GB_ERROR check_key(const char *key, int len) {
65  // test if 'key' is a valid non-hierarchical database key.
66  // i.e. contains only letters, numbers and '_' and
67  // is inside length constraints GB_KEY_LEN_MIN/GB_KEY_LEN_MAX.
68 
69  if (len < GB_KEY_LEN_MIN) {
70  if (!len) return EMPTY_KEY_NOT_ALLOWED;
71  return GBS_global_string("Invalid key '%s': too short", key);
72  }
73  if (len > GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
74 
75  for (int i = 0; i<len; ++i) {
76  char c = key[i];
77  bool validChar = isalnum(c) || c == '_';
78  if (!validChar) {
79  return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
80  }
81  }
82 
83  return NULp;
84 }
85 GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
86  // test if 'key' is a valid non-hierarchical database key
87  // (i.e. a valid name for a container or field).
88 
89  return check_key(key, key ? strlen(key) : 0);
90 }
91 
92 GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
93  // test whether 'key' is a hierarchical key,
94  // i.e. consists of subkeys (accepted by GB_check_key), separated by '/'.
95 
96  GB_ERROR err = NULp;
97 
98  if (key && key[0] == '/') ++key; // accept + remove leading '/'
99  if (!key || !key[0]) err = EMPTY_KEY_NOT_ALLOWED; // reject NULp, empty (or single slash)
100 
101  while (!err && key[0]) {
102  int nonSlashPart = strcspn(key, "/");
103 
104  err = check_key(key, nonSlashPart);
105  if (!err) {
106  key += nonSlashPart;
107  if (key[0] == '/') {
108  ++key;
109  if (key[0] == 0) { // nothing after slash
110  err = EMPTY_KEY_NOT_ALLOWED;
111  }
112  }
113  else {
114  gb_assert(key[0] == 0);
115  }
116  }
117  }
118  return err;
119 }
120 
121 // ----------------------------------------------
122 // escape/unescape characters in strings
123 
124 char *GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char) {
143  int len = strlen(str);
144  char *buffer = ARB_alloc<char>(2*len+1);
145  int j = 0;
146  int i;
147 
148  gb_assert(strlen(chars_to_escape) <= 26);
149  gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape
150 
151  for (i = 0; str[i]; ++i) {
152  if (str[i] == escape_char) {
153  buffer[j++] = escape_char;
154  buffer[j++] = escape_char;
155  }
156  else {
157  const char *found = strchr(chars_to_escape, str[i]);
158  if (found) {
159  buffer[j++] = escape_char;
160  buffer[j++] = (found-chars_to_escape+'A');
161 
162  gb_assert(found[0]<'A' || found[0]>'Z'); // illegal character in chars_to_escape
163  }
164  else {
165 
166  buffer[j++] = str[i];
167  }
168  }
169  }
170  buffer[j] = 0;
171 
172  return buffer;
173 }
174 
175 char *GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char) {
177 
178  int len = strlen(str);
179  char *buffer = ARB_alloc<char>(len+1);
180  int j = 0;
181  int i;
182 
183 #if defined(ASSERTION_USED)
184  int escaped_chars_len = strlen(escaped_chars);
185 #endif // ASSERTION_USED
186 
187  gb_assert(strlen(escaped_chars) <= 26);
188  gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape
189 
190  for (i = 0; str[i]; ++i) {
191  if (str[i] == escape_char) {
192  if (str[i+1] == escape_char) {
193  buffer[j++] = escape_char;
194  }
195  else {
196  int idx = str[i+1]-'A';
197 
198  gb_assert(idx >= 0 && idx<escaped_chars_len);
199  buffer[j++] = escaped_chars[idx];
200  }
201  ++i;
202  }
203  else {
204  buffer[j++] = str[i];
205  }
206  }
207  buffer[j] = 0;
208 
209  return buffer;
210 }
211 
213  GB_ERROR error = NULp;
214  GB_CSTR ka;
215  GBS_strstruct *out = GBS_stropen(1000);
216 
217  while ((ka = GBS_find_string(p, "$(", 0))) {
218  GB_CSTR kz = strchr(ka, ')');
219  if (!kz) {
220  error = GBS_global_string("missing ')' for envvar '%s'", p);
221  break;
222  }
223  else {
224  char *envvar = ARB_strpartdup(ka+2, kz-1);
225  int len = ka-p;
226 
227  if (len) GBS_strncat(out, p, len);
228 
229  GB_CSTR genv = GB_getenv(envvar);
230  if (genv) GBS_strcat(out, genv);
231 
232  p = kz+1;
233  free(envvar);
234  }
235  }
236 
237  if (error) {
238  GB_export_error(error);
239  GBS_strforget(out);
240  return NULp;
241  }
242 
243  GBS_strcat(out, p); // copy rest
244  return GBS_strclose(out);
245 }
246 
247 long GBS_gcgchecksum(const char *seq) {
248  // GCGchecksum
249  long i;
250  long check = 0;
251  long count = 0;
252  long seqlen = strlen(seq);
253 
254  for (i = 0; i < seqlen; i++) {
255  count++;
256  check += count * toupper(seq[i]);
257  if (count == 57) count = 0;
258  }
259  check %= 10000;
260 
261  return check;
262 }
263 
264 // Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
265 uint32_t crctab[] = {
266  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
267  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
268  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
269  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
270  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
271  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
272  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
273  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
274  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
275  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
276  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
277  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
278  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
279  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
280  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
281  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
282  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
283  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
284  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
285  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
286  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
287  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
288  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
289  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
290  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
291  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
292  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
293  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
294  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
295  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
296  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
297  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
298  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
299  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
300  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
301  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
302  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
303  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
304  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
305  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
306  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
307  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
308  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
309  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
310  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
311  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
312  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
313  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
314  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
315  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
316  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
317  0x2d02ef8dL
318 };
319 
320 uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude) {
321  /* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
322  * if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
323  */
324 
325  unsigned long c = 0xffffffffL;
326  long n = length;
327  int i;
328  int tab[256]; // @@@ avoid recalc for each call
329 
330  for (i=0; i<256; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
331  tab[i] = ignore_case ? toupper(i) : i;
332  }
333 
334  if (exclude) {
335  while (1) {
336  int k = *(unsigned char *)exclude++;
337  if (!k) break;
338  tab[k] = 0;
339  if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
340  }
341  }
342 
343  while (n--) {
344  i = tab[*(const unsigned char *)seq++];
345  if (i) {
346  c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
347  }
348  }
349  c = c ^ 0xffffffffL;
350  return c;
351 }
352 
353 uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude) {
354  // if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
355  return GB_checksum(seq, strlen(seq), ignore_case, exclude);
356 }
357 
358 size_t GBS_shorten_repeated_data(char *data) {
359  // shortens repeats in 'data'
360  // This function modifies 'data'!!
361  // e.g. "..............................ACGT....................TGCA"
362  // -> ".{30}ACGT.{20}TGCA"
363 
364 #if defined(DEBUG)
365  size_t orgLen = strlen(data);
366 #endif // DEBUG
367  char *dataStart = data;
368  char *dest = data;
369  size_t repeat = 1;
370  char last = *data++;
371 
372  while (last) {
373  char curr = *data++;
374  if (curr == last) {
375  repeat++;
376  }
377  else {
378  if (repeat >= 5) {
379  dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
380  }
381  else {
382  size_t r;
383  for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
384  }
385  last = curr;
386  repeat = 1;
387  }
388  }
389 
390  *dest = 0;
391 
392 #if defined(DEBUG)
393 
394  gb_assert(strlen(dataStart) <= orgLen);
395 #endif // DEBUG
396  return dest-dataStart;
397 }
398 
399 
400 // ------------------------------------------
401 // helper classes for tagged fields
402 
403 class TextRef {
404  const char *data; // has no terminal zero-byte!
405  int length;
406 
407 public:
408  TextRef() : data(NULp), length(-1) {}
409  TextRef(const char *data_, int length_) : data(data_), length(length_) {}
410  explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
411 
412  bool defined() const { return data && length>0; }
413  const char *get_data() const { return data; }
414  int get_length() const { return length; }
415 
416  const char *get_following() const { return data ? data+length : NULp; }
417 
418  int compare(const char *str) const {
419  gb_assert(defined());
420  int cmp = strncmp(get_data(), str, get_length());
421  if (!cmp) {
422  if (str[get_length()]) {
423  cmp = -1; // right side contains more content
424  }
425  }
426  return cmp;
427  }
428  int icompare(const char *str) const {
429  gb_assert(defined());
430  int cmp = strncasecmp(get_data(), str, get_length());
431  if (!cmp) {
432  if (str[get_length()]) {
433  cmp = -1; // right side contains more content
434  }
435  }
436  return cmp;
437  }
438  char *copy() const { return ARB_strndup(get_data(), get_length()); }
439 
440  char head() const { return defined() ? data[0] : 0; }
441  char tail() const { return defined() ? data[length-1] : 0; }
442 
444  if (defined()) {
445  for (int s = 0; s<length; ++s) {
446  if (!isspace(data[s])) {
447  return TextRef(data+s, length-s);
448  }
449  }
450  }
451  return TextRef();
452  }
454  if (defined()) {
455  for (int s = length-1; s>=0; --s) {
456  if (!isspace(data[s])) {
457  return TextRef(data, s+1);
458  }
459  }
460  }
461  return TextRef();
462  }
463 
464  TextRef trimmed() const {
465  return headTrimmed().tailTrimmed();
466  }
467 
468  inline TextRef partBefore(const TextRef& subref) const;
469  inline TextRef partBehind(const TextRef& subref) const;
470 
471  bool is_part_of(const TextRef& other) const {
472  gb_assert(defined() && other.defined());
473  return get_data()>=other.get_data() && get_following()<=other.get_following();
474  }
475 
476  const char *find(char c) const { return reinterpret_cast<const char*>(memchr(get_data(), c, get_length())); }
477 };
478 
479 static TextRef textBetween(const TextRef& t1, const TextRef& t2) {
480  const char *behind_d1 = t1.get_following();
481  const char *d2 = t2.get_data();
482 
483  if (behind_d1 && d2 && behind_d1<d2) {
484  return TextRef(behind_d1, d2-behind_d1);
485  }
486  return TextRef();
487 }
488 
489 inline TextRef TextRef::partBefore(const TextRef& subref) const {
490  gb_assert(subref.is_part_of(*this));
491  return textBetween(TextRef(get_data(), 0), subref);
492 }
493 inline TextRef TextRef::partBehind(const TextRef& subref) const {
494  gb_assert(subref.is_part_of(*this));
495  return TextRef(subref.get_following(), get_following()-subref.get_following());
496 }
497 
499  TextRef wholeInput;
500  TextRef tag, content; // current position
501  TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
502  TextRef nextBrackets; // next "[..]" part (behind current tag)
503 
504  void findBrackets(const char *in) {
505  nextBrackets = TextRef();
506  const char *tag_start = strchr(in, '[');
507  if (tag_start) {
508  const char *tag_end = strchr(tag_start, ']');
509  if (tag_end) {
510  if (tag_end == tag_start+1) { // empty tag -> use as content
511  findBrackets(tag_end+1);
512  }
513  else {
514  const char *unwanted_bracket = reinterpret_cast<const char*>(memchr(tag_start+1, '[', tag_end-tag_start-1));
515  if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
516  findBrackets(unwanted_bracket);
517  }
518  else {
519  TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
520  if (name.defined()) { // not only whitespace inside brackets
521  nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
522  }
523  else {
524  findBrackets(tag_end+1);
525  }
526  }
527  }
528  }
529  }
530  }
531 
532  void parse_next_multi_tag() {
533  gb_assert(restTags.defined());
534  TextRef comma(restTags.find(','), 1);
535  if (comma.defined()) {
536  tag = restTags.partBefore(comma).tailTrimmed();
537  restTags = restTags.partBehind(comma).headTrimmed();
538  }
539  else {
540  tag = restTags;
541  restTags = TextRef();
542  }
543  }
544  void parse_next() {
545  if (restTags.defined()) {
546  parse_next_multi_tag();
547  }
548  else if (nextBrackets.defined()) {
549  TextRef brackets = nextBrackets;
550  findBrackets(brackets.get_following());
551 
552  content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
553 
554  gb_assert(brackets.head() == '[' && brackets.tail() == ']');
555 
556  TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
557  gb_assert(tags.defined());
558 
559  restTags = tags;
560  parse_next_multi_tag();
561  }
562  else {
563  tag = content = TextRef();
564  gb_assert(!has_part());
565  }
566  }
567  void parse_first() {
568  gb_assert(!has_part());
569  findBrackets(wholeInput.get_data());
570  content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
571  if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
572  }
573 
574 public:
575  TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
576 
577  bool has_tag() const { return tag.defined(); }
578  bool has_content() const { return content.defined(); }
579 
580  void next() { parse_next(); }
581  bool has_part() const { return has_tag() || has_content(); } // false -> parser has finished
582 
583  const TextRef& get_tag() const { return tag; }
584  const TextRef& get_content() const { return content; }
585 };
586 
587 
588 // -------------------------------------------
589 // helper function for tagged fields
590 
591 static void g_bs_add_value_tag_to_hash(GB_HASH *hash, const char *tag, char *value) {
592  if (!value[0]) return; // ignore empty values
593 
594  {
595  char *p;
596  p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
597  p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
598  }
599 
600  GB_HASH *sh = (GB_HASH *)GBS_read_hash(hash, value);
601  if (!sh) {
602  sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
603  GBS_write_hash(hash, value, (long)sh);
604  }
605  GBS_write_hash(sh, tag, 1);
606 }
607 
608 static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH *hash, char *s, char *default_tag, const char *del) {
609  TaggedContentParser parser(s);
610  while (parser.has_part()) {
611  if (parser.has_content()) {
612  char *content = parser.get_content().copy();
613  if (parser.has_tag()) {
614  char *tag = parser.get_tag().copy();
615  if (!del || ARB_stricmp(tag, del) != 0) {
616  g_bs_add_value_tag_to_hash(hash, tag, content);
617  }
618  free(tag);
619  }
620  else {
621  g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
622  }
623  free(content);
624  }
625  parser.next();
626  }
627 }
628 
629 static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH *hash, char *s, char *default_tag, const char *rtag, const char *aci, GBL_call_env& env) {
630  GB_ERROR error = NULp;
631 
632  TaggedContentParser parser(s);
633  while (parser.has_part() && !error) {
634  if (parser.has_content()) {
635  char *value = parser.get_content().copy();
636  char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
637 
638  if (rtag && ARB_stricmp(tag, rtag) == 0) {
639  freeset(value, GB_command_interpreter_in_env(value, aci, env));
640  if (!value) error = GB_await_error();
641  }
642 
643  if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
644 
645  free(tag);
646  free(value);
647  }
648  parser.next();
649  }
650 
651  return error;
652 }
653 
654 static void g_bs_merge_tags(const char *tag, long /*val*/, void *cd_sub_result) {
655  GBS_strstruct *sub_result = (GBS_strstruct*)cd_sub_result;
656 
657  GBS_strcat(sub_result, tag);
658  GBS_strcat(sub_result, ",");
659 }
660 
661 static void g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash) {
662  static int counter = 0;
663 
664  GBS_strstruct *sub_result = GBS_stropen(100);
666  GBS_intcat(sub_result, counter++); // create a unique number
667 
668  char *str = ARB_strupper(GBS_strclose(sub_result));
669 
670  GB_HASH *g_bs_collect_tags_hash = (GB_HASH*)cd_g_bs_collect_tags_hash;
671  GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
672 
673  free(str);
674 }
675 
676 static void g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result) {
677  GBS_strstruct *merge_result = (GBS_strstruct*)cd_merge_result;
678 
679  char *lk = const_cast<char*>(strrchr(tag, ','));
680  if (lk) { // remove number at end
681  *lk = 0;
682 
683  if (!merge_result->empty()) merge_result->put(' '); // skip trailing space
684  merge_result->put('[');
685  merge_result->cat(tag);
686  merge_result->put(']');
687  merge_result->put(' ');
688  }
689  merge_result->cat((char*)value);
690 }
691 
692 static char *g_bs_get_string_of_tag_hash(GB_HASH *tag_hash) {
693  GBS_strstruct *merge_result = GBS_stropen(256);
695 
696  GBS_hash_do_const_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
698 
699  GBS_free_hash(collect_tags_hash);
700  return GBS_strclose(merge_result);
701 }
702 
703 static long g_bs_free_hash_of_hashes_elem(const char */*key*/, long val, void *) {
704  GB_HASH *hash = (GB_HASH*)val;
705  if (hash) GBS_free_hash(hash);
706  return 0;
707 }
710  GBS_free_hash(hash);
711 }
712 
713 char *GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2) {
714  /* Create a tagged string from two tagged strings:
715  * a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
716  *
717  * if 's2' is not empty, then delete tag 'replace1' in 's1'
718  * if 's1' is not empty, then delete tag 'replace2' in 's2'
719  *
720  * (result should never be NULp)
721  */
722 
723  char *str1 = ARB_strdup(s1);
724  char *str2 = ARB_strdup(s2);
725  char *t1 = GBS_string_2_key(tag1);
726  char *t2 = GBS_string_2_key(tag2);
728 
729  if (!s1[0]) replace2 = NULp;
730  if (!s2[0]) replace1 = NULp;
731 
732  if (replace1 && !replace1[0]) replace1 = NULp;
733  if (replace2 && !replace2[0]) replace2 = NULp;
734 
735  g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
736  g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
737 
738  char *result = g_bs_get_string_of_tag_hash(hash);
739 
741 
742  free(t2);
743  free(t1);
744  free(str2);
745  free(str1);
746 
747  return result;
748 }
749 
750 char *GBS_modify_tagged_string_with_ACI(const char *s, const char *dt, const char *tag, const char *aci, GBL_call_env& env) {
751  /* if 's' is untagged, tag it with default tag 'dt'.
752  * if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
753  *
754  * if result is NULp, an error has been exported.
755  */
756 
757  char *str = ARB_strdup(s);
758  char *default_tag = GBS_string_2_key(dt);
760  char *result = NULp;
761 
762  GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
763 
764  if (!error) {
765  result = g_bs_get_string_of_tag_hash(hash);
766  }
767  else {
768  GB_export_error(error);
769  }
770 
772 
773  free(default_tag);
774  free(str);
775 
776  return result;
777 }
778 
779 char *GB_read_as_tagged_string(GBDATA *gbd, const char *tagi) {
780  char *buf = GB_read_as_string(gbd);
781  if (buf && tagi && tagi[0]) {
782  TaggedContentParser parser(buf);
783 
784  char *wantedTag = GBS_string_2_key(tagi);
785  char *contentFound = NULp;
786 
787  while (parser.has_part() && !contentFound) {
788  if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
789  contentFound = parser.get_content().copy();
790  }
791  parser.next();
792  }
793  free(wantedTag);
794  free(buf);
795 
796  return contentFound;
797  }
798  return buf;
799 }
800 
801 
802 /* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
803  * used as well to save perl macros
804  *
805  * when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
806  *
807  * always keep in mind, that many users have databases/macros written with older
808  * versions of this function. They MUST load proper!!!
809  */
810 void GBS_fwrite_string(const char *strngi, FILE *out) {
811  unsigned char *strng = (unsigned char *)strngi;
812  int c;
813 
814  putc('"', out);
815 
816  while ((c = *strng++)) {
817  if (c < 32) {
818  putc('\\', out);
819  if (c == '\n')
820  putc('n', out);
821  else if (c == '\t')
822  putc('t', out);
823  else if (c<25) {
824  putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
825  }
826  else {
827  putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
828  }
829  }
830  else if (c == '"') {
831  putc('\\', out);
832  putc('"', out);
833  }
834  else if (c == '\\') {
835  putc('\\', out);
836  putc('\\', out);
837  }
838  else {
839  putc(c, out);
840  }
841  }
842  putc('"', out);
843 }
844 
845 /* Read a string from a file written by GBS_fwrite_string,
846  * Searches first '"'
847  *
848  * WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
849  * any changes should be done in GBS_fconvert_string too.
850  */
851 
852 static char *GBS_fread_string(FILE *in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
853  GBS_strstruct *strstr = GBS_stropen(1024);
854  int x;
855 
856  while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
857 
858  if (x != EOF) {
859  while ((x = getc(in)) != '"') {
860  if (x == EOF) break;
861  if (x == '\\') {
862  x = getc(in); if (x==EOF) break;
863  if (x == 'n') {
864  GBS_chrcat(strstr, '\n');
865  continue;
866  }
867  if (x == 't') {
868  GBS_chrcat(strstr, '\t');
869  continue;
870  }
871  if (x>='@' && x <= '@' + 25) {
872  GBS_chrcat(strstr, x-'@');
873  continue;
874  }
875  if (x>='0' && x <= '9') {
876  GBS_chrcat(strstr, x-('0'-25));
877  continue;
878  }
879  // all other backslashes are simply skipped
880  }
881  GBS_chrcat(strstr, x);
882  }
883  }
884  return GBS_strclose(strstr);
885 }
886 
887 /* does similar decoding as GBS_fread_string but works directly on an existing buffer
888  * (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
889  *
890  * inserts \0 behind decoded string (removes the closing '"')
891  * returns a pointer behind the end (") of the _encoded_ string
892  * returns NULp if a 0-character is found
893  */
895  char *t = buffer;
896  char *f = buffer;
897  int x;
898 
899  gb_assert(f[-1] == '"');
900  // the opening " has already been read
901 
902  while ((x = *f++) != '"') {
903  if (!x) break;
904 
905  if (x == '\\') {
906  x = *f++;
907  if (!x) break;
908 
909  if (x == 'n') {
910  *t++ = '\n';
911  continue;
912  }
913  if (x == 't') {
914  *t++ = '\t';
915  continue;
916  }
917  if (x>='@' && x <= '@' + 25) {
918  *t++ = x-'@';
919  continue;
920  }
921  if (x>='0' && x <= '9') {
922  *t++ = x-('0'-25);
923  continue;
924  }
925  // all other backslashes are simply skipped
926  }
927  *t++ = x;
928  }
929 
930  if (!x) return NULp; // error (string should not contain 0-character)
931  gb_assert(x == '"');
932 
933  t[0] = 0;
934  return f;
935 }
936 
937 char *GBS_replace_tabs_by_spaces(const char *text) {
938  int tlen = strlen(text);
939  GBS_strstruct *mfile = GBS_stropen(tlen * 3/2 + 1);
940  int tabpos = 0;
941  int c;
942 
943  while ((c=*(text++))) {
944  if (c == '\t') {
945  int ntab = (tabpos + 8) & 0xfffff8;
946  while (tabpos < ntab) {
947  GBS_chrcat(mfile, ' ');
948  tabpos++;
949  }
950  continue;
951  }
952  tabpos ++;
953  if (c == '\n') {
954  tabpos = 0;
955  }
956  GBS_chrcat(mfile, c);
957  }
958  return GBS_strclose(mfile);
959 }
960 
961 char *GBS_trim(const char *str) {
962  // trim whitespace at beginning and end of 'str'
963  const char *whitespace = " \t\n";
964  while (str[0] && strchr(whitespace, str[0])) str++;
965 
966  const char *end = strchr(str, 0)-1;
967  while (end >= str && strchr(whitespace, end[0])) end--;
968 
969  return ARB_strpartdup(str, end);
970 }
971 
972 static char *dated_info(const char *info) {
973  char *dated_info = NULp;
974  time_t date;
975 
976  if (time(&date) != -1) {
977  char *dstr = ctime(&date);
978  char *nl = strchr(dstr, '\n');
979 
980  if (nl) nl[0] = 0; // cut off LF
981 
982  dated_info = GBS_global_string_copy("%s: %s", dstr, info);
983  }
984  else {
985  dated_info = ARB_strdup(info);
986  }
987  return dated_info;
988 }
989 
990 char *GBS_log_action_to(const char *comment, const char *action, bool stamp) {
998  size_t clen = comment ? strlen(comment) : 0;
999  size_t alen = strlen(action);
1000 
1001  GBS_strstruct *new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2*\n + \0 + space for stamp
1002 
1003  if (comment) {
1004  GBS_strcat(new_comment, comment);
1005  if (clen == 0 || comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n');
1006  }
1007 
1008  if (stamp) {
1009  char *dated_action = dated_info(action);
1010  GBS_strcat(new_comment, dated_action);
1011  free(dated_action);
1012  }
1013  else {
1014  GBS_strcat(new_comment, action);
1015  }
1016  if (alen == 0 || action[alen-1] != '\n') GBS_chrcat(new_comment, '\n');
1017 
1018  return GBS_strclose(new_comment);
1019 }
1020 
1021 const char *GBS_funptr2readable(void *funptr, bool stripARBHOME) {
1022  // only returns module and offset for static functions :-(
1023  char **funNames = backtrace_symbols(&funptr, 1);
1024  const char *readable_fun = funNames[0];
1025 
1026  if (stripARBHOME) {
1027  const char *ARBHOME = GB_getenvARBHOME();
1028  if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1029  readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1030  }
1031  }
1032  return readable_fun;
1033 }
1034 
1035 // --------------------------------------------------------------------------------
1036 
1037 #ifdef UNIT_TESTS
1038 
1039 #include <test_unit.h>
1040 
1041 // #define TEST_TEST_MACROS
1042 
1043 #ifdef ENABLE_CRASH_TESTS
1044 static void provokesegv() { raise(SIGSEGV); }
1045 static void dont_provokesegv() {}
1046 # if defined(ASSERTION_USED)
1047 static void failassertion() { gb_assert(0); }
1048 # if defined(TEST_TEST_MACROS)
1049 static void dont_failassertion() {}
1050 # endif
1051 static void provokesegv_does_not_fail_assertion() {
1052  // provokesegv does not raise assertion
1053  // -> the following assertion fails
1054  TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1055 }
1056 # endif
1057 #endif
1058 
1059 void TEST_signal_tests__crashtest() {
1060  // check whether we can test that no SEGV or assertion failure happened
1061  TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1062 
1063  // check whether we can test for SEGV and assertion failures
1064  TEST_EXPECT_SEGFAULT(provokesegv);
1065  TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1066 
1067  // tests whether signal suppression works multiple times (by repeating tests)
1068  TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1069  TEST_EXPECT_SEGFAULT(provokesegv);
1070 
1071  // test whether SEGV can be distinguished from assertion
1072  TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1073 
1074  // The following section is disabled, because it will
1075  // provoke test warnings (to test these warnings).
1076  // (enable it when changing any of these TEST_..-macros used here)
1077 #if defined(TEST_TEST_MACROS)
1078  TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1079 
1080  TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1081  TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1082 #if defined(ASSERTION_USED)
1083  TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1084 #endif
1085 
1086  TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1088  TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1089 #endif
1090 }
1091 
1092 #define TEST_SHORTENED_EQUALS(Long,Short) do { \
1093  char *buf = ARB_strdup(Long); \
1094  GBS_shorten_repeated_data(buf); \
1095  TEST_EXPECT_EQUAL(buf, Short); \
1096  free(buf); \
1097  } while(0)
1098 
1099 void TEST_GBS_shorten_repeated_data() {
1100  TEST_SHORTENED_EQUALS("12345", "12345");
1101  TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1102  TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1103  TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1104  TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1105  TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1106  TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1107  TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1108  TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1109  TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1110  TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1111  TEST_SHORTENED_EQUALS("aabc", "aabc");
1112  TEST_SHORTENED_EQUALS("", "");
1113 }
1114 
1115 static const char *hkey_format[] = {
1116  "/%s/bbb/ccc",
1117  "/aaa/%s/ccc",
1118  "/aaa/bbb/%s",
1119 };
1120 
1121 inline const char *useInHkey(const char *fragment, size_t pos) {
1122  return GBS_global_string(hkey_format[pos], fragment);
1123 }
1124 
1125 #define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1126  for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1127  const char *hkey = useInHkey(use, i); \
1128  TEST_ANNOTATE(hkey); \
1129  TEST_EXPECT_NO_ERROR(GB_check_hkey(hkey)); \
1130  } \
1131  TEST_ANNOTATE(NULp); \
1132  } while(0)
1133 
1134 #define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1135  for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1136  const char *hkey = useInHkey(use, i); \
1137  TEST_ANNOTATE(hkey); \
1138  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(hkey), contains); \
1139  } \
1140  TEST_ANNOTATE(NULp); \
1141  } while(0)
1142 
1143 
1144 void TEST_DB_key_checks() {
1145  // plain keys
1146  const char *shortest = "ab";
1147  const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1148  const char *too_short = shortest+1;
1149  const char *longest = too_long+1;
1150 
1151  const char *empty = "";
1152  const char *slash = "sub/key";
1153  const char *dslash = "sub//key";
1154  const char *comma = "no,key";
1155  const char *minus = "no-key";
1156 
1157  // obsolete GB_LINK syntax:
1158  const char *link = "link->syntax";
1159  const char *nowhere = "link->";
1160  const char *fromNw = "->syntax";
1161 
1164 
1165  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1166  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1167  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1168 
1169  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character '/'");
1170  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(dslash), "Invalid character '/'");
1171  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character ','");
1172  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character '-'");
1173  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(link), "Invalid character '-'");
1174  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(nowhere), "Invalid character '-'");
1175  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(fromNw), "Invalid character '-'");
1176 
1177  // hierarchical keys
1178  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1179  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1180 
1181  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1182  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1183  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1184 
1185  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1186  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(dslash, "Empty key is not allowed");
1187  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1188  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "Invalid character '-'");
1189  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(link, "Invalid character '-'");
1190  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(nowhere, "Invalid character '-'");
1191  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(fromNw, "Invalid character '-'");
1192 
1193  // test NULp keys:
1194  TEST_EXPECT_ERROR_CONTAINS(GB_check_key (NULp), "Empty key is not allowed");
1195  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(NULp), "Empty key is not allowed");
1196 
1197  // some edge cases for hierarchical keys:
1198  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//"), "Empty key is not allowed");
1199  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//key"), "Empty key is not allowed"); // @@@ is double slash compensated by GB_search etc? if yes -> accept here as well!
1200  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key//"), "Empty key is not allowed");
1201  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("/"), "Empty key is not allowed");
1203  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key/"), "Empty key is not allowed"); // @@@ use better message? e.g. "invalid trailing '/'"
1204  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(""), "Empty key is not allowed");
1205 }
1206 
1207 #define TEST_STRING2KEY(str,expected) do { \
1208  char *as_key = GBS_string_2_key(str); \
1209  TEST_EXPECT_EQUAL(as_key, expected); \
1210  TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1211  free(as_key); \
1212  } while(0)
1213 
1214 void TEST_DB_key_generation() {
1215  TEST_STRING2KEY("abc", "abc");
1216  TEST_STRING2KEY("a b c", "a_b_c");
1217 
1218  // invalid chars
1219  TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1220  "string_containing_doublequotes_quotes_and_othershit");
1221 
1222  // length tests
1223  TEST_STRING2KEY("a", "a_"); // too short
1224  TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1225  "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1226 }
1227 
1228 void TEST_TaggedContentParser() {
1229  // test helper class TextRef:
1230  TEST_REJECT(TextRef().defined()); // default to undefined
1231  {
1232  TextRef bla("blakjahd", 3);
1233  TEST_EXPECT(bla.defined());
1234  TEST_EXPECT_EQUAL(bla.get_length(), 3);
1235 
1236  TEST_EXPECT(bla.compare("bl") > 0);
1237  TEST_EXPECT(bla.compare("bla") == 0);
1238  TEST_EXPECT(bla.compare("blase") < 0);
1239 
1240  TextRef spaced(" spaced "+1, 10);
1241  TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1242  TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1243  TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1244  }
1245 
1246  const char *text = " untagged [tag] tagged [empty] ";
1247 
1248  TextRef cr_untagged(strstr(text, "untagged"), 8);
1249  TextRef cr_tagged (strstr(text, "tagged"), 6);
1250  TextRef tr_tag (strstr(text, "tag"), 3);
1251  TextRef tr_empty (strstr(text, "empty"), 5);
1252 
1253  // test TaggedContentParser:
1254  {
1255  TaggedContentParser parser(text);
1256 
1257  TEST_EXPECT(parser.has_part());
1258  TEST_REJECT(parser.has_tag());
1259  TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1260 
1261  parser.next();
1262 
1263  TEST_EXPECT(parser.has_part());
1264  TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1265  TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1266 
1267  parser.next();
1268 
1269  TEST_EXPECT(parser.has_part());
1270  TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1271  TEST_REJECT(parser.has_content());
1272 
1273  parser.next();
1274 
1275  TEST_REJECT(parser.has_part());
1276  }
1277  { // parse untagged input
1278  TaggedContentParser parser("hi");
1279  TEST_EXPECT(parser.has_part());
1280  TEST_REJECT(parser.has_tag());
1281  TEST_EXPECT(parser.get_content().compare("hi") == 0);
1282  parser.next();
1283  TEST_REJECT(parser.has_part());
1284  }
1285  { // parse empty input
1286  TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1287  TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1288  }
1289  { // parse single tag w/o content
1290  TaggedContentParser parser(" [hello] ");
1291  TEST_EXPECT(parser.has_part());
1292  TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1293  TEST_REJECT(parser.has_content());
1294  parser.next();
1295  TEST_REJECT(parser.has_part());
1296  }
1297  { // parse multi-tags
1298  TaggedContentParser parser(" [ t1 , t2 ] t");
1299  TEST_EXPECT(parser.has_part());
1300  TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1301  TEST_EXPECT(parser.get_content().compare("t") == 0);
1302  parser.next();
1303  TEST_EXPECT(parser.has_part());
1304  TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1305  TEST_EXPECT(parser.get_content().compare("t") == 0);
1306  parser.next();
1307  TEST_REJECT(parser.has_part());
1308  }
1309 }
1310 
1311 #define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1312  char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1313  TEST_EXPECT_EQUAL(result, expected); \
1314  free(result); \
1315  } while(0)
1316 
1317 #define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1318  char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1319  TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1320  free(result); \
1321  } while(0)
1322 
1323 void TEST_merge_tagged_strings() {
1324  // merge two fields:
1325  const char *_0 = NULp;
1326 
1327  TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1328  TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source");
1329  TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source");
1330  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth");
1331 
1332  TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1333  TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1334 
1335  // update fields:
1336  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1337  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1338  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1339  TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1340  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1341 
1342  // append (opposed to update this keeps old entries with same tag; useless?)
1343  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1344  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1345  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1346 
1347  // merge three fields:
1348  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1349  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1350  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1351  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1352  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1353 
1354  // same tests as in section above, but vv:
1355  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1356  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1357  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1358  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1359  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1360 
1361  // test real-merges (content existing in both strings):
1362  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1363  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1364  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1365  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1366  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1367  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1368  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1369  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1370  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1371 
1372  // merge two tagged string with deleting
1373 #define DSTSRC1 "[DST] dest1 [SRC] src1"
1374 #define DSTSRC2 "[DST] dest2 [SRC] src2"
1375 #define DSTSRC2LOW "[dst] dest2 [src] src2"
1376 
1377  TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1378  TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1379  TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1380  TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1381  TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1382  TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1383  TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1384  TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1385  TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1386  TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1387  TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1388  TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1389  TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1390  TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1391 }
1392 
1393 __ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() {
1394  GB_shell shell;
1395  GBDATA *gb_main = GB_open("new.arb", "c");
1396  {
1397  GB_transaction ta(gb_main);
1398 
1399  {
1400  GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1401  TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1402  TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1403 
1404  GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1405  GB_UINT4 ints[] = { 1, 2 };
1406  TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1407  TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1408  }
1409 
1410 #define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1411 #define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1412 #define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1413 
1414  GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1415  const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1416  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1417 
1418  TEST_EXPECT_FULL_CONTENT(NULp);
1419  TEST_EXPECT_FULL_CONTENT("");
1420  TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1421 
1422  TEST_EXPECT_TAG_CONTENT("T1", "t12");
1423  TEST_EXPECT_TAG_CONTENT("T2", "t12");
1424  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1425  TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1426 
1427  TEST_EXPECT_TAG_CONTENT("AA", "aa");
1428  TEST_EXPECT_TAG_CONTENT("BB", "bb");
1429  TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1430  TEST_EXPECT_TAG_CONTENT("XX", "x1");
1431  TEST_EXPECT_TAG_CONTENT("YY", "yy");
1432  TEST_EXPECT_TAG_CONTENT("yy", "yy");
1433 
1434  TEST_REJECT_TAG_CONTENT("Y");
1435  // TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1436 
1437  TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1438  TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1439  TEST_EXPECT_TAG_CONTENT("fake", "fake");
1440 
1441  TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1442  TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1443  TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1444  TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1445  TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1446  TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1447  TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1448  TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1449 
1450  TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1451  TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1452  TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1453  TEST_EXPECT_TAG_CONTENT("LAST", "last");
1454 
1455  // test incomplete tags
1456  tagged_string = "bla [WHATEVER hello";
1457  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1458  TEST_REJECT_TAG_CONTENT("WHATEVER");
1459 
1460  tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1461  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1462  TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1463  TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1464  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1465 
1466  // test pathological tags
1467  tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1468  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1469  TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1470  TEST_EXPECT_FULL_CONTENT("");
1471  TEST_REJECT_TAG_CONTENT(" ");
1472  TEST_REJECT_TAG_CONTENT(" ");
1473  TEST_REJECT_TAG_CONTENT(",");
1474  TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1475  TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1476  TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1477  TEST_EXPECT_TAG_CONTENT(" ", "us");
1478  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1479  }
1480  GB_close(gb_main);
1481 }
1482 
1483 #define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1484  TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1485  GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1486  expected); \
1487  }while(0)
1488 
1489 #define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1490  TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1491  GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1492  expectedErrorPart); \
1493  }while(0)
1494 
1495 __ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() {
1496  GB_shell shell;
1497  GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1498  {
1499  GB_transaction ta(gb_main);
1500  GBL_env env(gb_main, "tree_missing");
1501 
1502  {
1503  GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1504  TEST_REJECT_NULL(gb_species);
1505  GBL_call_env callEnv(gb_species, env);
1506 
1507  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1508  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla");
1509  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1510  TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1511  TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1512 
1513  // empty tags:
1514  TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, "");
1515  TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1516  TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1517  TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1518  TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx");
1519 
1520 #define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1521 #define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1522 
1523  // dont eval:
1524  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE);
1525  // eval SRT:
1526  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top");
1527  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top");
1528  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top");
1529  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top");
1530  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1531  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":*=*-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1532  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1533  // eval ACI:
1534  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1535  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1536 
1537  // test SRT/ACI errors:
1538  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1539  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1540  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1541  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1542 
1543  // no error raised, if expression not applied:
1544  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1545 
1546  // incomplete tags
1547  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":*=<*>", "[DEF] <{no tag>");
1548  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,{no>");
1549  TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag,{no>");
1550  TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* *=<*2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1551  TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag{},{no>");
1552  TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* *=<*2,*1>", "[DEF] <{tag{} xx,{no>");
1553  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,no>");
1554  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1555  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1556  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1557  TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1558  TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":*=<*>", "[DEF] <kept {trunk>");
1559  TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":*=<*>", "[DEF] <kept>");
1560  }
1561 
1562  {
1563  GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1564  TEST_REJECT_NULL(gb_species);
1565  GBL_call_env callEnv(gb_species, env);
1566 
1567  // run scripts using context:
1568  TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1569  TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(|sequence|len)", "[T1] seqlen=$l [T2] seqlen=165");
1570  TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1571 
1572  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1573  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1574 
1575  // content before 1st tag:
1576  TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1577  TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1578 
1579  // test elimination of leading/trailing whitespace:
1580  TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1581  TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1582  TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1583 
1584 #define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1585 
1586  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1587  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1588  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1589  }
1590  }
1591  GB_close(gb_main);
1592 }
1593 
1594 void TEST_log_action() {
1595  for (int stamped = 0; stamped<=1; ++stamped) {
1596  TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1597  {
1598  char *logged = GBS_log_action_to("comment", "action", stamped);
1599  if (stamped) {
1600  TEST_EXPECT_CONTAINS(logged, "comment\n");
1601  TEST_EXPECT_CONTAINS(logged, "action\n");
1602  }
1603  else {
1604  TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1605  }
1606  free(logged);
1607  }
1608  {
1609  char *logged = GBS_log_action_to("comment\n", "action", stamped);
1610  if (stamped) {
1611  TEST_EXPECT_CONTAINS(logged, "comment\n");
1612  TEST_EXPECT_CONTAINS(logged, "action\n");
1613  }
1614  else {
1615  TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1616  }
1617  free(logged);
1618  }
1619  {
1620  char *logged = GBS_log_action_to("", "action", stamped);
1621  if (stamped) {
1622  TEST_EXPECT_EQUAL(logged[0], '\n');
1623  TEST_EXPECT_CONTAINS(logged, "action\n");
1624  }
1625  else {
1626  TEST_EXPECT_EQUAL(logged, "\naction\n");
1627  }
1628  free(logged);
1629  }
1630  {
1631  char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF
1632  if (stamped) {
1633  TEST_EXPECT_DIFFERENT(logged[0], '\n');
1634  TEST_EXPECT_CONTAINS(logged, "action\n");
1635  }
1636  else {
1637  TEST_EXPECT_EQUAL(logged, "action\n");
1638  }
1639  free(logged);
1640  }
1641  }
1642 }
1643 TEST_PUBLISH(TEST_log_action);
1644 
1645 #endif // UNIT_TESTS
1646 
static void g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash)
Definition: adstring.cxx:661
static void g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result)
Definition: adstring.cxx:676
void GBS_hash_do_const_sorted_loop(const GB_HASH *hs, gb_hash_const_loop_type func, gbs_hash_compare_function sorter, void *client_data)
Definition: adhash.cxx:644
TextRef partBehind(const TextRef &subref) const
Definition: adstring.cxx:493
const char * GB_ERROR
Definition: arb_core.h:25
string result
bool defined() const
Definition: adstring.cxx:412
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
TextRef(const char *data_, int length_)
Definition: adstring.cxx:409
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
Definition: adlang1.cxx:913
static TextRef textBetween(const TextRef &t1, const TextRef &t2)
Definition: adstring.cxx:479
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:457
static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH *hash, char *s, char *default_tag, const char *del)
Definition: adstring.cxx:608
size_t GBS_shorten_repeated_data(char *data)
Definition: adstring.cxx:358
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1385
static const char * EMPTY_KEY_NOT_ALLOWED
Definition: adstring.cxx:62
static void g_bs_merge_tags(const char *tag, long, void *cd_sub_result)
Definition: adstring.cxx:654
int orgLen
Definition: rns.c:12
int ARB_stricmp(const char *s1, const char *s2)
Definition: arb_str.h:28
const char * get_data() const
Definition: adstring.cxx:413
void GBS_intcat(GBS_strstruct *strstr, long val)
Definition: arb_strbuf.cxx:127
static char * g_bs_get_string_of_tag_hash(GB_HASH *tag_hash)
Definition: adstring.cxx:692
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
char * GB_read_as_string(GBDATA *gbd)
Definition: arbdb.cxx:1054
TextRef trimmed() const
Definition: adstring.cxx:464
#define TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(cb)
Definition: test_unit.h:1242
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
GB_ERROR GB_check_key(const char *key)
Definition: adstring.cxx:85
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
Definition: adhash.cxx:271
void GBS_strncat(GBS_strstruct *strstr, const char *ptr, size_t len)
Definition: arb_strbuf.cxx:101
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
TextRef headTrimmed() const
Definition: adstring.cxx:443
void cat(const char *from)
Definition: arb_strbuf.h:158
char * ARB_strpartdup(const char *start, const char *end)
Definition: arb_string.h:51
bool is_part_of(const TextRef &other) const
Definition: adstring.cxx:471
void GBS_fwrite_string(const char *strngi, FILE *out)
Definition: adstring.cxx:810
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
FILE * seq
Definition: rns.c:46
GB_CSTR GB_getenvARBHOME(void)
Definition: adsocket.cxx:565
GB_CSTR GBS_find_string(GB_CSTR cont, GB_CSTR substr, int match_mode)
Definition: admatch.cxx:103
TaggedContentParser(const char *input_)
Definition: adstring.cxx:575
#define TEST_EXPECT_SEGFAULT__UNWANTED(cb)
Definition: test_unit.h:1246
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1485
#define TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(cb)
Definition: test_unit.h:1243
unsigned int GB_UINT4
Definition: arbdb_base.h:37
int compare(const char *str) const
Definition: adstring.cxx:418
#define TEST_EXPECT_CONTAINS(str, part)
Definition: test_unit.h:1301
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:259
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
#define TEST_EXPECT(cond)
Definition: test_unit.h:1313
TextRef tailTrimmed() const
Definition: adstring.cxx:453
__ATTR__USERESULT GB_ERROR check_key(const char *key, int len)
Definition: adstring.cxx:64
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
Definition: arbdb.cxx:1779
const char * find(char c) const
Definition: adstring.cxx:476
char * GBS_replace_tabs_by_spaces(const char *text)
Definition: adstring.cxx:937
const char * get_following() const
Definition: adstring.cxx:416
static long g_bs_free_hash_of_hashes_elem(const char *, long val, void *)
Definition: adstring.cxx:703
char * GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char)
Definition: adstring.cxx:175
uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude)
Definition: adstring.cxx:320
#define TEST_REJECT(cond)
Definition: test_unit.h:1315
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1310
void GBS_strcat(GBS_strstruct *strstr, const char *ptr)
Definition: arb_strbuf.cxx:108
static void error(const char *msg)
Definition: mkptypes.cxx:96
char * GB_memdup(const char *source, size_t len)
Definition: adstring.cxx:56
#define GB_KEY_LEN_MAX
Definition: gb_key.h:25
#define GB_KEY_LEN_MIN
Definition: gb_key.h:26
const TextRef & get_tag() const
Definition: adstring.cxx:583
const TextRef & get_content() const
Definition: adstring.cxx:584
TextRef partBefore(const TextRef &subref) const
Definition: adstring.cxx:489
bool has_content() const
Definition: adstring.cxx:578
#define cmp(h1, h2)
Definition: admap.cxx:50
void GBS_dynaval_free(long val)
Definition: adhash.cxx:278
static char * GBS_string_2_key_with_exclusions(const char *str, const char *additional)
Definition: adstring.cxx:31
char * GBS_string_2_key(const char *str)
Definition: adstring.cxx:52
char * GBS_fconvert_string(char *buffer)
Definition: adstring.cxx:894
uint32_t crctab[]
Definition: adstring.cxx:265
int get_length() const
Definition: adstring.cxx:414
void GBS_strforget(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:76
const char * GBS_funptr2readable(void *funptr, bool stripARBHOME)
Definition: adstring.cxx:1021
char * ARB_strupper(char *s)
Definition: arb_str.h:63
void nl()
Definition: test_unit.h:404
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1244
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)
Definition: adhash.cxx:548
#define __ATTR__REDUCED_OPTIMIZE
Definition: test_unit.h:83
char tail() const
Definition: adstring.cxx:441
GB_CSTR GB_getenv(const char *env)
Definition: adsocket.cxx:677
void GBS_chrcat(GBS_strstruct *strstr, char ch)
Definition: arb_strbuf.cxx:119
Definition: arbdb.h:72
void spaced(const char *word)
Definition: test_unit.h:408
static void g_bs_free_hash_of_hashes(GB_HASH *hash)
Definition: adstring.cxx:708
char * GB_read_as_tagged_string(GBDATA *gbd, const char *tagi)
Definition: adstring.cxx:779
#define TEST_EXPECT_NO_SEGFAULT__WANTED(cb)
Definition: test_unit.h:1240
#define gb_assert(cond)
Definition: arbdbt.h:11
static char * dated_info(const char *info)
Definition: adstring.cxx:972
long GBS_gcgchecksum(const char *seq)
Definition: adstring.cxx:247
char * ARB_strndup(const char *start, int len)
Definition: arb_string.h:83
#define TEST_EXPECT_CODE_ASSERTION_FAILS(cb)
Definition: test_unit.h:1241
aisc_com * link
int icompare(const char *str) const
Definition: adstring.cxx:428
static char * GBS_fread_string(FILE *in)
Definition: adstring.cxx:852
char * GBS_trim(const char *str)
Definition: adstring.cxx:961
GB_ERROR GB_write_ints(GBDATA *gbd, const GB_UINT4 *i, long size)
Definition: arbdb.cxx:1437
#define TEST_EXPECT_NO_SEGFAULT(cb)
Definition: test_unit.h:1239
#define __ATTR__USERESULT
Definition: attributes.h:58
bool empty() const
Definition: arb_strbuf.h:68
char * copy() const
Definition: adstring.cxx:438
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
char head() const
Definition: adstring.cxx:440
TextRef(const char *zeroTerminated)
Definition: adstring.cxx:410
#define TEST_EXPECT_SEGFAULT__WANTED(cb)
Definition: test_unit.h:1245
#define TEST_EXPECT_SEGFAULT(cb)
Definition: test_unit.h:1244
bool white(int ch)
int GBS_HCF_sortedByKey(const char *k0, long dummy_1x, const char *k1, long dummy_2x)
Definition: adhash.cxx:656
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:353
static void g_bs_add_value_tag_to_hash(GB_HASH *hash, const char *tag, char *value)
Definition: adstring.cxx:591
#define NULp
Definition: cxxforward.h:97
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
Definition: test_unit.h:1103
#define TEST_EXPECT_DIFFERENT(expr, want)
Definition: test_unit.h:1290
bool ARB_strBeginsWith(const char *str, const char *with)
Definition: arb_str.h:42
char * GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2)
Definition: adstring.cxx:713
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
Definition: gb_aci.cxx:361
static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH *hash, char *s, char *default_tag, const char *rtag, const char *aci, GBL_call_env &env)
Definition: adstring.cxx:629
char * GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char)
Definition: adstring.cxx:124
bool has_tag() const
Definition: adstring.cxx:577
bool has_part() const
Definition: adstring.cxx:581
GB_transaction ta(gb_var)
char * GBS_log_action_to(const char *comment, const char *action, bool stamp)
Definition: adstring.cxx:990
GBDATA * gb_main
Definition: adname.cxx:33
size_t length
char * GBS_eval_env(GB_CSTR p)
Definition: adstring.cxx:212
char * GBS_modify_tagged_string_with_ACI(const char *s, const char *dt, const char *tag, const char *aci, GBL_call_env &env)
Definition: adstring.cxx:750
static int info[maxsites+1]
GB_ERROR GB_check_hkey(const char *key)
Definition: adstring.cxx:92
const char * GB_CSTR
Definition: arbdb_base.h:25
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:649
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
void put(char c)
Definition: arb_strbuf.h:138
Definition: arbdb.h:66
GB_write_int const char s
Definition: AW_awar.cxx:156