ARB
adstring.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adstring.cxx //
4 // Purpose : various string functions //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <arb_backtrace.h>
12 #include <arb_strbuf.h>
13 #include <arb_defs.h>
14 #include <arb_str.h>
15 
16 #include "gb_key.h"
17 #include "gb_aci.h"
18 
19 #include <SigHandler.h>
20 
21 #include <execinfo.h>
22 
23 #include <cstdarg>
24 #include <cctype>
25 #include <cerrno>
26 #include <ctime>
27 #include <setjmp.h>
28 
29 #include <valgrind.h>
30 
31 static char *GBS_string_2_key_with_exclusions(const char *str, const char *additional) {
32  // converts any string to a valid key (all chars in 'additional' are additionally allowed)
33  char buf[GB_KEY_LEN_MAX+1];
34  int i;
35  int c;
36  for (i=0; i<GB_KEY_LEN_MAX;) {
37  c = *(str++);
38  if (!c) break;
39 
40  if (c==' ' || c == '_') {
41  buf[i++] = '_';
42  }
43  else if (isalnum(c) || strchr(additional, c)) {
44  buf[i++] = c;
45  }
46  }
47  for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48  buf[i] = 0;
49  return ARB_strdup(buf);
50 }
51 
52 char *GBS_string_2_key(const char *str) { // converts any string to a valid key
53  return GBS_string_2_key_with_exclusions(str, "");
54 }
55 
56 char *GB_memdup(const char *source, size_t len) {
57  char *dest = ARB_alloc<char>(len);
58  memcpy(dest, source, len);
59  return dest;
60 }
61 
62 static const char *EMPTY_KEY_NOT_ALLOWED = "Empty key is not allowed";
63 
64 inline __ATTR__USERESULT GB_ERROR check_key(const char *key, int len) {
65  // test if 'key' is a valid non-hierarchical database key.
66  // i.e. contains only letters, numbers and '_' and
67  // is inside length constraints GB_KEY_LEN_MIN/GB_KEY_LEN_MAX.
68 
69  if (len < GB_KEY_LEN_MIN) {
70  if (!len) return EMPTY_KEY_NOT_ALLOWED;
71  return GBS_global_string("Invalid key '%s': too short", key);
72  }
73  if (len > GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
74 
75  for (int i = 0; i<len; ++i) {
76  char c = key[i];
77  bool validChar = isalnum(c) || c == '_';
78  if (!validChar) {
79  return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
80  }
81  }
82 
83  return NULp;
84 }
85 GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
86  // test if 'key' is a valid non-hierarchical database key
87  // (i.e. a valid name for a container or field).
88 
89  return check_key(key, key ? strlen(key) : 0);
90 }
91 
92 GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
93  // test whether 'key' is a hierarchical key,
94  // i.e. consists of subkeys (accepted by GB_check_key), separated by '/'.
95 
96  GB_ERROR err = NULp;
97 
98  if (key && key[0] == '/') ++key; // accept + remove leading '/'
99  if (!key || !key[0]) err = EMPTY_KEY_NOT_ALLOWED; // reject NULp, empty (or single slash)
100 
101  while (!err && key[0]) {
102  int nonSlashPart = strcspn(key, "/");
103 
104  err = check_key(key, nonSlashPart);
105  if (!err) {
106  key += nonSlashPart;
107  if (key[0] == '/') {
108  ++key;
109  if (key[0] == 0) { // nothing after slash
110  err = EMPTY_KEY_NOT_ALLOWED;
111  }
112  }
113  else {
114  gb_assert(key[0] == 0);
115  }
116  }
117  }
118  return err;
119 }
120 
121 // ----------------------------------------------
122 // escape/unescape characters in strings
123 
124 char *GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char) {
143  int len = strlen(str);
144  char *buffer = ARB_alloc<char>(2*len+1);
145  int j = 0;
146  int i;
147 
148  gb_assert(strlen(chars_to_escape) <= 26);
149  gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape
150 
151  for (i = 0; str[i]; ++i) {
152  if (str[i] == escape_char) {
153  buffer[j++] = escape_char;
154  buffer[j++] = escape_char;
155  }
156  else {
157  const char *found = strchr(chars_to_escape, str[i]);
158  if (found) {
159  buffer[j++] = escape_char;
160  buffer[j++] = (found-chars_to_escape+'A');
161 
162  gb_assert(found[0]<'A' || found[0]>'Z'); // illegal character in chars_to_escape
163  }
164  else {
165 
166  buffer[j++] = str[i];
167  }
168  }
169  }
170  buffer[j] = 0;
171 
172  return buffer;
173 }
174 
175 char *GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char) {
177 
178  int len = strlen(str);
179  char *buffer = ARB_alloc<char>(len+1);
180  int j = 0;
181  int i;
182 
183 #if defined(ASSERTION_USED)
184  int escaped_chars_len = strlen(escaped_chars);
185 #endif // ASSERTION_USED
186 
187  gb_assert(strlen(escaped_chars) <= 26);
188  gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape
189 
190  for (i = 0; str[i]; ++i) {
191  if (str[i] == escape_char) {
192  if (str[i+1] == escape_char) {
193  buffer[j++] = escape_char;
194  }
195  else {
196  int idx = str[i+1]-'A';
197 
198  gb_assert(idx >= 0 && idx<escaped_chars_len);
199  buffer[j++] = escaped_chars[idx];
200  }
201  ++i;
202  }
203  else {
204  buffer[j++] = str[i];
205  }
206  }
207  buffer[j] = 0;
208 
209  return buffer;
210 }
211 
213  GB_ERROR error = NULp;
214  GB_CSTR ka;
215  GBS_strstruct out(1000);
216 
217  while ((ka = GBS_find_string(p, "$(", 0))) {
218  GB_CSTR kz = strchr(ka, ')');
219  if (!kz) {
220  error = GBS_global_string("missing ')' for envvar '%s'", p);
221  break;
222  }
223  else {
224  char *envvar = ARB_strpartdup(ka+2, kz-1);
225  int len = ka-p;
226 
227  if (len) out.ncat(p, len);
228 
229  GB_CSTR genv = GB_getenv(envvar);
230  if (genv) out.cat(genv);
231 
232  p = kz+1;
233  free(envvar);
234  }
235  }
236 
237  if (error) {
238  GB_export_error(error);
239  return NULp;
240  }
241 
242  out.cat(p); // copy rest
243  return out.release_memfriendly();
244 }
245 
246 long GBS_gcgchecksum(const char *seq) {
247  // GCGchecksum
248  long i;
249  long check = 0;
250  long count = 0;
251  long seqlen = strlen(seq);
252 
253  for (i = 0; i < seqlen; i++) {
254  count++;
255  check += count * toupper(seq[i]);
256  if (count == 57) count = 0;
257  }
258  check %= 10000;
259 
260  return check;
261 }
262 
263 // Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
264 uint32_t crctab[] = {
265  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
266  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
267  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
268  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
269  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
270  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
271  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
272  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
273  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
274  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
275  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
276  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
277  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
278  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
279  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
280  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
281  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
282  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
283  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
284  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
285  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
286  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
287  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
288  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
289  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
290  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
291  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
292  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
293  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
294  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
295  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
296  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
297  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
298  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
299  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
300  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
301  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
302  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
303  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
304  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
305  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
306  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
307  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
308  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
309  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
310  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
311  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
312  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
313  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
314  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
315  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
316  0x2d02ef8dL
317 };
318 
319 uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude) {
320  /* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
321  * if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
322  */
323 
324  unsigned long c = 0xffffffffL;
325  long n = length;
326  int i;
327  int tab[256]; // @@@ avoid recalc for each call
328 
329  for (i=0; i<256; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
330  tab[i] = ignore_case ? toupper(i) : i;
331  }
332 
333  if (exclude) {
334  while (1) {
335  int k = *(unsigned char *)exclude++;
336  if (!k) break;
337  tab[k] = 0;
338  if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
339  }
340  }
341 
342  while (n--) {
343  i = tab[*(const unsigned char *)seq++];
344  if (i) {
345  c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
346  }
347  }
348  c = c ^ 0xffffffffL;
349  return c;
350 }
351 
352 uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude) {
353  // if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
354  return GB_checksum(seq, strlen(seq), ignore_case, exclude);
355 }
356 
357 size_t GBS_shorten_repeated_data(char *data) {
358  // shortens repeats in 'data'
359  // This function modifies 'data'!!
360  // e.g. "..............................ACGT....................TGCA"
361  // -> ".{30}ACGT.{20}TGCA"
362 
363 #if defined(DEBUG)
364  size_t orgLen = strlen(data);
365 #endif // DEBUG
366  char *dataStart = data;
367  char *dest = data;
368  size_t repeat = 1;
369  char last = *data++;
370 
371  while (last) {
372  char curr = *data++;
373  if (curr == last) {
374  repeat++;
375  }
376  else {
377  if (repeat >= 5) {
378  dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
379  }
380  else {
381  size_t r;
382  for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
383  }
384  last = curr;
385  repeat = 1;
386  }
387  }
388 
389  *dest = 0;
390 
391 #if defined(DEBUG)
392 
393  gb_assert(strlen(dataStart) <= orgLen);
394 #endif // DEBUG
395  return dest-dataStart;
396 }
397 
398 
399 // ------------------------------------------
400 // helper classes for tagged fields
401 
402 class TextRef {
403  const char *data; // has no terminal zero-byte!
404  int length;
405 
406 public:
407  TextRef() : data(NULp), length(-1) {}
408  TextRef(const char *data_, int length_) : data(data_), length(length_) {}
409  explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
410 
411  bool defined() const { return data && length>0; }
412  const char *get_data() const { return data; }
413  int get_length() const { return length; }
414 
415  const char *get_following() const { return data ? data+length : NULp; }
416 
417  int compare(const char *str) const {
418  gb_assert(defined());
419  int cmp = strncmp(get_data(), str, get_length());
420  if (!cmp) {
421  if (str[get_length()]) {
422  cmp = -1; // right side contains more content
423  }
424  }
425  return cmp;
426  }
427  int icompare(const char *str) const {
428  gb_assert(defined());
429  int cmp = strncasecmp(get_data(), str, get_length());
430  if (!cmp) {
431  if (str[get_length()]) {
432  cmp = -1; // right side contains more content
433  }
434  }
435  return cmp;
436  }
437  char *copy() const { return ARB_strndup(get_data(), get_length()); }
438 
439  char head() const { return defined() ? data[0] : 0; }
440  char tail() const { return defined() ? data[length-1] : 0; }
441 
443  if (defined()) {
444  for (int s = 0; s<length; ++s) {
445  if (!isspace(data[s])) {
446  return TextRef(data+s, length-s);
447  }
448  }
449  }
450  return TextRef();
451  }
453  if (defined()) {
454  for (int s = length-1; s>=0; --s) {
455  if (!isspace(data[s])) {
456  return TextRef(data, s+1);
457  }
458  }
459  }
460  return TextRef();
461  }
462 
463  TextRef trimmed() const {
464  return headTrimmed().tailTrimmed();
465  }
466 
467  inline TextRef partBefore(const TextRef& subref) const;
468  inline TextRef partBehind(const TextRef& subref) const;
469 
470  bool is_part_of(const TextRef& other) const {
471  gb_assert(defined() && other.defined());
472  return get_data()>=other.get_data() && get_following()<=other.get_following();
473  }
474 
475  const char *find(char c) const { return reinterpret_cast<const char*>(memchr(get_data(), c, get_length())); }
476 };
477 
478 static TextRef textBetween(const TextRef& t1, const TextRef& t2) {
479  const char *behind_d1 = t1.get_following();
480  const char *d2 = t2.get_data();
481 
482  if (behind_d1 && d2 && behind_d1<d2) {
483  return TextRef(behind_d1, d2-behind_d1);
484  }
485  return TextRef();
486 }
487 
488 inline TextRef TextRef::partBefore(const TextRef& subref) const {
489  gb_assert(subref.is_part_of(*this));
490  return textBetween(TextRef(get_data(), 0), subref);
491 }
492 inline TextRef TextRef::partBehind(const TextRef& subref) const {
493  gb_assert(subref.is_part_of(*this));
494  return TextRef(subref.get_following(), get_following()-subref.get_following());
495 }
496 
498  TextRef wholeInput;
499  TextRef tag, content; // current position
500  TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
501  TextRef nextBrackets; // next "[..]" part (behind current tag)
502 
503  void findBrackets(const char *in) {
504  nextBrackets = TextRef();
505  const char *tag_start = strchr(in, '[');
506  if (tag_start) {
507  const char *tag_end = strchr(tag_start, ']');
508  if (tag_end) {
509  if (tag_end == tag_start+1) { // empty tag -> use as content
510  findBrackets(tag_end+1);
511  }
512  else {
513  const char *unwanted_bracket = reinterpret_cast<const char*>(memchr(tag_start+1, '[', tag_end-tag_start-1));
514  if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
515  findBrackets(unwanted_bracket);
516  }
517  else {
518  TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
519  if (name.defined()) { // not only whitespace inside brackets
520  nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
521  }
522  else {
523  findBrackets(tag_end+1);
524  }
525  }
526  }
527  }
528  }
529  }
530 
531  void parse_next_multi_tag() {
532  gb_assert(restTags.defined());
533  TextRef comma(restTags.find(','), 1);
534  if (comma.defined()) {
535  tag = restTags.partBefore(comma).tailTrimmed();
536  restTags = restTags.partBehind(comma).headTrimmed();
537  }
538  else {
539  tag = restTags;
540  restTags = TextRef();
541  }
542  }
543  void parse_next() {
544  if (restTags.defined()) {
545  parse_next_multi_tag();
546  }
547  else if (nextBrackets.defined()) {
548  TextRef brackets = nextBrackets;
549  findBrackets(brackets.get_following());
550 
551  content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
552 
553  gb_assert(brackets.head() == '[' && brackets.tail() == ']');
554 
555  TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
556  gb_assert(tags.defined());
557 
558  restTags = tags;
559  parse_next_multi_tag();
560  }
561  else {
562  tag = content = TextRef();
563  gb_assert(!has_part());
564  }
565  }
566  void parse_first() {
567  gb_assert(!has_part());
568  findBrackets(wholeInput.get_data());
569  content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
570  if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
571  }
572 
573 public:
574  TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
575 
576  bool has_tag() const { return tag.defined(); }
577  bool has_content() const { return content.defined(); }
578 
579  void next() { parse_next(); }
580  bool has_part() const { return has_tag() || has_content(); } // false -> parser has finished
581 
582  const TextRef& get_tag() const { return tag; }
583  const TextRef& get_content() const { return content; }
584 };
585 
586 
587 // -------------------------------------------
588 // helper function for tagged fields
589 
590 static void g_bs_add_value_tag_to_hash(GB_HASH *hash, const char *tag, char *value) {
591  if (!value[0]) return; // ignore empty values
592 
593  {
594  char *p;
595  p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
596  p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
597  }
598 
599  GB_HASH *sh = (GB_HASH *)GBS_read_hash(hash, value);
600  if (!sh) {
601  sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
602  GBS_write_hash(hash, value, (long)sh);
603  }
604  GBS_write_hash(sh, tag, 1);
605 }
606 
607 static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH *hash, char *s, char *default_tag, const char *del) {
608  TaggedContentParser parser(s);
609  while (parser.has_part()) {
610  if (parser.has_content()) {
611  char *content = parser.get_content().copy();
612  if (parser.has_tag()) {
613  char *tag = parser.get_tag().copy();
614  if (!del || ARB_stricmp(tag, del) != 0) {
615  g_bs_add_value_tag_to_hash(hash, tag, content);
616  }
617  free(tag);
618  }
619  else {
620  g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
621  }
622  free(content);
623  }
624  parser.next();
625  }
626 }
627 
628 static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH *hash, char *s, char *default_tag, const char *rtag, const char *aci, GBL_call_env& env) {
629  GB_ERROR error = NULp;
630 
631  TaggedContentParser parser(s);
632  while (parser.has_part() && !error) {
633  if (parser.has_content()) {
634  char *value = parser.get_content().copy();
635  char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
636 
637  if (rtag && ARB_stricmp(tag, rtag) == 0) {
638  freeset(value, GB_command_interpreter_in_env(value, aci, env));
639  if (!value) error = GB_await_error();
640  }
641 
642  if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
643 
644  free(tag);
645  free(value);
646  }
647  parser.next();
648  }
649 
650  return error;
651 }
652 
653 static void g_bs_merge_tags(const char *tag, long /*val*/, void *cd_sub_result) {
654  GBS_strstruct& sub_result = *(GBS_strstruct*)cd_sub_result;
655 
656  sub_result.cat(tag);
657  sub_result.put(',');
658 }
659 
660 static void g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash) {
661  static int counter = 0;
662 
663  GBS_strstruct sub_result(100);
665  sub_result.putlong(counter++); // create a unique number
666 
667  char *str = ARB_strupper(sub_result.release());
668 
669  GB_HASH *g_bs_collect_tags_hash = (GB_HASH*)cd_g_bs_collect_tags_hash;
670  GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
671 
672  free(str);
673 }
674 
675 static void g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result) {
676  GBS_strstruct& merge_result = *(GBS_strstruct*)cd_merge_result;
677 
678  char *lk = const_cast<char*>(strrchr(tag, ','));
679  if (lk) { // remove number at end
680  *lk = 0;
681 
682  if (!merge_result.empty()) merge_result.put(' '); // skip trailing space
683  merge_result.cat_wrapped("[]", tag);
684  merge_result.put(' ');
685  }
686  merge_result.cat((char*)value);
687 }
688 
689 static char *g_bs_get_string_of_tag_hash(GB_HASH *tag_hash) {
690  GBS_strstruct merge_result(256);
692 
693  GBS_hash_do_const_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
694  GBS_hash_do_const_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, &merge_result);
695 
696  GBS_free_hash(collect_tags_hash);
697  return merge_result.release_memfriendly();
698 }
699 
700 static long g_bs_free_hash_of_hashes_elem(const char */*key*/, long val, void *) {
701  GB_HASH *hash = (GB_HASH*)val;
702  if (hash) GBS_free_hash(hash);
703  return 0;
704 }
707  GBS_free_hash(hash);
708 }
709 
710 char *GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2) {
711  /* Create a tagged string from two tagged strings:
712  * a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
713  *
714  * if 's2' is not empty, then delete tag 'replace1' in 's1'
715  * if 's1' is not empty, then delete tag 'replace2' in 's2'
716  *
717  * (result should never be NULp)
718  */
719 
720  char *str1 = ARB_strdup(s1);
721  char *str2 = ARB_strdup(s2);
722  char *t1 = GBS_string_2_key(tag1);
723  char *t2 = GBS_string_2_key(tag2);
725 
726  if (!s1[0]) replace2 = NULp;
727  if (!s2[0]) replace1 = NULp;
728 
729  if (replace1 && !replace1[0]) replace1 = NULp;
730  if (replace2 && !replace2[0]) replace2 = NULp;
731 
732  g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
733  g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
734 
735  char *result = g_bs_get_string_of_tag_hash(hash);
736 
738 
739  free(t2);
740  free(t1);
741  free(str2);
742  free(str1);
743 
744  return result;
745 }
746 
747 char *GBS_modify_tagged_string_with_ACI(const char *s, const char *dt, const char *tag, const char *aci, GBL_call_env& env) {
748  /* if 's' is untagged, tag it with default tag 'dt'.
749  * if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
750  *
751  * if result is NULp, an error has been exported.
752  */
753 
754  char *str = ARB_strdup(s);
755  char *default_tag = GBS_string_2_key(dt);
757  char *result = NULp;
758 
759  GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
760 
761  if (!error) {
762  result = g_bs_get_string_of_tag_hash(hash);
763  }
764  else {
765  GB_export_error(error);
766  }
767 
769 
770  free(default_tag);
771  free(str);
772 
773  return result;
774 }
775 
776 char *GB_read_as_tagged_string(GBDATA *gbd, const char *tagi) {
777  char *buf = GB_read_as_string(gbd);
778  if (buf && tagi && tagi[0]) {
779  TaggedContentParser parser(buf);
780 
781  char *wantedTag = GBS_string_2_key(tagi);
782  char *contentFound = NULp;
783 
784  while (parser.has_part() && !contentFound) {
785  if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
786  contentFound = parser.get_content().copy();
787  }
788  parser.next();
789  }
790  free(wantedTag);
791  free(buf);
792 
793  return contentFound;
794  }
795  return buf;
796 }
797 
798 
799 /* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
800  * used as well to save perl macros
801  *
802  * when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
803  *
804  * always keep in mind, that many users have databases/macros written with older
805  * versions of this function. They MUST load proper!!!
806  */
807 void GBS_fwrite_string(const char *strngi, FILE *out) {
808  unsigned char *strng = (unsigned char *)strngi;
809  int c;
810 
811  putc('"', out);
812 
813  while ((c = *strng++)) {
814  if (c < 32) {
815  putc('\\', out);
816  if (c == '\n')
817  putc('n', out);
818  else if (c == '\t')
819  putc('t', out);
820  else if (c<25) {
821  putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
822  }
823  else {
824  putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
825  }
826  }
827  else if (c == '"') {
828  putc('\\', out);
829  putc('"', out);
830  }
831  else if (c == '\\') {
832  putc('\\', out);
833  putc('\\', out);
834  }
835  else {
836  putc(c, out);
837  }
838  }
839  putc('"', out);
840 }
841 
842 /* Read a string from a file written by GBS_fwrite_string,
843  * Searches first '"'
844  *
845  * WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
846  * any changes should be done in GBS_fconvert_string too.
847  */
848 
849 static char *GBS_fread_string(FILE *in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
850  GBS_strstruct buf(1024);
851 
852  int x;
853  while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
854 
855  if (x != EOF) {
856  while ((x = getc(in)) != '"') {
857  if (x == EOF) break;
858  if (x == '\\') {
859  x = getc(in);
860  if (x==EOF) break;
861  if (x == 'n') { buf.put('\n'); continue; }
862  if (x == 't') { buf.put('\t'); continue; }
863  if (x>='@' && x <= '@' + 25) { buf.put(x-'@'); continue; }
864  if (x>='0' && x <= '9') { buf.put(x-('0'-25)); continue; }
865  // all other backslashes are simply skipped
866  }
867  buf.put(x);
868  }
869  }
870  return buf.release_memfriendly();
871 }
872 
873 /* does similar decoding as GBS_fread_string but works directly on an existing buffer
874  * (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
875  *
876  * inserts \0 behind decoded string (removes the closing '"')
877  * returns a pointer behind the end (") of the _encoded_ string
878  * returns NULp if a 0-character is found
879  */
881  char *t = buffer;
882  char *f = buffer;
883  int x;
884 
885  gb_assert(f[-1] == '"');
886  // the opening " has already been read
887 
888  while ((x = *f++) != '"') {
889  if (!x) break;
890 
891  if (x == '\\') {
892  x = *f++;
893  if (!x) break;
894 
895  if (x == 'n') {
896  *t++ = '\n';
897  continue;
898  }
899  if (x == 't') {
900  *t++ = '\t';
901  continue;
902  }
903  if (x>='@' && x <= '@' + 25) {
904  *t++ = x-'@';
905  continue;
906  }
907  if (x>='0' && x <= '9') {
908  *t++ = x-('0'-25);
909  continue;
910  }
911  // all other backslashes are simply skipped
912  }
913  *t++ = x;
914  }
915 
916  if (!x) return NULp; // error (string should not contain 0-character)
917  gb_assert(x == '"');
918 
919  t[0] = 0;
920  return f;
921 }
922 
923 char *GBS_replace_tabs_by_spaces(const char *text) {
924  int tlen = strlen(text);
925  GBS_strstruct mfile(tlen * 3/2 + 1);
926  int tabpos = 0;
927  int c;
928 
929  while ((c=*(text++))) {
930  if (c == '\t') {
931  int ntab = (tabpos + 8) & 0xfffff8;
932  while (tabpos < ntab) {
933  mfile.put(' ');
934  tabpos++;
935  }
936  continue;
937  }
938  tabpos ++;
939  if (c == '\n') {
940  tabpos = 0;
941  }
942  mfile.put(c);
943  }
944  return mfile.release_memfriendly();
945 }
946 
947 char *GBS_trim(const char *str) {
948  // trim whitespace at beginning and end of 'str'
949  const char *whitespace = " \t\n";
950  while (str[0] && strchr(whitespace, str[0])) str++;
951 
952  const char *end = strchr(str, 0)-1;
953  while (end >= str && strchr(whitespace, end[0])) end--;
954 
955  return ARB_strpartdup(str, end);
956 }
957 
958 static char *dated_info(const char *info) {
959  char *dated_info = NULp;
960  time_t date;
961 
962  if (time(&date) != -1) {
963  char *dstr = ctime(&date);
964  char *nl = strchr(dstr, '\n');
965 
966  if (nl) nl[0] = 0; // cut off LF
967 
968  dated_info = GBS_global_string_copy("%s: %s", dstr, info);
969  }
970  else {
971  dated_info = ARB_strdup(info);
972  }
973  return dated_info;
974 }
975 
976 char *GBS_log_action_to(const char *comment, const char *action, bool stamp) {
984  size_t clen = comment ? strlen(comment) : 0;
985  size_t alen = strlen(action);
986 
987  GBS_strstruct new_comment(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2*\n + \0 + space for stamp
988 
989  if (comment) {
990  new_comment.cat(comment);
991  if (clen == 0 || comment[clen-1] != '\n') new_comment.put('\n');
992  }
993 
994  if (stamp) {
995  char *dated_action = dated_info(action);
996  new_comment.cat(dated_action);
997  free(dated_action);
998  }
999  else {
1000  new_comment.cat(action);
1001  }
1002  if (alen == 0 || action[alen-1] != '\n') new_comment.put('\n');
1003 
1004  return new_comment.release_memfriendly();
1005 }
1006 
1007 const char *GBS_funptr2readable(void *funptr, bool stripARBHOME) {
1008  // only returns module and offset for static functions :-(
1009  char **funNames = backtrace_symbols(&funptr, 1);
1010  const char *readable_fun = funNames[0];
1011 
1012  if (stripARBHOME) {
1013  const char *ARBHOME = GB_getenvARBHOME();
1014  if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1015  readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1016  }
1017  }
1018  return readable_fun;
1019 }
1020 
1021 // --------------------------------------------------------------------------------
1022 
1023 #ifdef UNIT_TESTS
1024 
1025 #include <test_unit.h>
1026 
1027 // #define TEST_TEST_MACROS
1028 
1029 #ifdef ENABLE_CRASH_TESTS
1030 static void provokesegv() { raise(SIGSEGV); }
1031 static void dont_provokesegv() {}
1032 # if defined(ASSERTION_USED)
1033 static void failassertion() { gb_assert(0); }
1034 # if defined(TEST_TEST_MACROS)
1035 static void dont_failassertion() {}
1036 # endif
1037 static void provokesegv_does_not_fail_assertion() {
1038  // provokesegv does not raise assertion
1039  // -> the following assertion fails
1040  TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1041 }
1042 # endif
1043 #endif
1044 
1045 void TEST_signal_tests__crashtest() {
1046  // check whether we can test that no SEGV or assertion failure happened
1047  TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1048 
1049  // check whether we can test for SEGV and assertion failures
1050  TEST_EXPECT_SEGFAULT(provokesegv);
1051  TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1052 
1053  // tests whether signal suppression works multiple times (by repeating tests)
1054  TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1055  TEST_EXPECT_SEGFAULT(provokesegv);
1056 
1057  // test whether SEGV can be distinguished from assertion
1058  TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1059 
1060  // The following section is disabled, because it will
1061  // provoke test warnings (to test these warnings).
1062  // (enable it when changing any of these TEST_..-macros used here)
1063 #if defined(TEST_TEST_MACROS)
1064  TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1065 
1066  TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1067  TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1068 #if defined(ASSERTION_USED)
1069  TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1070 #endif
1071 
1072  TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1074  TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1075 #endif
1076 }
1077 
1078 #define TEST_SHORTENED_EQUALS(Long,Short) do { \
1079  char *buf = ARB_strdup(Long); \
1080  GBS_shorten_repeated_data(buf); \
1081  TEST_EXPECT_EQUAL(buf, Short); \
1082  free(buf); \
1083  } while(0)
1084 
1085 void TEST_GBS_shorten_repeated_data() {
1086  TEST_SHORTENED_EQUALS("12345", "12345");
1087  TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1088  TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1089  TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1090  TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1091  TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1092  TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1093  TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1094  TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1095  TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1096  TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1097  TEST_SHORTENED_EQUALS("aabc", "aabc");
1098  TEST_SHORTENED_EQUALS("", "");
1099 }
1100 
1101 static const char *hkey_format[] = {
1102  "/%s/bbb/ccc",
1103  "/aaa/%s/ccc",
1104  "/aaa/bbb/%s",
1105 };
1106 
1107 inline const char *useInHkey(const char *fragment, size_t pos) {
1108  return GBS_global_string(hkey_format[pos], fragment);
1109 }
1110 
1111 #define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1112  for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1113  const char *hkey = useInHkey(use, i); \
1114  TEST_ANNOTATE(hkey); \
1115  TEST_EXPECT_NO_ERROR(GB_check_hkey(hkey)); \
1116  } \
1117  TEST_ANNOTATE(NULp); \
1118  } while(0)
1119 
1120 #define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1121  for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1122  const char *hkey = useInHkey(use, i); \
1123  TEST_ANNOTATE(hkey); \
1124  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(hkey), contains); \
1125  } \
1126  TEST_ANNOTATE(NULp); \
1127  } while(0)
1128 
1129 
1130 void TEST_DB_key_checks() {
1131  // plain keys
1132  const char *shortest = "ab";
1133  const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1134  const char *too_short = shortest+1;
1135  const char *longest = too_long+1;
1136 
1137  const char *empty = "";
1138  const char *slash = "sub/key";
1139  const char *dslash = "sub//key";
1140  const char *comma = "no,key";
1141  const char *minus = "no-key";
1142 
1143  // obsolete GB_LINK syntax:
1144  const char *link = "link->syntax";
1145  const char *nowhere = "link->";
1146  const char *fromNw = "->syntax";
1147 
1150 
1151  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1152  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1153  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1154 
1155  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character '/'");
1156  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(dslash), "Invalid character '/'");
1157  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character ','");
1158  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character '-'");
1159  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(link), "Invalid character '-'");
1160  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(nowhere), "Invalid character '-'");
1161  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(fromNw), "Invalid character '-'");
1162 
1163  // hierarchical keys
1164  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1165  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1166 
1167  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1168  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1169  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1170 
1171  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1172  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(dslash, "Empty key is not allowed");
1173  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1174  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "Invalid character '-'");
1175  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(link, "Invalid character '-'");
1176  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(nowhere, "Invalid character '-'");
1177  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(fromNw, "Invalid character '-'");
1178 
1179  // test NULp keys:
1180  TEST_EXPECT_ERROR_CONTAINS(GB_check_key (NULp), "Empty key is not allowed");
1181  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(NULp), "Empty key is not allowed");
1182 
1183  // some edge cases for hierarchical keys:
1184  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//"), "Empty key is not allowed");
1185  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//key"), "Empty key is not allowed"); // @@@ is double slash compensated by GB_search etc? if yes -> accept here as well!
1186  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key//"), "Empty key is not allowed");
1187  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("/"), "Empty key is not allowed");
1189  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key/"), "Empty key is not allowed"); // @@@ use better message? e.g. "invalid trailing '/'"
1190  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(""), "Empty key is not allowed");
1191 }
1192 
1193 #define TEST_STRING2KEY(str,expected) do { \
1194  char *as_key = GBS_string_2_key(str); \
1195  TEST_EXPECT_EQUAL(as_key, expected); \
1196  TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1197  free(as_key); \
1198  } while(0)
1199 
1200 void TEST_DB_key_generation() {
1201  TEST_STRING2KEY("abc", "abc");
1202  TEST_STRING2KEY("a b c", "a_b_c");
1203 
1204  // invalid chars
1205  TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1206  "string_containing_doublequotes_quotes_and_othershit");
1207 
1208  // length tests
1209  TEST_STRING2KEY("a", "a_"); // too short
1210  TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1211  "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1212 }
1213 
1214 void TEST_TaggedContentParser() {
1215  // test helper class TextRef:
1216  TEST_REJECT(TextRef().defined()); // default to undefined
1217  {
1218  TextRef bla("blakjahd", 3);
1219  TEST_EXPECT(bla.defined());
1220  TEST_EXPECT_EQUAL(bla.get_length(), 3);
1221 
1222  TEST_EXPECT(bla.compare("bl") > 0);
1223  TEST_EXPECT(bla.compare("bla") == 0);
1224  TEST_EXPECT(bla.compare("blase") < 0);
1225 
1226  TextRef spaced(" spaced "+1, 10);
1227  TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1228  TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1229  TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1230  }
1231 
1232  const char *text = " untagged [tag] tagged [empty] ";
1233 
1234  TextRef cr_untagged(strstr(text, "untagged"), 8);
1235  TextRef cr_tagged (strstr(text, "tagged"), 6);
1236  TextRef tr_tag (strstr(text, "tag"), 3);
1237  TextRef tr_empty (strstr(text, "empty"), 5);
1238 
1239  // test TaggedContentParser:
1240  {
1241  TaggedContentParser parser(text);
1242 
1243  TEST_EXPECT(parser.has_part());
1244  TEST_REJECT(parser.has_tag());
1245  TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1246 
1247  parser.next();
1248 
1249  TEST_EXPECT(parser.has_part());
1250  TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1251  TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1252 
1253  parser.next();
1254 
1255  TEST_EXPECT(parser.has_part());
1256  TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1257  TEST_REJECT(parser.has_content());
1258 
1259  parser.next();
1260 
1261  TEST_REJECT(parser.has_part());
1262  }
1263  { // parse untagged input
1264  TaggedContentParser parser("hi");
1265  TEST_EXPECT(parser.has_part());
1266  TEST_REJECT(parser.has_tag());
1267  TEST_EXPECT(parser.get_content().compare("hi") == 0);
1268  parser.next();
1269  TEST_REJECT(parser.has_part());
1270  }
1271  { // parse empty input
1272  TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1273  TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1274  }
1275  { // parse single tag w/o content
1276  TaggedContentParser parser(" [hello] ");
1277  TEST_EXPECT(parser.has_part());
1278  TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1279  TEST_REJECT(parser.has_content());
1280  parser.next();
1281  TEST_REJECT(parser.has_part());
1282  }
1283  { // parse multi-tags
1284  TaggedContentParser parser(" [ t1 , t2 ] t");
1285  TEST_EXPECT(parser.has_part());
1286  TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1287  TEST_EXPECT(parser.get_content().compare("t") == 0);
1288  parser.next();
1289  TEST_EXPECT(parser.has_part());
1290  TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1291  TEST_EXPECT(parser.get_content().compare("t") == 0);
1292  parser.next();
1293  TEST_REJECT(parser.has_part());
1294  }
1295 }
1296 
1297 #define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1298  char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1299  TEST_EXPECT_EQUAL(result, expected); \
1300  free(result); \
1301  } while(0)
1302 
1303 #define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1304  char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1305  TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1306  free(result); \
1307  } while(0)
1308 
1309 void TEST_merge_tagged_strings() {
1310  // merge two fields:
1311  const char *_0 = NULp;
1312 
1313  TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1314  TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source");
1315  TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source");
1316  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth");
1317 
1318  TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1319  TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1320 
1321  // update fields:
1322  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1323  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1324  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1325  TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1326  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1327 
1328  // append (opposed to update this keeps old entries with same tag; useless?)
1329  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1330  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1331  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1332 
1333  // merge three fields:
1334  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1335  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1336  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1337  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1338  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1339 
1340  // same tests as in section above, but vv:
1341  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1342  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1343  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1344  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1345  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1346 
1347  // test real-merges (content existing in both strings):
1348  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1349  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1350  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1351  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1352  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1353  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1354  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1355  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1356  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1357 
1358  // merge two tagged string with deleting
1359 #define DSTSRC1 "[DST] dest1 [SRC] src1"
1360 #define DSTSRC2 "[DST] dest2 [SRC] src2"
1361 #define DSTSRC2LOW "[dst] dest2 [src] src2"
1362 
1363  TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1364  TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1365  TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1366  TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1367  TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1368  TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1369  TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1370  TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1371  TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1372  TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1373  TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1374  TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1375  TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1376  TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1377 }
1378 
1379 __ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() {
1380  GB_shell shell;
1381  GBDATA *gb_main = GB_open("new.arb", "c");
1382  {
1383  GB_transaction ta(gb_main);
1384 
1385  {
1386  GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1387  TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1388  TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1389 
1390  GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1391  GB_UINT4 ints[] = { 1, 2 };
1392  TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1393  TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1394  }
1395 
1396 #define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1397 #define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1398 #define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1399 
1400  GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1401  const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1402  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1403 
1404  TEST_EXPECT_FULL_CONTENT(NULp);
1405  TEST_EXPECT_FULL_CONTENT("");
1406  TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1407 
1408  TEST_EXPECT_TAG_CONTENT("T1", "t12");
1409  TEST_EXPECT_TAG_CONTENT("T2", "t12");
1410  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1411  TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1412 
1413  TEST_EXPECT_TAG_CONTENT("AA", "aa");
1414  TEST_EXPECT_TAG_CONTENT("BB", "bb");
1415  TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1416  TEST_EXPECT_TAG_CONTENT("XX", "x1");
1417  TEST_EXPECT_TAG_CONTENT("YY", "yy");
1418  TEST_EXPECT_TAG_CONTENT("yy", "yy");
1419 
1420  TEST_REJECT_TAG_CONTENT("Y");
1421  // TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1422 
1423  TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1424  TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1425  TEST_EXPECT_TAG_CONTENT("fake", "fake");
1426 
1427  TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1428  TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1429  TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1430  TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1431  TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1432  TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1433  TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1434  TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1435 
1436  TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1437  TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1438  TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1439  TEST_EXPECT_TAG_CONTENT("LAST", "last");
1440 
1441  // test incomplete tags
1442  tagged_string = "bla [WHATEVER hello";
1443  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1444  TEST_REJECT_TAG_CONTENT("WHATEVER");
1445 
1446  tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1447  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1448  TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1449  TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1450  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1451 
1452  // test pathological tags
1453  tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1454  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1455  TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1456  TEST_EXPECT_FULL_CONTENT("");
1457  TEST_REJECT_TAG_CONTENT(" ");
1458  TEST_REJECT_TAG_CONTENT(" ");
1459  TEST_REJECT_TAG_CONTENT(",");
1460  TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1461  TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1462  TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1463  TEST_EXPECT_TAG_CONTENT(" ", "us");
1464  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1465  }
1466  GB_close(gb_main);
1467 }
1468 
1469 #define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1470  TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1471  GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1472  expected); \
1473  }while(0)
1474 
1475 #define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1476  TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1477  GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1478  expectedErrorPart); \
1479  }while(0)
1480 
1481 __ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() {
1482  GB_shell shell;
1483  GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1484  {
1485  GB_transaction ta(gb_main);
1486  GBL_env env(gb_main, "tree_missing");
1487 
1488  {
1489  GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1490  TEST_REJECT_NULL(gb_species);
1491  GBL_call_env callEnv(gb_species, env);
1492 
1493  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1494  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla");
1495  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1496  TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1497  TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1498 
1499  // empty tags:
1500  TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, "");
1501  TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1502  TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1503  TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1504  TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx");
1505 
1506 #define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1507 #define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1508 
1509  // dont eval:
1510  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE);
1511  // eval SRT:
1512  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top");
1513  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top");
1514  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top");
1515  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top");
1516  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1517  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":*=*-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1518  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1519  // eval ACI:
1520  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1521  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1522 
1523  // test SRT/ACI errors:
1524  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1525  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1526  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1527  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1528 
1529  // no error raised, if expression not applied:
1530  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1531 
1532  // incomplete tags
1533  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":*=<*>", "[DEF] <{no tag>");
1534  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,{no>");
1535  TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag,{no>");
1536  TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* *=<*2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1537  TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag{},{no>");
1538  TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* *=<*2,*1>", "[DEF] <{tag{} xx,{no>");
1539  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,no>");
1540  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1541  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1542  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1543  TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1544  TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":*=<*>", "[DEF] <kept {trunk>");
1545  TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":*=<*>", "[DEF] <kept>");
1546  }
1547 
1548  {
1549  GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1550  TEST_REJECT_NULL(gb_species);
1551  GBL_call_env callEnv(gb_species, env);
1552 
1553  // run scripts using context:
1554  TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1555  TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(|sequence|len)", "[T1] seqlen=$l [T2] seqlen=165");
1556  TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1557 
1558  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1559  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1560 
1561  // content before 1st tag:
1562  TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1563  TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1564 
1565  // test elimination of leading/trailing whitespace:
1566  TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1567  TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1568  TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1569 
1570 #define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1571 
1572  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1573  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1574  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1575  }
1576  }
1577  GB_close(gb_main);
1578 }
1579 
1580 void TEST_log_action() {
1581  for (int stamped = 0; stamped<=1; ++stamped) {
1582  TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1583  {
1584  char *logged = GBS_log_action_to("comment", "action", stamped);
1585  if (stamped) {
1586  TEST_EXPECT_CONTAINS(logged, "comment\n");
1587  TEST_EXPECT_CONTAINS(logged, "action\n");
1588  }
1589  else {
1590  TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1591  }
1592  free(logged);
1593  }
1594  {
1595  char *logged = GBS_log_action_to("comment\n", "action", stamped);
1596  if (stamped) {
1597  TEST_EXPECT_CONTAINS(logged, "comment\n");
1598  TEST_EXPECT_CONTAINS(logged, "action\n");
1599  }
1600  else {
1601  TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1602  }
1603  free(logged);
1604  }
1605  {
1606  char *logged = GBS_log_action_to("", "action", stamped);
1607  if (stamped) {
1608  TEST_EXPECT_EQUAL(logged[0], '\n');
1609  TEST_EXPECT_CONTAINS(logged, "action\n");
1610  }
1611  else {
1612  TEST_EXPECT_EQUAL(logged, "\naction\n");
1613  }
1614  free(logged);
1615  }
1616  {
1617  char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF
1618  if (stamped) {
1619  TEST_EXPECT_DIFFERENT(logged[0], '\n');
1620  TEST_EXPECT_CONTAINS(logged, "action\n");
1621  }
1622  else {
1623  TEST_EXPECT_EQUAL(logged, "action\n");
1624  }
1625  free(logged);
1626  }
1627  }
1628 }
1629 TEST_PUBLISH(TEST_log_action);
1630 
1631 #endif // UNIT_TESTS
1632 
static void g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash)
Definition: adstring.cxx:660
static void g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result)
Definition: adstring.cxx:675
void GBS_hash_do_const_sorted_loop(const GB_HASH *hs, gb_hash_const_loop_type func, gbs_hash_compare_function sorter, void *client_data)
Definition: adhash.cxx:641
TextRef partBehind(const TextRef &subref) const
Definition: adstring.cxx:492
const char * GB_ERROR
Definition: arb_core.h:25
string result
bool defined() const
Definition: adstring.cxx:411
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
TextRef(const char *data_, int length_)
Definition: adstring.cxx:408
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
Definition: adlang1.cxx:914
static TextRef textBetween(const TextRef &t1, const TextRef &t2)
Definition: adstring.cxx:478
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:454
static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH *hash, char *s, char *default_tag, const char *del)
Definition: adstring.cxx:607
size_t GBS_shorten_repeated_data(char *data)
Definition: adstring.cxx:357
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1387
static const char * EMPTY_KEY_NOT_ALLOWED
Definition: adstring.cxx:62
static void g_bs_merge_tags(const char *tag, long, void *cd_sub_result)
Definition: adstring.cxx:653
int orgLen
Definition: rns.c:12
int ARB_stricmp(const char *s1, const char *s2)
Definition: arb_str.h:28
const char * get_data() const
Definition: adstring.cxx:412
static char * g_bs_get_string_of_tag_hash(GB_HASH *tag_hash)
Definition: adstring.cxx:689
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
char * GB_read_as_string(GBDATA *gbd)
Definition: arbdb.cxx:1060
TextRef trimmed() const
Definition: adstring.cxx:463
#define TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(cb)
Definition: test_unit.h:1253
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
GB_ERROR GB_check_key(const char *key)
Definition: adstring.cxx:85
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
Definition: adhash.cxx:271
char * release()
Definition: arb_strbuf.h:129
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:538
TextRef headTrimmed() const
Definition: adstring.cxx:442
void cat(const char *from)
Definition: arb_strbuf.h:199
char * ARB_strpartdup(const char *start, const char *end)
Definition: arb_string.h:51
bool is_part_of(const TextRef &other) const
Definition: adstring.cxx:470
void GBS_fwrite_string(const char *strngi, FILE *out)
Definition: adstring.cxx:807
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
FILE * seq
Definition: rns.c:46
GB_CSTR GB_getenvARBHOME(void)
Definition: adsocket.cxx:579
GB_CSTR GBS_find_string(GB_CSTR cont, GB_CSTR substr, int match_mode)
Definition: admatch.cxx:103
TaggedContentParser(const char *input_)
Definition: adstring.cxx:574
#define TEST_EXPECT_SEGFAULT__UNWANTED(cb)
Definition: test_unit.h:1257
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1517
#define TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(cb)
Definition: test_unit.h:1254
unsigned int GB_UINT4
Definition: arbdb_base.h:37
int compare(const char *str) const
Definition: adstring.cxx:417
void putlong(long l)
Definition: arb_strbuf.h:240
#define TEST_EXPECT_CONTAINS(str, part)
Definition: test_unit.h:1316
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:257
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
#define TEST_EXPECT(cond)
Definition: test_unit.h:1328
TextRef tailTrimmed() const
Definition: adstring.cxx:452
__ATTR__USERESULT GB_ERROR check_key(const char *key, int len)
Definition: adstring.cxx:64
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
Definition: arbdb.cxx:1781
const char * find(char c) const
Definition: adstring.cxx:475
char * GBS_replace_tabs_by_spaces(const char *text)
Definition: adstring.cxx:923
const char * get_following() const
Definition: adstring.cxx:415
static long g_bs_free_hash_of_hashes_elem(const char *, long val, void *)
Definition: adstring.cxx:700
char * GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char)
Definition: adstring.cxx:175
uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude)
Definition: adstring.cxx:319
#define TEST_REJECT(cond)
Definition: test_unit.h:1330
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1325
static void error(const char *msg)
Definition: mkptypes.cxx:96
char * GB_memdup(const char *source, size_t len)
Definition: adstring.cxx:56
#define GB_KEY_LEN_MAX
Definition: gb_key.h:25
#define GB_KEY_LEN_MIN
Definition: gb_key.h:26
const TextRef & get_tag() const
Definition: adstring.cxx:582
const TextRef & get_content() const
Definition: adstring.cxx:583
TextRef partBefore(const TextRef &subref) const
Definition: adstring.cxx:488
bool has_content() const
Definition: adstring.cxx:577
#define cmp(h1, h2)
Definition: admap.cxx:50
void cat_wrapped(const char *in, const char *from)
Definition: arb_strbuf.h:244
void GBS_dynaval_free(long val)
Definition: adhash.cxx:278
static char * GBS_string_2_key_with_exclusions(const char *str, const char *additional)
Definition: adstring.cxx:31
char * GBS_string_2_key(const char *str)
Definition: adstring.cxx:52
char * GBS_fconvert_string(char *buffer)
Definition: adstring.cxx:880
uint32_t crctab[]
Definition: adstring.cxx:264
int get_length() const
Definition: adstring.cxx:413
const char * GBS_funptr2readable(void *funptr, bool stripARBHOME)
Definition: adstring.cxx:1007
char * ARB_strupper(char *s)
Definition: arb_str.h:63
void nl()
Definition: test_unit.h:415
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1250
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)
Definition: adhash.cxx:545
#define __ATTR__REDUCED_OPTIMIZE
Definition: test_unit.h:83
char tail() const
Definition: adstring.cxx:440
GB_CSTR GB_getenv(const char *env)
Definition: adsocket.cxx:709
Definition: arbdb.h:72
void spaced(const char *word)
Definition: test_unit.h:419
static void g_bs_free_hash_of_hashes(GB_HASH *hash)
Definition: adstring.cxx:705
void ncat(const char *from, size_t count)
Definition: arb_strbuf.h:189
char * GB_read_as_tagged_string(GBDATA *gbd, const char *tagi)
Definition: adstring.cxx:776
#define TEST_EXPECT_NO_SEGFAULT__WANTED(cb)
Definition: test_unit.h:1251
#define gb_assert(cond)
Definition: arbdbt.h:11
static char * dated_info(const char *info)
Definition: adstring.cxx:958
long GBS_gcgchecksum(const char *seq)
Definition: adstring.cxx:246
char * ARB_strndup(const char *start, int len)
Definition: arb_string.h:83
#define TEST_EXPECT_CODE_ASSERTION_FAILS(cb)
Definition: test_unit.h:1252
aisc_com * link
int icompare(const char *str) const
Definition: adstring.cxx:427
static char * GBS_fread_string(FILE *in)
Definition: adstring.cxx:849
char * GBS_trim(const char *str)
Definition: adstring.cxx:947
GB_ERROR GB_write_ints(GBDATA *gbd, const GB_UINT4 *i, long size)
Definition: arbdb.cxx:1439
#define TEST_EXPECT_NO_SEGFAULT(cb)
Definition: test_unit.h:1250
#define __ATTR__USERESULT
Definition: attributes.h:58
bool empty() const
Definition: arb_strbuf.h:118
char * copy() const
Definition: adstring.cxx:437
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
char head() const
Definition: adstring.cxx:439
TextRef(const char *zeroTerminated)
Definition: adstring.cxx:409
#define TEST_EXPECT_SEGFAULT__WANTED(cb)
Definition: test_unit.h:1256
#define TEST_EXPECT_SEGFAULT(cb)
Definition: test_unit.h:1255
bool white(int ch)
int GBS_HCF_sortedByKey(const char *k0, long dummy_1x, const char *k1, long dummy_2x)
Definition: adhash.cxx:653
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:352
static void g_bs_add_value_tag_to_hash(GB_HASH *hash, const char *tag, char *value)
Definition: adstring.cxx:590
#define NULp
Definition: cxxforward.h:116
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
Definition: test_unit.h:1114
#define TEST_EXPECT_DIFFERENT(expr, want)
Definition: test_unit.h:1301
bool ARB_strBeginsWith(const char *str, const char *with)
Definition: arb_str.h:42
char * GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2)
Definition: adstring.cxx:710
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
Definition: gb_aci.cxx:361
static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH *hash, char *s, char *default_tag, const char *rtag, const char *aci, GBL_call_env &env)
Definition: adstring.cxx:628
char * GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char)
Definition: adstring.cxx:124
bool has_tag() const
Definition: adstring.cxx:576
bool has_part() const
Definition: adstring.cxx:580
GB_transaction ta(gb_var)
char * GBS_log_action_to(const char *comment, const char *action, bool stamp)
Definition: adstring.cxx:976
GBDATA * gb_main
Definition: adname.cxx:32
size_t length
char * GBS_eval_env(GB_CSTR p)
Definition: adstring.cxx:212
char * GBS_modify_tagged_string_with_ACI(const char *s, const char *dt, const char *tag, const char *aci, GBL_call_env &env)
Definition: adstring.cxx:747
static int info[maxsites+1]
GB_ERROR GB_check_hkey(const char *key)
Definition: adstring.cxx:92
const char * GB_CSTR
Definition: arbdb_base.h:25
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
char * release_memfriendly()
Definition: arb_strbuf.h:133
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
void put(char c)
Definition: arb_strbuf.h:174
Definition: arbdb.h:66
GB_write_int const char s
Definition: AW_awar.cxx:154