ARB
adstring.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adstring.cxx //
4 // Purpose : various string functions //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <arb_backtrace.h>
12 #include <arb_strbuf.h>
13 #include <arb_defs.h>
14 #include <arb_str.h>
15 
16 #include "gb_key.h"
17 #include "gb_aci.h"
18 
19 #include <SigHandler.h>
20 
21 #include <execinfo.h>
22 
23 #include <cstdarg>
24 #include <cctype>
25 #include <cerrno>
26 #include <ctime>
27 #include <setjmp.h>
28 
29 #include <valgrind.h>
30 
31 static char *GBS_string_2_key_with_exclusions(const char *str, const char *additional) {
32  // converts any string to a valid key (all chars in 'additional' are additionally allowed)
33  char buf[GB_KEY_LEN_MAX+1];
34  int i;
35  int c;
36  for (i=0; i<GB_KEY_LEN_MAX;) {
37  c = *(str++);
38  if (!c) break;
39 
40  if (c==' ' || c == '_') {
41  buf[i++] = '_';
42  }
43  else if (isalnum(c) || strchr(additional, c)) {
44  buf[i++] = c;
45  }
46  }
47  for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48  buf[i] = 0;
49  return ARB_strdup(buf);
50 }
51 
52 char *GBS_string_2_key(const char *str) { // converts any string to a valid key
53  return GBS_string_2_key_with_exclusions(str, "");
54 }
55 
56 char *GB_memdup(const char *source, size_t len) {
57  char *dest = ARB_alloc<char>(len);
58  memcpy(dest, source, len);
59  return dest;
60 }
61 
62 static const char *EMPTY_KEY_NOT_ALLOWED = "Empty key is not allowed";
63 
64 inline __ATTR__USERESULT GB_ERROR check_key(const char *key, int len) {
65  // test if 'key' is a valid non-hierarchical database key.
66  // i.e. contains only letters, numbers and '_' and
67  // is inside length constraints GB_KEY_LEN_MIN/GB_KEY_LEN_MAX.
68 
69  if (len < GB_KEY_LEN_MIN) {
70  if (!len) return EMPTY_KEY_NOT_ALLOWED;
71  return GBS_global_string("Invalid key '%s': too short", key);
72  }
73  if (len > GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
74 
75  for (int i = 0; i<len; ++i) {
76  char c = key[i];
77  bool validChar = isalnum(c) || c == '_';
78  if (!validChar) {
79  return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
80  }
81  }
82 
83  return NULp;
84 }
85 GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
86  // test if 'key' is a valid non-hierarchical database key
87  // (i.e. a valid name for a container or field).
88 
89  return check_key(key, key ? strlen(key) : 0);
90 }
91 
92 GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
93  // test whether 'key' is a hierarchical key,
94  // i.e. consists of subkeys (accepted by GB_check_key), separated by '/'.
95 
96  GB_ERROR err = NULp;
97 
98  if (key && key[0] == '/') ++key; // accept + remove leading '/'
99  if (!key || !key[0]) err = EMPTY_KEY_NOT_ALLOWED; // reject NULp, empty (or single slash)
100 
101  while (!err && key[0]) {
102  int nonSlashPart = strcspn(key, "/");
103 
104  err = check_key(key, nonSlashPart);
105  if (!err) {
106  key += nonSlashPart;
107  if (key[0] == '/') {
108  ++key;
109  if (key[0] == 0) { // nothing after slash
110  err = EMPTY_KEY_NOT_ALLOWED;
111  }
112  }
113  else {
114  gb_assert(key[0] == 0);
115  }
116  }
117  }
118  return err;
119 }
120 
121 // ----------------------------------------------
122 // escape/unescape characters in strings
123 
124 char *GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char) {
143  int len = strlen(str);
144  char *buffer = ARB_alloc<char>(2*len+1);
145  int j = 0;
146  int i;
147 
148  gb_assert(strlen(chars_to_escape) <= 26);
149  gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape
150 
151  for (i = 0; str[i]; ++i) {
152  if (str[i] == escape_char) {
153  buffer[j++] = escape_char;
154  buffer[j++] = escape_char;
155  }
156  else {
157  const char *found = strchr(chars_to_escape, str[i]);
158  if (found) {
159  buffer[j++] = escape_char;
160  buffer[j++] = (found-chars_to_escape+'A');
161 
162  gb_assert(found[0]<'A' || found[0]>'Z'); // illegal character in chars_to_escape
163  }
164  else {
165 
166  buffer[j++] = str[i];
167  }
168  }
169  }
170  buffer[j] = 0;
171 
172  return buffer;
173 }
174 
175 char *GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char) {
177 
178  int len = strlen(str);
179  char *buffer = ARB_alloc<char>(len+1);
180  int j = 0;
181  int i;
182 
183 #if defined(ASSERTION_USED)
184  int escaped_chars_len = strlen(escaped_chars);
185 #endif // ASSERTION_USED
186 
187  gb_assert(strlen(escaped_chars) <= 26);
188  gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape
189 
190  for (i = 0; str[i]; ++i) {
191  if (str[i] == escape_char) {
192  if (str[i+1] == escape_char) {
193  buffer[j++] = escape_char;
194  }
195  else {
196  int idx = str[i+1]-'A';
197 
198  gb_assert(idx >= 0 && idx<escaped_chars_len);
199  buffer[j++] = escaped_chars[idx];
200  }
201  ++i;
202  }
203  else {
204  buffer[j++] = str[i];
205  }
206  }
207  buffer[j] = 0;
208 
209  return buffer;
210 }
211 
213  GB_ERROR error = NULp;
214  GB_CSTR ka;
215  GBS_strstruct *out = GBS_stropen(1000);
216 
217  while ((ka = GBS_find_string(p, "$(", 0))) {
218  GB_CSTR kz = strchr(ka, ')');
219  if (!kz) {
220  error = GBS_global_string("missing ')' for envvar '%s'", p);
221  break;
222  }
223  else {
224  char *envvar = ARB_strpartdup(ka+2, kz-1);
225  int len = ka-p;
226 
227  if (len) GBS_strncat(out, p, len);
228 
229  GB_CSTR genv = GB_getenv(envvar);
230  if (genv) GBS_strcat(out, genv);
231 
232  p = kz+1;
233  free(envvar);
234  }
235  }
236 
237  if (error) {
238  GB_export_error(error);
239  GBS_strforget(out);
240  return NULp;
241  }
242 
243  GBS_strcat(out, p); // copy rest
244  return GBS_strclose(out);
245 }
246 
247 long GBS_gcgchecksum(const char *seq) {
248  // GCGchecksum
249  long i;
250  long check = 0;
251  long count = 0;
252  long seqlen = strlen(seq);
253 
254  for (i = 0; i < seqlen; i++) {
255  count++;
256  check += count * toupper(seq[i]);
257  if (count == 57) count = 0;
258  }
259  check %= 10000;
260 
261  return check;
262 }
263 
264 // Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
265 uint32_t crctab[] = {
266  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
267  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
268  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
269  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
270  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
271  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
272  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
273  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
274  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
275  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
276  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
277  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
278  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
279  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
280  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
281  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
282  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
283  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
284  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
285  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
286  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
287  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
288  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
289  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
290  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
291  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
292  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
293  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
294  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
295  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
296  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
297  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
298  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
299  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
300  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
301  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
302  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
303  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
304  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
305  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
306  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
307  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
308  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
309  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
310  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
311  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
312  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
313  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
314  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
315  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
316  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
317  0x2d02ef8dL
318 };
319 
320 uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude) {
321  /* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
322  * if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
323  */
324 
325  unsigned long c = 0xffffffffL;
326  long n = length;
327  int i;
328  int tab[256]; // @@@ avoid recalc for each call
329 
330  for (i=0; i<256; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
331  tab[i] = ignore_case ? toupper(i) : i;
332  }
333 
334  if (exclude) {
335  while (1) {
336  int k = *(unsigned char *)exclude++;
337  if (!k) break;
338  tab[k] = 0;
339  if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
340  }
341  }
342 
343  while (n--) {
344  i = tab[*(const unsigned char *)seq++];
345  if (i) {
346  c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
347  }
348  }
349  c = c ^ 0xffffffffL;
350  return c;
351 }
352 
353 uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude) {
354  // if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
355  return GB_checksum(seq, strlen(seq), ignore_case, exclude);
356 }
357 
358 size_t GBS_shorten_repeated_data(char *data) {
359  // shortens repeats in 'data'
360  // This function modifies 'data'!!
361  // e.g. "..............................ACGT....................TGCA"
362  // -> ".{30}ACGT.{20}TGCA"
363 
364 #if defined(DEBUG)
365  size_t orgLen = strlen(data);
366 #endif // DEBUG
367  char *dataStart = data;
368  char *dest = data;
369  size_t repeat = 1;
370  char last = *data++;
371 
372  while (last) {
373  char curr = *data++;
374  if (curr == last) {
375  repeat++;
376  }
377  else {
378  if (repeat >= 5) {
379  dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
380  }
381  else {
382  size_t r;
383  for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
384  }
385  last = curr;
386  repeat = 1;
387  }
388  }
389 
390  *dest = 0;
391 
392 #if defined(DEBUG)
393 
394  gb_assert(strlen(dataStart) <= orgLen);
395 #endif // DEBUG
396  return dest-dataStart;
397 }
398 
399 
400 // ------------------------------------------
401 // helper classes for tagged fields
402 
403 class TextRef {
404  const char *data; // has no terminal zero-byte!
405  int length;
406 
407 public:
408  TextRef() : data(NULp), length(-1) {}
409  TextRef(const char *data_, int length_) : data(data_), length(length_) {}
410  explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
411 
412  bool defined() const { return data && length>0; }
413  const char *get_data() const { return data; }
414  int get_length() const { return length; }
415 
416  const char *get_following() const { return data ? data+length : NULp; }
417 
418  int compare(const char *str) const {
419  gb_assert(defined());
420  int cmp = strncmp(get_data(), str, get_length());
421  if (!cmp) {
422  if (str[get_length()]) {
423  cmp = -1; // right side contains more content
424  }
425  }
426  return cmp;
427  }
428  int icompare(const char *str) const {
429  gb_assert(defined());
430  int cmp = strncasecmp(get_data(), str, get_length());
431  if (!cmp) {
432  if (str[get_length()]) {
433  cmp = -1; // right side contains more content
434  }
435  }
436  return cmp;
437  }
438  char *copy() const { return ARB_strndup(get_data(), get_length()); }
439 
440  char head() const { return defined() ? data[0] : 0; }
441  char tail() const { return defined() ? data[length-1] : 0; }
442 
444  if (defined()) {
445  for (int s = 0; s<length; ++s) {
446  if (!isspace(data[s])) {
447  return TextRef(data+s, length-s);
448  }
449  }
450  }
451  return TextRef();
452  }
454  if (defined()) {
455  for (int s = length-1; s>=0; --s) {
456  if (!isspace(data[s])) {
457  return TextRef(data, s+1);
458  }
459  }
460  }
461  return TextRef();
462  }
463 
464  TextRef trimmed() const {
465  return headTrimmed().tailTrimmed();
466  }
467 
468  inline TextRef partBefore(const TextRef& subref) const;
469  inline TextRef partBehind(const TextRef& subref) const;
470 
471  bool is_part_of(const TextRef& other) const {
472  gb_assert(defined() && other.defined());
473  return get_data()>=other.get_data() && get_following()<=other.get_following();
474  }
475 
476  const char *find(char c) const { return reinterpret_cast<const char*>(memchr(get_data(), c, get_length())); }
477 };
478 
479 static TextRef textBetween(const TextRef& t1, const TextRef& t2) {
480  const char *behind_d1 = t1.get_following();
481  const char *d2 = t2.get_data();
482 
483  if (behind_d1 && d2 && behind_d1<d2) {
484  return TextRef(behind_d1, d2-behind_d1);
485  }
486  return TextRef();
487 }
488 
489 inline TextRef TextRef::partBefore(const TextRef& subref) const {
490  gb_assert(subref.is_part_of(*this));
491  return textBetween(TextRef(get_data(), 0), subref);
492 }
493 inline TextRef TextRef::partBehind(const TextRef& subref) const {
494  gb_assert(subref.is_part_of(*this));
495  return TextRef(subref.get_following(), get_following()-subref.get_following());
496 }
497 
499  TextRef wholeInput;
500  TextRef tag, content; // current position
501  TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
502  TextRef nextBrackets; // next "[..]" part (behind current tag)
503 
504  void findBrackets(const char *in) {
505  nextBrackets = TextRef();
506  const char *tag_start = strchr(in, '[');
507  if (tag_start) {
508  const char *tag_end = strchr(tag_start, ']');
509  if (tag_end) {
510  if (tag_end == tag_start+1) { // empty tag -> use as content
511  findBrackets(tag_end+1);
512  }
513  else {
514  const char *unwanted_bracket = reinterpret_cast<const char*>(memchr(tag_start+1, '[', tag_end-tag_start-1));
515  if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
516  findBrackets(unwanted_bracket);
517  }
518  else {
519  TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
520  if (name.defined()) { // not only whitespace inside brackets
521  nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
522  }
523  else {
524  findBrackets(tag_end+1);
525  }
526  }
527  }
528  }
529  }
530  }
531 
532  void parse_next_multi_tag() {
533  gb_assert(restTags.defined());
534  TextRef comma(restTags.find(','), 1);
535  if (comma.defined()) {
536  tag = restTags.partBefore(comma).tailTrimmed();
537  restTags = restTags.partBehind(comma).headTrimmed();
538  }
539  else {
540  tag = restTags;
541  restTags = TextRef();
542  }
543  }
544  void parse_next() {
545  if (restTags.defined()) {
546  parse_next_multi_tag();
547  }
548  else if (nextBrackets.defined()) {
549  TextRef brackets = nextBrackets;
550  findBrackets(brackets.get_following());
551 
552  content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
553 
554  gb_assert(brackets.head() == '[' && brackets.tail() == ']');
555 
556  TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
557  gb_assert(tags.defined());
558 
559  restTags = tags;
560  parse_next_multi_tag();
561  }
562  else {
563  tag = content = TextRef();
564  gb_assert(!has_part());
565  }
566  }
567  void parse_first() {
568  gb_assert(!has_part());
569  findBrackets(wholeInput.get_data());
570  content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
571  if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
572  }
573 
574 public:
575  TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
576 
577  bool has_tag() const { return tag.defined(); }
578  bool has_content() const { return content.defined(); }
579 
580  void next() { parse_next(); }
581  bool has_part() const { return has_tag() || has_content(); } // false -> parser has finished
582 
583  const TextRef& get_tag() const { return tag; }
584  const TextRef& get_content() const { return content; }
585 };
586 
587 
588 // -------------------------------------------
589 // helper function for tagged fields
590 
591 static void g_bs_add_value_tag_to_hash(GB_HASH *hash, const char *tag, char *value) {
592  if (!value[0]) return; // ignore empty values
593 
594  {
595  char *p;
596  p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
597  p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
598  }
599 
600  GB_HASH *sh = (GB_HASH *)GBS_read_hash(hash, value);
601  if (!sh) {
602  sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
603  GBS_write_hash(hash, value, (long)sh);
604  }
605  GBS_write_hash(sh, tag, 1);
606 }
607 
608 static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH *hash, char *s, char *default_tag, const char *del) {
609  TaggedContentParser parser(s);
610  while (parser.has_part()) {
611  if (parser.has_content()) {
612  char *content = parser.get_content().copy();
613  if (parser.has_tag()) {
614  char *tag = parser.get_tag().copy();
615  if (!del || ARB_stricmp(tag, del) != 0) {
616  g_bs_add_value_tag_to_hash(hash, tag, content);
617  }
618  free(tag);
619  }
620  else {
621  g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
622  }
623  free(content);
624  }
625  parser.next();
626  }
627 }
628 
629 static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH *hash, char *s, char *default_tag, const char *rtag, const char *aci, GBL_call_env& env) {
630  GB_ERROR error = NULp;
631 
632  TaggedContentParser parser(s);
633  while (parser.has_part() && !error) {
634  if (parser.has_content()) {
635  char *value = parser.get_content().copy();
636  char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
637 
638  if (rtag && ARB_stricmp(tag, rtag) == 0) {
639  freeset(value, GB_command_interpreter_in_env(value, aci, env));
640  if (!value) error = GB_await_error();
641  }
642 
643  if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
644 
645  free(tag);
646  free(value);
647  }
648  parser.next();
649  }
650 
651  return error;
652 }
653 
654 static void g_bs_merge_tags(const char *tag, long /*val*/, void *cd_sub_result) {
655  GBS_strstruct *sub_result = (GBS_strstruct*)cd_sub_result;
656 
657  GBS_strcat(sub_result, tag);
658  GBS_strcat(sub_result, ",");
659 }
660 
661 static void g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash) {
662  static int counter = 0;
663 
664  GBS_strstruct *sub_result = GBS_stropen(100);
666  GBS_intcat(sub_result, counter++); // create a unique number
667 
668  char *str = ARB_strupper(GBS_strclose(sub_result));
669 
670  GB_HASH *g_bs_collect_tags_hash = (GB_HASH*)cd_g_bs_collect_tags_hash;
671  GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
672 
673  free(str);
674 }
675 
676 static void g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result) {
677  GBS_strstruct *merge_result = (GBS_strstruct*)cd_merge_result;
678 
679  char *lk = const_cast<char*>(strrchr(tag, ','));
680  if (lk) { // remove number at end
681  *lk = 0;
682 
683  if (!merge_result->empty()) merge_result->put(' '); // skip trailing space
684  merge_result->put('[');
685  merge_result->cat(tag);
686  merge_result->put(']');
687  merge_result->put(' ');
688  }
689  merge_result->cat((char*)value);
690 }
691 
692 static char *g_bs_get_string_of_tag_hash(GB_HASH *tag_hash) {
693  GBS_strstruct *merge_result = GBS_stropen(256);
695 
696  GBS_hash_do_const_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
698 
699  GBS_free_hash(collect_tags_hash);
700  return GBS_strclose(merge_result);
701 }
702 
703 static long g_bs_free_hash_of_hashes_elem(const char */*key*/, long val, void *) {
704  GB_HASH *hash = (GB_HASH*)val;
705  if (hash) GBS_free_hash(hash);
706  return 0;
707 }
710  GBS_free_hash(hash);
711 }
712 
713 char *GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2) {
714  /* Create a tagged string from two tagged strings:
715  * a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
716  *
717  * if 's2' is not empty, then delete tag 'replace1' in 's1'
718  * if 's1' is not empty, then delete tag 'replace2' in 's2'
719  *
720  * (result should never be NULp)
721  */
722 
723  char *str1 = ARB_strdup(s1);
724  char *str2 = ARB_strdup(s2);
725  char *t1 = GBS_string_2_key(tag1);
726  char *t2 = GBS_string_2_key(tag2);
728 
729  if (!s1[0]) replace2 = NULp;
730  if (!s2[0]) replace1 = NULp;
731 
732  if (replace1 && !replace1[0]) replace1 = NULp;
733  if (replace2 && !replace2[0]) replace2 = NULp;
734 
735  g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
736  g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
737 
738  char *result = g_bs_get_string_of_tag_hash(hash);
739 
741 
742  free(t2);
743  free(t1);
744  free(str2);
745  free(str1);
746 
747  return result;
748 }
749 
750 char *GBS_modify_tagged_string_with_ACI(const char *s, const char *dt, const char *tag, const char *aci, GBL_call_env& env) {
751  /* if 's' is untagged, tag it with default tag 'dt'.
752  * if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
753  *
754  * if result is NULp, an error has been exported.
755  */
756 
757  char *str = ARB_strdup(s);
758  char *default_tag = GBS_string_2_key(dt);
760  char *result = NULp;
761 
762  GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
763 
764  if (!error) {
765  result = g_bs_get_string_of_tag_hash(hash);
766  }
767  else {
768  GB_export_error(error);
769  }
770 
772 
773  free(default_tag);
774  free(str);
775 
776  return result;
777 }
778 
779 char *GB_read_as_tagged_string(GBDATA *gbd, const char *tagi) {
780  char *buf = GB_read_as_string(gbd);
781  if (buf && tagi && tagi[0]) {
782  TaggedContentParser parser(buf);
783 
784  char *wantedTag = GBS_string_2_key(tagi);
785  char *contentFound = NULp;
786 
787  while (parser.has_part() && !contentFound) {
788  if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
789  contentFound = parser.get_content().copy();
790  }
791  parser.next();
792  }
793  free(wantedTag);
794  free(buf);
795 
796  return contentFound;
797  }
798  return buf;
799 }
800 
801 
802 /* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
803  * used as well to save perl macros
804  *
805  * when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
806  *
807  * always keep in mind, that many users have databases/macros written with older
808  * versions of this function. They MUST load proper!!!
809  */
810 void GBS_fwrite_string(const char *strngi, FILE *out) {
811  unsigned char *strng = (unsigned char *)strngi;
812  int c;
813 
814  putc('"', out);
815 
816  while ((c = *strng++)) {
817  if (c < 32) {
818  putc('\\', out);
819  if (c == '\n')
820  putc('n', out);
821  else if (c == '\t')
822  putc('t', out);
823  else if (c<25) {
824  putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
825  }
826  else {
827  putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
828  }
829  }
830  else if (c == '"') {
831  putc('\\', out);
832  putc('"', out);
833  }
834  else if (c == '\\') {
835  putc('\\', out);
836  putc('\\', out);
837  }
838  else {
839  putc(c, out);
840  }
841  }
842  putc('"', out);
843 }
844 
845 /* Read a string from a file written by GBS_fwrite_string,
846  * Searches first '"'
847  *
848  * WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
849  * any changes should be done in GBS_fconvert_string too.
850  */
851 
852 static char *GBS_fread_string(FILE *in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
853  GBS_strstruct *strstr = GBS_stropen(1024);
854  int x;
855 
856  while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
857 
858  if (x != EOF) {
859  while ((x = getc(in)) != '"') {
860  if (x == EOF) break;
861  if (x == '\\') {
862  x = getc(in); if (x==EOF) break;
863  if (x == 'n') {
864  GBS_chrcat(strstr, '\n');
865  continue;
866  }
867  if (x == 't') {
868  GBS_chrcat(strstr, '\t');
869  continue;
870  }
871  if (x>='@' && x <= '@' + 25) {
872  GBS_chrcat(strstr, x-'@');
873  continue;
874  }
875  if (x>='0' && x <= '9') {
876  GBS_chrcat(strstr, x-('0'-25));
877  continue;
878  }
879  // all other backslashes are simply skipped
880  }
881  GBS_chrcat(strstr, x);
882  }
883  }
884  return GBS_strclose(strstr);
885 }
886 
887 /* does similar decoding as GBS_fread_string but works directly on an existing buffer
888  * (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
889  *
890  * inserts \0 behind decoded string (removes the closing '"')
891  * returns a pointer behind the end (") of the _encoded_ string
892  * returns NULp if a 0-character is found
893  */
895  char *t = buffer;
896  char *f = buffer;
897  int x;
898 
899  gb_assert(f[-1] == '"');
900  // the opening " has already been read
901 
902  while ((x = *f++) != '"') {
903  if (!x) break;
904 
905  if (x == '\\') {
906  x = *f++;
907  if (!x) break;
908 
909  if (x == 'n') {
910  *t++ = '\n';
911  continue;
912  }
913  if (x == 't') {
914  *t++ = '\t';
915  continue;
916  }
917  if (x>='@' && x <= '@' + 25) {
918  *t++ = x-'@';
919  continue;
920  }
921  if (x>='0' && x <= '9') {
922  *t++ = x-('0'-25);
923  continue;
924  }
925  // all other backslashes are simply skipped
926  }
927  *t++ = x;
928  }
929 
930  if (!x) return NULp; // error (string should not contain 0-character)
931  gb_assert(x == '"');
932 
933  t[0] = 0;
934  return f;
935 }
936 
937 char *GBS_replace_tabs_by_spaces(const char *text) {
938  int tlen = strlen(text);
939  GBS_strstruct *mfile = GBS_stropen(tlen * 3/2 + 1);
940  int tabpos = 0;
941  int c;
942 
943  while ((c=*(text++))) {
944  if (c == '\t') {
945  int ntab = (tabpos + 8) & 0xfffff8;
946  while (tabpos < ntab) {
947  GBS_chrcat(mfile, ' ');
948  tabpos++;
949  }
950  continue;
951  }
952  tabpos ++;
953  if (c == '\n') {
954  tabpos = 0;
955  }
956  GBS_chrcat(mfile, c);
957  }
958  return GBS_strclose(mfile);
959 }
960 
961 char *GBS_trim(const char *str) {
962  // trim whitespace at beginning and end of 'str'
963  const char *whitespace = " \t\n";
964  while (str[0] && strchr(whitespace, str[0])) str++;
965 
966  const char *end = strchr(str, 0)-1;
967  while (end >= str && strchr(whitespace, end[0])) end--;
968 
969  return ARB_strpartdup(str, end);
970 }
971 
972 static char *dated_info(const char *info) {
973  char *dated_info = NULp;
974  time_t date;
975 
976  if (time(&date) != -1) {
977  char *dstr = ctime(&date);
978  char *nl = strchr(dstr, '\n');
979 
980  if (nl) nl[0] = 0; // cut off LF
981 
982  dated_info = GBS_global_string_copy("%s: %s", dstr, info);
983  }
984  else {
985  dated_info = ARB_strdup(info);
986  }
987  return dated_info;
988 }
989 
990 char *GBS_log_action_to(const char *comment, const char *action, bool stamp) {
998  size_t clen = comment ? strlen(comment) : 0;
999  size_t alen = strlen(action);
1000 
1001  GBS_strstruct *new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2*\n + \0 + space for stamp
1002 
1003  if (comment) {
1004  GBS_strcat(new_comment, comment);
1005  if (clen == 0 || comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n');
1006  }
1007 
1008  if (stamp) {
1009  char *dated_action = dated_info(action);
1010  GBS_strcat(new_comment, dated_action);
1011  free(dated_action);
1012  }
1013  else {
1014  GBS_strcat(new_comment, action);
1015  }
1016  if (alen == 0 || action[alen-1] != '\n') GBS_chrcat(new_comment, '\n');
1017 
1018  return GBS_strclose(new_comment);
1019 }
1020 
1021 const char *GBS_funptr2readable(void *funptr, bool stripARBHOME) {
1022  // only returns module and offset for static functions :-(
1023  char **funNames = backtrace_symbols(&funptr, 1);
1024  const char *readable_fun = funNames[0];
1025 
1026  if (stripARBHOME) {
1027  const char *ARBHOME = GB_getenvARBHOME();
1028  if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1029  readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1030  }
1031  }
1032  return readable_fun;
1033 }
1034 
1035 // --------------------------------------------------------------------------------
1036 
1037 #ifdef UNIT_TESTS
1038 
1039 #include <test_unit.h>
1040 
1041 // #define TEST_TEST_MACROS
1042 
1043 #ifdef ENABLE_CRASH_TESTS
1044 static void provokesegv() { raise(SIGSEGV); }
1045 static void dont_provokesegv() {}
1046 # if defined(ASSERTION_USED)
1047 static void failassertion() { gb_assert(0); }
1048 # if defined(TEST_TEST_MACROS)
1049 static void dont_failassertion() {}
1050 # endif
1051 static void provokesegv_does_not_fail_assertion() {
1052  // provokesegv does not raise assertion
1053  // -> the following assertion fails
1054  TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1055 }
1056 # endif
1057 #endif
1058 
1059 void TEST_signal_tests__crashtest() {
1060  // check whether we can test that no SEGV or assertion failure happened
1061  TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1062 
1063  // check whether we can test for SEGV and assertion failures
1064  TEST_EXPECT_SEGFAULT(provokesegv);
1065  TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1066 
1067  // tests whether signal suppression works multiple times (by repeating tests)
1068  TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1069  TEST_EXPECT_SEGFAULT(provokesegv);
1070 
1071  // test whether SEGV can be distinguished from assertion
1072  TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1073 
1074  // The following section is disabled, because it will
1075  // provoke test warnings (to test these warnings).
1076  // (enable it when changing any of these TEST_..-macros used here)
1077 #if defined(TEST_TEST_MACROS)
1078  TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1079 
1080  TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1081  TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1082 #if defined(ASSERTION_USED)
1083  TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1084 #endif
1085 
1086  TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1088  TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1089 #endif
1090 }
1091 
1092 #define EXPECT_CONTENT(content) TEST_EXPECT_EQUAL(GBS_mempntr(strstr), content)
1093 
1094 void TEST_GBS_strstruct() {
1095  {
1096  GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT("");
1097 
1098  GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb");
1099  GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17");
1100  GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_");
1101  GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000");
1102 
1103  TEST_EXPECT_EQUAL(GBS_memoffset(strstr), 14);
1104  GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b");
1105  GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter");
1106  GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly");
1107 
1108  GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters");
1109  EXPECT_CONTENT("butterfly flutters");
1110 
1111  GBS_strforget(strstr);
1112  }
1113  {
1114  // re-alloc smaller
1115  GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT("");
1116  GBS_strforget(strstr);
1117  }
1118 
1119  // trigger downsize of oversized block
1120  for (int i = 0; i<12; ++i) {
1121  GBS_strstruct *strstr = GBS_stropen(10);
1122  GBS_strforget(strstr);
1123  }
1124 
1125  {
1126  GBS_strstruct *strstr = GBS_stropen(10);
1127  size_t oldbufsize = strstr->get_buffer_size();
1128  GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer
1129 
1130  TEST_EXPECT_DIFFERENT(oldbufsize, strstr->get_buffer_size()); // did we reallocate?
1131  EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx");
1132  GBS_strforget(strstr);
1133  }
1134 }
1135 
1136 #define TEST_SHORTENED_EQUALS(Long,Short) do { \
1137  char *buf = ARB_strdup(Long); \
1138  GBS_shorten_repeated_data(buf); \
1139  TEST_EXPECT_EQUAL(buf, Short); \
1140  free(buf); \
1141  } while(0)
1142 
1143 void TEST_GBS_shorten_repeated_data() {
1144  TEST_SHORTENED_EQUALS("12345", "12345");
1145  TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1146  TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1147  TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1148  TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1149  TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1150  TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1151  TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1152  TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1153  TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1154  TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1155  TEST_SHORTENED_EQUALS("aabc", "aabc");
1156  TEST_SHORTENED_EQUALS("", "");
1157 }
1158 
1159 static const char *hkey_format[] = {
1160  "/%s/bbb/ccc",
1161  "/aaa/%s/ccc",
1162  "/aaa/bbb/%s",
1163 };
1164 
1165 inline const char *useInHkey(const char *fragment, size_t pos) {
1166  return GBS_global_string(hkey_format[pos], fragment);
1167 }
1168 
1169 #define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1170  for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1171  const char *hkey = useInHkey(use, i); \
1172  TEST_ANNOTATE(hkey); \
1173  TEST_EXPECT_NO_ERROR(GB_check_hkey(hkey)); \
1174  } \
1175  TEST_ANNOTATE(NULp); \
1176  } while(0)
1177 
1178 #define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1179  for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1180  const char *hkey = useInHkey(use, i); \
1181  TEST_ANNOTATE(hkey); \
1182  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(hkey), contains); \
1183  } \
1184  TEST_ANNOTATE(NULp); \
1185  } while(0)
1186 
1187 
1188 void TEST_DB_key_checks() {
1189  // plain keys
1190  const char *shortest = "ab";
1191  const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1192  const char *too_short = shortest+1;
1193  const char *longest = too_long+1;
1194 
1195  const char *empty = "";
1196  const char *slash = "sub/key";
1197  const char *dslash = "sub//key";
1198  const char *comma = "no,key";
1199  const char *minus = "no-key";
1200 
1201  // obsolete GB_LINK syntax:
1202  const char *link = "link->syntax";
1203  const char *nowhere = "link->";
1204  const char *fromNw = "->syntax";
1205 
1208 
1209  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1210  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1211  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1212 
1213  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character '/'");
1214  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(dslash), "Invalid character '/'");
1215  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character ','");
1216  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character '-'");
1217  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(link), "Invalid character '-'");
1218  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(nowhere), "Invalid character '-'");
1219  TEST_EXPECT_ERROR_CONTAINS(GB_check_key(fromNw), "Invalid character '-'");
1220 
1221  // hierarchical keys
1222  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1223  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1224 
1225  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1226  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1227  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1228 
1229  TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1230  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(dslash, "Empty key is not allowed");
1231  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1232  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "Invalid character '-'");
1233  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(link, "Invalid character '-'");
1234  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(nowhere, "Invalid character '-'");
1235  TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(fromNw, "Invalid character '-'");
1236 
1237  // test NULp keys:
1238  TEST_EXPECT_ERROR_CONTAINS(GB_check_key (NULp), "Empty key is not allowed");
1239  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(NULp), "Empty key is not allowed");
1240 
1241  // some edge cases for hierarchical keys:
1242  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//"), "Empty key is not allowed");
1243  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//key"), "Empty key is not allowed"); // @@@ is double slash compensated by GB_search etc? if yes -> accept here as well!
1244  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key//"), "Empty key is not allowed");
1245  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("/"), "Empty key is not allowed");
1247  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key/"), "Empty key is not allowed"); // @@@ use better message? e.g. "invalid trailing '/'"
1248  TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(""), "Empty key is not allowed");
1249 }
1250 
1251 #define TEST_STRING2KEY(str,expected) do { \
1252  char *as_key = GBS_string_2_key(str); \
1253  TEST_EXPECT_EQUAL(as_key, expected); \
1254  TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1255  free(as_key); \
1256  } while(0)
1257 
1258 void TEST_DB_key_generation() {
1259  TEST_STRING2KEY("abc", "abc");
1260  TEST_STRING2KEY("a b c", "a_b_c");
1261 
1262  // invalid chars
1263  TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1264  "string_containing_doublequotes_quotes_and_othershit");
1265 
1266  // length tests
1267  TEST_STRING2KEY("a", "a_"); // too short
1268  TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1269  "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1270 }
1271 
1272 void TEST_TaggedContentParser() {
1273  // test helper class TextRef:
1274  TEST_REJECT(TextRef().defined()); // default to undefined
1275  {
1276  TextRef bla("blakjahd", 3);
1277  TEST_EXPECT(bla.defined());
1278  TEST_EXPECT_EQUAL(bla.get_length(), 3);
1279 
1280  TEST_EXPECT(bla.compare("bl") > 0);
1281  TEST_EXPECT(bla.compare("bla") == 0);
1282  TEST_EXPECT(bla.compare("blase") < 0);
1283 
1284  TextRef spaced(" spaced "+1, 10);
1285  TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1286  TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1287  TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1288  }
1289 
1290  const char *text = " untagged [tag] tagged [empty] ";
1291 
1292  TextRef cr_untagged(strstr(text, "untagged"), 8);
1293  TextRef cr_tagged (strstr(text, "tagged"), 6);
1294  TextRef tr_tag (strstr(text, "tag"), 3);
1295  TextRef tr_empty (strstr(text, "empty"), 5);
1296 
1297  // test TaggedContentParser:
1298  {
1299  TaggedContentParser parser(text);
1300 
1301  TEST_EXPECT(parser.has_part());
1302  TEST_REJECT(parser.has_tag());
1303  TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1304 
1305  parser.next();
1306 
1307  TEST_EXPECT(parser.has_part());
1308  TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1309  TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1310 
1311  parser.next();
1312 
1313  TEST_EXPECT(parser.has_part());
1314  TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1315  TEST_REJECT(parser.has_content());
1316 
1317  parser.next();
1318 
1319  TEST_REJECT(parser.has_part());
1320  }
1321  { // parse untagged input
1322  TaggedContentParser parser("hi");
1323  TEST_EXPECT(parser.has_part());
1324  TEST_REJECT(parser.has_tag());
1325  TEST_EXPECT(parser.get_content().compare("hi") == 0);
1326  parser.next();
1327  TEST_REJECT(parser.has_part());
1328  }
1329  { // parse empty input
1330  TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1331  TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1332  }
1333  { // parse single tag w/o content
1334  TaggedContentParser parser(" [hello] ");
1335  TEST_EXPECT(parser.has_part());
1336  TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1337  TEST_REJECT(parser.has_content());
1338  parser.next();
1339  TEST_REJECT(parser.has_part());
1340  }
1341  { // parse multi-tags
1342  TaggedContentParser parser(" [ t1 , t2 ] t");
1343  TEST_EXPECT(parser.has_part());
1344  TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1345  TEST_EXPECT(parser.get_content().compare("t") == 0);
1346  parser.next();
1347  TEST_EXPECT(parser.has_part());
1348  TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1349  TEST_EXPECT(parser.get_content().compare("t") == 0);
1350  parser.next();
1351  TEST_REJECT(parser.has_part());
1352  }
1353 }
1354 
1355 #define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1356  char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1357  TEST_EXPECT_EQUAL(result, expected); \
1358  free(result); \
1359  } while(0)
1360 
1361 #define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1362  char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1363  TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1364  free(result); \
1365  } while(0)
1366 
1367 void TEST_merge_tagged_strings() {
1368  // merge two fields:
1369  const char *_0 = NULp;
1370 
1371  TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1372  TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source");
1373  TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source");
1374  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth");
1375 
1376  TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1377  TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1378 
1379  // update fields:
1380  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1381  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1382  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1383  TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1384  TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1385 
1386  // append (opposed to update this keeps old entries with same tag; useless?)
1387  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1388  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1389  TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1390 
1391  // merge three fields:
1392  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1393  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1394  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1395  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1396  TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1397 
1398  // same tests as in section above, but vv:
1399  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1400  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1401  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1402  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1403  TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1404 
1405  // test real-merges (content existing in both strings):
1406  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1407  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1408  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1409  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1410  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1411  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1412  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1413  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1414  TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1415 
1416  // merge two tagged string with deleting
1417 #define DSTSRC1 "[DST] dest1 [SRC] src1"
1418 #define DSTSRC2 "[DST] dest2 [SRC] src2"
1419 #define DSTSRC2LOW "[dst] dest2 [src] src2"
1420 
1421  TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1422  TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1423  TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1424  TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1425  TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1426  TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1427  TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1428  TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1429  TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1430  TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1431  TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1432  TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1433  TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1434  TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1435 }
1436 
1437 __ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() {
1438  GB_shell shell;
1439  GBDATA *gb_main = GB_open("new.arb", "c");
1440  {
1441  GB_transaction ta(gb_main);
1442 
1443  {
1444  GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1445  TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1446  TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1447 
1448  GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1449  GB_UINT4 ints[] = { 1, 2 };
1450  TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1451  TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1452  }
1453 
1454 #define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1455 #define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1456 #define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1457 
1458  GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1459  const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1460  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1461 
1462  TEST_EXPECT_FULL_CONTENT(NULp);
1463  TEST_EXPECT_FULL_CONTENT("");
1464  TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1465 
1466  TEST_EXPECT_TAG_CONTENT("T1", "t12");
1467  TEST_EXPECT_TAG_CONTENT("T2", "t12");
1468  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1469  TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1470 
1471  TEST_EXPECT_TAG_CONTENT("AA", "aa");
1472  TEST_EXPECT_TAG_CONTENT("BB", "bb");
1473  TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1474  TEST_EXPECT_TAG_CONTENT("XX", "x1");
1475  TEST_EXPECT_TAG_CONTENT("YY", "yy");
1476  TEST_EXPECT_TAG_CONTENT("yy", "yy");
1477 
1478  TEST_REJECT_TAG_CONTENT("Y");
1479  // TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1480 
1481  TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1482  TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1483  TEST_EXPECT_TAG_CONTENT("fake", "fake");
1484 
1485  TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1486  TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1487  TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1488  TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1489  TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1490  TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1491  TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1492  TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1493 
1494  TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1495  TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1496  TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1497  TEST_EXPECT_TAG_CONTENT("LAST", "last");
1498 
1499  // test incomplete tags
1500  tagged_string = "bla [WHATEVER hello";
1501  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1502  TEST_REJECT_TAG_CONTENT("WHATEVER");
1503 
1504  tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1505  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1506  TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1507  TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1508  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1509 
1510  // test pathological tags
1511  tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1512  TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1513  TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1514  TEST_EXPECT_FULL_CONTENT("");
1515  TEST_REJECT_TAG_CONTENT(" ");
1516  TEST_REJECT_TAG_CONTENT(" ");
1517  TEST_REJECT_TAG_CONTENT(",");
1518  TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1519  TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1520  TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1521  TEST_EXPECT_TAG_CONTENT(" ", "us");
1522  TEST_EXPECT_TAG_CONTENT("T3", "t3");
1523  }
1524  GB_close(gb_main);
1525 }
1526 
1527 #define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1528  TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1529  GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1530  expected); \
1531  }while(0)
1532 
1533 #define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1534  TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1535  GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1536  expectedErrorPart); \
1537  }while(0)
1538 
1539 __ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() {
1540  GB_shell shell;
1541  GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1542  {
1543  GB_transaction ta(gb_main);
1544  GBL_env env(gb_main, "tree_missing");
1545 
1546  {
1547  GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1548  TEST_REJECT_NULL(gb_species);
1549  GBL_call_env callEnv(gb_species, env);
1550 
1551  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1552  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla");
1553  TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1554  TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1555  TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1556 
1557  // empty tags:
1558  TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, "");
1559  TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1560  TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1561  TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1562  TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx");
1563 
1564 #define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1565 #define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1566 
1567  // dont eval:
1568  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE);
1569  // eval SRT:
1570  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top");
1571  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top");
1572  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top");
1573  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top");
1574  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1575  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":*=*-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1576  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1577  // eval ACI:
1578  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1579  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1580 
1581  // test SRT/ACI errors:
1582  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1583  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1584  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1585  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1586 
1587  // no error raised, if expression not applied:
1588  TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1589 
1590  // incomplete tags
1591  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":*=<*>", "[DEF] <{no tag>");
1592  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,{no>");
1593  TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag,{no>");
1594  TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* *=<*2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1595  TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag{},{no>");
1596  TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* *=<*2,*1>", "[DEF] <{tag{} xx,{no>");
1597  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,no>");
1598  TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1599  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1600  TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1601  TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1602  TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":*=<*>", "[DEF] <kept {trunk>");
1603  TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":*=<*>", "[DEF] <kept>");
1604  }
1605 
1606  {
1607  GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1608  TEST_REJECT_NULL(gb_species);
1609  GBL_call_env callEnv(gb_species, env);
1610 
1611  // run scripts using context:
1612  TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1613  TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(|sequence|len)", "[T1] seqlen=$l [T2] seqlen=165");
1614  TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1615 
1616  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1617  TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1618 
1619  // content before 1st tag:
1620  TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1621  TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1622 
1623  // test elimination of leading/trailing whitespace:
1624  TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1625  TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1626  TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1627 
1628 #define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1629 
1630  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1631  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1632  TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1633  }
1634  }
1635  GB_close(gb_main);
1636 }
1637 
1638 void TEST_log_action() {
1639  for (int stamped = 0; stamped<=1; ++stamped) {
1640  TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1641  {
1642  char *logged = GBS_log_action_to("comment", "action", stamped);
1643  if (stamped) {
1644  TEST_EXPECT_CONTAINS(logged, "comment\n");
1645  TEST_EXPECT_CONTAINS(logged, "action\n");
1646  }
1647  else {
1648  TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1649  }
1650  free(logged);
1651  }
1652  {
1653  char *logged = GBS_log_action_to("comment\n", "action", stamped);
1654  if (stamped) {
1655  TEST_EXPECT_CONTAINS(logged, "comment\n");
1656  TEST_EXPECT_CONTAINS(logged, "action\n");
1657  }
1658  else {
1659  TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1660  }
1661  free(logged);
1662  }
1663  {
1664  char *logged = GBS_log_action_to("", "action", stamped);
1665  if (stamped) {
1666  TEST_EXPECT_EQUAL(logged[0], '\n');
1667  TEST_EXPECT_CONTAINS(logged, "action\n");
1668  }
1669  else {
1670  TEST_EXPECT_EQUAL(logged, "\naction\n");
1671  }
1672  free(logged);
1673  }
1674  {
1675  char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF
1676  if (stamped) {
1677  TEST_EXPECT_DIFFERENT(logged[0], '\n');
1678  TEST_EXPECT_CONTAINS(logged, "action\n");
1679  }
1680  else {
1681  TEST_EXPECT_EQUAL(logged, "action\n");
1682  }
1683  free(logged);
1684  }
1685  }
1686 }
1687 TEST_PUBLISH(TEST_log_action);
1688 
1689 #endif // UNIT_TESTS
1690 
static void g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash)
Definition: adstring.cxx:661
static void g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result)
Definition: adstring.cxx:676
void GBS_hash_do_const_sorted_loop(const GB_HASH *hs, gb_hash_const_loop_type func, gbs_hash_compare_function sorter, void *client_data)
Definition: adhash.cxx:644
TextRef partBehind(const TextRef &subref) const
Definition: adstring.cxx:493
const char * GB_ERROR
Definition: arb_core.h:25
string result
bool defined() const
Definition: adstring.cxx:412
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
TextRef(const char *data_, int length_)
Definition: adstring.cxx:409
static GB_ERROR tab(GBL_command_arguments *args, bool pretab)
Definition: adlang1.cxx:913
static TextRef textBetween(const TextRef &t1, const TextRef &t2)
Definition: adstring.cxx:479
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
Definition: adhash.cxx:457
static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH *hash, char *s, char *default_tag, const char *del)
Definition: adstring.cxx:608
size_t GBS_shorten_repeated_data(char *data)
Definition: adstring.cxx:358
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1361
static const char * EMPTY_KEY_NOT_ALLOWED
Definition: adstring.cxx:62
void GBS_strnprintf(GBS_strstruct *strstr, long maxlen, const char *templat,...)
Definition: arb_strbuf.cxx:113
static void g_bs_merge_tags(const char *tag, long, void *cd_sub_result)
Definition: adstring.cxx:654
int orgLen
Definition: rns.c:12
int ARB_stricmp(const char *s1, const char *s2)
Definition: arb_str.h:28
const char * get_data() const
Definition: adstring.cxx:413
void GBS_intcat(GBS_strstruct *strstr, long val)
Definition: arb_strbuf.cxx:127
static char * g_bs_get_string_of_tag_hash(GB_HASH *tag_hash)
Definition: adstring.cxx:692
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
char * GB_read_as_string(GBDATA *gbd)
Definition: arbdb.cxx:1030
TextRef trimmed() const
Definition: adstring.cxx:464
#define TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(cb)
Definition: test_unit.h:1242
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
GB_ERROR GB_check_key(const char *key)
Definition: adstring.cxx:85
GB_HASH * GBS_create_dynaval_hash(long estimated_elements, GB_CASE case_sens, void(*freefun)(long))
Definition: adhash.cxx:271
void GBS_strncat(GBS_strstruct *strstr, const char *ptr, size_t len)
Definition: arb_strbuf.cxx:101
void GBS_free_hash(GB_HASH *hs)
Definition: adhash.cxx:541
TextRef headTrimmed() const
Definition: adstring.cxx:443
void cat(const char *from)
Definition: arb_strbuf.h:156
char * ARB_strpartdup(const char *start, const char *end)
Definition: arb_string.h:51
bool is_part_of(const TextRef &other) const
Definition: adstring.cxx:471
void GBS_fwrite_string(const char *strngi, FILE *out)
Definition: adstring.cxx:810
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
FILE * seq
Definition: rns.c:46
GB_CSTR GB_getenvARBHOME(void)
Definition: adsocket.cxx:549
GB_CSTR GBS_find_string(GB_CSTR cont, GB_CSTR substr, int match_mode)
Definition: admatch.cxx:103
TaggedContentParser(const char *input_)
Definition: adstring.cxx:575
#define TEST_EXPECT_SEGFAULT__UNWANTED(cb)
Definition: test_unit.h:1246
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1484
#define TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(cb)
Definition: test_unit.h:1243
unsigned int GB_UINT4
Definition: arbdb_base.h:37
int compare(const char *str) const
Definition: adstring.cxx:418
#define TEST_EXPECT_CONTAINS(str, part)
Definition: test_unit.h:1301
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:259
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
#define TEST_EXPECT(cond)
Definition: test_unit.h:1312
TextRef tailTrimmed() const
Definition: adstring.cxx:453
size_t get_buffer_size() const
Definition: arb_strbuf.h:64
__ATTR__USERESULT GB_ERROR check_key(const char *key, int len)
Definition: adstring.cxx:64
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
Definition: arbdb.cxx:1755
const char * find(char c) const
Definition: adstring.cxx:476
void GBS_chrncat(GBS_strstruct *strstr, char ch, size_t n)
Definition: arb_strbuf.cxx:123
char * GBS_replace_tabs_by_spaces(const char *text)
Definition: adstring.cxx:937
const char * get_following() const
Definition: adstring.cxx:416
static long g_bs_free_hash_of_hashes_elem(const char *, long val, void *)
Definition: adstring.cxx:703
char * GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char)
Definition: adstring.cxx:175
long GBS_memoffset(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:91
uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude)
Definition: adstring.cxx:320
#define TEST_REJECT(cond)
Definition: test_unit.h:1314
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1309
void GBS_strcat(GBS_strstruct *strstr, const char *ptr)
Definition: arb_strbuf.cxx:108
static void error(const char *msg)
Definition: mkptypes.cxx:96
char * GB_memdup(const char *source, size_t len)
Definition: adstring.cxx:56
#define GB_KEY_LEN_MAX
Definition: gb_key.h:25
#define GB_KEY_LEN_MIN
Definition: gb_key.h:26
const TextRef & get_tag() const
Definition: adstring.cxx:583
const TextRef & get_content() const
Definition: adstring.cxx:584
TextRef partBefore(const TextRef &subref) const
Definition: adstring.cxx:489
bool has_content() const
Definition: adstring.cxx:578
#define cmp(h1, h2)
Definition: admap.cxx:50
void GBS_str_cut_tail(GBS_strstruct *strstr, size_t byte_count)
Definition: arb_strbuf.cxx:96
void GBS_dynaval_free(long val)
Definition: adhash.cxx:278
static char * GBS_string_2_key_with_exclusions(const char *str, const char *additional)
Definition: adstring.cxx:31
char * GBS_string_2_key(const char *str)
Definition: adstring.cxx:52
char * GBS_fconvert_string(char *buffer)
Definition: adstring.cxx:894
uint32_t crctab[]
Definition: adstring.cxx:265
int get_length() const
Definition: adstring.cxx:414
void GBS_strforget(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:76
const char * GBS_funptr2readable(void *funptr, bool stripARBHOME)
Definition: adstring.cxx:1021
char * ARB_strupper(char *s)
Definition: arb_str.h:63
void nl()
Definition: test_unit.h:404
GB_ERROR GB_write_int(GBDATA *gbd, long i)
Definition: arbdb.cxx:1220
void GBS_hash_do_loop(GB_HASH *hs, gb_hash_loop_type func, void *client_data)
Definition: adhash.cxx:548
#define __ATTR__REDUCED_OPTIMIZE
Definition: test_unit.h:83
char tail() const
Definition: adstring.cxx:441
GB_CSTR GB_getenv(const char *env)
Definition: adsocket.cxx:656
void GBS_chrcat(GBS_strstruct *strstr, char ch)
Definition: arb_strbuf.cxx:119
Definition: arbdb.h:72
void spaced(const char *word)
Definition: test_unit.h:408
static void g_bs_free_hash_of_hashes(GB_HASH *hash)
Definition: adstring.cxx:708
char * GB_read_as_tagged_string(GBDATA *gbd, const char *tagi)
Definition: adstring.cxx:779
#define TEST_EXPECT_NO_SEGFAULT__WANTED(cb)
Definition: test_unit.h:1240
#define gb_assert(cond)
Definition: arbdbt.h:11
static char * dated_info(const char *info)
Definition: adstring.cxx:972
long GBS_gcgchecksum(const char *seq)
Definition: adstring.cxx:247
char * ARB_strndup(const char *start, int len)
Definition: arb_string.h:83
#define TEST_EXPECT_CODE_ASSERTION_FAILS(cb)
Definition: test_unit.h:1241
aisc_com * link
int icompare(const char *str) const
Definition: adstring.cxx:428
static char * GBS_fread_string(FILE *in)
Definition: adstring.cxx:852
char * GBS_trim(const char *str)
Definition: adstring.cxx:961
GB_ERROR GB_write_ints(GBDATA *gbd, const GB_UINT4 *i, long size)
Definition: arbdb.cxx:1413
#define TEST_EXPECT_NO_SEGFAULT(cb)
Definition: test_unit.h:1239
#define __ATTR__USERESULT
Definition: attributes.h:58
bool empty() const
Definition: arb_strbuf.h:68
char * copy() const
Definition: adstring.cxx:438
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
char head() const
Definition: adstring.cxx:440
TextRef(const char *zeroTerminated)
Definition: adstring.cxx:410
#define TEST_EXPECT_SEGFAULT__WANTED(cb)
Definition: test_unit.h:1245
#define TEST_EXPECT_SEGFAULT(cb)
Definition: test_unit.h:1244
bool white(int ch)
int GBS_HCF_sortedByKey(const char *k0, long dummy_1x, const char *k1, long dummy_2x)
Definition: adhash.cxx:656
uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude)
Definition: adstring.cxx:353
static void g_bs_add_value_tag_to_hash(GB_HASH *hash, const char *tag, char *value)
Definition: adstring.cxx:591
#define NULp
Definition: cxxforward.h:97
void GBS_floatcat(GBS_strstruct *strstr, double val)
Definition: arb_strbuf.cxx:133
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:136
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
Definition: test_unit.h:1103
#define TEST_EXPECT_DIFFERENT(expr, want)
Definition: test_unit.h:1290
bool ARB_strBeginsWith(const char *str, const char *with)
Definition: arb_str.h:42
char * GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2)
Definition: adstring.cxx:713
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
Definition: gb_aci.cxx:361
static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH *hash, char *s, char *default_tag, const char *rtag, const char *aci, GBL_call_env &env)
Definition: adstring.cxx:629
char * GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char)
Definition: adstring.cxx:124
bool has_tag() const
Definition: adstring.cxx:577
bool has_part() const
Definition: adstring.cxx:581
GB_transaction ta(gb_var)
char * GBS_log_action_to(const char *comment, const char *action, bool stamp)
Definition: adstring.cxx:990
GBDATA * gb_main
Definition: adname.cxx:33
size_t length
char * GBS_eval_env(GB_CSTR p)
Definition: adstring.cxx:212
char * GBS_modify_tagged_string_with_ACI(const char *s, const char *dt, const char *tag, const char *aci, GBL_call_env &env)
Definition: adstring.cxx:750
static int info[maxsites+1]
GB_ERROR GB_check_hkey(const char *key)
Definition: adstring.cxx:92
const char * GB_CSTR
Definition: arbdb_base.h:25
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:395
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:625
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
Definition: adhash.cxx:253
void put(char c)
Definition: arb_strbuf.h:138
Definition: arbdb.h:66
GB_write_int const char s
Definition: AW_awar.cxx:156