ARB
arb_strarray.cxx
Go to the documentation of this file.
1 // ============================================================ //
2 // //
3 // File : arb_strarray.cxx //
4 // Purpose : handle arrays of strings //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in July 2011 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ============================================================ //
11 
12 #include "arb_strarray.h"
13 
14 #include <arb_str.h>
15 #include <arb_strbuf.h>
16 #include <arb_sort.h>
17 
18 void CharPtrArray::sort(CharPtrArray_compare_fun compare, void *client_data) {
22  GB_sort((void**)str, 0, size(), compare, client_data);
23 }
24 
25 void CharPtrArray::uniq(CharPtrArray_compare_fun compare, void *client_data) {
29  for (int i = size()-2; i >= 0; --i) {
30  if (compare(str[i], str[i+1], client_data) == 0) {
31  remove(i+1);
32  }
33  }
34 }
35 
36 /* ----------------------------------------
37  * conversion between
38  *
39  * - char ** (heap-allocated array of heap-allocated char*'s)
40  * - one string containing several substrings separated by a separator
41  * (e.g. "name1,name2,name3")
42  */
43 
44 // @@@ search for code which is splitting strings and use GBT_split_string there
45 // @@@ rename to GBS_split_string and move to string functions
46 
47 void GBT_splitNdestroy_string(ConstStrArray& names, char*& namelist, const char *separator, SplitMode mode) {
68  names.set_memblock(namelist);
69 
70  char *sep = namelist;
71  while (sep) {
72  size_t nonsepcount = strcspn(sep, separator);
73  if (nonsepcount || mode == SPLIT_KEEPEMPTY) {
74  names.put(sep);
75  sep += nonsepcount;
76  }
77  size_t sepcount = strspn(sep, separator);
78  sep[0] = 0;
79  if (sepcount) {
80  if (mode == SPLIT_KEEPEMPTY) {
81  for (size_t s = 1; s<sepcount; ++s) names.put(sep);
82  }
83  sep += sepcount;
84  }
85  else {
86  sep = NULp;
87  }
88  }
89  namelist = NULp; // own it
90 }
91 
92 void GBT_splitNdestroy_string(ConstStrArray& dest, char*& namelist, char separator) {
93  char separator_string[] = "x";
94  separator_string[0] = separator;
95  GBT_splitNdestroy_string(dest, namelist, separator_string, SPLIT_KEEPEMPTY);
96 }
97 
98 char *GBT_join_strings(const CharPtrArray& strings, char separator) {
107  if (!strings[0] || (!strings[0][0] && !strings[1])) return ARB_strdup("");
108 
109  GBS_strstruct out(1000);
110  out.cat(strings[0]);
111  arb_assert(implicated(separator, !strchr(strings[0], separator))); // otherwise you'll never be able to GBT_split_string
112  for (int n = 1; strings[n]; ++n) {
113  if (separator) out.put(separator);
114  out.cat(strings[n]);
115  arb_assert(implicated(separator, !strchr(strings[n], separator))); // otherwise you'll never be able to GBT_split_string
116  }
117  return out.release();
118 }
119 
120 int CharPtrArray::index_of(const char *search_for) const {
121  // return index of 'search_for' or -1 if not found or given
122  int index = -1;
123  if (search_for && allocated) {
124  for (int i = 0; str[i]; ++i) {
125  if (strcmp(str[i], search_for) == 0) {
126  index = i;
127  break;
128  }
129  }
130  }
131  return index;
132 }
133 
134 void CharPtrArray::move(int oidx, int nidx) {
140  int siz = size();
141 
142  if (oidx == -1) oidx = siz-1;
143  if (nidx == -1) nidx = siz-1;
144  else if (nidx >= siz) nidx = 0;
145 
146  arb_assert(nidx<siz);
147 
148  if (oidx != nidx && oidx<siz) {
149  if (oidx>nidx) for (int i = oidx-1; i>= nidx; --i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
150  else for (int i = oidx; i< nidx; ++i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
151  }
152 }
153 
154 void CharPtrArray::remove(int i) {
155  arb_assert(ok());
157  free_elem(i);
158 
159  // move elems incl. sentinel:
160  while (size_t(i) < elems) {
161  str[i] = str[i+1];
162  ++i;
163  }
164  elems--;
165  arb_assert(ok());
166 }
167 
168 // --------------------------------------------------------------------------------
169 
170 #ifdef UNIT_TESTS
171 #include <test_unit.h>
172 
173 void TEST_StrArray() {
174  StrArray array;
175 
176  TEST_EXPECT(array.empty());
177  TEST_EXPECT_EQUAL(array.index_of("whatever"), -1);
178  TEST_EXPECT_EQUAL(array.size(), 0);
179  TEST_EXPECT_NULL(array[0]);
180 
181  array.put(ARB_strdup("first"));
182 
183  TEST_REJECT(array.empty());
184  TEST_EXPECT_EQUAL(array.size(), 1);
185  TEST_EXPECT_EQUAL(array[0], "first");
186  TEST_EXPECT_NULL(array[1]);
187 
188  array.put(ARB_strdup("second"));
189 
190  TEST_EXPECT_EQUAL(array.size(), 2);
191  TEST_EXPECT_EQUAL(array[0], "first");
192  TEST_EXPECT_EQUAL(array[1], "second");
193  TEST_EXPECT_NULL(array[2]);
194 
195  array.remove(0);
196 
197  TEST_EXPECT_EQUAL(array.size(), 1);
198  TEST_EXPECT_EQUAL(array[0], "second");
199  TEST_EXPECT_NULL(array[1]);
200 
201  array.remove(0);
202 
203  TEST_EXPECT(array.empty());
204  TEST_EXPECT_EQUAL(array.size(), 0);
205  TEST_EXPECT_NULL(array[0]);
206 }
207 
208 void TEST_StrArray_truncate() {
209  ConstStrArray parts;
210  GBT_split_string(parts, "test;word;bla", ';');
211 
212  TEST_EXPECT_EQUAL(parts.size(), 3);
213  parts.resize(1000); TEST_EXPECT_EQUAL(parts.size(), 3);
214  parts.resize(2); TEST_EXPECT_EQUAL(parts.size(), 2);
215  parts.resize(1); TEST_EXPECT_EQUAL(parts.size(), 1);
216  parts.resize(0); TEST_EXPECT(parts.empty());
217 }
218 
219 #define TEST_SPLIT_JOIN(str,sep) \
220  do { \
221  ConstStrArray cnames; \
222  GBT_split_string(cnames, str, sep); \
223  TEST_EXPECT_STRARRAY_CONTAINS(cnames, sep, joined); \
224  } while(0)
225 
226 void TEST_GBT_split_join_names() {
227  { // simple split
229  GBT_split_string(names, "a*b*c", '*');
230  size_t count = names.size();
231 
232  TEST_EXPECT_EQUAL(count, 3U);
233  TEST_EXPECT_EQUAL(names[0], "a");
234  TEST_EXPECT_EQUAL(names[1], "b");
235  TEST_EXPECT_EQUAL(names[2], "c");
236  }
237  { // split string containing empty tokens
239  GBT_split_string(names, "**a**b*c*", '*');
240  size_t count = names.size();
241 
242  TEST_EXPECT_EQUAL(count, 7U);
243  TEST_EXPECT_EQUAL(names[0], "");
244  TEST_EXPECT_EQUAL(names[1], "");
245  TEST_EXPECT_EQUAL(names[2], "a");
246  TEST_EXPECT_EQUAL(names[3], "");
247  TEST_EXPECT_EQUAL(names[4], "b");
248  TEST_EXPECT_EQUAL(names[5], "c");
249  TEST_EXPECT_EQUAL(names[6], "");
250  TEST_EXPECT_NULL(names[7]);
251  }
252  { // split string containing empty tokens (with dropEmptyTokens==true)
254  GBT_split_string(names, "**a**b*c*", "*", SPLIT_DROPEMPTY);
255  size_t count = names.size();
256 
257  TEST_EXPECT_EQUAL(count, 3U);
258  TEST_EXPECT_EQUAL(names[0], "a");
259  TEST_EXPECT_EQUAL(names[1], "b");
260  TEST_EXPECT_EQUAL(names[2], "c");
261  TEST_EXPECT_NULL(names[3]);
262  }
263 
264  { // split empty string
266  GBT_split_string(names, "", '*');
267  size_t count = names.size();
268 
269  TEST_EXPECT_EQUAL(count, 1U);
270  TEST_EXPECT_EQUAL(names[0], "");
271  TEST_EXPECT_NULL(names[1]);
272  }
273 
274  { // split empty string (dropping empty)
276  GBT_split_string(names, "", "*", SPLIT_DROPEMPTY);
277  size_t count = names.size();
278 
279  TEST_EXPECT_EQUAL(count, 0U);
280  TEST_EXPECT_NULL(names[0]);
281  }
282 
283  { // split empty string (with dropEmptyTokens==true)
285  GBT_split_string(names, "", "*", SPLIT_DROPEMPTY);
286  size_t count = names.size();
287 
288  TEST_EXPECT_EQUAL(count, 0U);
289  TEST_EXPECT_NULL(names[0]);
290  }
291 
292  TEST_SPLIT_JOIN("a.b.c", '.');
293  TEST_SPLIT_JOIN("a.b.c", '*');
294 
295  TEST_SPLIT_JOIN("..a.b.c", '.');
296  TEST_SPLIT_JOIN("a.b.c..", '.');
297  TEST_SPLIT_JOIN("a..b..c", '.');
298  TEST_SPLIT_JOIN(".", '.');
299  TEST_SPLIT_JOIN("....", '.');
300  TEST_SPLIT_JOIN("", '.');
301 }
302 
303 void TEST_StrArray_index_of() {
305 
306  TEST_EXPECT_EQUAL(names.index_of("a"), -1);
307 
308  GBT_split_string(names, "**a**b*c*", '*');
309 
310  TEST_EXPECT_EQUAL(names.index_of("a"), 2);
311  TEST_EXPECT_EQUAL(names.index_of("b"), 4);
312  TEST_EXPECT_EQUAL(names.index_of("c"), 5);
313  TEST_EXPECT_EQUAL(names.index_of(""), 0);
314  TEST_EXPECT_EQUAL(names.index_of("no"), -1);
315 }
316 
317 #define TEST_EXPECT_NAMES_JOIN_TO(names,sep,expected) TEST_EXPECT_STRARRAY_CONTAINS(names,sep,expected)
318 
319 void TEST_join_empty_strings() {
321 
322  // empty array
323  TEST_EXPECT_STRARRAY_CONTAINS(names, '*', "");
324  TEST_EXPECT_STRARRAY_CONTAINS(names, 0, "");
325 
326  // array with one empty string:
327  names.put("");
328  TEST_EXPECT_STRARRAY_CONTAINS(names, '*', "");
329  TEST_EXPECT_STRARRAY_CONTAINS(names, 0, "");
330 
331  // array with more empty strings:
332  names.put("");
333  TEST_EXPECT_STRARRAY_CONTAINS(names, '*', "*");
334  TEST_EXPECT_STRARRAY_CONTAINS(names, 0, "");
335 
336  names.put("");
337  TEST_EXPECT_STRARRAY_CONTAINS(names, '*', "**");
338  TEST_EXPECT_STRARRAY_CONTAINS(names, 0, "");
339 
340  names.put_before(2, "hello");
341  TEST_EXPECT_STRARRAY_CONTAINS(names, '*', "**hello*");
342  TEST_EXPECT_STRARRAY_CONTAINS(names, 0, "hello");
343 }
344 
345 void TEST_StrArray_safe_remove() {
347  GBT_split_string(names, "a*b*c*d*e", '*');
348 
349  TEST_EXPECT_EQUAL(names.size(), 5U);
350  TEST_EXPECT_NAMES_JOIN_TO(names, 0, "abcde"); // test GBT_join_strings w/o separator
351 
352  names.safe_remove(0);
353  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d*e");
354 
355  names.safe_remove(3);
356  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
357 
358  names.safe_remove(3); // index out of range
359  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
360 
361  names.safe_remove(-1); // illegal index
362  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
363 
364  names.safe_remove(1);
365  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*d");
366 }
367 
368 void TEST_StrArray_move() {
370  GBT_split_string(names, "a*b*c*dee", '*');
371 
372  names.move(0, -1); // -1 means last
373  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*dee*a");
374  names.move(-1, 0);
375  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*dee");
376  names.move(2, 3);
377  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*dee*c");
378  names.move(2, 1);
379  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
380 
381  // test wrap arounds
382  names.move(0, -1);
383  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "dee*b*c*a");
384  names.move(-1, 99999);
385  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
386 }
387 
388 void TEST_StrArray_put_before() { // test after TEST_StrArray_move (cause put_before() depends on move())
390  GBT_split_string(names, "a", '*');
391 
392  names.put_before(-1, "b"); // append at end
393  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b");
394 
395  names.put_before(2, "c"); // append at end (using non-existing index)
396  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c");
397 
398  names.put_before(99, "d"); // append at end (using even bigger non-existing index)
399  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*d");
400 
401  names.put_before(2, "b2"); // insert inside
402  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*b2*c*d");
403 
404  names.put_before(0, "a0"); // insert at beginning
405  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d");
406 
407  names.put_before(5, "d0"); // insert before last
408  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d0*d");
409 }
410 TEST_PUBLISH(TEST_StrArray_put_before);
411 
412 #endif // UNIT_TESTS
413 
#define arb_assert(cond)
Definition: arb_assert.h:245
void put(const char *elem)
Definition: arb_strarray.h:188
size_t size() const
Definition: arb_strarray.h:85
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
void resize(int newsize)
Definition: arb_strarray.h:108
int(* CharPtrArray_compare_fun)(const void *p0, const void *p1, void *client_data)
Definition: arb_strarray.h:29
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
#define TEST_EXPECT_STRARRAY_CONTAINS(strings, separator, expected)
Definition: test_unit.h:1338
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void uniq(CharPtrArray_compare_fun compare, void *client_data)
char * release()
Definition: arb_strbuf.h:129
void cat(const char *from)
Definition: arb_strbuf.h:199
void set_memblock(char *block)
Definition: arb_strarray.h:177
bool empty() const
Definition: arb_strarray.h:86
static FullNameMap names
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1517
virtual void free_elem(int i)=0
#define TEST_EXPECT(cond)
Definition: test_unit.h:1328
void swap(int i1, int i2)
Definition: arb_strarray.h:94
int index_of(const char *search_for) const
size_t elems
Definition: arb_strarray.h:36
#define TEST_REJECT(cond)
Definition: test_unit.h:1330
void remove(int i)
void safe_remove(int i)
Definition: arb_strarray.h:103
void put_before(int insert_before, const char *elem)
Definition: arb_strarray.h:197
bool ok() const
Definition: arb_strarray.h:38
char * GBT_join_strings(const CharPtrArray &strings, char separator)
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1322
void move(int from, int to)
void sort(CharPtrArray_compare_fun compare, void *client_data)
#define NULp
Definition: cxxforward.h:116
void GBT_split_string(ConstStrArray &dest, const char *namelist, const char *separator, SplitMode mode)
Definition: arb_strarray.h:223
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, SplitMode mode)
bool elem_index(int i) const
Definition: arb_strarray.h:45
SplitMode
Definition: arb_strarray.h:218
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
static Score ** U
Definition: align.cxx:67
void put(char c)
Definition: arb_strbuf.h:174
GB_write_int const char s
Definition: AW_awar.cxx:154