ARB
arb_strarray.cxx
Go to the documentation of this file.
1 // ============================================================ //
2 // //
3 // File : arb_strarray.cxx //
4 // Purpose : handle arrays of strings //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in July 2011 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ============================================================ //
11 
12 #include "arb_strarray.h"
13 
14 #include <arb_str.h>
15 #include <arb_strbuf.h>
16 #include <arb_sort.h>
17 
18 void CharPtrArray::sort(CharPtrArray_compare_fun compare, void *client_data) {
22  GB_sort((void**)str, 0, size(), compare, client_data);
23 }
24 
25 void CharPtrArray::uniq(CharPtrArray_compare_fun compare, void *client_data) {
29  for (int i = size()-2; i >= 0; --i) {
30  if (compare(str[i], str[i+1], client_data) == 0) {
31  remove(i+1);
32  }
33  }
34 }
35 
36 /* ----------------------------------------
37  * conversion between
38  *
39  * - char ** (heap-allocated array of heap-allocated char*'s)
40  * - one string containing several substrings separated by a separator
41  * (e.g. "name1,name2,name3")
42  */
43 
44 // @@@ search for code which is splitting strings and use GBT_split_string there
45 // @@@ rename to GBS_split_string and move to string functions
46 
47 void GBT_splitNdestroy_string(ConstStrArray& names, char*& namelist, const char *separator, SplitMode mode) {
66  names.set_memblock(namelist);
67 
68  char *sep = namelist;
69  while (sep) {
70  size_t nonsepcount = strcspn(sep, separator);
71  if (nonsepcount || mode == SPLIT_KEEPEMPTY) {
72  names.put(sep);
73  sep += nonsepcount;
74  }
75  size_t sepcount = strspn(sep, separator);
76  sep[0] = 0;
77  if (sepcount) {
78  if (mode == SPLIT_KEEPEMPTY) {
79  for (size_t s = 1; s<sepcount; ++s) names.put(sep);
80  }
81  sep += sepcount;
82  }
83  else {
84  sep = NULp;
85  }
86  }
87  namelist = NULp; // own it
88 }
89 
90 void GBT_splitNdestroy_string(ConstStrArray& dest, char*& namelist, char separator) {
91  char separator_string[] = "x";
92  separator_string[0] = separator;
93  GBT_splitNdestroy_string(dest, namelist, separator_string, SPLIT_KEEPEMPTY);
94 }
95 
96 char *GBT_join_strings(const CharPtrArray& strings, char separator) {
105  if (!strings[0]) return ARB_strdup("");
106 
107  GBS_strstruct out(1000);
108  out.cat(strings[0]);
109  arb_assert(implicated(separator, !strchr(strings[0], separator))); // otherwise you'll never be able to GBT_split_string
110  for (int n = 1; strings[n]; ++n) {
111  if (separator) out.put(separator);
112  out.cat(strings[n]);
113  arb_assert(implicated(separator, !strchr(strings[n], separator))); // otherwise you'll never be able to GBT_split_string
114  }
115  return out.release();
116 }
117 
118 int CharPtrArray::index_of(const char *search_for) const {
119  // return index of 'search_for' or -1 if not found or given
120  int index = -1;
121  if (search_for && allocated) {
122  for (int i = 0; str[i]; ++i) {
123  if (strcmp(str[i], search_for) == 0) {
124  index = i;
125  break;
126  }
127  }
128  }
129  return index;
130 }
131 
132 void CharPtrArray::move(int oidx, int nidx) {
138  int siz = size();
139 
140  if (oidx == -1) oidx = siz-1;
141  if (nidx == -1) nidx = siz-1;
142  else if (nidx >= siz) nidx = 0;
143 
144  arb_assert(nidx<siz);
145 
146  if (oidx != nidx && oidx<siz) {
147  if (oidx>nidx) for (int i = oidx-1; i>= nidx; --i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
148  else for (int i = oidx; i< nidx; ++i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
149  }
150 }
151 
152 void CharPtrArray::remove(int i) {
153  arb_assert(ok());
155  free_elem(i);
156 
157  // move elems incl. sentinel:
158  while (size_t(i) < elems) {
159  str[i] = str[i+1];
160  ++i;
161  }
162  elems--;
163  arb_assert(ok());
164 }
165 
166 // --------------------------------------------------------------------------------
167 
168 #ifdef UNIT_TESTS
169 #include <test_unit.h>
170 
171 void TEST_StrArray() {
172  StrArray array;
173 
174  TEST_EXPECT(array.empty());
175  TEST_EXPECT_EQUAL(array.index_of("whatever"), -1);
176  TEST_EXPECT_EQUAL(array.size(), 0);
177  TEST_EXPECT_NULL(array[0]);
178 
179  array.put(ARB_strdup("first"));
180 
181  TEST_REJECT(array.empty());
182  TEST_EXPECT_EQUAL(array.size(), 1);
183  TEST_EXPECT_EQUAL(array[0], "first");
184  TEST_EXPECT_NULL(array[1]);
185 
186  array.put(ARB_strdup("second"));
187 
188  TEST_EXPECT_EQUAL(array.size(), 2);
189  TEST_EXPECT_EQUAL(array[0], "first");
190  TEST_EXPECT_EQUAL(array[1], "second");
191  TEST_EXPECT_NULL(array[2]);
192 
193  array.remove(0);
194 
195  TEST_EXPECT_EQUAL(array.size(), 1);
196  TEST_EXPECT_EQUAL(array[0], "second");
197  TEST_EXPECT_NULL(array[1]);
198 
199  array.remove(0);
200 
201  TEST_EXPECT(array.empty());
202  TEST_EXPECT_EQUAL(array.size(), 0);
203  TEST_EXPECT_NULL(array[0]);
204 }
205 
206 void TEST_StrArray_truncate() {
207  ConstStrArray parts;
208  GBT_split_string(parts, "test;word;bla", ';');
209 
210  TEST_EXPECT_EQUAL(parts.size(), 3);
211  parts.resize(1000); TEST_EXPECT_EQUAL(parts.size(), 3);
212  parts.resize(2); TEST_EXPECT_EQUAL(parts.size(), 2);
213  parts.resize(1); TEST_EXPECT_EQUAL(parts.size(), 1);
214  parts.resize(0); TEST_EXPECT(parts.empty());
215 }
216 
217 #define TEST_SPLIT_JOIN(str,sep) \
218  do { \
219  ConstStrArray cnames; \
220  GBT_split_string(cnames, str, sep); \
221  TEST_EXPECT_STRARRAY_CONTAINS(cnames, sep, joined); \
222  } while(0)
223 
224 void TEST_GBT_split_join_names() {
225  { // simple split
227  GBT_split_string(names, "a*b*c", '*');
228  size_t count = names.size();
229 
230  TEST_EXPECT_EQUAL(count, 3U);
231  TEST_EXPECT_EQUAL(names[0], "a");
232  TEST_EXPECT_EQUAL(names[1], "b");
233  TEST_EXPECT_EQUAL(names[2], "c");
234  }
235  { // split string containing empty tokens
237  GBT_split_string(names, "**a**b*c*", '*');
238  size_t count = names.size();
239 
240  TEST_EXPECT_EQUAL(count, 7U);
241  TEST_EXPECT_EQUAL(names[0], "");
242  TEST_EXPECT_EQUAL(names[1], "");
243  TEST_EXPECT_EQUAL(names[2], "a");
244  TEST_EXPECT_EQUAL(names[3], "");
245  TEST_EXPECT_EQUAL(names[4], "b");
246  TEST_EXPECT_EQUAL(names[5], "c");
247  TEST_EXPECT_EQUAL(names[6], "");
248  TEST_EXPECT_NULL(names[7]);
249  }
250  { // split string containing empty tokens (with dropEmptyTokens==true)
252  GBT_split_string(names, "**a**b*c*", "*", SPLIT_DROPEMPTY);
253  size_t count = names.size();
254 
255  TEST_EXPECT_EQUAL(count, 3U);
256  TEST_EXPECT_EQUAL(names[0], "a");
257  TEST_EXPECT_EQUAL(names[1], "b");
258  TEST_EXPECT_EQUAL(names[2], "c");
259  TEST_EXPECT_NULL(names[3]);
260  }
261 
262  { // split empty string
264  GBT_split_string(names, "", '*');
265  size_t count = names.size();
266 
267  TEST_EXPECT_EQUAL(count, 1U);
268  TEST_EXPECT_EQUAL(names[0], "");
269  TEST_EXPECT_NULL(names[1]);
270  }
271 
272  { // split empty string (with dropEmptyTokens==true)
274  GBT_split_string(names, "", "*", SPLIT_DROPEMPTY);
275  size_t count = names.size();
276 
277  TEST_EXPECT_EQUAL(count, 0U);
278  TEST_EXPECT_NULL(names[0]);
279  }
280 
281  TEST_SPLIT_JOIN("a.b.c", '.');
282  TEST_SPLIT_JOIN("a.b.c", '*');
283 
284  TEST_SPLIT_JOIN("..a.b.c", '.');
285  TEST_SPLIT_JOIN("a.b.c..", '.');
286  TEST_SPLIT_JOIN("a..b..c", '.');
287  TEST_SPLIT_JOIN(".", '.');
288  TEST_SPLIT_JOIN("....", '.');
289  TEST_SPLIT_JOIN("", '.');
290 }
291 
292 void TEST_StrArray_index_of() {
294 
295  TEST_EXPECT_EQUAL(names.index_of("a"), -1);
296 
297  GBT_split_string(names, "**a**b*c*", '*');
298 
299  TEST_EXPECT_EQUAL(names.index_of("a"), 2);
300  TEST_EXPECT_EQUAL(names.index_of("b"), 4);
301  TEST_EXPECT_EQUAL(names.index_of("c"), 5);
302  TEST_EXPECT_EQUAL(names.index_of(""), 0);
303  TEST_EXPECT_EQUAL(names.index_of("no"), -1);
304 }
305 
306 #define TEST_EXPECT_NAMES_JOIN_TO(names,sep,expected) TEST_EXPECT_STRARRAY_CONTAINS(names,sep,expected)
307 
308 void TEST_StrArray_safe_remove() {
310  GBT_split_string(names, "a*b*c*d*e", '*');
311 
312  TEST_EXPECT_EQUAL(names.size(), 5U);
313  TEST_EXPECT_NAMES_JOIN_TO(names, 0, "abcde"); // test GBT_join_strings w/o separator
314 
315  names.safe_remove(0);
316  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d*e");
317 
318  names.safe_remove(3);
319  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
320 
321  names.safe_remove(3); // index out of range
322  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
323 
324  names.safe_remove(-1); // illegal index
325  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
326 
327  names.safe_remove(1);
328  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*d");
329 }
330 
331 void TEST_StrArray_move() {
333  GBT_split_string(names, "a*b*c*dee", '*');
334 
335  names.move(0, -1); // -1 means last
336  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*dee*a");
337  names.move(-1, 0);
338  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*dee");
339  names.move(2, 3);
340  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*dee*c");
341  names.move(2, 1);
342  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
343 
344  // test wrap arounds
345  names.move(0, -1);
346  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "dee*b*c*a");
347  names.move(-1, 99999);
348  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
349 }
350 
351 void TEST_StrArray_put_before() { // test after TEST_StrArray_move (cause put_before() depends on move())
353  GBT_split_string(names, "a", '*');
354 
355  names.put_before(-1, "b"); // append at end
356  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b");
357 
358  names.put_before(2, "c"); // append at end (using non-existing index)
359  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c");
360 
361  names.put_before(99, "d"); // append at end (using even bigger non-existing index)
362  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*d");
363 
364  names.put_before(2, "b2"); // insert inside
365  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*b2*c*d");
366 
367  names.put_before(0, "a0"); // insert at beginning
368  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d");
369 
370  names.put_before(5, "d0"); // insert before last
371  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d0*d");
372 }
373 TEST_PUBLISH(TEST_StrArray_put_before);
374 
375 #endif // UNIT_TESTS
376 
#define arb_assert(cond)
Definition: arb_assert.h:245
void put(const char *elem)
Definition: arb_strarray.h:188
size_t size() const
Definition: arb_strarray.h:85
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
void resize(int newsize)
Definition: arb_strarray.h:108
int(* CharPtrArray_compare_fun)(const void *p0, const void *p1, void *client_data)
Definition: arb_strarray.h:29
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void uniq(CharPtrArray_compare_fun compare, void *client_data)
char * release()
Definition: arb_strbuf.h:129
void cat(const char *from)
Definition: arb_strbuf.h:199
void set_memblock(char *block)
Definition: arb_strarray.h:177
bool empty() const
Definition: arb_strarray.h:86
static FullNameMap names
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1517
virtual void free_elem(int i)=0
#define TEST_EXPECT(cond)
Definition: test_unit.h:1328
void swap(int i1, int i2)
Definition: arb_strarray.h:94
int index_of(const char *search_for) const
size_t elems
Definition: arb_strarray.h:36
#define TEST_REJECT(cond)
Definition: test_unit.h:1330
void remove(int i)
void safe_remove(int i)
Definition: arb_strarray.h:103
void put_before(int insert_before, const char *elem)
Definition: arb_strarray.h:197
bool ok() const
Definition: arb_strarray.h:38
char * GBT_join_strings(const CharPtrArray &strings, char separator)
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1322
void move(int from, int to)
void sort(CharPtrArray_compare_fun compare, void *client_data)
#define NULp
Definition: cxxforward.h:116
void GBT_split_string(ConstStrArray &dest, const char *namelist, const char *separator, SplitMode mode)
Definition: arb_strarray.h:223
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, SplitMode mode)
bool elem_index(int i) const
Definition: arb_strarray.h:45
SplitMode
Definition: arb_strarray.h:218
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
static Score ** U
Definition: align.cxx:67
void put(char c)
Definition: arb_strbuf.h:174
GB_write_int const char s
Definition: AW_awar.cxx:154