ARB
arb_strarray.cxx
Go to the documentation of this file.
1 // ============================================================ //
2 // //
3 // File : arb_strarray.cxx //
4 // Purpose : handle arrays of strings //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in July 2011 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ============================================================ //
11 
12 #include "arb_strarray.h"
13 
14 #include <arb_str.h>
15 #include <arb_strbuf.h>
16 #include <arb_sort.h>
17 
18 void CharPtrArray::sort(CharPtrArray_compare_fun compare, void *client_data) {
22  GB_sort((void**)str, 0, size(), compare, client_data);
23 }
24 
25 void CharPtrArray::uniq(CharPtrArray_compare_fun compare, void *client_data) {
29  for (int i = size()-2; i >= 0; --i) {
30  if (compare(str[i], str[i+1], client_data) == 0) {
31  remove(i+1);
32  }
33  }
34 }
35 
36 /* ----------------------------------------
37  * conversion between
38  *
39  * - char ** (heap-allocated array of heap-allocated char*'s)
40  * - one string containing several substrings separated by a separator
41  * (e.g. "name1,name2,name3")
42  */
43 
44 #if defined(WARN_TODO)
45 #warning search for code which is splitting strings and use GBT_split_string there
46 #warning rename to GBS_split_string and move to string functions
47 #endif
48 
49 void GBT_splitNdestroy_string(ConstStrArray& names, char*& namelist, const char *separator, bool dropEmptyTokens) {
68  names.set_memblock(namelist);
69 
70  char *sep = namelist;
71  while (sep) {
72  size_t nonsepcount = strcspn(sep, separator);
73  if (nonsepcount || !dropEmptyTokens) {
74  names.put(sep);
75  sep += nonsepcount;
76  }
77  size_t sepcount = strspn(sep, separator);
78  sep[0] = 0;
79  if (sepcount) {
80  if (!dropEmptyTokens) {
81  for (size_t s = 1; s<sepcount; ++s) names.put(sep);
82  }
83  sep += sepcount;
84  }
85  else {
86  sep = NULp;
87  }
88  }
89  namelist = NULp; // own it
90 }
91 
92 void GBT_splitNdestroy_string(ConstStrArray& dest, char*& namelist, char separator) {
93  char separator_string[] = "x";
94  separator_string[0] = separator;
95  GBT_splitNdestroy_string(dest, namelist, separator_string, false);
96 }
97 
98 char *GBT_join_strings(const CharPtrArray& strings, char separator) {
107  if (!strings[0]) return ARB_strdup("");
108 
109  GBS_strstruct *out = GBS_stropen(1000);
110  GBS_strcat(out, strings[0]);
111  arb_assert(implicated(separator, !strchr(strings[0], separator))); // otherwise you'll never be able to GBT_split_string
112  for (int n = 1; strings[n]; ++n) {
113  if (separator) GBS_chrcat(out, separator);
114  GBS_strcat(out, strings[n]);
115  arb_assert(implicated(separator, !strchr(strings[n], separator))); // otherwise you'll never be able to GBT_split_string
116  }
117  return GBS_strclose(out);
118 }
119 
120 int CharPtrArray::index_of(const char *search_for) const {
121  // return index of 'search_for' or -1 if not found or given
122  int index = -1;
123  if (search_for && allocated) {
124  for (int i = 0; str[i]; ++i) {
125  if (strcmp(str[i], search_for) == 0) {
126  index = i;
127  break;
128  }
129  }
130  }
131  return index;
132 }
133 
134 void CharPtrArray::move(int oidx, int nidx) {
140  int siz = size();
141 
142  if (oidx == -1) oidx = siz-1;
143  if (nidx == -1) nidx = siz-1;
144  else if (nidx >= siz) nidx = 0;
145 
146  arb_assert(nidx<siz);
147 
148  if (oidx != nidx && oidx<siz) {
149  if (oidx>nidx) for (int i = oidx-1; i>= nidx; --i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
150  else for (int i = oidx; i< nidx; ++i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
151  }
152 }
153 
154 // --------------------------------------------------------------------------------
155 
156 #ifdef UNIT_TESTS
157 #include <test_unit.h>
158 
159 void TEST_StrArray() {
160  StrArray array;
161 
162  TEST_EXPECT(array.empty());
163  TEST_EXPECT_EQUAL(array.index_of("whatever"), -1);
164  TEST_EXPECT_EQUAL(array.size(), 0);
165  TEST_EXPECT_NULL(array[0]);
166 
167  array.put(ARB_strdup("first"));
168 
169  TEST_REJECT(array.empty());
170  TEST_EXPECT_EQUAL(array.size(), 1);
171  TEST_EXPECT_EQUAL(array[0], "first");
172  TEST_EXPECT_NULL(array[1]);
173 
174  array.put(ARB_strdup("second"));
175 
176  TEST_EXPECT_EQUAL(array.size(), 2);
177  TEST_EXPECT_EQUAL(array[0], "first");
178  TEST_EXPECT_EQUAL(array[1], "second");
179  TEST_EXPECT_NULL(array[2]);
180 
181  array.remove(0);
182 
183  TEST_EXPECT_EQUAL(array.size(), 1);
184  TEST_EXPECT_EQUAL(array[0], "second");
185  TEST_EXPECT_NULL(array[1]);
186 
187  array.remove(0);
188 
189  TEST_EXPECT(array.empty());
190  TEST_EXPECT_EQUAL(array.size(), 0);
191  TEST_EXPECT_NULL(array[0]);
192 }
193 
194 void TEST_StrArray_truncate() {
195  ConstStrArray parts;
196  GBT_split_string(parts, "test;word;bla", ';');
197 
198  TEST_EXPECT_EQUAL(parts.size(), 3);
199  parts.resize(1000); TEST_EXPECT_EQUAL(parts.size(), 3);
200  parts.resize(2); TEST_EXPECT_EQUAL(parts.size(), 2);
201  parts.resize(1); TEST_EXPECT_EQUAL(parts.size(), 1);
202  parts.resize(0); TEST_EXPECT(parts.empty());
203 }
204 
205 #define TEST_SPLIT_JOIN(str,sep) \
206  do { \
207  ConstStrArray cnames; \
208  GBT_split_string(cnames, str, sep); \
209  char *joined = GBT_join_strings(cnames, sep); \
210  TEST_EXPECT_EQUAL(str, joined); \
211  free(joined); \
212  } while(0)
213 
214 void TEST_GBT_split_join_names() {
215  { // simple split
217  GBT_split_string(names, "a*b*c", '*');
218  size_t count = names.size();
219 
220  TEST_EXPECT_EQUAL(count, 3U);
221  TEST_EXPECT_EQUAL(names[0], "a");
222  TEST_EXPECT_EQUAL(names[1], "b");
223  TEST_EXPECT_EQUAL(names[2], "c");
224  }
225  { // split string containing empty tokens
227  GBT_split_string(names, "**a**b*c*", '*');
228  size_t count = names.size();
229 
230  TEST_EXPECT_EQUAL(count, 7U);
231  TEST_EXPECT_EQUAL(names[0], "");
232  TEST_EXPECT_EQUAL(names[1], "");
233  TEST_EXPECT_EQUAL(names[2], "a");
234  TEST_EXPECT_EQUAL(names[3], "");
235  TEST_EXPECT_EQUAL(names[4], "b");
236  TEST_EXPECT_EQUAL(names[5], "c");
237  TEST_EXPECT_EQUAL(names[6], "");
238  TEST_EXPECT_NULL(names[7]);
239  }
240 
241  TEST_SPLIT_JOIN("a.b.c", '.');
242  TEST_SPLIT_JOIN("a.b.c", '*');
243 
244  TEST_SPLIT_JOIN("..a.b.c", '.');
245  TEST_SPLIT_JOIN("a.b.c..", '.');
246  TEST_SPLIT_JOIN("a..b..c", '.');
247  TEST_SPLIT_JOIN(".", '.');
248  TEST_SPLIT_JOIN("....", '.');
249 }
250 
251 void TEST_StrArray_index_of() {
253 
254  TEST_EXPECT_EQUAL(names.index_of("a"), -1);
255 
256  GBT_split_string(names, "**a**b*c*", '*');
257 
258  TEST_EXPECT_EQUAL(names.index_of("a"), 2);
259  TEST_EXPECT_EQUAL(names.index_of("b"), 4);
260  TEST_EXPECT_EQUAL(names.index_of("c"), 5);
261  TEST_EXPECT_EQUAL(names.index_of(""), 0);
262  TEST_EXPECT_EQUAL(names.index_of("no"), -1);
263 }
264 
265 #define TEST_EXPECT_NAMES_JOIN_TO(names, sep, expected) \
266  do { \
267  char *joined = GBT_join_strings(names, sep); \
268  TEST_EXPECT_EQUAL(joined, expected); \
269  free(joined); \
270  } while(0) \
271 
272 void TEST_StrArray_safe_remove() {
274  GBT_split_string(names, "a*b*c*d*e", '*');
275 
276  TEST_EXPECT_EQUAL(names.size(), 5U);
277  TEST_EXPECT_NAMES_JOIN_TO(names, 0, "abcde"); // test GBT_join_strings w/o separator
278 
279  names.safe_remove(0);
280  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d*e");
281 
282  names.safe_remove(3);
283  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
284 
285  names.safe_remove(3); // index out of range
286  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
287 
288  names.safe_remove(-1); // illegal index
289  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
290 
291  names.safe_remove(1);
292  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*d");
293 }
294 
295 void TEST_StrArray_move() {
297  GBT_split_string(names, "a*b*c*dee", '*');
298 
299  names.move(0, -1); // -1 means last
300  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*dee*a");
301  names.move(-1, 0);
302  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*dee");
303  names.move(2, 3);
304  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*dee*c");
305  names.move(2, 1);
306  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
307 
308  // test wrap arounds
309  names.move(0, -1);
310  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "dee*b*c*a");
311  names.move(-1, 99999);
312  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
313 }
314 
315 void TEST_StrArray_put_before() { // test after TEST_StrArray_move (cause put_before() depends on move())
317  GBT_split_string(names, "a", '*');
318 
319  names.put_before(-1, "b"); // append at end
320  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b");
321 
322  names.put_before(2, "c"); // append at end (using non-existing index)
323  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c");
324 
325  names.put_before(99, "d"); // append at end (using even bigger non-existing index)
326  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*d");
327 
328  names.put_before(2, "b2"); // insert inside
329  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*b2*c*d");
330 
331  names.put_before(0, "a0"); // insert at beginning
332  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d");
333 
334  names.put_before(5, "d0"); // insert before last
335  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d0*d");
336 }
337 TEST_PUBLISH(TEST_StrArray_put_before);
338 
339 #endif // UNIT_TESTS
340 
#define arb_assert(cond)
Definition: arb_assert.h:245
void put(const char *elem)
Definition: arb_strarray.h:199
size_t size() const
Definition: arb_strarray.h:85
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
void resize(int newsize)
Definition: arb_strarray.h:119
int(* CharPtrArray_compare_fun)(const void *p0, const void *p1, void *client_data)
Definition: arb_strarray.h:29
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void uniq(CharPtrArray_compare_fun compare, void *client_data)
void set_memblock(char *block)
Definition: arb_strarray.h:188
bool empty() const
Definition: arb_strarray.h:86
void GBT_split_string(ConstStrArray &dest, const char *namelist, const char *separator, bool dropEmptyTokens)
Definition: arb_strarray.h:232
static FullNameMap names
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1485
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
#define TEST_EXPECT(cond)
Definition: test_unit.h:1313
void swap(int i1, int i2)
Definition: arb_strarray.h:94
int index_of(const char *search_for) const
#define TEST_REJECT(cond)
Definition: test_unit.h:1315
void GBS_strcat(GBS_strstruct *strstr, const char *ptr)
Definition: arb_strbuf.cxx:108
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, bool dropEmptyTokens)
void safe_remove(int i)
Definition: arb_strarray.h:114
void put_before(int insert_before, const char *elem)
Definition: arb_strarray.h:208
void GBS_chrcat(GBS_strstruct *strstr, char ch)
Definition: arb_strbuf.cxx:119
char * GBT_join_strings(const CharPtrArray &strings, char separator)
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1307
void move(int from, int to)
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
void sort(CharPtrArray_compare_fun compare, void *client_data)
#define NULp
Definition: cxxforward.h:97
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
static Score ** U
Definition: align.cxx:67
GB_write_int const char s
Definition: AW_awar.cxx:156