ARB
arb_strarray.cxx
Go to the documentation of this file.
1 // ============================================================ //
2 // //
3 // File : arb_strarray.cxx //
4 // Purpose : handle arrays of strings //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in July 2011 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // ============================================================ //
11 
12 #include "arb_strarray.h"
13 
14 #include <arb_str.h>
15 #include <arb_strbuf.h>
16 #include <arb_sort.h>
17 
18 void CharPtrArray::sort(CharPtrArray_compare_fun compare, void *client_data) {
22  GB_sort((void**)str, 0, size(), compare, client_data);
23 }
24 
25 void CharPtrArray::uniq(CharPtrArray_compare_fun compare, void *client_data) {
29  for (int i = size()-2; i >= 0; --i) {
30  if (compare(str[i], str[i+1], client_data) == 0) {
31  remove(i+1);
32  }
33  }
34 }
35 
36 /* ----------------------------------------
37  * conversion between
38  *
39  * - char ** (heap-allocated array of heap-allocated char*'s)
40  * - one string containing several substrings separated by a separator
41  * (e.g. "name1,name2,name3")
42  */
43 
44 // @@@ search for code which is splitting strings and use GBT_split_string there
45 // @@@ rename to GBS_split_string and move to string functions
46 
47 void GBT_splitNdestroy_string(ConstStrArray& names, char*& namelist, const char *separator, bool dropEmptyTokens) {
66  names.set_memblock(namelist);
67 
68  char *sep = namelist;
69  while (sep) {
70  size_t nonsepcount = strcspn(sep, separator);
71  if (nonsepcount || !dropEmptyTokens) {
72  names.put(sep);
73  sep += nonsepcount;
74  }
75  size_t sepcount = strspn(sep, separator);
76  sep[0] = 0;
77  if (sepcount) {
78  if (!dropEmptyTokens) {
79  for (size_t s = 1; s<sepcount; ++s) names.put(sep);
80  }
81  sep += sepcount;
82  }
83  else {
84  sep = NULp;
85  }
86  }
87  namelist = NULp; // own it
88 }
89 
90 void GBT_splitNdestroy_string(ConstStrArray& dest, char*& namelist, char separator) {
91  char separator_string[] = "x";
92  separator_string[0] = separator;
93  GBT_splitNdestroy_string(dest, namelist, separator_string, false);
94 }
95 
96 char *GBT_join_strings(const CharPtrArray& strings, char separator) {
105  if (!strings[0]) return ARB_strdup("");
106 
107  GBS_strstruct *out = GBS_stropen(1000);
108  GBS_strcat(out, strings[0]);
109  arb_assert(implicated(separator, !strchr(strings[0], separator))); // otherwise you'll never be able to GBT_split_string
110  for (int n = 1; strings[n]; ++n) {
111  if (separator) GBS_chrcat(out, separator);
112  GBS_strcat(out, strings[n]);
113  arb_assert(implicated(separator, !strchr(strings[n], separator))); // otherwise you'll never be able to GBT_split_string
114  }
115  return GBS_strclose(out);
116 }
117 
118 int CharPtrArray::index_of(const char *search_for) const {
119  // return index of 'search_for' or -1 if not found or given
120  int index = -1;
121  if (search_for && allocated) {
122  for (int i = 0; str[i]; ++i) {
123  if (strcmp(str[i], search_for) == 0) {
124  index = i;
125  break;
126  }
127  }
128  }
129  return index;
130 }
131 
132 void CharPtrArray::move(int oidx, int nidx) {
138  int siz = size();
139 
140  if (oidx == -1) oidx = siz-1;
141  if (nidx == -1) nidx = siz-1;
142  else if (nidx >= siz) nidx = 0;
143 
144  arb_assert(nidx<siz);
145 
146  if (oidx != nidx && oidx<siz) {
147  if (oidx>nidx) for (int i = oidx-1; i>= nidx; --i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
148  else for (int i = oidx; i< nidx; ++i) swap(i, i+1); // LOOP_VECTORIZED[!<810]
149  }
150 }
151 
152 // --------------------------------------------------------------------------------
153 
154 #ifdef UNIT_TESTS
155 #include <test_unit.h>
156 
157 void TEST_StrArray() {
158  StrArray array;
159 
160  TEST_EXPECT(array.empty());
161  TEST_EXPECT_EQUAL(array.index_of("whatever"), -1);
162  TEST_EXPECT_EQUAL(array.size(), 0);
163  TEST_EXPECT_NULL(array[0]);
164 
165  array.put(ARB_strdup("first"));
166 
167  TEST_REJECT(array.empty());
168  TEST_EXPECT_EQUAL(array.size(), 1);
169  TEST_EXPECT_EQUAL(array[0], "first");
170  TEST_EXPECT_NULL(array[1]);
171 
172  array.put(ARB_strdup("second"));
173 
174  TEST_EXPECT_EQUAL(array.size(), 2);
175  TEST_EXPECT_EQUAL(array[0], "first");
176  TEST_EXPECT_EQUAL(array[1], "second");
177  TEST_EXPECT_NULL(array[2]);
178 
179  array.remove(0);
180 
181  TEST_EXPECT_EQUAL(array.size(), 1);
182  TEST_EXPECT_EQUAL(array[0], "second");
183  TEST_EXPECT_NULL(array[1]);
184 
185  array.remove(0);
186 
187  TEST_EXPECT(array.empty());
188  TEST_EXPECT_EQUAL(array.size(), 0);
189  TEST_EXPECT_NULL(array[0]);
190 }
191 
192 void TEST_StrArray_truncate() {
193  ConstStrArray parts;
194  GBT_split_string(parts, "test;word;bla", ';');
195 
196  TEST_EXPECT_EQUAL(parts.size(), 3);
197  parts.resize(1000); TEST_EXPECT_EQUAL(parts.size(), 3);
198  parts.resize(2); TEST_EXPECT_EQUAL(parts.size(), 2);
199  parts.resize(1); TEST_EXPECT_EQUAL(parts.size(), 1);
200  parts.resize(0); TEST_EXPECT(parts.empty());
201 }
202 
203 #define TEST_SPLIT_JOIN(str,sep) \
204  do { \
205  ConstStrArray cnames; \
206  GBT_split_string(cnames, str, sep); \
207  TEST_EXPECT_STRARRAY_CONTAINS(cnames, sep, joined); \
208  } while(0)
209 
210 void TEST_GBT_split_join_names() {
211  { // simple split
213  GBT_split_string(names, "a*b*c", '*');
214  size_t count = names.size();
215 
216  TEST_EXPECT_EQUAL(count, 3U);
217  TEST_EXPECT_EQUAL(names[0], "a");
218  TEST_EXPECT_EQUAL(names[1], "b");
219  TEST_EXPECT_EQUAL(names[2], "c");
220  }
221  { // split string containing empty tokens
223  GBT_split_string(names, "**a**b*c*", '*');
224  size_t count = names.size();
225 
226  TEST_EXPECT_EQUAL(count, 7U);
227  TEST_EXPECT_EQUAL(names[0], "");
228  TEST_EXPECT_EQUAL(names[1], "");
229  TEST_EXPECT_EQUAL(names[2], "a");
230  TEST_EXPECT_EQUAL(names[3], "");
231  TEST_EXPECT_EQUAL(names[4], "b");
232  TEST_EXPECT_EQUAL(names[5], "c");
233  TEST_EXPECT_EQUAL(names[6], "");
234  TEST_EXPECT_NULL(names[7]);
235  }
236 
237  TEST_SPLIT_JOIN("a.b.c", '.');
238  TEST_SPLIT_JOIN("a.b.c", '*');
239 
240  TEST_SPLIT_JOIN("..a.b.c", '.');
241  TEST_SPLIT_JOIN("a.b.c..", '.');
242  TEST_SPLIT_JOIN("a..b..c", '.');
243  TEST_SPLIT_JOIN(".", '.');
244  TEST_SPLIT_JOIN("....", '.');
245 }
246 
247 void TEST_StrArray_index_of() {
249 
250  TEST_EXPECT_EQUAL(names.index_of("a"), -1);
251 
252  GBT_split_string(names, "**a**b*c*", '*');
253 
254  TEST_EXPECT_EQUAL(names.index_of("a"), 2);
255  TEST_EXPECT_EQUAL(names.index_of("b"), 4);
256  TEST_EXPECT_EQUAL(names.index_of("c"), 5);
257  TEST_EXPECT_EQUAL(names.index_of(""), 0);
258  TEST_EXPECT_EQUAL(names.index_of("no"), -1);
259 }
260 
261 #define TEST_EXPECT_NAMES_JOIN_TO(names,sep,expected) TEST_EXPECT_STRARRAY_CONTAINS(names,sep,expected)
262 
263 void TEST_StrArray_safe_remove() {
265  GBT_split_string(names, "a*b*c*d*e", '*');
266 
267  TEST_EXPECT_EQUAL(names.size(), 5U);
268  TEST_EXPECT_NAMES_JOIN_TO(names, 0, "abcde"); // test GBT_join_strings w/o separator
269 
270  names.safe_remove(0);
271  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d*e");
272 
273  names.safe_remove(3);
274  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
275 
276  names.safe_remove(3); // index out of range
277  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
278 
279  names.safe_remove(-1); // illegal index
280  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*d");
281 
282  names.safe_remove(1);
283  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*d");
284 }
285 
286 void TEST_StrArray_move() {
288  GBT_split_string(names, "a*b*c*dee", '*');
289 
290  names.move(0, -1); // -1 means last
291  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "b*c*dee*a");
292  names.move(-1, 0);
293  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*dee");
294  names.move(2, 3);
295  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*dee*c");
296  names.move(2, 1);
297  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
298 
299  // test wrap arounds
300  names.move(0, -1);
301  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "dee*b*c*a");
302  names.move(-1, 99999);
303  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*dee*b*c");
304 }
305 
306 void TEST_StrArray_put_before() { // test after TEST_StrArray_move (cause put_before() depends on move())
308  GBT_split_string(names, "a", '*');
309 
310  names.put_before(-1, "b"); // append at end
311  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b");
312 
313  names.put_before(2, "c"); // append at end (using non-existing index)
314  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c");
315 
316  names.put_before(99, "d"); // append at end (using even bigger non-existing index)
317  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*c*d");
318 
319  names.put_before(2, "b2"); // insert inside
320  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a*b*b2*c*d");
321 
322  names.put_before(0, "a0"); // insert at beginning
323  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d");
324 
325  names.put_before(5, "d0"); // insert before last
326  TEST_EXPECT_NAMES_JOIN_TO(names, '*', "a0*a*b*b2*c*d0*d");
327 }
328 TEST_PUBLISH(TEST_StrArray_put_before);
329 
330 #endif // UNIT_TESTS
331 
#define arb_assert(cond)
Definition: arb_assert.h:245
void put(const char *elem)
Definition: arb_strarray.h:199
size_t size() const
Definition: arb_strarray.h:85
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
void resize(int newsize)
Definition: arb_strarray.h:119
int(* CharPtrArray_compare_fun)(const void *p0, const void *p1, void *client_data)
Definition: arb_strarray.h:29
void GB_sort(void **array, size_t first, size_t behind_last, gb_compare_function compare, void *client_data)
Definition: arb_sort.cxx:27
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
void uniq(CharPtrArray_compare_fun compare, void *client_data)
void set_memblock(char *block)
Definition: arb_strarray.h:188
bool empty() const
Definition: arb_strarray.h:86
void GBT_split_string(ConstStrArray &dest, const char *namelist, const char *separator, bool dropEmptyTokens)
Definition: arb_strarray.h:232
static FullNameMap names
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1502
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
#define TEST_EXPECT(cond)
Definition: test_unit.h:1313
void swap(int i1, int i2)
Definition: arb_strarray.h:94
int index_of(const char *search_for) const
#define TEST_REJECT(cond)
Definition: test_unit.h:1315
void GBS_strcat(GBS_strstruct *strstr, const char *ptr)
Definition: arb_strbuf.cxx:108
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, bool dropEmptyTokens)
void safe_remove(int i)
Definition: arb_strarray.h:114
void put_before(int insert_before, const char *elem)
Definition: arb_strarray.h:208
void GBS_chrcat(GBS_strstruct *strstr, char ch)
Definition: arb_strbuf.cxx:119
char * GBT_join_strings(const CharPtrArray &strings, char separator)
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1307
void move(int from, int to)
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
void sort(CharPtrArray_compare_fun compare, void *client_data)
#define NULp
Definition: cxxforward.h:114
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
static Score ** U
Definition: align.cxx:67
GB_write_int const char s
Definition: AW_awar.cxx:154