ARB
adfile.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adfile.cxx //
4 // Purpose : various IO related functions //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <dirent.h>
12 #include <unistd.h>
13 #include <sys/stat.h>
14 
15 #include <arb_sort.h>
16 #include <arb_str.h>
17 #include <arb_strarray.h>
18 #include <arb_file.h>
19 #include <arb_pathlen.h>
20 
21 #include "gb_local.h"
22 #include "gb_load.h"
23 
25  // get the current working directory
26  // (directory from which application has been started)
28 }
29 
30 GB_ERROR gb_scan_directory(char *basename, gb_scandir *sd) {
31  // goes to header: __ATTR__USERESULT_TODO
32  // look for quick saves (basename = yyy/xxx no arb ending !!!!)
33  char *path = ARB_strdup(basename);
34  const char *fulldir = ".";
35  char *file = strrchr(path, '/');
36  DIR *dirp;
37  int curindex;
38  char *suffix;
39  dirent *dp;
40  struct stat st;
41  const char *oldstyle = ".arb.quick";
42  char buffer[ARB_PATH_MAX];
43  int oldstylelen = strlen(oldstyle);
44  int filelen;
45 
46  if (file) {
47  *(file++) = 0;
48  fulldir = path;
49  }
50  else {
51  file = path;
52  }
53 
54  memset((char*)sd, 0, sizeof(*sd));
55  sd->type = GB_SCAN_NO_QUICK;
56  sd->highest_quick_index = -1;
57  sd->newest_quick_index = -1;
58  sd->date_of_quick_file = 0;
59 
60  dirp = opendir(fulldir);
61  if (!dirp) {
62  GB_ERROR error = GBS_global_string("Directory %s of file %s.arb not readable", fulldir, file);
63  free(path);
64  return error;
65  }
66  filelen = strlen(file);
67  for (dp = readdir(dirp); dp; dp = readdir(dirp)) {
68  if (strncmp(dp->d_name, file, filelen)) continue;
69  suffix = dp->d_name + filelen;
70  if (suffix[0] != '.') continue;
71  if (!strncmp(suffix, oldstyle, oldstylelen)) {
72  if (sd->type == GB_SCAN_NEW_QUICK) {
73  printf("Warning: Found new and old changes files, using new\n");
74  continue;
75  }
76  sd->type = GB_SCAN_OLD_QUICK;
77  curindex = atoi(suffix+oldstylelen);
78  goto check_time_and_date;
79  }
80  if (strlen(suffix) == 4 &&
81  suffix[0] == '.' &&
82  suffix[1] == 'a' &&
83  isdigit(suffix[2]) &&
84  isdigit(suffix[3])) {
85  if (sd->type == GB_SCAN_OLD_QUICK) {
86  printf("Warning: Found new and old changes files, using new\n");
87  }
88  sd->type = GB_SCAN_NEW_QUICK;
89  curindex = atoi(suffix+2);
90  goto check_time_and_date;
91  }
92  continue;
93  check_time_and_date :
94  if (curindex > sd->highest_quick_index) sd->highest_quick_index = curindex;
95  sprintf(buffer, "%s/%s", fulldir, dp->d_name);
96  stat(buffer, &st);
97  if ((unsigned long)st.st_mtime > sd->date_of_quick_file) {
98  sd->date_of_quick_file = st.st_mtime;
99  sd->newest_quick_index = curindex;
100  }
101  continue;
102  }
103  closedir(dirp);
104  free(path);
105  return NULp;
106 }
107 
108 
109 char *GB_find_all_files(const char *dir, const char *mask, bool filename_only) {
110  /* Returns a string containing the filenames of all files matching mask.
111  The single filenames are separated by '*'.
112  if 'filename_only' is true -> string contains only filenames w/o path
113 
114  returns 0 if no files found (or directory not found).
115  in this case an error may be exported
116 
117  'mask' may contain wildcards (*?) or
118  it may be a regular expression ('/regexp/')
119  */
120 
121  DIR *dirp;
122  struct stat st;
123  char *result = NULp;
124 
125  dirp = opendir(dir);
126  if (dirp) {
128  if (matcher) {
129  for (dirent *dp = readdir(dirp); dp; dp = readdir(dirp)) {
130  if (GBS_string_matches_regexp(dp->d_name, matcher)) {
131  char buffer[ARB_PATH_MAX];
132  sprintf(buffer, "%s/%s", dir, dp->d_name);
133  if (stat(buffer, &st) == 0 && S_ISREG(st.st_mode)) { // regular file ?
134  if (filename_only) strcpy(buffer, dp->d_name);
135  if (result) {
136  freeset(result, GBS_global_string_copy("%s*%s", result, buffer));
137  }
138  else {
139  result = ARB_strdup(buffer);
140  }
141  }
142  }
143  }
144  GBS_free_matcher(matcher);
145  }
146  closedir(dirp);
147  }
148 
149  return result;
150 }
151 
152 char *GB_find_latest_file(const char *dir, const char *mask) {
153  /* returns the name of the newest file in dir 'dir' matching 'mask'
154  * or NULp (in this case an error may be exported)
155  *
156  * 'mask' may contain wildcards (*?) or
157  * it may be a regular expression ('/regexp/')
158  */
159 
160  DIR *dirp;
161  struct stat st;
162  char *result = NULp;
163 
164  dirp = opendir(dir);
165  if (dirp) {
167  if (matcher) {
168  GB_ULONG newest = 0;
169  for (dirent *dp = readdir(dirp); dp; dp = readdir(dirp)) {
170  if (GBS_string_matches_regexp(dp->d_name, matcher)) {
171  char buffer[ARB_PATH_MAX];
172  sprintf(buffer, "%s/%s", dir, dp->d_name);
173  if (stat(buffer, &st) == 0 &&
174  S_ISREG(st.st_mode) &&
175  (GB_ULONG)st.st_mtime > newest) {
176  newest = st.st_mtime;
177  freedup(result, dp->d_name);
178  }
179  }
180  }
181  GBS_free_matcher(matcher);
182  }
183  closedir(dirp);
184  }
185  return result;
186 }
187 
188 static const char *GB_existing_file(const char *file, bool warn_when_not_found) {
189  // return 'file' if it's an existing readable file
190  // return NULp otherwise
191 
192  gb_assert(file);
193  if (GB_is_readablefile(file)) return file;
194  if (warn_when_not_found) GB_warningf("Could not find '%s'", file);
195  return NULp;
196 }
197 
198 char *GB_lib_file(bool warn_when_not_found, const char *libprefix, const char *filename) {
199  // Search a file in '$ARBHOME/lib/libprefix'
200  // Return NULp if not found
201  return nulldup(GB_existing_file(GB_path_in_ARBLIB(libprefix, filename), warn_when_not_found));
202 }
203 
204 char *GB_property_file(bool warn_when_not_found, const char *filename) {
205  // Search a file in '$ARB_PROP' or its default in '$ARBHOME/lib/arb_default'
206  // Return NULp if neither found
207 
208  char *result = nulldup(GB_existing_file(GB_path_in_arbprop(filename), warn_when_not_found));
209  if (!result) result = GB_lib_file(warn_when_not_found, "arb_default", filename);
210  return result;
211 }
212 
213 void GBS_read_dir(StrArray& names, const char *dir, const char *mask) {
214  /* Return full pathnames of files in directory 'dir'.
215  *
216  * Filter through 'mask':
217  * - mask == NULp -> return all files
218  * - in format '/expr/' -> use regular expression (case sensitive)
219  * - else it does a simple string match with wildcards ("?*")
220  *
221  * Result are inserted into 'names' and 'names' is sorted alphanumerically.
222  * Note: 'names' are not cleared, so several calls with the same StrArray get collected.
223  *
224  * In case of error, 'names' is empty and error is exported.
225  *
226  * Special case: If 'dir' is the name of a file, return an array with file as only element
227  */
228 
229  gb_assert(dir); // missing dir was allowed before 12/2008, forbidden now!
230 
231  if (dir[0]) {
232  char *fulldir = ARB_strdup(GB_canonical_path(dir));
233  DIR *dirstream = opendir(fulldir);
234 
235  if (!dirstream) {
236  if (GB_is_readablefile(fulldir)) { // fixed: returned true for directories before (4/2012)
237  names.put(fulldir); // transfer ownership
238  fulldir = NULp;
239  }
240  else {
241  // fulldir is neither directory nor file -> automatically accept wildcards
242 
243  char *lslash = strrchr(fulldir, '/');
244  if (lslash) {
245  lslash[0] = 0;
246  char *name = lslash+1;
247  if (GB_is_directory(fulldir)) {
248  for (int smartMethod = 1; smartMethod<=2 && names.empty(); ++smartMethod) { // stop getting too smart as soon as some files matched
249  if (smartMethod == 1) {
250  GBS_read_dir(names, fulldir, name); // does auto-accept wildcards (if they were appended to path)
251  }
252  else if (smartMethod == 2) {
253  char *namePlusMask = GBS_global_string_copy("%s%s", name, mask); // same as 1, but concat name+mask
254  GBS_read_dir(names, fulldir, namePlusMask);
255  free(namePlusMask);
256  }
257  if (GB_have_error()) { // trying to be smart did fail ->
258  GB_clear_error(); // ignore all errors
259  names.clear(); // forget results
260  }
261  }
262  }
263  else {
264  GB_export_errorf("can't read directory '%s'", fulldir);
265  }
266  lslash[0] = '/';
267  }
268  }
269  }
270  else {
271  if (!mask) mask = "*";
272 
274  if (matcher) {
275  dirent *entry;
276  while ((entry = readdir(dirstream))) {
277  const char *name = entry->d_name;
278 
279  if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0))) {
280  ; // skip '.' and '..'
281  }
282  else {
283  if (GBS_string_matches_regexp(name, matcher)) {
284  const char *full = GB_concat_path(fulldir, name);
285  if (!GB_is_directory(full)) { // skip directories
286  names.put(ARB_strdup(full));
287  }
288  }
289  }
290  }
291 
292  names.sort(GB_string_comparator, NULp);
293 
294  GBS_free_matcher(matcher);
295  }
296 
297  closedir(dirstream);
298  }
299 
300  free(fulldir);
301  }
302 }
303 
304 const char *GB_get_arb_revision_tag() {
308  static SmartCharPtr tag;
309 
310  if (tag.isNull()) {
311  char *name = GB_lib_file(true, NULp, "revision_info.txt");
312  tag = GB_read_file(name);
313 
314  char *nl = strchr(&*tag, '\n');
315  if (nl) *nl = 0;
316 
317  if (tag.isNull()) tag = GBS_global_string_copy("<failed to read %s>", name);
318  free(name);
319  }
320  return &*tag;
321 }
322 
323 
324 // --------------------------------------------------------------------------------
325 
326 #ifdef UNIT_TESTS
327 #include <test_unit.h>
328 
329 static char *remove_path(const char *fullname, void *cl_path) {
330  const char *path = (const char *)cl_path;
331  return ARB_strdup(fullname+(ARB_strBeginsWith(fullname, path) ? strlen(path) : 0));
332 }
333 
334 static void GBT_transform_names(StrArray& dest, const StrArray& source, char *transform(const char *, void *), void *client_data) {
335  for (int i = 0; source[i]; ++i) dest.put(transform(source[i], client_data));
336 }
337 
338 #define TEST_JOINED_FULLDIR_CONTENT_EQUALS(fulldir,mask,expected) do { \
339  StrArray contents; \
340  GBS_read_dir(contents, fulldir, mask); \
341  StrArray contents_no_path; \
342  GBT_transform_names(contents_no_path, contents, remove_path, (void*)fulldir); \
343  char *joined = GBT_join_strings(contents_no_path, '!'); \
344  TEST_EXPECT_EQUAL(joined, expected); \
345  free(joined); \
346  } while(0)
347 
348 #define TEST_JOINED_DIR_CONTENT_EQUALS(subdir,mask,expected) do { \
349  char *fulldir = ARB_strdup(GB_path_in_ARBHOME(subdir)); \
350  TEST_JOINED_FULLDIR_CONTENT_EQUALS(fulldir,mask,expected); \
351  free(fulldir); \
352  } while(0)
353 
354 void TEST_GBS_read_dir() {
355  TEST_EXPECT(GBS_string_matches("util.c", "????*.c", GB_MIND_CASE));
356  TEST_REJECT(GBS_string_matches("util.c", "?????*.c", GB_MIND_CASE));
357 
358  TEST_JOINED_DIR_CONTENT_EQUALS("GDE/CLUSTAL", "*.c", "/amenu.c!/clustalv.c!/gcgcheck.c!/myers.c!/sequence.c!/showpair.c!/trees.c!/upgma.c!/util.c");
359  TEST_JOINED_DIR_CONTENT_EQUALS("GDE/CLUSTAL", "?????*.c", "/amenu.c!/clustalv.c!/gcgcheck.c!/myers.c!/sequence.c!/showpair.c!/trees.c!/upgma.c");
360  TEST_JOINED_DIR_CONTENT_EQUALS("GDE/CLUSTAL", "??????*.c", "/clustalv.c!/gcgcheck.c" "!/sequence.c!/showpair.c");
361  TEST_JOINED_DIR_CONTENT_EQUALS("GDE/CLUSTAL", "/s.*\\.c/", "/clustalv.c!/myers.c!/sequence.c!/showpair.c!/trees.c");
362 
363  // test a dir containing subdirectories
364  TEST_JOINED_DIR_CONTENT_EQUALS("SL", NULp, "/Makefile!/README");
365  TEST_JOINED_DIR_CONTENT_EQUALS("SL", "*", "/Makefile!/README");
366 
367  TEST_JOINED_FULLDIR_CONTENT_EQUALS("", "", ""); // allow GBS_read_dir to be called with "" -> returns empty filelist
368 }
369 
370 void TEST_find_file() {
371  gb_getenv_hook old = GB_install_getenv_hook(arb_test::fakeenv);
372 
373  TEST_EXPECT_EQUAL(GB_existing_file("min_ascii.arb", false), "min_ascii.arb");
374  TEST_EXPECT_NULL(GB_existing_file("nosuchfile", false));
375 
376  char *tcporg = GB_lib_file(false, "", "arb_tcp_org.dat");
377  TEST_EXPECT_EQUAL(tcporg, GB_path_in_ARBHOME("lib/arb_tcp_org.dat"));
378  TEST_EXPECT_NULL(GB_lib_file(true, "bla", "blub"));
379  free(tcporg);
380 
381  char *status = GB_property_file(false, "status.arb");
382  TEST_EXPECT_EQUAL(status, GB_path_in_ARBHOME("lib/arb_default/status.arb"));
383  TEST_EXPECT_NULL(GB_property_file(true, "undhepp"));
384  free(status);
385 
386  TEST_EXPECT_EQUAL((void*)arb_test::fakeenv, (void*)GB_install_getenv_hook(old));
387 }
388 TEST_PUBLISH(TEST_find_file);
389 
390 #endif // UNIT_TESTS
const char * GB_ERROR
Definition: arb_core.h:25
GBS_string_matcher * GBS_compile_matcher(const char *search_expr, GB_CASE case_flag)
Definition: admatch.cxx:41
string result
#define ARB_PATH_MAX
Definition: arb_pathlen.h:31
GB_CSTR GB_path_in_arbprop(const char *relative_path)
Definition: adsocket.cxx:1109
char * GB_lib_file(bool warn_when_not_found, const char *libprefix, const char *filename)
Definition: adfile.cxx:198
unsigned long date_of_quick_file
Definition: gb_load.h:27
char * GB_find_all_files(const char *dir, const char *mask, bool filename_only)
Definition: adfile.cxx:109
void GBS_free_matcher(GBS_string_matcher *matcher)
Definition: admatch.cxx:94
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
bool GB_have_error()
Definition: arb_msg.cxx:349
char * GB_property_file(bool warn_when_not_found, const char *filename)
Definition: adfile.cxx:204
NOT4PERL gb_getenv_hook GB_install_getenv_hook(gb_getenv_hook hook)
Definition: adsocket.cxx:646
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
GB_CSTR GB_canonical_path(const char *anypath)
Definition: adsocket.cxx:947
unsigned long GB_ULONG
Definition: arbdb_base.h:42
int GB_string_comparator(const void *v0, const void *v1, void *)
Definition: arb_sort.cxx:47
static FullNameMap names
#define TEST_PUBLISH(testfunction)
Definition: test_unit.h:1484
const char * GB_get_arb_revision_tag()
Definition: adfile.cxx:304
#define TEST_EXPECT(cond)
Definition: test_unit.h:1312
void GB_warningf(const char *templat,...)
Definition: arb_msg.cxx:490
void GB_clear_error()
Definition: arb_msg.cxx:365
int highest_quick_index
Definition: gb_load.h:25
gb_scan_quicks_types type
Definition: gb_load.h:28
#define TEST_REJECT(cond)
Definition: test_unit.h:1314
static void error(const char *msg)
Definition: mkptypes.cxx:96
GB_ERROR gb_scan_directory(char *basename, gb_scandir *sd)
Definition: adfile.cxx:30
void GBS_read_dir(StrArray &names, const char *dir, const char *mask)
Definition: adfile.cxx:213
GB_CSTR GB_path_in_ARBHOME(const char *relative_path)
Definition: adsocket.cxx:1100
#define RETURN_ONETIME_ALLOC(allocated)
Definition: smartptr.h:315
const char *(* gb_getenv_hook)(const char *varname)
Definition: arbdb.h:138
GB_CSTR GB_path_in_ARBLIB(const char *relative_path)
Definition: adsocket.cxx:1103
GB_CSTR GB_getcwd()
Definition: adfile.cxx:24
static BasicStatus status
void nl()
Definition: test_unit.h:404
char * GB_find_latest_file(const char *dir, const char *mask)
Definition: adfile.cxx:152
int newest_quick_index
Definition: gb_load.h:26
GB_CSTR GB_concat_path(GB_CSTR anypath_left, GB_CSTR anypath_right)
Definition: adsocket.cxx:1016
GB_ERROR GB_export_errorf(const char *templat,...)
Definition: arb_msg.cxx:264
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1307
#define gb_assert(cond)
Definition: arbdbt.h:11
bool GB_is_directory(const char *path)
Definition: arb_file.cxx:176
#define NULp
Definition: cxxforward.h:97
bool GB_is_readablefile(const char *filename)
Definition: arb_file.cxx:172
bool ARB_strBeginsWith(const char *str, const char *with)
Definition: arb_str.h:42
bool GBS_string_matches_regexp(const char *str, const GBS_string_matcher *expr)
Definition: admatch.cxx:269
static const char * GB_existing_file(const char *file, bool warn_when_not_found)
Definition: adfile.cxx:188
bool GBS_string_matches(const char *str, const char *expr, GB_CASE case_sens)
Definition: admatch.cxx:193
const char * GB_CSTR
Definition: arbdb_base.h:25
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
char * GB_read_file(const char *path)
Definition: adsocket.cxx:287