ARB
arb_zfile.cxx
Go to the documentation of this file.
1 // ================================================================ //
2 // //
3 // File : arb_zfile.cxx //
4 // Purpose : Compressed file I/O //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in November 2015 //
7 // http://www.arb-home.de/ //
8 // //
9 // ================================================================ //
10 
11 #include "arb_zfile.h"
12 #include "arb_file.h"
13 #include "arb_msg.h"
14 #include "arb_misc.h"
15 #include "arb_string.h"
16 
17 #include <string>
18 #include <map>
19 
20 using namespace std;
21 
22 class zinfo {
23  // info stored for each sucessfully opened file
24  // to support proper error message on close.
25  bool writing; // false -> reading
26  string filename;
27  string pipe_cmd;
28 public:
29  zinfo() {}
30  zinfo(bool writing_, const char *filename_, const char *pipe_cmd_)
31  : writing(writing_),
32  filename(filename_),
33  pipe_cmd(pipe_cmd_)
34  {}
35 
36  bool isOutputPipe() const { return writing; }
37  const char *get_filename() const { return filename.c_str(); }
38  const char *get_pipecmd() const { return pipe_cmd.c_str(); }
39 };
40 static map<FILE*,zinfo> zfile_info;
41 
42 FILE *ARB_zfopen(const char *name, const char *mode, FileCompressionMode cmode, GB_ERROR& error, bool hideStderr) {
43  arb_assert(!error);
44 
45  if (strchr(mode, 'a')) {
46  error = "Cannot append to file using ARB_zfopen";
47  return NULp;
48  }
49  if (strchr(mode, 't')) {
50  error = "Cannot use textmode for ARB_zfopen";
51  return NULp;
52  }
53  if (strchr(mode, '+')) {
54  error = "Cannot open file in read and write mode with ARB_zfopen";
55  return NULp;
56  }
57 
58  bool forOutput = strchr(mode, 'w');
59  FILE *fp = NULp;
60 
61  if (cmode == ZFILE_AUTODETECT) {
62  if (forOutput) {
63  error = "Autodetecting compression mode only works for input files";
64  }
65  else {
66  fp = fopen(name, "rb");
67  if (!fp) error = GB_IO_error("opening", name);
68  else {
69  // detect compression and set 'cmode'
70  const size_t MAGICSIZE = 5;
71  char buffer[MAGICSIZE];
72 
73  size_t bytes_read = fread(buffer, 1, MAGICSIZE, fp);
74  fclose(fp);
75  fp = NULp;
76 
77  if (bytes_read>=2 && strncmp(buffer, "\x1f\x8b", 2) == 0) cmode = ZFILE_GZIP;
78  else if (bytes_read>=2 && strncmp(buffer, "BZ", 2) == 0) cmode = ZFILE_BZIP2;
79  else if (bytes_read>=5 && strncmp(buffer, "\xfd" "7zXZ", 5) == 0) cmode = ZFILE_XZ;
80  else {
81  cmode = ZFILE_UNCOMPRESSED;
82  }
83  }
84  }
85  }
86 
87  if (cmode == ZFILE_UNCOMPRESSED) {
88  fp = fopen(name, mode);
89  if (!fp) error = GB_IO_error("opening", name);
90  else {
91  zfile_info[fp] = zinfo(forOutput, name, "");
92  }
93  }
94  else {
95  if (!error) {
96  const char *compressor = NULp; // command used to compress (and decompress)
97  const char *decompress_flag = "-d"; // flag needed to decompress (assumes none to compress)
98 
99  switch (cmode) {
100  case ZFILE_GZIP: {
101  static char *pigz = ARB_executable("pigz", ARB_getenv_ignore_empty("PATH"));
102  compressor = pigz ? pigz : "gzip";
103  break;
104  }
105  case ZFILE_BZIP2: compressor = "bzip2"; break;
106  case ZFILE_XZ: compressor = "xz"; break;
107 
108  default:
109  error = GBS_global_string("Invalid compression mode (%i)", int(cmode));
110  break;
111 
112 #if defined(USE_BROKEN_COMPRESSION)
113  case ZFILE_BROKEN:
114  compressor = "arb_weirdo"; // a non-existing command!
115  break;
116 #endif
117  }
118 
119  if (!error) {
120  char *pipeCmd = forOutput
121  ? GBS_global_string_copy("%s > %s", compressor, name)
122  : GBS_global_string_copy("%s %s < %s", compressor, decompress_flag, name);
123 
124  if (hideStderr) {
125  freeset(pipeCmd, GBS_global_string_copy("( %s 2>/dev/null )", pipeCmd));
126  }
127 
128  // remove 'b' from mode (pipes are binary by default)
129  char *impl_b_mode = ARB_strdup(mode);
130  while (1) {
131  char *b = strchr(impl_b_mode, 'b');
132  if (!b) break;
133  strcpy(b, b+1);
134  }
135 
136  if (forOutput) { // write to pipe
137  fp = popen(pipeCmd, impl_b_mode);
138  if (!fp) error = GB_IO_error("writing to pipe", pipeCmd);
139  }
140  else { // read from pipe
141  fp = popen(pipeCmd, impl_b_mode);
142  if (!fp) error = GB_IO_error("reading from pipe", pipeCmd);
143  }
144 
145  if (!error) {
146  zfile_info[fp] = zinfo(forOutput, name, pipeCmd);
147  }
148 
149  free(impl_b_mode);
150  free(pipeCmd);
151  }
152  }
153  }
154 
155  arb_assert(contradicted(fp, error));
156  arb_assert(implicated(error, error[0])); // deny empty error
157  return fp;
158 }
159 
161  bool fifo = GB_is_fifo(fp);
162 
163  arb_assert(zfile_info.find(fp) != zfile_info.end()); // file was not opened using ARB_zfopen!
164 
165  zinfo info = zfile_info[fp];
166  zfile_info.erase(fp);
167 
168  int res;
169  if (fifo) {
170  res = pclose(fp);
171  }
172  else {
173  res = fclose(fp);
174  }
175 
176  GB_ERROR error = NULp;
177  if (res != 0) {
178  int exited = WIFEXITED(res);
179  int status = WEXITSTATUS(res);
180 #if defined(DEBUG)
181  int signaled = WIFSIGNALED(res);
182 #endif
183 
184  if (exited) {
185  if (status) {
186  if (fifo) {
187  error = GBS_global_string("pipe %s\n"
188  " file='%s'\n"
189  " using cmd='%s'\n"
190  " failed with exitcode=%i (broken pipe? corrupted archive?)\n",
191  info.isOutputPipe() ? "writing to" : "reading from",
192  info.get_filename(),
193  info.get_pipecmd(),
194  status);
195  }
196  }
197  }
198  if (!error) error = GB_IO_error("closing", info.get_filename());
199 #if defined(DEBUG)
200  error = GBS_global_string("%s (res=%i, exited=%i, signaled=%i, status=%i)", error, res, exited, signaled, status);
201 #endif
202  }
203  return error;
204 }
205 
206 // --------------------------------------------------------------------------------
207 
208 #ifdef UNIT_TESTS
209 #ifndef TEST_UNIT_H
210 #include <test_unit.h>
211 #endif
212 
213 static char *fileContent(FILE *in, size_t& bytes_read) {
214  const size_t BUFFERSIZE = 1000;
215  char *buffer = ARB_alloc<char>(BUFFERSIZE+1);
216  bytes_read = fread(buffer, 1, BUFFERSIZE, in);
217  arb_assert(bytes_read<BUFFERSIZE);
218  buffer[bytes_read] = 0;
219  return buffer;
220 }
221 
222 #define TEST_EXPECT_ZFOPEN_FAILS(name,mode,cmode,errpart) do{ \
223  GB_ERROR error = NULp; \
224  FILE *fp = ARB_zfopen(name, mode, cmode, error, false); \
225  \
226  if (fp) { \
227  TEST_EXPECT_NULL(error); \
228  error = ARB_zfclose(fp); \
229  } \
230  else { \
231  TEST_EXPECT_NULL(fp); \
232  } \
233  TEST_REJECT_NULL(error); \
234  TEST_EXPECT_CONTAINS(error, errpart); \
235  }while(0)
236 
237 void TEST_compressed_io() {
238  const char *inText = "general/text.input";
239  const char *outFile = "compressed.out";
240 
241  TEST_EXPECT_ZFOPEN_FAILS("", "", ZFILE_UNCOMPRESSED, "Invalid argument");
242  TEST_EXPECT_ZFOPEN_FAILS(outFile, "a", ZFILE_UNCOMPRESSED, "Cannot append to file using ARB_zfopen");
243  TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_UNDEFINED, "Invalid compression mode");
244  TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_AUTODETECT, "only works for input files");
245  TEST_EXPECT_ZFOPEN_FAILS(outFile, "rt", ZFILE_AUTODETECT, "Cannot use textmode");
246  TEST_EXPECT_ZFOPEN_FAILS(outFile, "r+", ZFILE_AUTODETECT, "Cannot open file in read and write mode");
247 
248 #if defined(USE_BROKEN_COMPRESSION)
249  TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_BROKEN, "broken pipe");
250  TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_BROKEN, "broken pipe");
251 #endif
252 
253  char *testText;
254  const size_t TEST_TEXT_SIZE = 428;
255  {
256  GB_ERROR error = NULp;
257  FILE *in = ARB_zfopen(inText, "r", ZFILE_UNCOMPRESSED, error, false);
258  TEST_EXPECT_NULL(error);
259  TEST_REJECT_NULL(in);
260 
261  size_t bytes_read;
262  testText = fileContent(in, bytes_read);
263  TEST_EXPECT_EQUAL(bytes_read, TEST_TEXT_SIZE);
264 
266  }
267 
268  int successful_compressions = 0;
269 
271  cmode != ZFILE_UNDEFINED;
272  cmode = FileCompressionMode(cmode+1))
273  {
274  TEST_ANNOTATE(GBS_global_string("cmode=%i", int(cmode)));
275 
276  bool compressed_save_failed = false;
277  {
278  GB_ERROR error = NULp;
279  FILE *out = ARB_zfopen(outFile, "w", cmode, error, false);
280 
281  TEST_EXPECT_NO_ERROR(error);
282  TEST_REJECT_NULL(out);
283 
284  TEST_EXPECT_DIFFERENT(EOF, fputs(testText, out));
285 
286  error = ARB_zfclose(out);
287  if (error && strstr(error, "failed with exitcode=127") && cmode != ZFILE_UNCOMPRESSED) {
288  // assume compression utility is not installed
289  compressed_save_failed = true;
290  }
291  else {
292  TEST_EXPECT_NO_ERROR(error);
293  }
294  }
295 
296  if (!compressed_save_failed) {
297  for (int detect = 0; detect<=1; ++detect) {
298  TEST_ANNOTATE(GBS_global_string("cmode=%i detect=%i", int(cmode), detect));
299 
300  GB_ERROR error = NULp;
301  FILE *in = ARB_zfopen(outFile, "r", detect ? ZFILE_AUTODETECT : cmode, error, false);
302 
303  TEST_REJECT(error);
304  TEST_REJECT_NULL(in);
305 
306  size_t bytes_read;
307  char *content = fileContent(in, bytes_read);
309  TEST_EXPECT_EQUAL(content, testText); // if this fails for detect==1 -> detection does not work
310  free(content);
311  }
312  successful_compressions++;
313  }
314  }
315 
316  TEST_EXPECT(successful_compressions>=3); // at least ZFILE_UNCOMPRESSED, ZFILE_GZIP and ZFILE_BZIP should succeed
317 
318  free(testText);
319  TEST_EXPECT_DIFFERENT(GB_unlink(outFile), -1);
320 }
321 
322 #endif // UNIT_TESTS
323 
324 // --------------------------------------------------------------------------------
325 
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
FileCompressionMode
Definition: arb_zfile.h:22
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
bool isOutputPipe() const
Definition: arb_zfile.cxx:36
const char * ARB_getenv_ignore_empty(const char *envvar)
Definition: arb_misc.cxx:102
GB_ERROR GB_IO_error(const char *action, const char *filename)
Definition: arb_msg.cxx:293
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
STL namespace.
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
FILE * ARB_zfopen(const char *name, const char *mode, FileCompressionMode cmode, GB_ERROR &error, bool hideStderr)
Definition: arb_zfile.cxx:42
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
char * ARB_executable(const char *exe_name, const char *path)
Definition: arb_misc.cxx:107
#define TEST_EXPECT(cond)
Definition: test_unit.h:1312
zinfo(bool writing_, const char *filename_, const char *pipe_cmd_)
Definition: arb_zfile.cxx:30
#define TEST_REJECT(cond)
Definition: test_unit.h:1314
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1309
static void error(const char *msg)
Definition: mkptypes.cxx:96
zinfo()
Definition: arb_zfile.cxx:29
bool GB_is_fifo(const char *path)
Definition: arb_file.cxx:89
const char * get_filename() const
Definition: arb_zfile.cxx:37
static map< FILE *, zinfo > zfile_info
Definition: arb_zfile.cxx:40
static BasicStatus status
fputs(TRACE_PREFIX, stderr)
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1307
GB_ERROR ARB_zfclose(FILE *fp)
Definition: arb_zfile.cxx:160
const size_t BUFFERSIZE
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1107
#define NULp
Definition: cxxforward.h:97
#define TEST_EXPECT_DIFFERENT(expr, want)
Definition: test_unit.h:1290
const char * get_pipecmd() const
Definition: arb_zfile.cxx:38
static int info[maxsites+1]
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1283
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195