ARB
arb_zfile.cxx
Go to the documentation of this file.
1 // ================================================================ //
2 // //
3 // File : arb_zfile.cxx //
4 // Purpose : Compressed file I/O //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in November 2015 //
7 // http://www.arb-home.de/ //
8 // //
9 // ================================================================ //
10 
11 #include "arb_zfile.h"
12 #include "arb_file.h"
13 #include "arb_msg.h"
14 #include "arb_misc.h"
15 #include "arb_string.h"
16 
17 #include <string>
18 #include <map>
19 
20 using namespace std;
21 
22 class zinfo {
23  // info stored for each sucessfully opened file
24  // to support proper error message on close.
25  bool writing; // false -> reading
26  string filename;
27  string pipe_cmd;
28 public:
29  zinfo() {}
30  zinfo(bool writing_, const char *filename_, const char *pipe_cmd_)
31  : writing(writing_),
32  filename(filename_),
33  pipe_cmd(pipe_cmd_)
34  {}
35 
36  bool isOutputPipe() const { return writing; }
37  const char *get_filename() const { return filename.c_str(); }
38  const char *get_pipecmd() const { return pipe_cmd.c_str(); }
39 };
40 static map<FILE*,zinfo> zfile_info;
41 
42 FILE *ARB_zfopen(const char *name, const char *mode, FileCompressionMode cmode, GB_ERROR& error, bool hideStderr) {
43  arb_assert(!error);
44 
45  if (strchr(mode, 'a')) {
46  error = "Cannot append to file using ARB_zfopen";
47  return NULp;
48  }
49  if (strchr(mode, 't')) {
50  error = "Cannot use textmode for ARB_zfopen";
51  return NULp;
52  }
53  if (strchr(mode, '+')) {
54  error = "Cannot open file in read and write mode with ARB_zfopen";
55  return NULp;
56  }
57 
58  bool forOutput = strchr(mode, 'w');
59  FILE *fp = NULp;
60 
61  if (cmode == ZFILE_AUTODETECT) {
62  if (forOutput) {
63  error = "Autodetecting compression mode only works for input files";
64  }
65  else {
66  fp = fopen(name, "rb");
67  if (!fp) error = GB_IO_error("opening", name);
68  else {
69  // detect compression and set 'cmode'
70  const size_t MAGICSIZE = 5;
71  char buffer[MAGICSIZE];
72 
73  size_t bytes_read = fread(buffer, 1, MAGICSIZE, fp);
74  fclose(fp);
75  fp = NULp;
76 
77  if (bytes_read>=2 && strncmp(buffer, "\x1f\x8b", 2) == 0) cmode = ZFILE_GZIP;
78  else if (bytes_read>=2 && strncmp(buffer, "BZ", 2) == 0) cmode = ZFILE_BZIP2;
79  else if (bytes_read>=5 && strncmp(buffer, "\xfd" "7zXZ", 5) == 0) cmode = ZFILE_XZ;
80  else {
81  cmode = ZFILE_UNCOMPRESSED;
82  }
83  }
84  }
85  }
86 
87  if (cmode == ZFILE_UNCOMPRESSED) {
88  fp = fopen(name, mode);
89  if (!fp) error = GB_IO_error("opening", name);
90  else {
91  zfile_info[fp] = zinfo(forOutput, name, "");
92  }
93  }
94  else {
95  if (!error) {
96  const char *compressor = NULp; // command used to compress (and decompress)
97  const char *decompress_flag = "-d"; // flag needed to decompress (assumes none to compress)
98 
99  switch (cmode) {
100  case ZFILE_GZIP: {
101  static char *pigz = ARB_executable("pigz", ARB_getenv_ignore_empty("PATH"));
102  if (pigz) {
103  ARB_warn_about_unwanted_chars(pigz, "the pigz executable (detected as gzip dropin)");
104  compressor = pigz;
105  }
106  else {
107  compressor = "gzip";
108  }
109  break;
110  }
111  case ZFILE_BZIP2: compressor = "bzip2"; break;
112  case ZFILE_XZ: compressor = "xz"; break;
113 
114  default:
115  error = GBS_global_string("Invalid compression mode (%i)", int(cmode));
116  break;
117 
118 #if defined(USE_BROKEN_COMPRESSION)
119  case ZFILE_BROKEN:
120  compressor = "arb_weirdo"; // a non-existing command!
121  break;
122 #endif
123  }
124 
125  if (!error) {
126  char *pipeCmd = forOutput
127  ? GBS_global_string_copy("%s > %s", compressor, name)
128  : GBS_global_string_copy("%s %s < %s", compressor, decompress_flag, name);
129 
130  if (hideStderr) {
131  freeset(pipeCmd, GBS_global_string_copy("( %s 2>/dev/null )", pipeCmd));
132  }
133 
134  // remove 'b' from mode (pipes are binary by default)
135  char *impl_b_mode = ARB_strdup(mode);
136  while (1) {
137  char *b = strchr(impl_b_mode, 'b');
138  if (!b) break;
139  strcpy(b, b+1);
140  }
141 
142  if (forOutput) { // write to pipe
143  fp = popen(pipeCmd, impl_b_mode);
144  if (!fp) error = GB_IO_error("writing to pipe", pipeCmd);
145  }
146  else { // read from pipe
147  fp = popen(pipeCmd, impl_b_mode);
148  if (!fp) error = GB_IO_error("reading from pipe", pipeCmd);
149  }
150 
151  if (!error) {
152  zfile_info[fp] = zinfo(forOutput, name, pipeCmd);
153  }
154 
155  free(impl_b_mode);
156  free(pipeCmd);
157  }
158  }
159  }
160 
161  arb_assert(contradicted(fp, error));
162  arb_assert(implicated(error, error[0])); // deny empty error
163  return fp;
164 }
165 
167  bool fifo = GB_is_fifo(fp);
168 
169  arb_assert(zfile_info.find(fp) != zfile_info.end()); // file was not opened using ARB_zfopen!
170 
171  zinfo info = zfile_info[fp];
172  zfile_info.erase(fp);
173 
174  int res;
175  if (fifo) {
176  res = pclose(fp);
177  }
178  else {
179  res = fclose(fp);
180  }
181 
182  GB_ERROR error = NULp;
183  if (res != 0) {
184  int exited = WIFEXITED(res);
185  int status = WEXITSTATUS(res);
186 #if defined(DEBUG)
187  int signaled = WIFSIGNALED(res);
188 #endif
189 
190  if (exited) {
191  if (status) {
192  if (fifo) {
193  error = GBS_global_string("pipe %s\n"
194  " file='%s'\n"
195  " using cmd='%s'\n"
196  " failed with exitcode=%i (broken pipe? corrupted archive?)\n",
197  info.isOutputPipe() ? "writing to" : "reading from",
198  info.get_filename(),
199  info.get_pipecmd(),
200  status);
201  }
202  }
203  }
204  if (!error) error = GB_IO_error("closing", info.get_filename());
205 #if defined(DEBUG)
206  error = GBS_global_string("%s (res=%i, exited=%i, signaled=%i, status=%i)", error, res, exited, signaled, status);
207 #endif
208  }
209  return error;
210 }
211 
212 // --------------------------------------------------------------------------------
213 
214 #ifdef UNIT_TESTS
215 #ifndef TEST_UNIT_H
216 #include <test_unit.h>
217 #endif
218 
219 static char *fileContent(FILE *in, size_t& bytes_read) {
220  const size_t BUFFERSIZE = 1000;
221  char *buffer = ARB_alloc<char>(BUFFERSIZE+1);
222  bytes_read = fread(buffer, 1, BUFFERSIZE, in);
223  arb_assert(bytes_read<BUFFERSIZE);
224  buffer[bytes_read] = 0;
225  return buffer;
226 }
227 
228 #define TEST_EXPECT_ZFOPEN_FAILS(name,mode,cmode,errpart) do{ \
229  GB_ERROR error = NULp; \
230  FILE *fp = ARB_zfopen(name, mode, cmode, error, false); \
231  \
232  if (fp) { \
233  TEST_EXPECT_NULL(error); \
234  error = ARB_zfclose(fp); \
235  } \
236  else { \
237  TEST_EXPECT_NULL(fp); \
238  } \
239  TEST_REJECT_NULL(error); \
240  TEST_EXPECT_CONTAINS(error, errpart); \
241  }while(0)
242 
243 void TEST_compressed_io() {
244  const char *inText = "general/text.input";
245  const char *outFile = "compressed.out";
246 
247  TEST_EXPECT_ZFOPEN_FAILS("", "", ZFILE_UNCOMPRESSED, "Invalid argument");
248  TEST_EXPECT_ZFOPEN_FAILS(outFile, "a", ZFILE_UNCOMPRESSED, "Cannot append to file using ARB_zfopen");
249  TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_UNDEFINED, "Invalid compression mode");
250  TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_AUTODETECT, "only works for input files");
251  TEST_EXPECT_ZFOPEN_FAILS(outFile, "rt", ZFILE_AUTODETECT, "Cannot use textmode");
252  TEST_EXPECT_ZFOPEN_FAILS(outFile, "r+", ZFILE_AUTODETECT, "Cannot open file in read and write mode");
253 
254 #if defined(USE_BROKEN_COMPRESSION)
255  TEST_EXPECT_ZFOPEN_FAILS(outFile, "r", ZFILE_BROKEN, "broken pipe");
256  TEST_EXPECT_ZFOPEN_FAILS(outFile, "w", ZFILE_BROKEN, "broken pipe");
257 #endif
258 
259  char *testText;
260  const size_t TEST_TEXT_SIZE = 428;
261  {
262  GB_ERROR error = NULp;
263  FILE *in = ARB_zfopen(inText, "r", ZFILE_UNCOMPRESSED, error, false);
264  TEST_EXPECT_NULL(error);
265  TEST_REJECT_NULL(in);
266 
267  size_t bytes_read;
268  testText = fileContent(in, bytes_read);
269  TEST_EXPECT_EQUAL(bytes_read, TEST_TEXT_SIZE);
270 
272  }
273 
274  int successful_compressions = 0;
275 
277  cmode != ZFILE_UNDEFINED;
278  cmode = FileCompressionMode(cmode+1))
279  {
280  TEST_ANNOTATE(GBS_global_string("cmode=%i", int(cmode)));
281 
282  bool compressed_save_failed = false;
283  {
284  GB_ERROR error = NULp;
285  FILE *out = ARB_zfopen(outFile, "w", cmode, error, false);
286 
287  TEST_EXPECT_NO_ERROR(error);
288  TEST_REJECT_NULL(out);
289 
290  TEST_EXPECT_DIFFERENT(EOF, fputs(testText, out));
291 
292  error = ARB_zfclose(out);
293  if (error && strstr(error, "failed with exitcode=127") && cmode != ZFILE_UNCOMPRESSED) {
294  // assume compression utility is not installed
295  compressed_save_failed = true;
296  }
297  else {
298  TEST_EXPECT_NO_ERROR(error);
299  }
300  }
301 
302  if (!compressed_save_failed) {
303  for (int detect = 0; detect<=1; ++detect) {
304  TEST_ANNOTATE(GBS_global_string("cmode=%i detect=%i", int(cmode), detect));
305 
306  GB_ERROR error = NULp;
307  FILE *in = ARB_zfopen(outFile, "r", detect ? ZFILE_AUTODETECT : cmode, error, false);
308 
309  TEST_REJECT(error);
310  TEST_REJECT_NULL(in);
311 
312  size_t bytes_read;
313  char *content = fileContent(in, bytes_read);
315  TEST_EXPECT_EQUAL(content, testText); // if this fails for detect==1 -> detection does not work
316  free(content);
317  }
318  successful_compressions++;
319  }
320  }
321 
322  TEST_EXPECT(successful_compressions>=3); // at least ZFILE_UNCOMPRESSED, ZFILE_GZIP and ZFILE_BZIP should succeed
323 
324  free(testText);
325  TEST_EXPECT_DIFFERENT(GB_unlink(outFile), -1);
326 }
327 
328 #endif // UNIT_TESTS
329 
330 // --------------------------------------------------------------------------------
331 
#define arb_assert(cond)
Definition: arb_assert.h:245
const char * GB_ERROR
Definition: arb_core.h:25
FileCompressionMode
Definition: arb_zfile.h:22
#define implicated(hypothesis, conclusion)
Definition: arb_assert.h:289
bool isOutputPipe() const
Definition: arb_zfile.cxx:36
void ARB_warn_about_unwanted_chars(const char *path, const char *path_description)
Definition: arb_misc.cxx:150
const char * ARB_getenv_ignore_empty(const char *envvar)
Definition: arb_misc.cxx:102
GB_ERROR GB_IO_error(const char *action, const char *filename)
Definition: arb_msg.cxx:285
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
STL namespace.
int GB_unlink(const char *path)
Definition: arb_file.cxx:188
FILE * ARB_zfopen(const char *name, const char *mode, FileCompressionMode cmode, GB_ERROR &error, bool hideStderr)
Definition: arb_zfile.cxx:42
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
char * ARB_executable(const char *exe_name, const char *path)
Definition: arb_misc.cxx:107
#define TEST_EXPECT(cond)
Definition: test_unit.h:1328
zinfo(bool writing_, const char *filename_, const char *pipe_cmd_)
Definition: arb_zfile.cxx:30
#define TEST_REJECT(cond)
Definition: test_unit.h:1330
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1325
static void error(const char *msg)
Definition: mkptypes.cxx:96
zinfo()
Definition: arb_zfile.cxx:29
bool GB_is_fifo(const char *path)
Definition: arb_file.cxx:89
const char * get_filename() const
Definition: arb_zfile.cxx:37
static map< FILE *, zinfo > zfile_info
Definition: arb_zfile.cxx:40
static BasicStatus status
fputs(TRACE_PREFIX, stderr)
#define TEST_EXPECT_NULL(n)
Definition: test_unit.h:1322
GB_ERROR ARB_zfclose(FILE *fp)
Definition: arb_zfile.cxx:166
const size_t BUFFERSIZE
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
#define NULp
Definition: cxxforward.h:116
#define TEST_EXPECT_DIFFERENT(expr, want)
Definition: test_unit.h:1301
const char * get_pipecmd() const
Definition: arb_zfile.cxx:38
static int info[maxsites+1]
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194