ARB
adcompr.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : adcompr.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <climits>
12 
13 #include <arbdbt.h>
14 
15 #include "gb_key.h"
16 #include "gb_t_prot.h"
17 #include "gb_compress.h"
18 #include "gb_localdata.h"
19 
20 #include <stdint.h>
21 
22 #if defined(DEBUG)
23 // #define TEST_HUFFMAN_CODE
24 #endif // DEBUG
25 
26 #define GBTUM_COMPRESS_TREE_SIZE 32
27 
28 
29 #define GB_READ_BIT(p, c, bp, result) if (!bp) { c = *(p++); bp = 8; }; result = (c>>7); result &= 1; c <<= 1; bp--
30 
31 #define GB_INIT_WRITE_BITS(p, bp) *p = 0; bp = 8
32 
33 #define GB_WRITE_BITS(p, bp, bitc, bits, i) \
34  if (bp<=0) { bp += 8; p++; *p = 0; } \
35  if ((i=bp-bitc)<0) { \
36  *p |= bits>>(-i); \
37  i += 8; \
38  bp += 8; p++; *p = 0; \
39  } \
40  *p |= bits<<i; \
41  bp-=bitc;
42 
43 #define GB_READ_BITS(p, c, bp, bitc, bits) { \
44  long _i; \
45  int _r; \
46  bits = 0; \
47  for (_i=bitc-1; _i>=0; _i--) { \
48  GB_READ_BIT(p, c, bp, _r); \
49  bits = (bits<<1) + _r; \
50  } }
51 
52 
54  if (t->leaf)
55  return NULp;
56  if (!t->son[0])
57  return GB_export_error("Database entry corrupt (zero left son)");
58  if (!t->son[1])
59  return GB_export_error("Database entry corrupt (zero right son)");
60 
61  {
62  GB_ERROR err = gb_check_huffmann_tree(t->son[0]);
63  if (err) return err;
64  }
65  return gb_check_huffmann_tree(t->son[1]);
66 }
67 
68 #if defined(TEST_HUFFMAN_CODE)
69 static void gb_dump_huffmann_tree(gb_compress_tree *t, const char *prefix) {
70  if (t->leaf) {
71  long command = (long)t->son[1];
72  printf("%s", prefix);
73 
74  switch (command) {
75  case GB_CS_END: printf(" [GB_CS_END]\n"); break;
76  case GB_CS_ID: printf(" [GB_CS_ID]\n"); break;
77  case GB_CS_OK: {
78  long val = (long)t->son[0];
79  printf(": value=%li (='%c')\n", val, (char)val);
80  break;
81  }
82  default: {
83  long val = (long)t->son[0];
84  printf(" other command (%li) value=%li (='%c')\n", command, val, (char)val);
85  break;
86  }
87  }
88  }
89  else {
90  int len = strlen(prefix);
91  char *my_prefix = ARB_alloc<char>(len+2);
92  strcpy(my_prefix, prefix);
93  my_prefix[len+1] = 0;
94  my_prefix[len] = '0';
95  gb_dump_huffmann_tree(t->son[0], my_prefix);
96  my_prefix[len] = '1';
97  gb_dump_huffmann_tree(t->son[1], my_prefix);
98  free(my_prefix);
99  }
100 }
101 static void gb_dump_huffmann_list(gb_compress_list *bc, const char *prefix) {
102  if (bc->command == GB_CD_NODE) {
103  int len = strlen(prefix);
104  char *my_prefix = ARB_alloc<char>(len+2);
105  strcpy(my_prefix, prefix);
106  my_prefix[len+1] = 0;
107  my_prefix[len] = '0';
108  gb_dump_huffmann_list(bc->son[0], my_prefix);
109  my_prefix[len] = '1';
110  gb_dump_huffmann_list(bc->son[1], my_prefix);
111  free(my_prefix);
112  }
113  else {
114  printf("%s value=%i (='%c') count=%li\n", prefix, bc->value, (char)bc->value, bc->count);
115  }
116 }
117 
118 #endif // TEST_HUFFMAN_CODE
119 
120 gb_compress_tree *gb_build_uncompress_tree(const unsigned char *data, long short_flag, char **end) {
121  gb_compress_tree *Main, *t;
122  long bits, mask, i;
123  const unsigned char *p;
124  GB_ERROR error;
125 
127  for (p=data; *p; p+=3+short_flag) {
128  bits = p[0];
129  mask = 0x80;
130  for (i=7; i; i--, mask=mask>>1) if (mask & bits) break; // find first bit
131  if (!i) {
132  GB_internal_error("Data corrupt");
133  return NULp;
134  }
135  t = Main;
136  for (; i; i--) {
137  if (t->leaf) {
138  GB_export_error("Corrupt data !!!");
139  return NULp;
140  }
141  mask = mask>>1;
142  if (mask & bits) {
143  if (!t->son[1]) {
145  }
146  t=t->son[1];
147  }
148  else {
149  if (!t->son[0]) {
151  }
152  t=t->son[0];
153  }
154  }
155  if (t->leaf) {
156  GB_export_error("Corrupt data !!!");
157  return NULp;
158  }
159  t->leaf = 1;
160  if (short_flag) {
161  t->son[0] = (gb_compress_tree *)(long)((p[2]<<8)+p[3]);
162  }
163  else {
164  t->son[0] = (gb_compress_tree *)(long)(p[2]);
165  }
166  t->son[1] = (gb_compress_tree *)(long)p[1]; // command
167  }
168  if (end) *end = ((char *)p)+1;
169  if ((error = gb_check_huffmann_tree(Main))) {
170  GB_internal_errorf("%s", error);
171  gb_free_compress_tree(Main);
172  return NULp;
173  }
174 
175 #if defined(TEST_HUFFMAN_CODE)
176  printf("Huffman tree:\n");
177  gb_dump_huffmann_tree(Main, "");
178 #endif // TEST_HUFFMAN_CODE
179 
180  return Main;
181 }
182 
184  if (tree) {
185  if (!tree->leaf) {
186  if (tree->son[0]) gb_free_compress_tree(tree->son[0]);
187  if (tree->son[1])gb_free_compress_tree(tree->son[1]);
188  }
189  }
191 }
192 
193 gb_compress_list *gb_build_compress_list(const unsigned char *data, long short_flag, long *size) {
194  int i, maxi, bitc;
195  int val, bits, mask, value;
196  const unsigned char *p;
197 
199 
200  maxi = 0;
201  val = bits = mask = value = bitc = 0;
202  for (p=data; *p; p+=3+short_flag) {
203  i = (p[2]);
204  if (short_flag) {
205  i = (i<<8)+p[3];
206  }
207  if (i>maxi) maxi = i;
208  }
209  *size = maxi;
210 
211  gb_compress_list *list = ARB_calloc<gb_compress_list>(maxi+1);
212 
213  maxi = 0;
214  val = -1;
215 
216  for (p=data; *p; p+=3+short_flag) {
217  val = (p[2]);
218  if (short_flag) val = (val<<8)+p[3];
219 
220  for (i=maxi; i<val; i++) { // LOOP_VECTORIZED[!<910]
221  list[i].command = command;
222  list[i].mask = mask;
223  list[i].bitcnt = bitc;
224  list[i].bits = bits;
225  list[i].value = value;
226  }
227  maxi = val;
228 
229  command = (enum gb_compress_list_commands)(p[1]);
230  bits = p[0];
231  mask = 0x80;
232  for (bitc=7; bitc; bitc--, mask=mask>>1) if (mask & bits) break; // find first bit
233  mask = 0xff>>(8-bitc);
234  bits = bits & mask;
235  value = val;
236  }
237  i = val;
238  list[i].command = command;
239  list[i].mask = mask;
240  list[i].bitcnt = bitc;
241  list[i].bits = bits;
242  list[i].value = value;
243  return list;
244 }
245 
246 // ------------------------
247 // bit compression
248 
249 
250 char *gb_compress_bits(const char *source, long size, const unsigned char *c_0, long *msize) {
251  const unsigned char *s = (const unsigned char *)source;
252  char *buffer = GB_give_other_buffer(source, size);
253  char *dest = buffer;
254 
255  long i, j;
256  int bitptr, bits, bitc;
257  int zo_flag = 0;
258  long len;
259  int h_i, command;
260  int isNull[256];
261 
262  for (i = 0; i<256; ++i) isNull[i] = 0;
263  for (i = 0; c_0[i]; ++i) isNull[c_0[i]] = 1;
264 
265  GB_INIT_WRITE_BITS(dest, bitptr);
266  for (len = size, i = 0; len; len--) {
267  if (zo_flag == isNull[*(s++)]) {
268  zo_flag = 1 - zo_flag;
269  for (command = GB_CS_SUB; command != GB_CS_OK;) {
270  if (i>gb_local->bc_size) j = gb_local->bc_size; else j = i;
271  bits = gb_local->bitcompress[j].bits;
272  bitc = gb_local->bitcompress[j].bitcnt;
273  command = gb_local->bitcompress[j].command;
274  i -= gb_local->bitcompress[j].value;
275  GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i);
276  }
277  i = 0;
278  }
279  i++;
280  }
281  for (command = GB_CS_SUB; command != GB_CS_OK;) {
282  if (i>gb_local->bc_size) j = gb_local->bc_size; else j = i;
283  bits = gb_local->bitcompress[j].bits;
284  bitc = gb_local->bitcompress[j].bitcnt;
285  command = gb_local->bitcompress[j].command;
286  i -= gb_local->bitcompress[j].value;
287  GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i);
288  }
289  *msize = dest - buffer + 1;
290  return buffer;
291 }
292 
293 
294 GB_BUFFER gb_uncompress_bits(const char *source, long size, char c_0, char c_1) {
296 
297  uint8_t ch = 0;
298 
299  long bitp = 0;
300  char outc = c_0;
301 
302  const uint8_t *s = (const uint8_t*)(source);
303 
304  char *p = GB_give_other_buffer(source, size+1);
305  char *buffer = p;
306 
307  for (long pos = 0; pos<size;) {
308  long lastpos = pos;
309  for (long command = GB_CS_SUB; command != GB_CS_OK;) {
310  gb_compress_tree *t;
311  for (t = Main; !t->leaf;) {
312  int bit;
313  GB_READ_BIT(s, ch, bitp, bit);
314  t = t->son[bit];
315  }
316  command = (long) t->son[1];
317  pos += (long)t->son[0];
318  }
319  for (; lastpos<pos; lastpos++) *(p++) = outc;
320  if (outc==c_0) outc=c_1; else outc=c_0;
321  }
322  *p = 0;
323  return buffer;
324 }
325 
326 
327 
328 /* Runlength encoding produces..
329  *
330  * from source: string
331  * dest: [command]+
332  *
333  * command data meaning
334  *
335  * -122 low high byte 'byte' was repeated 'low'+256*'high' times
336  * -119..-1 byte 'byte' was repeated -'command' times
337  * 0 --- end
338  * 1..119 [byte]+ 'command' data bytes follow
339  */
340 
341 static char *g_b_write_run(char *dest, int scount, int lastbyte) {
342  while (scount> 0xffff) {
343  *(dest++) = 0x100-122;
344  *(dest++) = 0xff;
345  *(dest++) = 0xff;
346  *(dest++) = lastbyte;
347  scount -= 0xffff;
348  }
349  if (scount >250) {
350  *(dest++) = 0x100-122;
351  *(dest++) = scount & 0xff;
352  *(dest++) = (scount >> 8) & 0xff;
353  *(dest++) = lastbyte;
354  return dest;
355  }
356 
357  while (scount>120) { // 120 blocks
358  *(dest++) = 0x100 - 120;
359  *(dest++) = lastbyte;
360  scount -= 120;
361  }
362 
363  if (scount) { // and rest
364  *(dest++) = -scount;
365  *(dest++) = lastbyte;
366  }
367  return dest;
368 }
369 
370 #define GB_COPY_NONRUN(dest, source, len) \
371  while (len > 120) { \
372  int _i = 120; \
373  char *_p; \
374  len -= _i; \
375  *(dest++) = _i; \
376  _p = dest; dest+=_i; \
377  while (_i--) { \
378  *(_p++) = *(source++); \
379  } \
380  } \
381  if (len >0) { \
382  int _i = len; \
383  char *_p; \
384  len = 0; \
385  *(dest++) = _i; \
386  _p = dest; dest+=_i; \
387  while (_i--) { \
388  *(_p++) = *(source++); \
389  } \
390  }
391 
392 void gb_compress_equal_bytes_2(const char *source, size_t size, size_t *msize, char *dest) {
393  long i; // length counter
394  int last, rbyte; // next and akt value
395  long scount; // same count; count equal bytes
396  char *buffer = dest;
397  const char *sourcenequal; // begin of non equal section
398  long hi; // to temporary store i
399  int hsize;
400 
401  sourcenequal = source;
402  rbyte = *(source ++);
403  last = -1;
404  for (i=size-1; i;) {
405  if (rbyte!=last) { // Search an equal pair
406  last = rbyte;
407  rbyte = *(source ++); i--;
408  continue;
409  } // last rbyte *(source)
410  hi = i+1;
411  while (i) { // get equal bytes
412  rbyte = *(source ++); i--;
413  if (rbyte!=last) break;
414  }
415  scount = hi-i; // number of equal bytes ; at end e.b.-1
416  if (scount <= GB_RUNLENGTH_SIZE) continue; // less than 4-> no runlength compression
417 
418  /* compression !!!
419  * 1. copy unequal bytes
420  * 2. fill rest
421  */
422 
423  hsize = source - sourcenequal - scount -1; // hsize of non equal bytes
424  source = sourcenequal;
425  hi = i; // i=unequal length
426  GB_COPY_NONRUN(dest, source, hsize); // LOOP_VECTORIZED=2
427  dest = g_b_write_run(dest, scount, last);
428  source += scount; // now equal bytes
429 
430  sourcenequal = source++; // no rbyte written yet
431  last = rbyte;
432  i = hi;
433  if (i) {
434  rbyte = *(source++); i--;
435  }
436  }
437  // and now rest which is not compressed
438  hsize = source - sourcenequal;
439  source = sourcenequal;
440  GB_COPY_NONRUN(dest, source, hsize); // LOOP_VECTORIZED=2
441 
442  *(dest++) = 0; // end of data
443  *msize = dest-buffer;
444  if (*msize >size*9/8) printf("ssize %d, dsize %d\n", (int)size, (int)*msize);
445 }
446 
447 static GB_BUFFER gb_compress_equal_bytes(const char *source, size_t size, size_t *msize, int last_flag) {
448  char *dest;
449  char *buffer;
450 
451  dest = buffer = GB_give_other_buffer(source, size*9/8);
452  *(dest++) = GB_COMPRESSION_RUNLENGTH | last_flag;
453  gb_compress_equal_bytes_2(source, size, msize, dest);
454  (*msize) ++; // Tag byte
455  return buffer;
456 }
457 
460  long val;
462 };
463 
465 
466 static void gb_compress_huffmann_add_to_list(long val, gb_compress_list *element) {
467  huffmann_list *dat, *search, *searchlast;
468 
470  dat->val = val;
471  dat->element = element;
472 
473  searchlast = NULp;
474  for (search = huffmann_listhead; search; search = search->next) {
475  if (val<search->val) break;
476  searchlast = search;
477  }
478  if (huffmann_listhead) {
479  if (searchlast) {
480  dat->next = searchlast->next;
481  searchlast->next = dat;
482  }
483  else {
484  dat->next = huffmann_listhead;
485  huffmann_listhead = dat;
486  }
487  }
488  else {
489  huffmann_listhead = dat;
490  }
491 }
492 
493 static long gb_compress_huffmann_pop(long *val, gb_compress_list **element) {
494  huffmann_list *dat;
495  if ((dat = huffmann_listhead)) {
496  huffmann_listhead = dat->next;
497  *val = dat->val;
498  *element = dat->element;
499  gbm_free_mem(dat, sizeof(huffmann_list), GBM_CB_INDEX);
500  return 1;
501  }
502  else {
503  GB_internal_error("huffman compression failed");
504  return 0;
505  }
506 }
507 
508 static char *gb_compress_huffmann_rek(gb_compress_list *bc, int bits, int bitcnt, char *dest) {
509  if (bc->command == GB_CD_NODE) {
510  dest = gb_compress_huffmann_rek(bc->son[0], (bits<<1), bitcnt+1, dest);
511  dest = gb_compress_huffmann_rek(bc->son[1], (bits<<1)+1, bitcnt+1, dest);
513  return dest;
514  }
515  else {
516  *(dest++) = bits;
517  *(dest++) = bc->command;
518  *(dest++) = bc->value;
519  bc->bitcnt = bitcnt;
520  bc->mask = 0xff>>(8-bitcnt);
521  bc->bits = bits&bc->mask;
522  return dest;
523  }
524 }
525 
526 static GB_BUFFER gb_compress_huffmann(GB_CBUFFER source, size_t size, size_t *msize, int last_flag) {
527  const int COMPRESS_LIST_SIZE = 257;
528  gb_compress_list bitcompress[COMPRESS_LIST_SIZE];
529 
530  memset((char *)(&bitcompress[0]), 0, sizeof(gb_compress_list)*COMPRESS_LIST_SIZE);
531 
532  char *buffer = GB_give_other_buffer(source, size*2+3*GBTUM_COMPRESS_TREE_SIZE+1);
533  char *dest = buffer;
534  *(dest++) = GB_COMPRESSION_HUFFMANN | last_flag;
535 
536  long id = 0;
537 
538  {
539  long level;
540  long i;
541  long restcount;
542 
543  long vali[2] = { 0, 0 };
544 
545  gb_compress_list *element1 = NULp;
546  gb_compress_list *element2 = NULp;
547  gb_compress_list *bc = NULp;
548 
549  unsigned char *s = (unsigned char *)source;
550  for (long len = size; len; len--) {
551  bitcompress[*(s++)].count++;
552  }
553  level = size/GBTUM_COMPRESS_TREE_SIZE;
554  restcount = 0;
555  for (i=0; i<256; i++) {
556  bitcompress[i].value = (int)i;
557  if (bitcompress[i].count>level) {
558  gb_compress_huffmann_add_to_list(bitcompress[i].count, &bitcompress[i]);
559  bitcompress[i].command = GB_CS_OK;
560  }
561  else {
562  restcount += bitcompress[i].count;
563  bitcompress[i].count = 0;
564  id = i;
565  bitcompress[i].command = GB_CS_ID;
566  }
567  }
568  bitcompress[COMPRESS_LIST_SIZE-1].command = GB_CS_END;
569 
570  gb_compress_huffmann_add_to_list(restcount, &bitcompress[id]);
571  gb_compress_huffmann_add_to_list(1, &bitcompress[COMPRESS_LIST_SIZE-1]);
572  while (huffmann_listhead->next) {
573  gb_compress_huffmann_pop(&(vali[0]), &element1);
574  gb_compress_huffmann_pop(&(vali[1]), &element2);
575 
577  bc->command = GB_CD_NODE;
578  bc->son[0] = element1;
579  bc->son[1] = element2;
580 
581  if (element1->command == GB_CD_NODE) {
582  bc->bits = element1->bits+1;
583  if (element2->command == GB_CD_NODE && element2->bits >= bc->bits) bc->bits = element2->bits+1;
584  }
585  else {
586  if (element2->command == GB_CD_NODE) {
587  bc->bits = element2->bits+1;
588  }
589  else {
590  bc->bits = 1;
591  }
592  }
593 
594  gb_assert(bc->bits <= 7); // max. 7 bits allowed
595 
596  // if already 6 bits used -> put to end of list; otherwise sort in
597  gb_compress_huffmann_add_to_list(bc->bits >= 6 ? LONG_MAX : vali[0]+vali[1], bc);
598  }
599  gb_compress_huffmann_pop(&(vali[0]), &element1);
600 #if defined(TEST_HUFFMAN_CODE)
601  printf("huffman list:\n");
602  gb_dump_huffmann_list(bc, "");
603 #endif // TEST_HUFFMAN_CODE
604  dest = gb_compress_huffmann_rek(bc, 1, 0, dest);
605  *(dest++) = 0;
606  }
607 
608  gb_compress_list *pbid = &bitcompress[id];
609  unsigned char *s = (unsigned char *)source;
610  {
611  int bitptr, bits, bitc;
612 
613  GB_INIT_WRITE_BITS(dest, bitptr);
614  int h_i;
615  for (long len = size; len; len--) {
616  int val = *(s++);
617  int command = bitcompress[val].command;
618  if (command == GB_CS_OK) {
619  gb_compress_list *pbc = &bitcompress[val];
620  bits = pbc->bits;
621  bitc = pbc->bitcnt;
622  GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i);
623  }
624  else {
625  bits = pbid->bits;
626  bitc = pbid->bitcnt;
627  GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i);
628 
629  bits = val;
630  bitc = 8;
631  GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i);
632  }
633  }
634  bits = bitcompress[COMPRESS_LIST_SIZE-1].bits;
635  bitc = bitcompress[COMPRESS_LIST_SIZE-1].bitcnt;
636  // cppcheck-suppress unreadVariable (bitptr is assigned but never used)
637  GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i);
638  }
639  *msize = dest - buffer + 1;
640 #if defined(TEST_HUFFMAN_CODE)
641  printf("huffman compression %zu -> %zu (%5.2f %%)\n", size, *msize, (double)((double)(*msize)/size*100));
642 #endif // TEST_HUFFMAN_CODE
643  if (*msize >size*2) printf("ssize %d, size %d\n", (int)size, (int)*msize);
644  return buffer;
645 }
646 
647 
648 static GB_BUFFER gb_uncompress_equal_bytes(GB_CBUFFER s, size_t size, size_t *new_size) {
649  const signed char *source = (signed char*)s;
650  char *dest;
651  unsigned int c;
652  long j;
653  long i, k;
654  char *buffer;
655 
656  dest = buffer = GB_give_other_buffer((char *)source, size);
657 
658 #if defined(DEBUG) && 0
659  printf("gb_uncompress_equal_bytes(size=%li):\n", size);
660 #endif // DEBUG
661 
662  for (i=size; i;) {
663  j = *(source++);
664 #if defined(DEBUG) && 0
665  printf("size=%li (code=%i)\n", i, (int)j);
666 #endif // DEBUG
667  if (j>0) { // uncompressed data block
668  if (j>i) j=i;
669  i -= j;
670  for (; j; j--) { // LOOP_VECTORIZED
671  *(dest++) = (char)(*(source++));
672  }
673  }
674  else { // equal bytes compressed
675  if (!j) break; // end symbol
676  if (j == -122) {
677  j = *(source++) & 0xff;
678  j |= (uint16_t(*(source++)) << 8) & 0xff00; // cast to unsigned (left shifting negative values is undefined behavior)
679  j = -j;
680  }
681  c = *(source++);
682  i += j;
683  if (i<0) {
684  j += -i;
685  i = 0;
686  }
687  if (j<-30) { // set multiple bytes
688  j = -j;
689  if (((long)dest) & 1) {
690  *(dest++) = c;
691  j--;
692  }
693  if (((long)dest) &2) {
694  *(dest++) = c;
695  *(dest++) = c;
696  j-=2;
697  }
698  c &= 0xff; // copy c to upper bytes
699  c |= (c<<8);
700  c |= (c<<16);
701  k = j&3;
702  j = j>>2;
703  for (; j; j--) { // LOOP_VECTORIZED
704  *((GB_UINT4 *)dest) = c;
705  dest += sizeof(GB_UINT4);
706  }
707  j = k;
708  for (; j; j--) *(dest++) = c;
709  }
710  else {
711  for (; j; j++) *(dest++) = c;
712  }
713  }
714  }
715 
716  *new_size = dest-buffer;
717  gb_assert(size >= *new_size); // buffer overflow
718 
719  return buffer;
720 }
721 
722 static GB_BUFFER gb_uncompress_huffmann(GB_CBUFFER source, size_t maxsize, size_t *new_size) {
723  gb_compress_tree *un_tree, *t;
724 
725  char *data[1];
726  char *p, *buffer;
727  long bitp;
728  uint8_t ch = 0, *s;
729  long val, command;
730 
731  un_tree = gb_build_uncompress_tree((const unsigned char *)source, 0, data);
732  if (!un_tree) return NULp;
733 
734  bitp = 0;
735  buffer = p = GB_give_other_buffer(source, maxsize);
736  s = (uint8_t*)data[0];
737  while (1) {
738  for (t = un_tree; !t->leaf;) {
739  int bit;
740  GB_READ_BIT(s, ch, bitp, bit);
741  t = t->son[bit];
742  }
743  command = (long) t->son[1];
744  if (command == GB_CS_END) break;
745  if (command == GB_CS_ID) {
746  GB_READ_BITS(s, ch, bitp, 8, val);
747  }
748  else {
749  val = (long) t->son[0];
750  }
751  *(p++) = (int)val;
752  }
753  gb_free_compress_tree(un_tree);
754 
755  *new_size = p-buffer;
756  gb_assert(maxsize >= *new_size); // buffer overflow
757 
758  return buffer;
759 }
760 
761 GB_BUFFER gb_uncompress_bytes(GB_CBUFFER source, size_t size, size_t *new_size) {
762  char *data = gb_uncompress_huffmann(source, size, new_size);
763  if (data) data = gb_uncompress_equal_bytes(data, size, new_size);
764  gb_assert(!data || size >= *new_size); // buffer overflow
765  return data;
766 }
767 
768 // -------------------------------------------------
769 // Compress long and floats (4 byte values)
770 
771 
772 GB_BUFFER gb_uncompress_longs_old(GB_CBUFFER source, size_t size, size_t *new_size) {
773  // size is byte value
774  char *res = NULp;
775  char *data = gb_uncompress_huffmann(source, (size*9)/8, new_size);
776  if (data) {
777  char *p, *s0, *s1, *s2, *s3;
778  GB_UINT4 mi, i;
779 
780  data = gb_uncompress_equal_bytes(data, size, new_size);
781 
782  gb_assert(*new_size == size);
783  res = p = GB_give_other_buffer(data, size);
784 
785  STATIC_ASSERT(sizeof(GB_UINT4) == 4);
786 
787  mi = (GB_UINT4)(size / 4);
788  s0 = data + 0 * mi;
789  s1 = data + 1 * mi;
790  s2 = data + 2 * mi;
791  s3 = data + 3 * mi;
792 
793  for (i = 0; i < mi; i++) { // LOOP_VECTORIZED
794  *(p++) = *(s0++);
795  *(p++) = *(s1++);
796  *(p++) = *(s2++);
797  *(p++) = *(s3++);
798  }
799 
800  *new_size = mi*4;
801  }
802  return res;
803 }
804 
805 
806 static GB_BUFFER gb_uncompress_longs(GB_CBUFFER data, size_t size, size_t *new_size) {
807  const char *s0, *s1, *s2, *s3;
808  char *p, *res;
809  long mi, i;
810 
811  res = p = GB_give_other_buffer(data, size);
812 
813  STATIC_ASSERT(sizeof(GB_UINT4) == 4);
814 
815  mi = size / 4;
816  s0 = data + 0 * mi;
817  s1 = data + 1 * mi;
818  s2 = data + 2 * mi;
819  s3 = data + 3 * mi;
820 
821  for (i = 0; i < mi; i++) { // LOOP_VECTORIZED =2[<5][>=54] =1 // 1x with 5.x; 2x with 4.9.x + 5.4 + 5.5 + 9.5 + 10.x
822  *(p++) = *(s0++);
823  *(p++) = *(s1++);
824  *(p++) = *(s2++);
825  *(p++) = *(s3++);
826  }
827 
828  *new_size = mi*4;
829  return res;
830 }
831 
832 static GB_BUFFER gb_compress_longs(GB_CBUFFER source, long size, int last_flag) {
833  long mi, i;
834  const char *p;
835  char *s0, *s1, *s2, *s3;
836  char *dest = GB_give_other_buffer(source, size+1);
837 
838  mi = size/4;
839  p = source;
840  *(dest) = GB_COMPRESSION_SORTBYTES | last_flag;
841  s0 = dest + 0*mi + 1;
842  s1 = dest + 1*mi + 1;
843  s2 = dest + 2*mi + 1;
844  s3 = dest + 3*mi + 1;
845  for (i=0; i<mi; i++) { // LOOP_VECTORIZED
846  *(s0++) = *(p++);
847  *(s1++) = *(p++);
848  *(s2++) = *(p++);
849  *(s3++) = *(p++);
850  }
851  return dest;
852 }
853 
854 // -----------------------------
855 // Dictionary Functions
856 
858  gb_Key *ks = &Main->keys[key];
859  if (ks->gb_key_disabled) return NULp;
860  if (!ks->gb_key) {
861  gb_load_single_key_data(Main->gb_main(), key);
862  if (Main->gb_key_data && !ks->gb_key) {
863  GB_internal_error("Couldn't load gb_key");
864  }
865  }
866  return Main->keys[key].dictionary;
867 }
868 
870  if (gbd->is_entry()) {
871  GBENTRY *gbe = gbd->as_entry();
872  const char *data = gbe->data();
873 
874  if (data) {
875  if (gbe->flags.compressed_data) {
876  size_t size = gbe->uncompressed_size();
877  int last = 0;
878  GB_ERROR error = NULp;
879  size_t new_size = -1;
880 
881  while (!last) {
882  int c = *((unsigned char *)(data++));
883 
884  if (c & GB_COMPRESSION_LAST) {
885  last = 1;
886  c &= ~GB_COMPRESSION_LAST;
887  }
888 
889  if (c == GB_COMPRESSION_DICTIONARY) {
890  return true;
891  }
892 
893  if (c == GB_COMPRESSION_HUFFMANN) {
894  data = gb_uncompress_huffmann(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size);
895  }
896  else if (c == GB_COMPRESSION_RUNLENGTH) {
897  data = gb_uncompress_equal_bytes(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size);
898  }
899  else if (c == GB_COMPRESSION_SEQUENCE) {
900  data = gb_uncompress_by_sequence(gbe, data, size, &error, &new_size);
901  }
902  else if (c == GB_COMPRESSION_SORTBYTES) {
903  data = gb_uncompress_longs(data, size, &new_size);
904  }
905  else {
906  error = GB_export_errorf("Internal Error: Cannot uncompress data of field '%s'", GB_read_key_pntr(gbe));
907  }
908 
909  if (error) {
910  GB_internal_error(error);
911  break;
912  }
913  }
914  }
915  }
916  }
917  return false;
918 }
919 
920 // -----------------------------------
921 // Top compression algorithms
922 
923 GB_BUFFER gb_compress_data(GBDATA *gbd, int key, GB_CBUFFER source, size_t size, size_t *msize, GB_COMPRESSION_MASK max_compr, bool pre_compressed) {
924  // Compress a data string.
925  //
926  // Returns:
927  // - NULp if no compression makes sense
928  // - otherwise returns compressed data and stores its size in 'msize'
929 
930  char *data;
931  int last_flag = GB_COMPRESSION_LAST;
932 
933  if (pre_compressed) {
934  last_flag = 0;
935  }
936 
937  gb_assert(1);
938 
939  if (max_compr & GB_COMPRESSION_SORTBYTES) {
940  source = gb_compress_longs(source, size, last_flag);
941  last_flag = 0;
942  size++; // @@@ OLI
943  }
944  else if (max_compr & GB_COMPRESSION_DICTIONARY) {
945  GB_MAIN_TYPE *Main = GB_MAIN(gbd);
946  GB_DICTIONARY *dict;
947  if (!key) {
948  key = GB_KEY_QUARK(gbd);
949  }
950  dict = gb_get_dictionary(Main, key);
951  if (dict) {
952  size_t real_size = size-(gbd->type()==GB_STRING); // for strings w/o trailing zero; @@@ or use is_a_string()?
953 
954  if (real_size) {
955  data = gb_compress_by_dictionary(dict, source, real_size, msize, last_flag, 9999, 3);
956 
957  if ((*msize<=10 && size>10) || *msize < size*7/8) { // successful compression
958  source = data;
959  size = *msize;
960  last_flag = 0;
961  }
962  }
963  }
964 
965  }
966  if (max_compr & GB_COMPRESSION_RUNLENGTH && size > GB_RUNLENGTH_MIN_SIZE) {
967  data = gb_compress_equal_bytes(source, size, msize, last_flag);
968  if (*msize < size-10 && *msize < size*7/8) { // successful compression
969  source = data;
970  size = *msize;
971  last_flag = 0;
972  }
973  }
974 
975  if (max_compr & GB_COMPRESSION_HUFFMANN && size > GB_HUFFMAN_MIN_SIZE) {
976  data = gb_compress_huffmann(source, size, msize, last_flag);
977  if (*msize < size-10 && *msize < size*7/8) { // successful compression
978  source = data;
979  size = *msize;
980  last_flag = 0;
981  }
982  }
983 
984  *msize = size;
985 
986  if (last_flag) return NULp; // no compression
987  return (char *)source;
988 }
989 
990 GB_CBUFFER gb_uncompress_data(GBDATA *gbd, GB_CBUFFER source, size_t size) {
991  int last = 0;
992  const char *data = (char *)source;
993  size_t new_size = -1;
994  GB_ERROR error = NULp;
995 
996  while (!last) {
997  int c = *((unsigned char *)(data++));
998  if (c & GB_COMPRESSION_LAST) {
999  last = 1;
1000  c &= ~GB_COMPRESSION_LAST;
1001  }
1002  if (c == GB_COMPRESSION_HUFFMANN) {
1003  data = gb_uncompress_huffmann(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size);
1004  }
1005  else if (c == GB_COMPRESSION_RUNLENGTH) {
1006  data = gb_uncompress_equal_bytes(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size);
1007  }
1008  else if (c == GB_COMPRESSION_DICTIONARY) {
1009  data = gb_uncompress_by_dictionary(gbd, data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size);
1010  }
1011  else if (c == GB_COMPRESSION_SEQUENCE) {
1012  data = gb_uncompress_by_sequence(gbd, data, size, &error, &new_size);
1013  }
1014  else if (c == GB_COMPRESSION_SORTBYTES) {
1015  data = gb_uncompress_longs(data, size, &new_size);
1016  }
1017  else {
1018  error = GBS_global_string("Internal Error: Cannot uncompress data of field '%s'", GB_read_key_pntr(gbd));
1019  }
1020 
1021  if (!data && !error) error = GB_await_error();
1022  if (error) last = 1; // sth went wrong, stop
1023  }
1024 
1025  if (!error && new_size != size) {
1026  error = GBS_global_string("Wrong decompressed size (expected=%zi, got=%zi)", size, new_size);
1027  }
1028 
1029  if (error) {
1030  GB_export_error(error);
1031  data = NULp;
1032  }
1033 
1034  return data;
1035 }
1036 
#define GB_READ_BIT(p, c, bp, result)
Definition: adcompr.cxx:29
const char * GB_ERROR
Definition: arb_core.h:25
#define GB_READ_BITS(p, c, bp, bitc, bits)
Definition: adcompr.cxx:43
GB_CBUFFER gb_uncompress_data(GBDATA *gbd, GB_CBUFFER source, size_t size)
Definition: adcompr.cxx:990
const char * id
Definition: AliAdmin.cxx:17
static char * g_b_write_run(char *dest, int scount, int lastbyte)
Definition: adcompr.cxx:341
void gb_free_compress_tree(gb_compress_tree *tree)
Definition: adcompr.cxx:183
GB_BUFFER gb_compress_data(GBDATA *gbd, int key, GB_CBUFFER source, size_t size, size_t *msize, GB_COMPRESSION_MASK max_compr, bool pre_compressed)
Definition: adcompr.cxx:923
static GB_BUFFER gb_uncompress_equal_bytes(GB_CBUFFER s, size_t size, size_t *new_size)
Definition: adcompr.cxx:648
gb_compress_list_commands command
Definition: gb_compress.h:34
GBDATA * gb_key
Definition: gb_key.h:35
gb_compress_list * element
Definition: adcompr.cxx:461
GB_MAIN_TYPE * GB_MAIN(GBDATA *gbd)
Definition: gb_data.h:291
GB_BUFFER gb_uncompress_longs_old(GB_CBUFFER source, size_t size, size_t *new_size)
Definition: adcompr.cxx:772
bool GB_is_dictionary_compressed(GBDATA *gbd)
Definition: adcompr.cxx:869
gb_compress_tree * bituncompress
Definition: gb_localdata.h:59
huffmann_list * next
Definition: adcompr.cxx:459
long
Definition: AW_awar.cxx:152
#define GB_COPY_NONRUN(dest, source, len)
Definition: adcompr.cxx:370
const unsigned GB_HUFFMAN_MIN_SIZE
Definition: adtune.cxx:29
void GB_internal_errorf(const char *templat,...)
Definition: arb_msg.cxx:504
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
static long gb_compress_huffmann_pop(long *val, gb_compress_list **element)
Definition: adcompr.cxx:493
GB_TYPES type() const
Definition: gb_data.h:139
char * gb_uncompress_by_sequence(GBDATA *gbd, const char *ss, size_t size, GB_ERROR *error, size_t *new_size)
char * gb_compress_bits(const char *source, long size, const unsigned char *c_0, long *msize)
Definition: adcompr.cxx:250
Definition: gb_key.h:28
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
GB_BUFFER GB_give_other_buffer(GB_CBUFFER buffer, long size)
Definition: arbdb.cxx:363
GB_BUFFER gb_uncompress_bytes(GB_CBUFFER source, size_t size, size_t *new_size)
Definition: adcompr.cxx:761
unsigned int GB_UINT4
Definition: arbdb_base.h:37
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:257
GB_DICTIONARY * dictionary
Definition: gb_key.h:39
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
gb_flag_types flags
Definition: gb_data.h:134
static GB_BUFFER gb_compress_longs(GB_CBUFFER source, long size, int last_flag)
Definition: adcompr.cxx:832
GB_CSTR GB_read_key_pntr(GBDATA *gbd)
Definition: arbdb.cxx:1656
static __ATTR__USERESULT GB_ERROR gb_check_huffmann_tree(gb_compress_tree *t)
Definition: adcompr.cxx:53
GBCONTAINER * gb_key_data
Definition: gb_main.h:121
static void gb_compress_huffmann_add_to_list(long val, gb_compress_list *element)
Definition: adcompr.cxx:466
static GB_BUFFER gb_compress_huffmann(GB_CBUFFER source, size_t size, size_t *msize, int last_flag)
Definition: adcompr.cxx:526
GB_BUFFER gb_uncompress_bits(const char *source, long size, char c_0, char c_1)
Definition: adcompr.cxx:294
static void error(const char *msg)
Definition: mkptypes.cxx:96
gb_compress_list * bitcompress
Definition: gb_localdata.h:60
int gb_key_disabled
Definition: gb_key.h:37
GB_write_int const char GB_write_autoconv_string WRITE_SKELETON(write_pointer, GBDATA *,"%p", GB_write_pointer) char *AW_awa if)(!gb_var) return strdup("")
Definition: AW_awar.cxx:163
void * gbm_get_mem(size_t size, long index)
Definition: gb_memory.h:130
GBDATA * gb_main() const
Definition: gb_main.h:179
#define GB_WRITE_BITS(p, bp, bitc, bits, i)
Definition: adcompr.cxx:33
#define GB_INIT_WRITE_BITS(p, bp)
Definition: adcompr.cxx:31
char * data()
Definition: gb_data.h:219
gb_compress_tree * son[2]
Definition: gb_compress.h:30
void gb_compress_equal_bytes_2(const char *source, size_t size, size_t *msize, char *dest)
Definition: adcompr.cxx:392
GB_ERROR GB_export_errorf(const char *templat,...)
Definition: arb_msg.cxx:262
void GB_internal_error(const char *message)
Definition: arb_msg.cxx:481
GBQUARK GB_KEY_QUARK(GBDATA *gbd)
Definition: gb_key.h:48
gb_compress_list * son[2]
Definition: gb_compress.h:42
#define gb_assert(cond)
Definition: arbdbt.h:11
static GB_BUFFER gb_uncompress_longs(GB_CBUFFER data, size_t size, size_t *new_size)
Definition: adcompr.cxx:806
static char * gb_compress_huffmann_rek(gb_compress_list *bc, int bits, int bitcnt, char *dest)
Definition: adcompr.cxx:508
#define GB_RUNLENGTH_SIZE
Definition: gb_tune.h:14
gb_compress_list_commands
Definition: gb_compress.h:20
size_t uncompressed_size() const
Definition: gb_compress.h:48
bool is_entry() const
Definition: gb_data.h:148
#define __ATTR__USERESULT
Definition: attributes.h:58
char * gb_uncompress_by_dictionary(GBDATA *gbd, GB_CSTR s_source, size_t size, size_t *new_size)
Definition: adoptimize.cxx:578
gb_compress_list * gb_build_compress_list(const unsigned char *data, long short_flag, long *size)
Definition: adcompr.cxx:193
static GB_BUFFER gb_compress_equal_bytes(const char *source, size_t size, size_t *msize, int last_flag)
Definition: adcompr.cxx:447
char * gb_compress_by_dictionary(GB_DICTIONARY *dict, GB_CSTR s_source, size_t size, size_t *msize, int last_flag, int search_backward, int search_forward)
Definition: adoptimize.cxx:591
#define GB_COMPRESSION_TAGS_SIZE_MAX
Definition: gb_compress.h:18
gb_compress_tree * gb_build_uncompress_tree(const unsigned char *data, long short_flag, char **end)
Definition: adcompr.cxx:120
gb_Key * keys
Definition: gb_main.h:134
GBENTRY * as_entry() const
Definition: gb_data.h:150
#define NULp
Definition: cxxforward.h:116
int GB_COMPRESSION_MASK
Definition: arbdb.h:35
static char * command
Definition: arb_a2ps.c:319
GB_DICTIONARY * gb_get_dictionary(GB_MAIN_TYPE *Main, GBQUARK key)
Definition: adcompr.cxx:857
static huffmann_list * huffmann_listhead
Definition: adcompr.cxx:464
const char * GB_CBUFFER
Definition: arbdb_base.h:27
void gbm_free_mem(void *block, size_t size, long index)
Definition: gb_memory.h:131
void gb_load_single_key_data(GBDATA *gb_main, GBQUARK q)
Definition: adsystem.cxx:112
#define GBTUM_COMPRESS_TREE_SIZE
Definition: adcompr.cxx:26
static GB_BUFFER gb_uncompress_huffmann(GB_CBUFFER source, size_t maxsize, size_t *new_size)
Definition: adcompr.cxx:722
#define STATIC_ASSERT(const_expression)
Definition: static_assert.h:37
int GBQUARK
Definition: arbdb_base.h:30
char * GB_BUFFER
Definition: arbdb_base.h:26
gb_local_data * gb_local
Definition: arbdb.cxx:33
const unsigned GB_RUNLENGTH_MIN_SIZE
Definition: adtune.cxx:30
unsigned int compressed_data
Definition: gb_data.h:70
GB_write_int const char s
Definition: AW_awar.cxx:154