ARB
Classes | Macros | Typedefs | Enumerations | Functions | Variables
adoptimize.cxx File Reference
#include <climits>
#include <netinet/in.h>
#include <arb_file.h>
#include <arb_diff.h>
#include <arbdbt.h>
#include "gb_key.h"
#include "gb_compress.h"
#include "gb_dict.h"
#include "arb_progress.h"
Include dependency graph for adoptimize.cxx:

Go to the source code of this file.

Classes

struct  O_gbdByKey
 
union  DictTree
 
struct  FullDictTree
 
struct  SingleDictTree
 

Macros

#define COMPRESSIBLE(type)   ((type) >= GB_BYTES && (type)<=GB_STRING)
 
#define DICT_MEM_WEIGHT   4
 
#define WORD_HELPFUL(wordlen, occurrences)
 
#define MIN_WORD_LEN   8
 
#define MAX_WORD_LEN   50
 
#define MAX_BROTHERS
 
#define MAX_DIFFER
 
#define INCR_DIFFER   1
 
#define DICT_STRING_INCR   1024
 
#define LEN_BITS   4
 
#define INDEX_BITS   2
 
#define INDEX_LEN_BITS   1
 
#define LEN_SHIFT   0
 
#define INDEX_SHIFT   (LEN_SHIFT+LEN_BITS)
 
#define INDEX_LEN_SHIFT   (INDEX_SHIFT+INDEX_BITS)
 
#define BITMASK(bits)   ((1<<(bits))-1)
 
#define GETVAL(tag, typ)   (((tag)>>typ##_SHIFT)&BITMASK(typ##_BITS))
 
#define MIN_SHORTLEN   6
 
#define MAX_SHORTLEN   (BITMASK(LEN_BITS)+MIN_SHORTLEN-1)
 
#define MIN_LONGLEN   (MAX_SHORTLEN+1)
 
#define MAX_LONGLEN   (MIN_LONGLEN+255)
 
#define SHORTLEN_DECR   (MIN_SHORTLEN-1)
 
#define LONGLEN_DECR   MIN_LONGLEN
 
#define MIN_COMPR_WORD_LEN   MIN_SHORTLEN
 
#define MAX_COMPR_WORD_LEN   MAX_LONGLEN
 
#define MAX_SHORT_INDEX   BITMASK(INDEX_BITS+8)
 
#define MAX_LONG_INDEX   BITMASK(INDEX_BITS+16)
 
#define LAST_COMPRESSED_BIT   64
 
#define DUMP_COMPRESSION_TEST   0
 
#define test_dtree(tree)
 
#define testCounts(tree)
 
#define cmp(i1, i2)   (heap2[i1]-heap2[i2])
 
#define swap(i1, i2)
 
#define cmp(i1, i2)   GB_MEMCMP(dict->text+dict->offsets[heap[i1]], dict->text+dict->offsets[heap[i2]], dict->textlen)
 
#define swap(i1, i2)   do { int s = heap[i1]; heap[i1] = heap[i2]; heap[i2] = s; } while (0)
 

Typedefs

typedef unsigned char unsigned_char
 
typedef unsigned charu_str
 
typedef const unsigned charcu_str
 

Enumerations

enum  DictNodeType { SINGLE_NODE, FULL_NODE }
 

Functions

cu_str get_data_n_size (GBDATA *gbd, size_t *size)
 
static long min (long a, long b)
 
static void g_b_opti_scanGbdByKey (GB_MAIN_TYPE *Main, GBDATA *gbd, O_gbdByKey *gbk)
 
static O_gbdByKeyg_b_opti_createGbdByKey (GB_MAIN_TYPE *Main)
 
static void g_b_opti_freeGbdByKey (O_gbdByKey *gbk)
 
static GB_ERROR gb_convert_compression (GBDATA *gbd)
 
GB_ERROR gb_convert_V2_to_V3 (GBDATA *gb_main)
 
int INDEX_DICT_OFFSET (int idx, GB_DICTIONARY *dict)
 
int ALPHA_DICT_OFFSET (int idx, GB_DICTIONARY *dict)
 
int GB_MEMCMP (const void *vm1, const void *vm2, long size)
 
static int searchWord (GB_DICTIONARY *dict, cu_str source, long size, unsigned long *wordIndex, int *wordLen)
 
static chargb_uncompress_by_dictionary_internal (GB_DICTIONARY *dict, GB_CSTR s_source, const size_t size, bool append_zero, size_t *new_size)
 
chargb_uncompress_by_dictionary (GBDATA *gbd, GB_CSTR s_source, size_t size, size_t *new_size)
 
chargb_compress_by_dictionary (GB_DICTIONARY *dict, GB_CSTR s_source, size_t size, size_t *msize, int last_flag, int search_backward, int search_forward)
 
static DictTree new_dtree (cu_str text, long len, long *memcount)
 
static DictTree single2full_dtree (DictTree tree, long *memcount)
 
static void free_dtree (DictTree tree)
 
static DictTree cut_dtree (DictTree tree, int cut_count, long *memcount, long *leafcount)
 
static DictTree cut_useless_words (DictTree tree, int deep, long *removed)
 
static DictTree add_dtree_to_dtree (DictTree toAdd, DictTree to, long *memcount)
 
static DictTree add_to_dtree (DictTree tree, cu_str text, long len, long *memcount)
 
static long calcCounts (DictTree tree)
 
static int count_dtree_leafs (DictTree tree, int deep, int *maxdeep)
 
static int COUNT (DictTree tree)
 
static DictTree removeSubsequentString (DictTree *tree_pntr, cu_str buffer, int len, int max_occur)
 
static cu_str memstr (cu_str stringStart, int stringStartLen, cu_str inString, int inStringLen)
 
static int expandBranches (u_str buffer, int deep, int minwordlen, int maxdeep, DictTree tree, DictTree root, int max_percent)
 
static DictTree build_dict_tree (O_gbdByKey *gbk, long maxmem, long maxdeep, size_t minwordlen, long *data_sum)
 
static DictTree remove_word_from_dtree (DictTree tree, cu_str wordStart, int wordLen, u_str resultBuffer, int *resultLen, long *resultFrequency, long *removed)
 
static void downheap (int *heap, int *heap2, int me, int num)
 
static void downheap2 (int *heap, GB_DICTIONARY *dict, int me, int num)
 
static void sort_dict_offsets (GB_DICTIONARY *dict)
 
static GB_DICTIONARYgb_create_dictionary (O_gbdByKey *gbk, long maxmem)
 
static void gb_free_dictionary (GB_DICTIONARY *&dict)
 
static GB_ERROR readAndWrite (O_gbdByKey *gbkp, size_t &old_size, size_t &new_size)
 
static GB_ERROR gb_create_dictionaries (GB_MAIN_TYPE *Main, long maxmem)
 
GB_ERROR GB_optimize (GBDATA *gb_main)
 

Variables

static int gbdByKey_cnt
 

Macro Definition Documentation

#define COMPRESSIBLE (   type)    ((type) >= GB_BYTES && (type)<=GB_STRING)

Definition at line 69 of file adoptimize.cxx.

Referenced by build_dict_tree(), and readAndWrite().

#define DICT_MEM_WEIGHT   4

Definition at line 70 of file adoptimize.cxx.

#define WORD_HELPFUL (   wordlen,
  occurrences 
)
Value:
((long)((occurrences)*3 + DICT_MEM_WEIGHT*(2*sizeof(GB_NINT)+(wordlen))) \
< \
(long)((occurrences)*(wordlen)))
long
Definition: AW_awar.cxx:152
#define DICT_MEM_WEIGHT
Definition: adoptimize.cxx:70
int GB_NINT
Definition: gb_dict.h:14

Definition at line 72 of file adoptimize.cxx.

Referenced by cut_useless_words().

#define MIN_WORD_LEN   8

Definition at line 82 of file adoptimize.cxx.

Referenced by gb_create_dictionary().

#define MAX_WORD_LEN   50

Definition at line 83 of file adoptimize.cxx.

Referenced by gb_create_dictionary().

#define MAX_BROTHERS
Value:
10 /* maximum no of brothers linked with SingleDictTree
* above we use FullDictTree */

Definition at line 84 of file adoptimize.cxx.

Referenced by add_to_dtree().

#define MAX_DIFFER
Value:
2 /* percentage of difference (of occurrences of strings) below which two
* consecutive parts are treated as EQUAL # of occurrences */

Definition at line 86 of file adoptimize.cxx.

Referenced by build_dict_tree().

#define INCR_DIFFER   1

Definition at line 88 of file adoptimize.cxx.

Referenced by build_dict_tree().

#define DICT_STRING_INCR   1024

Definition at line 90 of file adoptimize.cxx.

Referenced by gb_create_dictionary().

#define LEN_BITS   4

Definition at line 312 of file adoptimize.cxx.

#define INDEX_BITS   2

Definition at line 313 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define INDEX_LEN_BITS   1

Definition at line 314 of file adoptimize.cxx.

#define LEN_SHIFT   0

Definition at line 316 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define INDEX_SHIFT   (LEN_SHIFT+LEN_BITS)

Definition at line 317 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define INDEX_LEN_SHIFT   (INDEX_SHIFT+INDEX_BITS)

Definition at line 318 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define BITMASK (   bits)    ((1<<(bits))-1)

Definition at line 320 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define GETVAL (   tag,
  typ 
)    (((tag)>>typ##_SHIFT)&BITMASK(typ##_BITS))

Definition at line 321 of file adoptimize.cxx.

Referenced by gb_uncompress_by_dictionary_internal().

#define MIN_SHORTLEN   6

Definition at line 323 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define MAX_SHORTLEN   (BITMASK(LEN_BITS)+MIN_SHORTLEN-1)

Definition at line 324 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define MIN_LONGLEN   (MAX_SHORTLEN+1)

Definition at line 325 of file adoptimize.cxx.

#define MAX_LONGLEN   (MIN_LONGLEN+255)

Definition at line 326 of file adoptimize.cxx.

#define SHORTLEN_DECR   (MIN_SHORTLEN-1)
#define LONGLEN_DECR   MIN_LONGLEN
#define MIN_COMPR_WORD_LEN   MIN_SHORTLEN

Definition at line 331 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary(), and searchWord().

#define MAX_COMPR_WORD_LEN   MAX_LONGLEN

Definition at line 332 of file adoptimize.cxx.

Referenced by searchWord().

#define MAX_SHORT_INDEX   BITMASK(INDEX_BITS+8)

Definition at line 334 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary().

#define MAX_LONG_INDEX   BITMASK(INDEX_BITS+16)
#define LAST_COMPRESSED_BIT   64
#define DUMP_COMPRESSION_TEST   0

Definition at line 348 of file adoptimize.cxx.

#define test_dtree (   tree)

Definition at line 1047 of file adoptimize.cxx.

Referenced by build_dict_tree().

#define testCounts (   tree)

Definition at line 1048 of file adoptimize.cxx.

Referenced by build_dict_tree(), and gb_create_dictionary().

#define cmp (   i1,
  i2 
)    (heap2[i1]-heap2[i2])

Definition at line 2122 of file adoptimize.cxx.

Referenced by downheap(), and downheap2().

#define swap (   i1,
  i2 
)
Value:
do \
{ \
int s = heap[i1]; \
heap[i1] = heap[i2]; \
heap[i2] = s; \
\
s = heap2[i1]; \
heap2[i1] = heap2[i2]; \
heap2[i2] = s; \
} \
while (0)
while(1)
GB_write_int const char s
Definition: AW_awar.cxx:154

Definition at line 2123 of file adoptimize.cxx.

Referenced by downheap(), and downheap2().

#define cmp (   i1,
  i2 
)    GB_MEMCMP(dict->text+dict->offsets[heap[i1]], dict->text+dict->offsets[heap[i2]], dict->textlen)

Definition at line 2122 of file adoptimize.cxx.

#define swap (   i1,
  i2 
)    do { int s = heap[i1]; heap[i1] = heap[i2]; heap[i2] = s; } while (0)

Definition at line 2123 of file adoptimize.cxx.

Typedef Documentation

typedef unsigned char unsigned_char

Definition at line 29 of file adoptimize.cxx.

typedef unsigned char* u_str

Definition at line 30 of file adoptimize.cxx.

typedef const unsigned char* cu_str

Definition at line 31 of file adoptimize.cxx.

Enumeration Type Documentation

Enumerator
SINGLE_NODE 
FULL_NODE 

Definition at line 49 of file adoptimize.cxx.

Function Documentation

cu_str get_data_n_size ( GBDATA gbd,
size_t size 
)
inline
static long min ( long  a,
long  b 
)
inlinestatic

Definition at line 113 of file adoptimize.cxx.

Referenced by gb_compress_by_dictionary(), gb_create_dictionary(), and searchWord().

static void g_b_opti_scanGbdByKey ( GB_MAIN_TYPE Main,
GBDATA gbd,
O_gbdByKey gbk 
)
static
static O_gbdByKey* g_b_opti_createGbdByKey ( GB_MAIN_TYPE Main)
static
static void g_b_opti_freeGbdByKey ( O_gbdByKey gbk)
static

Definition at line 171 of file adoptimize.cxx.

References gbdByKey_cnt.

Referenced by gb_create_dictionaries().

static GB_ERROR gb_convert_compression ( GBDATA gbd)
static
GB_ERROR gb_convert_V2_to_V3 ( GBDATA gb_main)
int INDEX_DICT_OFFSET ( int  idx,
GB_DICTIONARY dict 
)
inline

Definition at line 298 of file adoptimize.cxx.

References gb_assert, and GB_DICTIONARY::offsets.

Referenced by ALPHA_DICT_OFFSET(), and gb_uncompress_by_dictionary_internal().

int ALPHA_DICT_OFFSET ( int  idx,
GB_DICTIONARY dict 
)
inline

Definition at line 302 of file adoptimize.cxx.

References gb_assert, INDEX_DICT_OFFSET(), and GB_DICTIONARY::resort.

Referenced by searchWord().

int GB_MEMCMP ( const void *  vm1,
const void *  vm2,
long  size 
)
inline

Definition at line 424 of file adoptimize.cxx.

References diff().

Referenced by gb_create_dictionary(), memstr(), and searchWord().

static int searchWord ( GB_DICTIONARY dict,
cu_str  source,
long  size,
unsigned long wordIndex,
int wordLen 
)
static
static char* gb_uncompress_by_dictionary_internal ( GB_DICTIONARY dict,
GB_CSTR  s_source,
const size_t  size,
bool  append_zero,
size_t new_size 
)
static
char* gb_uncompress_by_dictionary ( GBDATA gbd,
GB_CSTR  s_source,
size_t  size,
size_t new_size 
)
char* gb_compress_by_dictionary ( GB_DICTIONARY dict,
GB_CSTR  s_source,
size_t  size,
size_t msize,
int  last_flag,
int  search_backward,
int  search_forward 
)
static DictTree new_dtree ( cu_str  text,
long  len,
long memcount 
)
static
static DictTree single2full_dtree ( DictTree  tree,
long memcount 
)
static
static void free_dtree ( DictTree  tree)
static
static DictTree cut_dtree ( DictTree  tree,
int  cut_count,
long memcount,
long leafcount 
)
static
static DictTree cut_useless_words ( DictTree  tree,
int  deep,
long removed 
)
static
static DictTree add_dtree_to_dtree ( DictTree  toAdd,
DictTree  to,
long memcount 
)
static
static DictTree add_to_dtree ( DictTree  tree,
cu_str  text,
long  len,
long memcount 
)
static
static long calcCounts ( DictTree  tree)
static
static int count_dtree_leafs ( DictTree  tree,
int  deep,
int maxdeep 
)
static
static int COUNT ( DictTree  tree)
static
static DictTree removeSubsequentString ( DictTree tree_pntr,
cu_str  buffer,
int  len,
int  max_occur 
)
static
static cu_str memstr ( cu_str  stringStart,
int  stringStartLen,
cu_str  inString,
int  inStringLen 
)
static

Definition at line 1594 of file adoptimize.cxx.

References GB_MEMCMP(), and NULp.

Referenced by expandBranches().

static int expandBranches ( u_str  buffer,
int  deep,
int  minwordlen,
int  maxdeep,
DictTree  tree,
DictTree  root,
int  max_percent 
)
static
static DictTree build_dict_tree ( O_gbdByKey gbk,
long  maxmem,
long  maxdeep,
size_t  minwordlen,
long data_sum 
)
static
static DictTree remove_word_from_dtree ( DictTree  tree,
cu_str  wordStart,
int  wordLen,
u_str  resultBuffer,
int resultLen,
long resultFrequency,
long removed 
)
static
static void downheap ( int heap,
int heap2,
int  me,
int  num 
)
static

Definition at line 2094 of file adoptimize.cxx.

References cmp, gb_assert, and swap.

Referenced by sort_dict_offsets().

static void downheap2 ( int heap,
GB_DICTIONARY dict,
int  me,
int  num 
)
static

Definition at line 2125 of file adoptimize.cxx.

References cmp, gb_assert, and swap.

Referenced by sort_dict_offsets().

static void sort_dict_offsets ( GB_DICTIONARY dict)
static
static GB_DICTIONARY* gb_create_dictionary ( O_gbdByKey gbk,
long  maxmem 
)
static
static void gb_free_dictionary ( GB_DICTIONARY *&  dict)
static
static GB_ERROR readAndWrite ( O_gbdByKey gbkp,
size_t old_size,
size_t new_size 
)
static
static GB_ERROR gb_create_dictionaries ( GB_MAIN_TYPE Main,
long  maxmem 
)
static
GB_ERROR GB_optimize ( GBDATA gb_main)

Variable Documentation

int gbdByKey_cnt
static