12 #include <netinet/in.h>
69 #define COMPRESSIBLE(type) ((type) >= GB_BYTES && (type)<=GB_STRING)
70 #define DICT_MEM_WEIGHT 4
72 #define WORD_HELPFUL(wordlen, occurrences) ((long)((occurrences)*3 + DICT_MEM_WEIGHT*(2*sizeof(GB_NINT)+(wordlen))) \
74 (long)((occurrences)*(wordlen)))
82 #define MIN_WORD_LEN 8 // minimum length of words in dictionary
83 #define MAX_WORD_LEN 50 // maximum length of words in dictionary
84 #define MAX_BROTHERS 10
88 #define INCR_DIFFER 1 // the above percentage is incremented from 0 to MAX_DIFFER by INCR_DIFFER per step
90 #define DICT_STRING_INCR 1024 // dictionary string will be incremented by this size
98 switch (gbd->
type()) {
113 static inline long min(
long a,
long b) {
122 for (
int idx=0; idx < gbc->
d.
nheader; idx++) {
130 gb_assert(gbk[quark].cnt < Main->keys[quark].nref || quark==0);
133 gbk[quark].
gbds[gbk[quark].
cnt] = gbd;
141 gbdByKey_cnt = Main->
keycnt;
161 if (gbk[idx].cnt != Main->
keys[idx].
nref && idx) {
162 printf(
"idx=%i gbk[idx].cnt=%i Main->keys[idx].nref=%li\n",
163 idx, gbk[idx].cnt, Main->
keys[idx].
nref);
172 for (
int idx=0; idx<
gbdByKey_cnt; idx++) free(gbk[idx].gbds);
183 if (gb_child->flags.compressed_data || gb_child->is_container()) {
192 long elems = gbe->
size();
194 size_t new_size = -1;
197 switch (gbd->
type()) {
228 switch (gbd->
type()) {
265 if (
GB_entry(gb_main,
"extended_data")) {
266 GB_warning(
"Converting data from old V2.0 to V2.1 Format:\n"
267 " Please Wait (may take some time)");
300 return ntohl(dict->
offsets[idx]);
305 realIndex = ntohl(dict->
resort[idx]);
314 #define INDEX_LEN_BITS 1
317 #define INDEX_SHIFT (LEN_SHIFT+LEN_BITS)
318 #define INDEX_LEN_SHIFT (INDEX_SHIFT+INDEX_BITS)
320 #define BITMASK(bits) ((1<<(bits))-1)
321 #define GETVAL(tag,typ) (((tag)>>typ##_SHIFT)&BITMASK(typ##_BITS))
323 #define MIN_SHORTLEN 6
324 #define MAX_SHORTLEN (BITMASK(LEN_BITS)+MIN_SHORTLEN-1)
325 #define MIN_LONGLEN (MAX_SHORTLEN+1)
326 #define MAX_LONGLEN (MIN_LONGLEN+255)
328 #define SHORTLEN_DECR (MIN_SHORTLEN-1) // !! zero is used as flag for long len !!
329 #define LONGLEN_DECR MIN_LONGLEN
331 #define MIN_COMPR_WORD_LEN MIN_SHORTLEN
332 #define MAX_COMPR_WORD_LEN MAX_LONGLEN
334 #define MAX_SHORT_INDEX BITMASK(INDEX_BITS+8)
335 #define MAX_LONG_INDEX BITMASK(INDEX_BITS+16)
337 #define LAST_COMPRESSED_BIT 64
340 # define DUMP_COMPRESSION_TEST 0
348 # define DUMP_COMPRESSION_TEST 0
354 #if defined(COUNT_CHUNKS)
356 static long uncompressedBlocks[64];
359 static void clearChunkCounters() {
362 for (i=0; i<64; i++) uncompressedBlocks[i] = 0;
363 for (i=0; i<
MAX_LONGLEN; i++) compressedBlocks[i] = 0;
366 static void dumpChunkCounters() {
369 printf(
"------------------------------\n" "Uncompressed blocks used:\n");
370 for (i=0; i<64; i++)
if (uncompressedBlocks[i]) printf(
" size=%i used=%li\n", i, uncompressedBlocks[i]);
371 printf(
"------------------------------\n" "Words used:\n");
372 for (i=0; i<
MAX_LONGLEN; i++)
if (compressedBlocks[i]) printf(
" size=%i used=%li\n", i, compressedBlocks[i]);
373 printf(
"------------------------------\n");
375 #endif // COUNT_CHUNKS
388 #if DUMP_COMPRESSION_TEST>=2
396 #if DUMP_COMPRESSION_TEST>=1
398 static void dumpBinary(
u_str data,
long size) {
408 putchar(c&bitval ?
'1' :
'0');
413 cnt = (cnt+1)%PER_LINE;
414 if (!cnt) putchar(
'\n');
417 if (cnt) putchar(
'\n');
424 inline int GB_MEMCMP(
const void *vm1,
const void *vm2,
long size) {
425 char *c1 = (
char*)vm1,
429 while (size-- && !diff) diff = *c1++-*c2++;
438 int h = dict->
words-1;
447 cu_str dictword = text+off;
448 long msize =
min(size, dsize-off);
450 #if DUMP_COMPRESSION_TEST>=4
451 printf(
" %s (%i)\n", lstr(dictword, 20), m);
454 if (
GB_MEMCMP(source, dictword, msize)<=0) h = m;
461 int msize = (
int)
min(size, dsize-off);
465 while (msize-- && *s++==*word++) equal++;
467 #if DUMP_COMPRESSION_TEST>=3
469 printf(
" EQUAL=%i '%s' (%i->%i, off=%i)", equal, lstr(text+off, equal), l, ntohl(resort[l]),
ALPHA_DICT_OFFSET(l, dict));
470 printf(
" (context=%s)\n", lstr(text+off-
min(off, 20),
min(off, 20)+equal+20));
476 idx = ntohl(resort[l]);
491 unsigned long wordIndex;
493 int wordFound =
searchWord(dict, (
cu_str)source, strlen(source), &wordIndex, &wordLen);
496 printf(
"'%s' (idx=%lu, off=%i)\n", lstr(dict->
text+ntohl(dict->
offsets[wordIndex]), wordLen), wordIndex, ntohl(dict->
offsets[wordIndex]));
515 while (left && !done) {
518 if ((c=*source++)&128) {
519 int indexLen =
GETVAL(c, INDEX_LEN);
520 unsigned long idx =
GETVAL(c, INDEX);
529 idx = (idx << 8) | *source++;
532 idx = (((idx << 8) | source[1]) << 8) | source[0];
541 #if DUMP_COMPRESSION_TEST>=2
542 printf(
" word='%s' (idx=%lu, off=%li, len=%i)\n",
543 lstr(word, c), idx, (
long)ntohl(dict->
offsets[idx]), c);
548 gb_assert(((d + c) <= word) || (d >= (word + c)));
549 while (c--) *d++ = *word++;
563 gb_assert(((d + c) <= source) || (d >= (source + c)));
564 while (c--) *d++ = *source++;
570 if (append_zero) *dest++ = 0;
598 #if defined(ASSERTION_USED)
599 const size_t org_size = size;
600 #endif // ASSERTION_USED
608 unsigned long wordIndex;
612 if ((wordFound =
searchWord(dict, source, size, &wordIndex, &wordLen))) {
616 length = source-unknown;
621 int maxShift = (
int)
min(search_forward, wordLen-1);
623 for (shift=1; shift<=maxShift; shift++) {
624 unsigned long wordIndex2;
628 if ((wordFound2 =
searchWord(dict, source+shift, size-shift, &wordIndex2, &wordLen2))) {
629 if (wordLen2>(wordLen+shift)) {
630 wordIndex = wordIndex2;
640 length = source-unknown;
645 int take = (
int)
min(length, 63);
648 uncompressedBlocks[take]++;
651 lastUncompressed = dest;
654 memcpy(dest, unknown, take);
664 int indexHighBits = indexLen==0 ? wordIndex>>8 : wordIndex>>16;
667 unsigned long nextWordIndex;
674 lastUncompressed =
NULp;
677 cu_str source2 = source+wordLen;
678 long size2 = size-wordLen;
680 if (!(nextWordFound=
searchWord(dict, source+wordLen, size-wordLen, &nextWordIndex, &nextWordLen))) {
683 for (shift=1; shift<=search_backward && shift<(wordLen-
MIN_COMPR_WORD_LEN); shift++) {
685 unsigned long wordIndex2;
689 if ((wordFound2=
searchWord(dict, source2-shift, size2+shift, &wordIndex2, &wordLen2))) {
690 if (wordLen2>(shift+1)) {
694 nextWordIndex = wordIndex2;
695 nextWordLen = wordLen2;
704 compressedBlocks[wordLen]++;
707 #if DUMP_COMPRESSION_TEST>=2
708 printf(
" word='%s' (idx=%li, off=%i, len=%i)\n",
709 dict_word(dict, wordIndex, wordLen), wordIndex, (
int)ntohl(dict->
offsets[wordIndex]), wordLen);
725 *dest++ = (
char)wordIndex;
727 *dest++ = (
char)(wordIndex >> 8);
729 unknown = source += wordLen;
732 wordFound = nextWordFound;
733 wordIndex = nextWordIndex;
734 wordLen = nextWordLen;
739 if (--size==0)
goto takeRest;
748 #if defined(ASSERTION_USED)
750 size_t new_size = -1;
753 gb_assert(memcmp(test, s_source, org_size) == 0);
756 #endif // ASSERTION_USED
762 #if defined(TEST_DICT)
765 long uncompressed_sum = 0;
766 long compressed_sum = 0;
770 long char_count[256];
771 for (
int i=0; i<256; i++) char_count[i] = 0;
773 printf(
" * Testing compression..\n");
776 clearChunkCounters();
779 for (
int cnt=0; cnt<gbk->
cnt; cnt++) {
787 if (size<1)
continue;
791 memcpy(copy, data, size);
793 #if DUMP_COMPRESSION_TEST>=1
794 printf(
"----------------------------\n");
795 printf(
"original : %3li b = '%s'\n", size, data);
799 size_t compressedSize;
802 #if DUMP_COMPRESSION_TEST>=1
803 printf(
"compressed : %3li b = '%s'\n", compressedSize, lstr(compressed, compressedSize));
804 dumpBinary(compressed, compressedSize);
807 for (
size_t i=0; i<compressedSize; i++) char_count[compressed[i]]++;
809 size_t new_size = -1;
812 #if DUMP_COMPRESSION_TEST>=1
813 printf(
"copy : %3li b = '%s'\n", size, lstr(copy, size));
814 printf(
"decompressed: %3li b = '%s'\n", size, lstr(uncompressed, size));
817 if (
GB_MEMCMP(copy, uncompressed, size)!=0) {
820 while (copy[byte]==uncompressed[byte]) byte++;
821 printf(
"Error in compression (off=%i, '%s'", byte, lstr(copy+byte, 10));
822 printf(
"!='%s'\n", lstr(uncompressed+byte, 10));
825 if (compressedSize<size) {
826 uncompressed_sum += size;
827 compressed_sum += compressedSize;
830 uncompressed_sum += size;
831 compressed_sum += size;
843 long compressed_plus_dict = compressed_sum+dict_size;
845 long ratio = (compressed_plus_dict*100)/uncompressed_sum;
847 printf(
" uncompressed size = %10li b\n"
848 " compressed size = %10li b\n"
849 " %17s = %10li b (Ratio=%li%%)\n",
852 dict_text, compressed_plus_dict, ratio);
857 *uncompSum += uncompressed_sum;
858 *compSum += compressed_sum+dict_size;
867 #define TEST // test trees?
877 static char *strnstr(
char *s1,
int len,
char *s2) {
879 int len2 = strlen(s2);
881 while (len-->=len2) {
883 if (strncmp(s1, s2, len2)==0)
return s1;
901 for (idx=0; idx<256; idx++) {
906 else if (tree.
full->
count[idx]>0) printf(
" '%s' (%i) [array]\n", buffer, tree.
full->
count[idx]);
915 else printf(
" '%s' (%i) [single]\n", buffer, tree.
single->
count);
927 # define dump_dtree(deep, tree)
959 for (idx=0; idx<256; idx++) {
965 gb_assert(son_cnt<=tree.full->count[idx]);
990 #ifdef TEST_MAX_OCCUR_COUNT
991 #define MAX_OCCUR_COUNT 600000
1000 #if defined(TEST_MAX_OCCUR_COUNT)
1002 #endif // TEST_MAX_OCCUR_COUNT
1019 for (idx=0; idx<256; idx++) {
1020 #if defined(TEST_MAX_OCCUR_COUNT)
1022 #endif // TEST_MAX_OCCUR_COUNT
1047 # define test_dtree(tree) // (tree)
1048 # define testCounts(tree) // 0
1064 (*memcount) +=
sizeof(*tail);
1089 (*memcount) +=
sizeof(*full);
1093 for (idx=0; idx<256; idx++) {
1095 full->
count[idx] = 0;
1111 (*memcount) -=
sizeof(*t);
1157 (*memcount) -=
sizeof(*tree.
single);
1158 if (brother.
exists)
return cut_dtree(brother, cut_count, memcount, leafcount);
1175 for (idx=0; idx<256; idx++) {
1197 (*memcount) -=
sizeof(*(tree.
full));
1220 long removed_single;
1226 *removed += removed_single;
1237 *removed += removed_single;
1248 *removed += removed_single;
1257 for (idx=0; idx<256; idx++) {
1260 tree.
full->
count[idx] -= removed_single;
1261 *removed += removed_single;
1342 (*memcount) -=
sizeof(toAdd.
single);
1364 if (t->
ch==text[0]) {
1376 else if (t->
ch > text[0]) {
1431 for (idx=0; idx<256; idx++) {
1454 if (++deep>*maxdeep) *maxdeep = deep;
1466 for (idx=0; idx<256; idx++) {
1468 else if (tree.
full->
count[idx]) leafs++;
1492 for (idx=0; idx<256; idx++) cnt += tree.
full->
count[idx];
1510 static int restCount;
1519 while (tree.
single->
ch <= buffer[0]) {
1520 if (tree.
single->
ch == buffer[0]) {
1525 restCount =
COUNT(rest);
1543 *tree_pntr = tree = brother;
1561 if (tree.
full->
count[ch] <= max_occur) {
1563 restCount =
COUNT(rest);
1595 if (!inStringLen)
return stringStart;
1597 while (stringStartLen) {
1598 cu_str found = (
cu_str)memchr(stringStart, inString[0], stringStartLen);
1602 stringStartLen -= found-stringStart;
1603 stringStart = found;
1605 if (stringStartLen<inStringLen)
break;
1607 if (
GB_MEMCMP(stringStart, inString, inStringLen)==0)
return stringStart;
1642 u_str buf = buffer+1;
1645 if (len>minwordlen) {
1646 buf += len-minwordlen;
1650 if (len==minwordlen) {
1662 int cnt =
COUNT(rest);
1669 printf(
"expanding '%s'", lstr(buffer, deep+1+DUMP_MORE));
1670 printf(
" (searching for '%s') -> found %i nodes\n", lstr(buf, len+DUMP_MORE), cnt);
1690 for (idx=0; idx<256; idx++) {
1695 u_str buf = buffer+1;
1698 if (len>minwordlen) {
1699 buf += len-minwordlen;
1703 if (len==minwordlen) {
1717 int cnt =
COUNT(rest);
1725 printf(
"expanding '%s'", lstr(buffer, deep+1+DUMP_MORE));
1726 printf(
" (searching for '%s') -> found %i nodes\n", lstr(buf, len+DUMP_MORE), cnt);
1732 int added =
expandBranches(buffer, deep+1, minwordlen, maxdeep, tree.
full->
son[idx], root, max_percent);
1764 long lowmem = (maxmem*9)/10;
1771 for (cnt=0; cnt<gbk->
cnt; cnt++) {
1780 if (size<minwordlen)
continue;
1783 lastWord = data+size-minwordlen;
1786 if (strnstr(data, size, SELECTED_WORDS))
1790 for (; data<=lastWord; data++) {
1791 tree =
add_to_dtree(tree, data, minwordlen, &memcount);
1793 while (memcount>maxmem) {
1795 tree =
cut_dtree(tree, cut_count, &memcount, &leafs);
1796 if (memcount<=lowmem)
break;
1809 tree =
cut_dtree(tree, cutoff, &memcount, &leafs);
1825 printf(
"Directory overflow (%li) -- reducing size (cutoff = %i)\n", leafs, cutoff);
1827 tree =
cut_dtree(tree, cutoff, &memcount, &leafs);
1831 printf(
"----------------------- tree with short branches:\n");
1832 dump_dtree(0, tree);
1833 printf(
"---------------------------\n");
1855 for (idx=0; idx<256; idx++) {
1875 printf(
"----------------------- tree with expanded branches:\n");
1876 dump_dtree(0, tree);
1877 printf(
"-----------------------\n");
1890 long removed_single = 0;
1898 *resultBuffer = *wordStart;
1903 resultBuffer+1, resultLen, resultFrequency,
1908 *removed += removed_single;
1913 *resultLen = wordLen==1;
1923 if (brother.
exists) tree = brother;
1929 resultBuffer, resultLen, resultFrequency,
1931 if (*resultLen) *removed += removed_single;
1945 resultBuffer+1, resultLen, resultFrequency,
1957 *removed += removed_single;
1962 *resultLen = (wordLen==1);
1965 *removed += removed_single = *resultFrequency = tree.
full->
count[ch];
1991 resultBuffer+1, resultLen, resultFrequency,
2007 *removed += removed_single;
2015 if (brother.
exists) tree = brother;
2024 for (idx=0; idx<256; idx++) {
2026 *resultBuffer = idx;
2028 resultBuffer+1, resultLen, resultFrequency,
2039 tree.
full->
count[idx] -= removed_single;
2046 *resultBuffer = idx;
2048 *resultFrequency = tree.
full->
count[idx];
2058 *removed += removed_single;
2081 #define cmp(i1, i2) (heap2[i1]-heap2[i2])
2082 #define swap(i1, i2) do \
2085 heap[i1] = heap[i2]; \
2089 heap2[i1] = heap2[i2]; \
2094 static void downheap(
int *heap,
int *heap2,
int me,
int num) {
2099 if (lson>num)
return;
2101 if (
cmp(lson, me)<0) {
2102 if (rson<=num &&
cmp(lson, rson)>0) {
2111 else if (rson<=num &&
cmp(me, rson)>0) {
2122 #define cmp(i1, i2) GB_MEMCMP(dict->text+dict->offsets[heap[i1]], dict->text+dict->offsets[heap[i2]], dict->textlen)
2123 #define swap(i1, i2) do { int s = heap[i1]; heap[i1] = heap[i2]; heap[i2] = s; } while (0)
2130 if (lson>num)
return;
2132 if (
cmp(lson, me)>0) {
2133 if (rson<=num &&
cmp(lson, rson)<0) {
2142 else if (rson<=num &&
cmp(me, rson)<0) {
2157 int num = dict->
words;
2159 int *heap2 = dict->
resort-1;
2163 for (i=num/2; i>=1; i--)
downheap(heap, heap2, i, num);
2167 int big2 = heap2[1];
2169 heap[1] = heap[num];
2170 heap2[1] = heap2[num];
2182 for (i=0, num=dict->
words; i<num; i++) dict->
resort[i] = i;
2186 for (i=num/2; i>=1; i--)
downheap2(heap2, dict, i, num);
2191 heap2[1] = heap2[num];
2217 long overlap_sum = 0;
2218 long max_overlap = 0;
2233 printf(
" examined data was %li bytes\n", data_sum);
2234 printf(
" tree contains %i words *** maximum tree depth = %i\n", words, maxdeep);
2244 memset(buffer,
'*', maxdeep);
2249 int nextWordLen = 0;
2252 #if DUMP_COMPRESSION_TEST>=4
2253 printf(
"word='%s' (occur=%li overlap=%i)\n", lstr(buffer, wordLen), wordFrequency, overlap);
2257 overlap_sum += overlap;
2258 if (overlap>max_overlap) max_overlap = overlap;
2259 word_sum += wordLen;
2262 if (offset-overlap+wordLen > dict->
textlen) {
2276 word = dict->
text+offset-overlap;
2278 memcpy(word, buffer, wordLen);
2279 offset += wordLen-overlap;
2283 for (len=
min(10, wordLen-1); len>=0 && nextWordLen==0; len--) {
2284 memset(buffer,
'*', maxdeep);
2285 tree =
remove_word_from_dtree(tree, word+wordLen-len, len, buffer, &nextWordLen, &wordFrequency, &dummy);
2289 wordLen = nextWordLen;
2297 printf(
" word_sum=%li overlap_sum=%li (%li%%) max_overlap=%li\n",
2298 word_sum, overlap_sum, (overlap_sum*100)/word_sum, max_overlap);
2301 if (offset<dict->textlen) {
2304 memcpy(ntext, dict->
text, offset);
2337 for (
int i=0; i<gbkp->
cnt && !
error; i++) {
2349 memcpy(data, d, size);
2353 switch (gbd->
type()) {
2385 #if defined(TEST_DICT)
2386 long uncompressed_sum = 0;
2387 long compressed_sum = 0;
2390 printf(
"Creating GBDATA-Arrays..\n");
2396 printf(
"Creating dictionaries..\n");
2401 #if defined(TEST_ONE)
2404 if (gbk[idx].cnt && strcmp(
quark2key(Main, idx),
"tree")==0)
break;
2414 arb_progress progress(
"Optimizing key data",
long(gbdByKey_cnt-1));
2426 strcmp(key_name,
"REF") == 0 ||
2427 strcmp(key_name,
"ref") == 0
2432 if (!gbk[idx].cnt)
continue;
2441 if (strcmp(key_name,
"data") == 0)
continue;
2442 if (strcmp(key_name,
"quality") == 0)
continue;
2445 printf(
"- dictionary for '%s' (idx=%i)\n", key_name, idx);
2453 printf(
" * Uncompressing all with old dictionary ...\n");
2455 size_t old_compressed_size;
2459 int old_compr_mask = compr_mask;
2463 compr_mask &= ~GB_COMPRESSION_DICTIONARY;
2464 error =
readAndWrite(&gbk[idx], old_compressed_size, new_size);
2465 compr_mask = old_compr_mask;
2479 long old_dict_buffer_size;
2480 char *old_dict_buffer;
2486 *nint++ = htonl(dict->
words);
2487 for (n=0; n<dict->
words; n++) *nint++ = htonl(dict->
offsets[n]);
2488 for (n=0; n<dict->
words; n++) *nint++ = htonl(dict->
resort[n]);
2493 const char *key = Main->
keys[idx].
key;
2500 printf(
" * Compressing all with new dictionary ...\n");
2502 size_t old_size, new_compressed_size;
2503 error =
readAndWrite(&gbk[idx], old_size, new_compressed_size);
2506 printf(
" (compressed size: old=%zu new=%zu ratio=%.1f%%)\n",
2507 old_compressed_size, new_compressed_size, (new_compressed_size*100.0)/old_compressed_size);
2525 #if defined(TEST_DICT)
2528 test_dictionary(dict_reloaded, &(gbk[idx]), &uncompressed_sum, &compressed_sum);
2541 printf(
" overall uncompressed size = %li b\n"
2542 " overall compressed size = %li b (Ratio=%li%%)\n",
2543 uncompressed_sum, compressed_sum,
2544 (compressed_sum*100)/uncompressed_sum);
2566 if (maxKB<=(LONG_MAX/1024)) maxMem = maxKB*1024;
2567 else maxMem = LONG_MAX;
2588 void TEST_SLOW_optimize() {
2596 const char *source_ascii =
"TEST_opti_ascii_in.arb";
2597 const char *target_ascii =
"TEST_opti_ascii_out.arb";
2599 const char *nonopti =
"TEST_opti_none.arb";
2600 const char *optimized =
"TEST_opti_initial.arb";
2601 const char *reoptimized =
"TEST_opti_again.arb";
2602 const char *expected =
"TEST_opti_expected.arb";
2620 #if defined(TEST_AUTO_UPDATE)
2621 TEST_COPY_FILE(optimized, expected);
2623 #if defined(TEST_AUTO_UPDATE_ASCII)
2624 TEST_COPY_FILE(target_ascii, source_ascii);
2660 void TEST_ticket_742() {
2664 const char *opti_db =
"TEST_opti_expected.arb";
2667 for (
int suf = 2; suf<=4; ++suf) {
2720 const char *seq1 =
".....................AUUCUGGUU-----GA--U-CC-U-G------------..............";
2721 const char *seq2 =
"........A-A--CU---------------C-A-A-A-G-GA-G--AC---A-C-U-G...............";
2749 void TEST_AFTER_SLOW_streamed_ascii_save_asUsedBy_silva_pipeline() {
2752 const char *loadname =
"TEST_loadsave_ascii.arb";
2753 const char *savename =
"TEST_streamsaved.arb";
2773 for (
int saveWhileTransactionOpen = 0; saveWhileTransactionOpen<=1; ++saveWhileTransactionOpen) {
2826 GBDATA *gb_extended_data2;
2843 #endif // UNIT_TESTS
GB_ERROR GB_begin_transaction(GBDATA *gbd)
GB_ERROR GBK_system(const char *system_command)
int INDEX_DICT_OFFSET(int idx, GB_DICTIONARY *dict)
static DictTree build_dict_tree(O_gbdByKey *gbk, long maxmem, long maxdeep, size_t minwordlen, long *data_sum)
GBDATA * GB_open(const char *path, const char *opent)
GB_ERROR GB_commit_transaction(GBDATA *gbd)
static void downheap(int *heap, int *heap2, int me, int num)
void GB_warning(const char *message)
GB_ERROR GB_start_streamed_save_as(GBDATA *gbd, const char *path, const char *savetype, ArbDBWriter *&writer)
GBDATA * GB_child(GBDATA *father)
GB_ERROR GB_write_bytes(GBDATA *gbd, const char *s, long size)
static DictTree single2full_dtree(DictTree tree, long *memcount)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
const char * quark2key(GB_MAIN_TYPE *Main, GBQUARK key_quark)
#define ASSERT_NO_ERROR(errorExpr)
GB_MAIN_TYPE * GB_MAIN(GBDATA *gbd)
static GB_ERROR readAndWrite(O_gbdByKey *gbkp, size_t &old_size, size_t &new_size)
static int searchWord(GB_DICTIONARY *dict, cu_str source, long size, unsigned long *wordIndex, int *wordLen)
GB_BUFFER gb_uncompress_longs_old(GB_CBUFFER source, size_t size, size_t *new_size)
static GB_ERROR gb_convert_compression(GBDATA *gbd)
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
unsigned char unsigned_char
void GB_disable_quicksave(GBDATA *gbd, const char *reason)
GB_ERROR GB_write_pntr(GBDATA *gbd, const char *s, size_t bytes_size, size_t stored_size)
static int expandBranches(u_str buffer, int deep, int minwordlen, int maxdeep, DictTree tree, DictTree root, int max_percent)
GB_ERROR gb_convert_V2_to_V3(GBDATA *gb_main)
GB_ERROR GB_stream_save_part(ArbDBWriter *writer, GBDATA *from, GBDATA *till)
#define MIN_COMPR_WORD_LEN
const char * GBS_global_string(const char *templat,...)
int GB_unlink(const char *path)
int ALPHA_DICT_OFFSET(int idx, GB_DICTIONARY *dict)
static GB_DICTIONARY * gb_create_dictionary(O_gbdByKey *gbk, long maxmem)
long GB_size_of_file(const char *path)
static DictTree add_to_dtree(DictTree tree, cu_str text, long len, long *memcount)
#define LAST_COMPRESSED_BIT
char buffer[MESSAGE_BUFFERSIZE]
GBDATA * GBT_open(const char *path, const char *opent)
GB_ERROR GB_delete(GBDATA *&source)
GBDATA * GBT_first_species_rel_species_data(GBDATA *gb_species_data)
GB_BUFFER GB_give_other_buffer(GB_CBUFFER buffer, long size)
GB_BUFFER gb_uncompress_bytes(GB_CBUFFER source, size_t size, size_t *new_size)
static GB_ERROR gb_create_dictionaries(GB_MAIN_TYPE *Main, long maxmem)
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
int gb_get_compression_mask(GB_MAIN_TYPE *Main, GBQUARK key, int gb_type)
GB_ERROR GB_export_error(const char *error)
GB_CSTR GB_read_bytes_pntr(GBDATA *gbd)
GB_ERROR GB_await_error()
char * GB_memdup(const char *source, size_t len)
static int diff(int v1, int v2, int v3, int v4, int st, int en)
GBDATA * GB_create_container(GBDATA *father, const char *key)
#define TEST_EXPECT(cond)
const unsigned char * cu_str
GB_ERROR GB_finish_stream_save(ArbDBWriter *&writer)
static cu_str memstr(cu_str stringStart, int stringStartLen, cu_str inString, int inStringLen)
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
static DictTree removeSubsequentString(DictTree *tree_pntr, cu_str buffer, int len, int max_occur)
static DictTree new_dtree(cu_str text, long len, long *memcount)
static void sort_dict_offsets(GB_DICTIONARY *dict)
GB_ERROR GB_save_as(GBDATA *gbd, const char *path, const char *savetype)
static DictTree add_dtree_to_dtree(DictTree toAdd, DictTree to, long *memcount)
bool is_container() const
#define TEST_REJECT(cond)
#define TEST_REJECT_NULL(n)
static void downheap2(int *heap, GB_DICTIONARY *dict, int me, int num)
static void error(const char *msg)
GB_ERROR gb_load_dictionary_data(GBDATA *gb_main, const char *key, char **dict_data, long *size)
GB_ERROR gb_save_dictionary_data(GBDATA *gb_main, const char *key, const char *dict, int size)
void GB_flush_cache(GBDATA *gbd)
static char * gb_uncompress_by_dictionary_internal(GB_DICTIONARY *dict, GB_CSTR s_source, const size_t size, bool append_zero, size_t *new_size)
GBCONTAINER * as_container() const
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
#define TEST_EXPECT_LESS(val, ref)
void * gbm_get_mem(size_t size, long index)
cu_str get_data_n_size(GBDATA *gbd, size_t *size)
static DictTree cut_useless_words(DictTree tree, int deep, long *removed)
#define COMPRESSIBLE(type)
static SearchTree * tree[SEARCH_PATTERNS]
int GB_read_flag(GBDATA *gbd)
int GB_MEMCMP(const void *vm1, const void *vm2, long size)
static O_gbdByKey * g_b_opti_createGbdByKey(GB_MAIN_TYPE *Main)
GB_CUINT4 * GB_read_ints_pntr(GBDATA *gbd)
GB_ERROR GB_optimize(GBDATA *gb_main)
static int COUNT(DictTree tree)
static void g_b_opti_scanGbdByKey(GB_MAIN_TYPE *Main, GBDATA *gbd, O_gbdByKey *gbk)
GB_ERROR GB_create_index(GBDATA *gbd, const char *key, GB_CASE case_sens, long estimated_size) __ATTR__USERESULT
GB_ERROR GB_set_temporary(GBDATA *gbd) __ATTR__USERESULT
GB_ULONG GB_get_usable_memory(void)
static void copy(double **i, double **j)
GBQUARK GB_KEY_QUARK(GBDATA *gbd)
TYPE * ARB_calloc(size_t nelem)
void GB_write_flag(GBDATA *gbd, long flag)
GB_UNDO_TYPE GB_get_requested_undo_type(GBDATA *gb_main)
static DictTree remove_word_from_dtree(DictTree tree, cu_str wordStart, int wordLen, u_str resultBuffer, int *resultLen, long *resultFrequency, long *removed)
size_t uncompressed_size() const
#define TEST_EXPECT_FILES_EQUAL(f1, f2)
GB_ERROR GB_write_ints(GBDATA *gbd, const GB_UINT4 *i, long size)
char * gb_uncompress_by_dictionary(GBDATA *gbd, GB_CSTR s_source, size_t size, size_t *new_size)
static void gb_free_dictionary(GB_DICTIONARY *&dict)
static DictTree cut_dtree(DictTree tree, int cut_count, long *memcount, long *leafcount)
GB_ERROR GB_copy_dropMarksAndTempstate(GBDATA *dest, GBDATA *source)
GBDATA * GBT_first_species(GBDATA *gb_main)
char * gb_compress_by_dictionary(GB_DICTIONARY *dict, GB_CSTR s_source, size_t size, size_t *msize, int last_flag, int search_backward, int search_forward)
const char * GB_get_db_path(GBDATA *gbd)
#define TEST_EXPECT_NO_ERROR(call)
#define GB_COMPRESSION_TAGS_SIZE_MAX
GBENTRY * as_entry() const
#define WORD_HELPFUL(wordlen, occurrences)
GB_ERROR GB_write_floats(GBDATA *gbd, const float *f, long size)
GBDATA * GBT_next_species(GBDATA *gb_species)
bool GB_is_regularfile(const char *path)
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
#define MAX_COMPR_WORD_LEN
static void free_dtree(DictTree tree)
GB_DICTIONARY * gb_get_dictionary(GB_MAIN_TYPE *Main, GBQUARK key)
#define TEST_EXPECT_TEXTFILE_DIFFLINES(fgot, fwant, diff)
static void g_b_opti_freeGbdByKey(O_gbdByKey *gbk)
GB_ERROR GB_request_undo_type(GBDATA *gb_main, GB_UNDO_TYPE type) __ATTR__USERESULT_TODO
GBDATA * GB_nextChild(GBDATA *child)
static long calcCounts(DictTree tree)
GB_CFLOAT * GB_read_floats_pntr(GBDATA *gbd)
GB_transaction ta(gb_var)
static long min(long a, long b)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
void gbm_free_mem(void *block, size_t size, long index)
GBDATA * GBT_get_SAI_data(GBDATA *gb_main)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
static int count_dtree_leafs(DictTree tree, int deep, int *maxdeep)
#define TEST_EXPECT_TEXTFILES_EQUAL(fgot, fwant)
#define TEST_EXPECT_EQUAL(expr, want)
GBDATA * GB_entry(GBDATA *father, const char *key)
void inc_and_check_user_abort(GB_ERROR &error)
unsigned int compressed_data
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
GBDATA * GBT_get_species_data(GBDATA *gb_main)
GB_write_int const char s