ARB
arb_2_bin.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : arb_2_bin.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 
12 #include <arbdbt.h>
13 #include <TreeRead.h>
14 #include <TreeNode.h>
15 
16 int ARB_main(int argc, char *argv[]) {
18 
19  fprintf(stderr, "arb_2_bin - ARB database ascii to binary converter\n");
20 
21  if (argc <= 1 || strcmp(argv[1], "--help") == 0) {
22  fprintf(stderr,
23  "\n"
24  "Purpose: Converts a database to binary format\n"
25  "Syntax: arb_2_bin [Options] database [newdatabase]\n"
26  "Options: -m create map file too\n"
27  " -r try to repair destroyed database\n"
28  " -c[tree_xxx] optimize database using tree_xxx or largest tree\n"
29  " -C<type> use extra compression\n"
30  " (known <type>s: %s)\n"
31  " -e <treefile> optimize the database using the tree from the\n"
32  " file. The tree is not stored in the database.\n"
33  "\n"
34  "database my be '-' in which case arb_2_bin reads from stdin.\n"
35  "\n",
37 
38  if (argc>1 && strcmp(argv[1], "--help") != 0) { error = "Missing arguments"; }
39  }
40  else {
41  char rtype[256];
42  char wtype[256];
43  int ci = 1;
44  int nidx = 0;
45 
46  const char *opt_tree = NULp;
47  const char *ext_tree = NULp;
48 
49  {
50  char *rtypep = rtype;
51  char *wtypep = wtype;
52 
53  memset(rtype, 0, 10);
54  memset(wtype, 0, 10);
55  *(wtypep++) = 'b';
56  *(rtypep++) = 'r';
57  *(rtypep++) = 'w';
58 
59  while (argv[ci] && argv[ci][0] == '-' && argv[ci][1] != 0 && !error) {
60  if (!strcmp(argv[ci], "-m")) { ci++; *(wtypep++) = 'm'; }
61  else if (!strcmp(argv[ci], "-r")) { ci++; *(rtypep++) = 'R'; }
62  else if (!strncmp(argv[ci], "-c", 2)) { opt_tree = argv[ci]+2; ci++; }
63  else if (!strncmp(argv[ci], "-i", 2)) { nidx = atoi(argv[ci]+2); ci++; }
64  else if (!strncmp(argv[ci], "-C", 2)) {
65  char cflag = argv[ci][2]; ci++;
66  if (!strchr(GB_get_supported_compression_flags(false), cflag)) {
67  error = GBS_global_string("Unknown compression flag '%c'", cflag);
68  }
69  else {
70  *(wtypep++) = cflag;
71  }
72  }
73  else if (!strcmp(argv[ci], "-e")) { ci++; ext_tree = argv[ci]; ci++; }
74  else {
75  error = GBS_global_string("Unknown argument '%s'", argv[ci]);
76  break;
77  }
78  }
79  }
80 
81  const char *in = argv[ci++];
82  const char *out = ci >= argc ? in : argv[ci++];
83 
84  if (!error && !in) error = "missing arguments";
85 
86  if (!error) {
87  printf("Reading database...\n");
88  GB_shell shell;
89  GBDATA *gb_main = GBT_open(in, rtype);
90  if (!gb_main) {
91  error = GB_await_error();
92  }
93  else {
94  TreeNode *tree = NULp;
95 
96  if (ext_tree) {
97  printf("Reading tree from '%s' ..\n", ext_tree);
98  {
99  char *warnings = NULp;
100  tree = TREE_load(ext_tree, new SimpleRoot, NULp, false, &warnings);
101  if (tree) {
102  if (warnings) {
103  printf("Warning from TREE_load: %s\n", warnings);
104  free(warnings);
105  }
106  opt_tree = "tree_zzzz_comp_opt_tree_zzzz";
107  error = GB_begin_transaction(gb_main);
108  if (tree->is_leaf()) error = "Cannot load tree (need at least 2 leafs)";
109  if (!error) error = GBT_write_tree(gb_main, opt_tree, tree);
110  error = GB_end_transaction(gb_main, error);
111  }
112  else {
113  error = GBS_global_string("Failed to load tree: %s", GB_await_error());
114  }
115  }
116  }
117 
118  if (!error && opt_tree) {
119  char *ali_name = GBT_get_default_alignment(gb_main);
120  if (!ali_name) {
121  error = GB_await_error();
122  }
123  else {
124  if (!strlen(opt_tree)) opt_tree = NULp;
125 
126  printf("Optimizing database...\n");
127  error = GBT_compress_sequence_tree2(gb_main, opt_tree, ali_name);
128  free(ali_name);
129  }
130  if (error) error = GBS_global_string("Failed to optimize database: %s", error);
131  }
132 
133  if (!error && ext_tree) {
134  printf("Deleting compression tree from database...\n");
135  error = GB_begin_transaction(gb_main);
136  GBDATA *gb_comp_tree = GBT_find_tree(gb_main, opt_tree);
137  if (!error) error = GB_delete(gb_comp_tree);
138  if (error) error = GBS_global_string("Failed to delete optimization tree: %s", error);
139  error = GB_end_transaction(gb_main, error);
140  }
141 
142  if (!error) {
143  GB_set_next_main_idx(nidx);
144  printf("Saving database...\n");
145  error = GB_save(gb_main, out, wtype);
146  }
147  GB_close(gb_main);
148  }
149  }
150  }
151 
152  if (error) {
153  fprintf(stderr, "arb_2_bin: Error: %s\n", error);
154  return EXIT_FAILURE;
155  }
156  return EXIT_SUCCESS;
157 }
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2528
const char * GB_ERROR
Definition: arb_core.h:25
GB_ERROR GB_save(GBDATA *gb, const char *path, const char *savetype)
void GB_set_next_main_idx(long idx)
Definition: ad_load.cxx:918
int ARB_main(int argc, char *argv[])
Definition: arb_2_bin.cxx:16
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2561
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
GBDATA * GBT_open(const char *path, const char *opent)
Definition: adtools.cxx:524
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1916
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
GB_ERROR GBT_write_tree(GBDATA *gb_main, const char *tree_name, TreeNode *tree)
Definition: adtree.cxx:523
static void error(const char *msg)
Definition: mkptypes.cxx:96
TreeNode * TREE_load(const char *path, TreeRoot *troot, char **commentPtr, bool allow_length_scaling, char **warningPtr)
Definition: TreeRead.cxx:620
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
bool is_leaf() const
Definition: TreeNode.h:211
static list< LineAttachedMessage > warnings
GB_ERROR GBT_compress_sequence_tree2(GBDATA *gbd, const char *tree_name, const char *ali_name) __ATTR__USERESULT
Definition: adseqcompr.cxx:889
#define NULp
Definition: cxxforward.h:116
char * GBT_get_default_alignment(GBDATA *gb_main)
Definition: adali.cxx:747
GBDATA * GBT_find_tree(GBDATA *gb_main, const char *tree_name)
Definition: adtree.cxx:993
GBDATA * gb_main
Definition: adname.cxx:32
const char * GB_get_supported_compression_flags(bool verboose)
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655