ARB
arb_export_newick.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : arb_export_newick.cxx //
4 // Purpose : used by the SILVA pipeline to export trees for //
5 // which the tree leafs are labeled by NDS and not //
6 // by the species ID (name) of the sequence. //
7 // //
8 // Institute of Microbiology (Technical University Munich) //
9 // http://www.arb-home.de/ //
10 // //
11 // =============================================================== //
12 
13 #include <TreeWrite.h>
14 #include <TreeNode.h>
15 #include <arb_handlers.h>
16 #include <arb_global_defs.h>
17 #include <gb_aci.h>
18 #include <string>
19 #include <cstdlib>
20 
21 using namespace std;
22 
23 class CLI : virtual Noncopyable {
24  bool helpWanted;
26 
27  string database; // name of input database
28  string tree; // name of the tree to export
29  string newick_file; // name of the file the newick tree is exported to
30  string leaf_aci; // aci to generate the leaf names
31  LabelQuoting quoting_mode; // none, single or double. single and double will be forced
32  bool add_branch_length; // true -> branch lengths added to the newick tree
33  bool add_bootstraps; // true -> bootstrap values added to the newick tree
34  bool add_group_names; // true -> group names added to the newick tree
35  bool replace_problem_chars; // true -> problem chars are replaced in the newick tree
36  bool pretty; // true -> prettify the newick tree
37 
38  static inline const char *getarg(int& argc, const char**& argv) {
39  return argc>0 ? (--argc,*argv++) : NULp;
40  }
41  inline const char *expect_arg(int& argc, const char**& argv) {
42  const char *arg = getarg(argc, argv);
43  if (!arg) {
44  error = "expected argument missing";
45  arg = "";
46  }
47  return arg;
48  }
49  inline LabelQuoting parse_quoting_mode(int& argc, const char**& argv) {
50  const char *quoting_mode_str= expect_arg(argc, argv);
51  if (strcasecmp(quoting_mode_str, "none") == 0) {
52  return LABEL_DISALLOW_QUOTES;
53  } else if (strcasecmp(quoting_mode_str, "single") == 0) {
55  } else if (strcasecmp(quoting_mode_str, "double") == 0) {
57  } else {
58  error = GBS_global_string("unknown quoting mode '%s'", quoting_mode_str);
59  return LABEL_DISALLOW_QUOTES;
60  }
61  }
62 
63  void parse(int& argc, const char**& argv) {
64  const char *arg = getarg(argc, argv);
65  if (arg) {
66  if (strcmp(arg, "--db") == 0) database = expect_arg(argc, argv);
67  else if (strcmp(arg, "--tree") == 0) tree = expect_arg(argc, argv);
68  else if (strcmp(arg, "--newick-file") == 0) newick_file = expect_arg(argc, argv);
69  else if (strcmp(arg, "--leaf-aci") == 0) leaf_aci = expect_arg(argc, argv);
70  else if (strcmp(arg, "--quoting") == 0) quoting_mode = parse_quoting_mode(argc, argv);
71  else if (strcmp(arg, "--add-branch-lengths") == 0) add_branch_length = true;
72  else if (strcmp(arg, "--add-bootstraps") == 0) add_bootstraps = true;
73  else if (strcmp(arg, "--add-group-names") == 0) add_group_names = true;
74  else if (strcmp(arg, "--replace-problem-chars") == 0) replace_problem_chars = true;
75  else if (strcmp(arg, "--pretty") == 0) pretty = true;
76  else if (strcmp(arg, "--help") == 0) helpWanted = true;
77  else {
78  error = GBS_global_string("unexpected argument '%s'", arg);
79  }
80  }
81  }
82  void check_required_arguments() {
83  if (database.empty()) error = "no input database specified";
84  else if (tree.empty()) error = "no tree name specified";
85  else if (newick_file.empty()) error = "no output file specified";
86  }
87 
88 public:
89  CLI(int argc, const char **argv) :
90  helpWanted(false),
91  error(NULp),
92  leaf_aci("readdb(\"name\")"),
93  quoting_mode(LABEL_DISALLOW_QUOTES),
94  add_branch_length(false),
95  add_bootstraps(false),
96  add_group_names(false),
97  replace_problem_chars(false),
98  pretty(false)
99 
100  {
101  --argc; ++argv;
102  while (!error && argc>0 && !helpWanted) {
103  parse(argc, argv);
104  }
105 
106  if (!helpWanted) { // do not check extended conditions, if '--help' seen
107  if (!error) {
108  check_required_arguments();
109  if (error) helpWanted = true;
110  }
111  }
112  }
113 
114  void show_help() const {
115  fputs("\n"
116  "arb_export_newick -- export a tree in newick format\n"
117  "Usage: arb_export_newick [switches]\n"
118  "\n"
119  "mandatory arguments:\n"
120  "--db <dbname> ARB database to export from\n"
121  "--tree <treename> name of the tree to export\n"
122  "--newick-file <outname> name of generated newick file\n"
123  "\n"
124  "switches:\n"
125  "--leaf-aci <aci> specify content for the leaf names using ACI\n"
126  " (default: \"readdb(name)\"; see http://help.arb-home.de/aci.html)\n"
127  "--quoting <mode> none, single, double. Single and double are forced.\n"
128  " (default: none)\n"
129  "--add-branch-lengths add the branch lengths to the newick file.\n"
130  " (default: branch lengths are omitted)\n"
131  "--add-bootstraps add the bootstrap values to the newick file.\n"
132  " (default: bootstrap values are omitted)\n"
133  "--add-group-names add the group names to the newick file.\n"
134  " (default: group names are omitted)\n"
135  "--replace-problem-chars problematic characters in names will be replaced\n"
136  " (default: no characters are replaced)\n"
137  "--pretty prettify the newick tree\n"
138  " (default: tree is not prettified)\n"
139  "--help show this help message\n"
140  "\n"
141  ,stderr);
142  }
143 
144  bool help_wanted() const { return helpWanted; }
145  GB_ERROR get_error() const { return error; }
146 
147  const char *get_database() const { return database.c_str(); }
148  const char *get_tree() const { return tree.c_str(); }
149  const char *get_newick_file() const { return newick_file.c_str(); }
150  const char *get_leaf_aci() const { return leaf_aci.c_str(); }
151  LabelQuoting get_quoting_mode() const { return quoting_mode; }
152 
153  bool shall_add_branch_length() const { return add_branch_length; }
154  bool shall_add_bootstraps() const { return add_bootstraps; }
155  bool shall_add_group_names() const { return add_group_names; }
156  bool shall_replace_problem_chars() const { return replace_problem_chars; }
157  bool shall_be_pretty() const { return pretty; }
158 };
159 
160 
161 class ACI_Labeler: public TreeLabeler {
162  SmartCharPtr leaf_aci;
163 
164 public:
165  explicit ACI_Labeler(const char *leaf_aci_) : leaf_aci(strdup(leaf_aci_)) {}
166 
167  const char *speciesLabel(GBDATA *gb_main, GBDATA *gb_species, TreeNode *, const char *tree_name) const OVERRIDE {
168  GBL_env env(gb_main, tree_name);
169  GBL_call_env callEnv(gb_species, env);
170 
171  char* node_text = GB_command_interpreter_in_env("", leaf_aci.content(), callEnv);
172  if (!node_text) {
173  GB_ERROR ndsError = GB_await_error();
174  node_text = GBS_global_string_copy("<error: %s>", ndsError);
175  GB_export_error(ndsError);
176  }
177 
178  RETURN_LOCAL_ALLOC(node_text);
179  }
180  const char *groupLabel(GBDATA *, GBDATA *, TreeNode *innerNode, const char *) const {
181  // ACI is not used for group names
182  return innerNode->name;
183  }
184 };
185 
186 static GB_ERROR export_newick(const CLI& args) {
187 
188  ARB_redirect_handlers_to(stderr, stderr);
189  GB_ERROR error = NULp;
190 
191  const char *dbname = args.get_database();
192  GB_shell shell;
193  GBDATA *gb_main = GB_open(dbname, "r");
194 
195  if (!gb_main) {
196  error = GB_await_error();
197  }
198  else {
199  ACI_Labeler labeler(args.get_leaf_aci());
200 
201  LabelQuoting quoting_mode = args.get_quoting_mode();
202  if (args.shall_replace_problem_chars()) {
203  quoting_mode = LabelQuoting(quoting_mode|LABEL_FORCE_REPLACE);
204  }
205 
206  error = TREE_write_Newick(gb_main,
207  args.get_tree(),
208  labeler,
210  args.shall_add_bootstraps(),
211  args.shall_add_group_names(),
212  args.shall_be_pretty(),
213  quoting_mode,
214  args.get_newick_file());
215 
216  // get possible NDS error, too
217  if (!error) error = GB_incur_error();
218  GB_close(gb_main);
219  }
220 
221  return error;
222 }
223 
224 int main(int argc, char **argv) {
225 
226  CLI args(argc, const_cast<const char**>(argv));
227  GB_ERROR error = args.get_error();
228 
229  if (!error && args.help_wanted()) {
230  args.show_help();
231  return EXIT_FAILURE;
232  }
233 
234  if (!error) error = export_newick(args);
235 
236  if (error) {
237  fprintf(stderr, "Error: %s\n", error);
238  return EXIT_FAILURE;
239  }
240  return EXIT_SUCCESS;
241 }
const char * speciesLabel(GBDATA *gb_main, GBDATA *gb_species, TreeNode *, const char *tree_name) const OVERRIDE
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
const char * groupLabel(GBDATA *, GBDATA *, TreeNode *innerNode, const char *) const
GB_ERROR GB_incur_error()
Definition: arb_msg.h:49
LabelQuoting get_quoting_mode() const
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
STL namespace.
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
bool shall_replace_problem_chars() const
const char * get_leaf_aci() const
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:257
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
void show_help() const
bool shall_add_branch_length() const
const char * get_tree() const
#define false
Definition: ureadseq.h:13
static void error(const char *msg)
Definition: mkptypes.cxx:96
bool shall_add_bootstraps() const
const char * get_database() const
#define RETURN_LOCAL_ALLOC(mallocation)
Definition: smartptr.h:310
static SearchTree * tree[SEARCH_PATTERNS]
Definition: ED4_search.cxx:629
GB_ERROR TREE_write_Newick(GBDATA *gb_main, const char *tree_name, const TreeLabeler &labeler, bool save_branchlengths, bool save_bootstraps, bool save_groupnames, bool pretty, LabelQuoting quoteMode, const char *path)
Definition: TreeWrite.cxx:359
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
int main(int argc, char **argv)
fputs(TRACE_PREFIX, stderr)
bool shall_add_group_names() const
#define OVERRIDE
Definition: cxxforward.h:112
CLI(int argc, const char **argv)
bool shall_be_pretty() const
char * name
Definition: TreeNode.h:174
bool help_wanted() const
#define NULp
Definition: cxxforward.h:116
LabelQuoting
Definition: TreeWrite.h:18
ACI_Labeler(const char *leaf_aci_)
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
Definition: gb_aci.cxx:361
const char * get_newick_file() const
GBDATA * gb_main
Definition: adname.cxx:32
GB_ERROR get_error() const
void ARB_redirect_handlers_to(FILE *errStream, FILE *outStream)
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
static GB_ERROR export_newick(const CLI &args)