ARB
arb_export_newick.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : arb_export_newick.cxx //
4 // Purpose : used by the SILVA pipeline to export trees for //
5 // which the tree leafs are labeled by NDS and not //
6 // by the species ID (name) of the sequence. //
7 // //
8 // Institute of Microbiology (Technical University Munich) //
9 // http://www.arb-home.de/ //
10 // //
11 // =============================================================== //
12 
13 #include <TreeWrite.h>
14 #include <TreeNode.h>
15 #include <arb_handlers.h>
16 #include <arb_global_defs.h>
17 #include <gb_aci.h>
18 #include <string>
19 #include <cstdlib>
20 
21 using namespace std;
22 
23 class CLI : virtual Noncopyable {
24  bool helpWanted;
26 
27  string database; // name of input database
28  string tree; // name of the tree to export
29  string newick_file; // name of the file the newick tree is exported to
30  string leaf_aci; // aci to generate the leaf names
31  LabelQuoting quoting_mode; // none, single or double. single and double will be forced
32  bool add_branch_length; // true -> branch lengths added to the newick tree
33  bool add_bootstraps; // true -> bootstrap values added to the newick tree
34  bool force_bootstraps; // true -> implies add_bootstraps==true; save 100% at branches w/o bootstrap value
35  bool add_remarks; // true -> custom remarks added to the newick tree
36  bool add_group_names; // true -> group names added to the newick tree
37  bool replace_problem_chars; // true -> problem chars are replaced in the newick tree
38  bool pretty; // true -> prettify the newick tree
39 
40  static inline const char *getarg(int& argc, const char**& argv) {
41  return argc>0 ? (--argc,*argv++) : NULp;
42  }
43  inline const char *expect_arg(int& argc, const char**& argv) {
44  const char *arg = getarg(argc, argv);
45  if (!arg) {
46  error = "expected argument missing";
47  arg = "";
48  }
49  return arg;
50  }
51  inline LabelQuoting parse_quoting_mode(int& argc, const char**& argv) {
52  const char *quoting_mode_str= expect_arg(argc, argv);
53  if (strcasecmp(quoting_mode_str, "none") == 0) {
54  return LABEL_DISALLOW_QUOTES;
55  } else if (strcasecmp(quoting_mode_str, "single") == 0) {
57  } else if (strcasecmp(quoting_mode_str, "double") == 0) {
59  } else {
60  error = GBS_global_string("unknown quoting mode '%s'", quoting_mode_str);
61  return LABEL_DISALLOW_QUOTES;
62  }
63  }
64 
65  void parse(int& argc, const char**& argv) {
66  const char *arg = getarg(argc, argv);
67  if (arg) {
68  if (strcmp(arg, "--db") == 0) database = expect_arg(argc, argv);
69  else if (strcmp(arg, "--tree") == 0) tree = expect_arg(argc, argv);
70  else if (strcmp(arg, "--newick-file") == 0) newick_file = expect_arg(argc, argv);
71  else if (strcmp(arg, "--leaf-aci") == 0) leaf_aci = expect_arg(argc, argv);
72  else if (strcmp(arg, "--quoting") == 0) quoting_mode = parse_quoting_mode(argc, argv);
73  else if (strcmp(arg, "--add-branch-lengths") == 0) add_branch_length = true;
74  else if (strcmp(arg, "--add-bootstraps") == 0) add_bootstraps = true;
75  else if (strcmp(arg, "--add-custom-remarks") == 0) add_remarks = true;
76  else if (strcmp(arg, "--force-bootstraps") == 0) force_bootstraps = true;
77  else if (strcmp(arg, "--add-group-names") == 0) add_group_names = true;
78  else if (strcmp(arg, "--replace-problem-chars") == 0) replace_problem_chars = true;
79  else if (strcmp(arg, "--pretty") == 0) pretty = true;
80  else if (strcmp(arg, "--help") == 0) helpWanted = true;
81  else {
82  error = GBS_global_string("unexpected argument '%s'", arg);
83  }
84  }
85  }
86  void check_required_arguments() {
87  if (database.empty()) error = "no input database specified";
88  else if (tree.empty()) error = "no tree name specified";
89  else if (newick_file.empty()) error = "no output file specified";
90  }
91 
92 public:
93  CLI(int argc, const char **argv) :
94  helpWanted(false),
95  error(NULp),
96  leaf_aci("readdb(\"name\")"),
97  quoting_mode(LABEL_DISALLOW_QUOTES),
98  add_branch_length(false),
99  add_bootstraps(false),
100  force_bootstraps(false),
101  add_remarks(false),
102  add_group_names(false),
103  replace_problem_chars(false),
104  pretty(false)
105 
106  {
107  --argc; ++argv;
108  while (!error && argc>0 && !helpWanted) {
109  parse(argc, argv);
110  }
111 
112  if (!helpWanted) { // do not check extended conditions, if '--help' seen
113  if (!error) {
114  check_required_arguments();
115  if (error) helpWanted = true;
116  }
117  }
118 
119  if (force_bootstraps) add_bootstraps = true; // implied
120  }
121 
122  void show_help() const {
123  fputs("\n"
124  "arb_export_newick -- export a tree in newick format\n"
125  "Usage: arb_export_newick [switches]\n"
126  "\n"
127  "mandatory arguments:\n"
128  "--db <dbname> ARB database to export from\n"
129  "--tree <treename> name of the tree to export\n"
130  "--newick-file <outname> name of generated newick file\n"
131  "\n"
132  "switches:\n"
133  "--leaf-aci <aci> specify content for the leaf names using ACI\n"
134  " (default: \"readdb(name)\"; see http://help.arb-home.de/aci.html)\n"
135  "--quoting <mode> none, single, double. Single and double are forced.\n"
136  " (default: none)\n"
137  "--add-branch-lengths add the branch lengths to the newick file.\n"
138  " (default: branch lengths are omitted)\n"
139  "--add-bootstraps add the bootstrap values to the newick file.\n"
140  " (default: bootstrap values are omitted)\n"
141  "--force-bootstraps for branches without bootstrap values always save 100%\n"
142  " (implies: --add-bootstraps)\n"
143  "--add-custom-remarks add the custom remarks to the newick file.\n"
144  " (default: custom remarks are omitted)\n"
145  "--add-group-names add the group names to the newick file.\n"
146  " (default: group names are omitted)\n"
147  "--replace-problem-chars problematic characters in names will be replaced\n"
148  " (default: no characters are replaced)\n"
149  "--pretty prettify the newick tree\n"
150  " (default: tree is not prettified)\n"
151  "--help show this help message\n"
152  "\n"
153  ,stderr);
154  }
155 
156  bool help_wanted() const { return helpWanted; }
157  GB_ERROR get_error() const { return error; }
158 
159  const char *get_database() const { return database.c_str(); }
160  const char *get_tree() const { return tree.c_str(); }
161  const char *get_newick_file() const { return newick_file.c_str(); }
162  const char *get_leaf_aci() const { return leaf_aci.c_str(); }
163  LabelQuoting get_quoting_mode() const { return quoting_mode; }
164 
165 
168  if (add_remarks) {
169  if (add_bootstraps) {
170  style = force_bootstraps ? SAVE_ANY_BRANCH_REMARKS_INCL_100 : SAVE_ANY_BRANCH_REMARKS;
171  }
172  else {
173  style = SAVE_CUSTOM_REMARKS;
174  }
175  }
176  else if (add_bootstraps) {
177  style = force_bootstraps ? SAVE_BOOTSTRAPS_INCL_100 : SAVE_BOOTSTRAPS;
178  }
179  return style;
180  }
181 
182  bool shall_add_branch_length() const { return add_branch_length; }
183  bool shall_add_group_names() const { return add_group_names; }
184  bool shall_replace_problem_chars() const { return replace_problem_chars; }
185  bool shall_be_pretty() const { return pretty; }
186 };
187 
188 
189 class ACI_Labeler: public TreeLabeler {
190  SmartCharPtr leaf_aci;
191 
192 public:
193  explicit ACI_Labeler(const char *leaf_aci_) : leaf_aci(strdup(leaf_aci_)) {}
194 
195  const char *speciesLabel(GBDATA *gb_main, GBDATA *gb_species, TreeNode *leafNode, const char *tree_name) const OVERRIDE {
196  if (!gb_species) return leafNode->name; // required for zombies
197 
198  GBL_env env(gb_main, tree_name);
199  GBL_call_env callEnv(gb_species, env);
200 
201  char* node_text = GB_command_interpreter_in_env("", leaf_aci.content(), callEnv);
202  if (!node_text) {
203  GB_ERROR ndsError = GB_await_error();
204  node_text = GBS_global_string_copy("<error: %s>", ndsError);
205  GB_export_error(ndsError);
206  }
207  RETURN_LOCAL_ALLOC(node_text);
208  }
209  const char *groupLabel(GBDATA *, GBDATA *, TreeNode *innerNode, const char *) const {
210  // ACI is not used for group names
211  return innerNode->name;
212  }
213 };
214 
215 static GB_ERROR export_newick(const CLI& args) {
216 
217  ARB_redirect_handlers_to(stderr, stderr);
218  GB_ERROR error = NULp;
219 
220  const char *dbname = args.get_database();
221  GB_shell shell;
222  GBDATA *gb_main = GB_open(dbname, "r");
223 
224  if (!gb_main) {
225  error = GB_await_error();
226  }
227  else {
228  ACI_Labeler labeler(args.get_leaf_aci());
229 
230  LabelQuoting quoting_mode = args.get_quoting_mode();
231  if (args.shall_replace_problem_chars()) {
232  quoting_mode = LabelQuoting(quoting_mode|LABEL_FORCE_REPLACE);
233  }
234 
235  error = TREE_write_Newick(gb_main,
236  args.get_tree(),
237  labeler,
239  args.shall_add_bootstraps(),
240  args.shall_add_group_names(),
241  args.shall_be_pretty(),
242  quoting_mode,
243  args.get_newick_file());
244 
245  // get possible NDS error, too
246  if (!error) error = GB_incur_error();
247  GB_close(gb_main);
248  }
249 
250  return error;
251 }
252 
253 int main(int argc, char **argv) {
254 
255  CLI args(argc, const_cast<const char**>(argv));
256  GB_ERROR error = args.get_error();
257 
258  if (!error && args.help_wanted()) {
259  args.show_help();
260  return EXIT_FAILURE;
261  }
262 
263  if (!error) error = export_newick(args);
264 
265  if (error) {
266  fprintf(stderr, "Error: %s\n", error);
267  return EXIT_FAILURE;
268  }
269  return EXIT_SUCCESS;
270 }
const char * speciesLabel(GBDATA *gb_main, GBDATA *gb_species, TreeNode *leafNode, const char *tree_name) const OVERRIDE
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
const char * groupLabel(GBDATA *, GBDATA *, TreeNode *innerNode, const char *) const
GB_ERROR GB_incur_error()
Definition: arb_msg.h:49
BootstrapSaveStyle shall_add_bootstraps() const
LabelQuoting get_quoting_mode() const
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
STL namespace.
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
bool shall_replace_problem_chars() const
const char * get_leaf_aci() const
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:257
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
void show_help() const
bool shall_add_branch_length() const
const char * get_tree() const
#define false
Definition: ureadseq.h:13
GB_ERROR TREE_write_Newick(GBDATA *gb_main, const char *tree_name, const TreeLabeler &labeler, bool save_branchlengths, BootstrapSaveStyle bootstrap_style, bool save_groupnames, bool pretty, LabelQuoting quoteMode, const char *path)
Definition: TreeWrite.cxx:406
static void error(const char *msg)
Definition: mkptypes.cxx:96
const char * get_database() const
#define RETURN_LOCAL_ALLOC(mallocation)
Definition: smartptr.h:310
static SearchTree * tree[SEARCH_PATTERNS]
Definition: ED4_search.cxx:629
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
int main(int argc, char **argv)
fputs(TRACE_PREFIX, stderr)
bool shall_add_group_names() const
#define OVERRIDE
Definition: cxxforward.h:112
CLI(int argc, const char **argv)
bool shall_be_pretty() const
char * name
Definition: TreeNode.h:226
bool help_wanted() const
#define NULp
Definition: cxxforward.h:116
LabelQuoting
Definition: TreeWrite.h:18
ACI_Labeler(const char *leaf_aci_)
NOT4PERL char * GB_command_interpreter_in_env(const char *str, const char *commands, const GBL_call_env &callEnv)
Definition: gb_aci.cxx:361
const char * get_newick_file() const
GBDATA * gb_main
Definition: adname.cxx:32
GB_ERROR get_error() const
void ARB_redirect_handlers_to(FILE *errStream, FILE *outStream)
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
static GB_ERROR export_newick(const CLI &args)
BootstrapSaveStyle
Definition: TreeWrite.h:29