29 #define sio_assert(cond) arb_assert(cond)
32 using namespace SEQIO;
53 for (
int i = 0; internal_export_commands[i]; ++i) {
54 if (strcmp(command, internal_export_commands[i]) == 0) {
93 if (!file || !file[0]) {
94 error =
"No export format selected";
97 char *fullfile =
NULp;
105 FILE *in = fopen(fullfile,
"r");
107 if (!in) error =
GB_IO_error(
"reading export form", fullfile);
111 bool seen_BEGIN =
false;
113 size_t linenumber = 0;
116 if (!strcmp(s1,
"SYSTEM")) { reassign(efo->
system, s2); }
117 else if (!strcmp(s1,
"PRE_FORMAT")) { reassign(efo->
pre_format, s2); }
118 else if (!strcmp(s1,
"SUFFIX")) { reassign(efo->
suffix, s2); }
120 else if (!strcmp(s1,
"INTERNAL")) {
126 else if (!strcmp(s1,
"BEGIN")) {
128 error =
"'BEGIN' not allowed when 'INTERNAL' is used";
159 if (efo->
system && !efo->
pre_format) error =
"Missing 'PRE_FORMAT' (needed by 'SYSTEM')";
160 else if (efo->
pre_format && !efo->
system) error =
"Missing 'SYSTEM' (needed by 'PRE_FORMAT')";
162 if (efo->
system) error =
"'SYSTEM' is not allowed together with 'INTERNAL'";
163 if (efo->
pre_format) error =
"'PRE_FORMAT' is not allowed together with 'INTERNAL'";
181 const char *one_species;
186 one_species(one_species_)
209 GBDATA *last_species_read;
219 size_t species_count;
225 size_t *export_column;
233 last_species_read(
NULp),
238 whichSpecies(which, one_species),
239 species_count(
size_t(-1)),
241 cut_stop_codon(CutStopCodon),
256 if (cut_stop_codon) {
260 GB_warning(
"Cutting stop codon makes no sense - ignored");
261 cut_stop_codon =
false;
266 GB_warningf(
"Warning: Your filter is shorter than the alignment (%zu<%li)",
273 delete [] export_column;
289 const unsigned char *get_seq_data(
GBDATA *gb_species,
size_t& slen,
GB_ERROR& error)
const;
294 if (species_count ==
size_t(-1)) {
303 return species_count;
311 const char *data =
NULp;
323 return (
const unsigned char *)data;
335 size_t gap_columns = filter->get_filtered_length();
336 size_t *gap_column =
new size_t[gap_columns+1];
338 const size_t *filterpos_2_seqpos = filter->get_filterpos_2_seqpos();
339 memcpy(gap_column, filterpos_2_seqpos, gap_columns*
sizeof(*gap_column));
340 gap_column[gap_columns] = max_ali_len;
342 arb_progress progress(
"Calculating vertical gaps", count_species());
344 for (
GBDATA *gb_species = first_species();
346 gb_species = next_species(gb_species))
349 const unsigned char *sdata = get_seq_data(gb_species, slen, err);
354 for (i = 0; i<gap_columns; ++i) {
355 if (
isGap(sdata[gap_column[i]])) {
356 gap_column[j++] = gap_column[i];
362 size_t skipped_columns = i-j;
364 gap_columns -= skipped_columns;
370 columns = filter->get_filtered_length() - gap_columns;
371 export_column =
new size_t[columns];
375 size_t flen = filter->get_filtered_length();
377 for (a = 0; a<flen && gpos<gap_columns; ++a) {
378 size_t fpos = filterpos_2_seqpos[a];
379 if (fpos == gap_column[gpos]) {
385 export_column[epos++] = fpos;
388 for (; a<flen; ++a) {
389 export_column[epos++] = filterpos_2_seqpos[a];
395 delete [] gap_column;
398 const size_t *filterpos_2_seqpos = filter->get_filterpos_2_seqpos();
400 columns = filter->get_filtered_length();
401 export_column =
new size_t[columns];
403 memcpy(export_column, filterpos_2_seqpos, columns*
sizeof(*filterpos_2_seqpos));
406 seq =
new char[columns+1];
412 if (gb_species != last_species_read) {
417 const unsigned char *data = get_seq_data(gb_species, len, curr_error);
420 error = strdup(curr_error);
424 const uchar *simplify = filter->get_simplify_table();
426 if (cut_stop_codon) {
427 const unsigned char *stop_codon = (
const unsigned char *)memchr(data,
'*', len);
429 len = stop_codon-data;
435 for (i = 0; i<columns; ++i) {
436 size_t seq_pos = export_column[i];
438 unsigned char c = data[seq_pos];
440 seq[j++] = simplify[c];
448 for (i = 0; i<columns; ++i) {
449 size_t seq_pos = export_column[i];
451 seq[i] = simplify[data[seq_pos]];
454 seq[i] = simplify[
'.'];
490 if (depth == 1 && strncmp(key_name,
"ali_", 4) == 0) {
495 tag =
new XML_Tag(
"ALIGNMENT");
496 tag->add_attribute(
"name", key_name+4);
503 XML_Tag dtag(
"data");
509 tag =
new XML_Tag(key_name);
513 if (name) tag->add_attribute(
"name", name);
522 if (strcmp(sub_key_name,
"name") != 0) {
534 tag->add_attribute(
"error",
"unsavable");
552 for (l = p; l>o; l--)
if (*l==
'\n')
break;
553 r = strchr(p,
'\n');
if (!r) r = p + strlen(p);
554 fwrite(o, 1, l-o, out);
590 static int export_depth = 0;
593 *resulting_outname =
NULp;
602 free(unique_outname);
606 else *resulting_outname = strdup(outname);
618 char *intermediate_export;
636 free(intermediate_export);
639 FILE *out = fopen(*resulting_outname,
"wt");
640 if (!out) error =
GB_IO_error(
"writing", *resulting_outname);
642 XML_Document *xml =
NULp;
646 gb_species && !
error;
656 xml =
new XML_Document(
"ARB_SEQ_EXPORT",
"arb_seq_export.dtd", out);
658 xml->add_attribute(
"database", db_name);
662 XML_Comment rem(
"There is a basic version of ARB_seq_export.dtd in $ARBHOME/lib/dtd\n"
663 "but you might need to expand it by yourself,\n"
664 "because the ARB-database may contain any kind of fields.");
671 gb_species && !
error;
674 if (ruleset.
isSet()) {
677 if (clone.has_error()) {
678 error = clone.get_error();
698 if (*resulting_outname) {
700 freenull(*resulting_outname);
713 char *path, *name, *suffix;
715 *resulting_outname =
NULp;
720 gb_species && !
error;
724 if (!species_name) error =
"Can't export unnamed species";
727 progress.subtitle(fname);
736 if (!*resulting_outname ||
737 (res_oname && strcmp(*resulting_outname, res_oname)>0))
739 reassign(*resulting_outname, res_oname);
746 progress.inc_and_check_user_abort(error);
764 AP_filter *filter,
int cut_stop_codon,
int compress,
765 const char *dbname,
const char *
formname,
const char *field_transfer_set,
766 const char *outname,
int multiple,
char **real_outname)
770 if (field_transfer_set && !field_transfer_set[0]) {
771 field_transfer_set =
NULp;
778 if (field_transfer_set) {
834 if (!error && efs.
form) {
850 error =
"exports all fields";
853 error =
"unsupported filter type";
857 error =
"no form loaded";
863 char *nameOnly =
NULp;
866 const char *shownName = nameOnly ? nameOnly : eft_formname;
884 #define TEST_AUTO_UPDATE_ONLY_MISSING // do auto-update only if file is missing
886 void TEST_sequence_export() {
914 for (
int e = 0; eft[e]; ++e) {
915 for (
int complete = 0; complete <= 1; ++complete) {
916 const char *name = strrchr(eft[e],
'/');
925 if (strcmp(name,
"fasta_wacc.eft") == 0) {
927 "Exports sequences to fasta-format.\n"
928 "Header exported as: >ID SEQLENGTH bp SEQTYPE ACC");
933 const char *outname =
"impexp/exported";
934 char *used_outname =
NULp;
940 "DBname", eft[e],
NULp,
941 outname, 0, &used_outname));
946 #if defined(TEST_AUTO_UPDATE)
947 #if defined(TEST_AUTO_UPDATE_ONLY_MISSING)
954 TEST_COPY_FILE(outname, expected);
960 #endif // TEST_AUTO_UPDATE
GB_ERROR GBK_system(const char *system_command)
static bool isGap(char c)
GBDATA * select_next(GBDATA *gb_previous) const
GBDATA * GB_open(const char *path, const char *opent)
char * GB_read_fp(FILE *in)
static export_sequence_data * esd
void GB_warning(const char *message)
GB_ERROR detectVerticalGaps()
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
GBDATA * GB_child(GBDATA *father)
return string(buffer, length)
char * GBS_string_eval_in_env(const char *insource, const char *icommand, const GBL_call_env &callEnv)
long GBT_mark_all(GBDATA *gb_main, int flag)
static gb_export_sequence_cb get_export_sequence
void GB_unlink_or_warn(const char *path, GB_ERROR *error)
NOT4PERL void GB_set_export_sequence_hook(gb_export_sequence_cb escb)
bool read_string_pair(FILE *in, char *&s1, char *&s2, size_t &lineNr)
GB_ERROR GB_IO_error(const char *action, const char *filename)
char * ARB_strdup(const char *str)
GBDATA * get_gb_main() const
char * GB_read_as_string(GBDATA *gbd)
const char * ARB_date_string()
const char * GBS_global_string(const char *templat,...)
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
char * GBS_string_eval(const char *insource, const char *icommand)
GBDATA * select_first(GBDATA *gb_main) const
void set_single_mode(GBDATA *gb_species)
void auto_subtitles(const char *prefix)
SpeciesSelector(ExportWhich which_, const char *one_species_)
static EXPORT_CMD check_internal(const char *command)
GBDATA * GB_get_father(GBDATA *gbd)
GB_CSTR GB_canonical_path(const char *anypath)
GB_CSTR GBS_find_string(GB_CSTR cont, GB_CSTR substr, int match_mode)
static GB_ERROR export_format_single(const char *db_name, const char *formname, const char *outname, char **resulting_outname, RuleSetPtr ruleset)
static const char * internal_export_commands[]
GB_ERROR GB_await_error()
char * GB_create_tempfile(const char *name)
long GB_read_count(GBDATA *gbd)
GB_TYPES GB_read_type(GBDATA *gbd)
static GB_ERROR export_species_using_form(FILE *out, const char *form, const GBL_call_env &callEnv)
void GB_warningf(const char *templat,...)
GB_CSTR GB_read_key_pntr(GBDATA *gbd)
bool isSet() const
test if SmartPtr is not NULp
static GB_ERROR export_format_multiple(const char *dbname, const char *formname, const char *outname, bool multiple, char **resulting_outname, RuleSetPtr ruleset)
static GB_ERROR XML_recursive(GBDATA *gbd, int depth)
#define TEST_REJECT(cond)
#define TEST_REJECT_NULL(n)
static void error(const char *msg)
GB_CSTR GB_path_in_ARBHOME(const char *relative_path)
size_t get_length() const
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
GB_ERROR get_exportFormat_information(const char *eft_formname, ExportFormatInfo &info)
const char * get_export_sequence(GBDATA *gb_species, size_t &seq_len, GB_ERROR &error)
GB_ERROR export_by_format(GBDATA *gb_main, ExportWhich which, const char *one_species, AP_filter *filter, int cut_stop_codon, int compress, const char *dbname, const char *formname, const char *field_transfer_set, const char *outname, int multiple, char **real_outname)
GB_alignment_type GBT_get_alignment_type(GBDATA *gb_main, const char *aliname)
#define TEST_EXPECT_ZERO_OR_SHOW_ERRNO(iocond)
bool GB_is_container(GBDATA *gbd)
#define TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(fgot, fwant, diff)
GB_CSTR GB_path_in_ARBLIB(const char *relative_path)
static GB_ERROR read_export_format(export_format *efo, const char *file, bool load_complete_form)
void GBS_read_dir(StrArray &names, const char *dir, const char *mask)
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
GB_CSTR GB_append_suffix(const char *name, const char *suffix)
char * GB_unique_filename(const char *name_prefix, const char *suffix)
void appendTo(char *&content, char sep, char *&toAppend)
fputs(TRACE_PREFIX, stderr)
GB_CSTR GB_concat_path(GB_CSTR anypath_left, GB_CSTR anypath_right)
void GB_write_flag(GBDATA *gbd, long flag)
export_sequence_data(GBDATA *Gb_Main, ExportWhich which, const char *one_species, AP_filter *Filter, bool CutStopCodon, int Compress)
GBDATA * first_species() const
const struct formatTable formname[]
GB_ERROR GB_failedTo_error(const char *do_something, const char *special, GB_ERROR error)
GBDATA * GBT_first_species(GBDATA *gb_main)
GB_ERROR is_invalid() const
char * get_exportFormat_evalForm(const char *eft_formname, GB_ERROR &error)
const char * GBS_static_string(const char *str)
#define TEST_EXPECT_NO_ERROR(call)
void GB_split_full_path(const char *fullpath, char **res_dir, char **res_fullname, char **res_name_only, char **res_suffix)
bool is_std_gap(const char c)
GBDATA * GBT_next_species(GBDATA *gb_species)
bool GB_is_regularfile(const char *path)
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
void GB_informationf(const char *templat,...)
const char * getAlignment() const
bool in_single_mode() const
char * GBT_get_default_alignment(GBDATA *gb_main)
ARB_ERROR getError() const
GBDATA * GB_nextChild(GBDATA *child)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
static int info[maxsites+1]
static const char * exported_sequence(GBDATA *gb_species, size_t *seq_len, GB_ERROR *error)
GBDATA * next_species(GBDATA *gb_prev) const
const char * GBT_read_char_pntr(GBDATA *gb_container, const char *fieldpath)
#define TEST_EXPECT_EQUAL(expr, want)
static GB_ERROR export_write_species(GBDATA *gb_species, FILE *out, const GBL_env &env, const export_format &efo)
void inc_and_check_user_abort(GB_ERROR &error)
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
bool GB_is_privatefile(const char *path, bool read_private)
const unsigned char * get_seq_data(GBDATA *gb_species, size_t &slen, GB_ERROR &error) const