41 AppendNA((
NA_Base *)sequfilt, strlen((
const char *)sequfilt), this_elem);
47 unsigned char **the_names,
48 unsigned char **the_sequences,
49 unsigned long numberspecies,
50 unsigned long maxalignlen,
53 bool cutoff_stop_codon,
60 gde_assert(contradicted(the_species, the_names));
63 allocatedFilter =
new AP_filter(maxalignlen);
64 filter = allocatedFilter;
68 if (fl < maxalignlen) {
69 aw_message(
"Warning: Your filter is shorter than the alignment len");
76 size_t *seqlen = ARB_calloc<size_t>(numberspecies);
80 for (i=0; i<numberspecies; i++) {
81 seqlen[i] = strlen((
char *)the_sequences[i]);
85 if (cutoff_stop_codon) {
87 fputs(
"[CUTTING STOP CODONS]\n", stdout);
88 for (i=0; i<numberspecies; i++) {
92 long pos = stop_codon-
seq;
93 long restlen = maxalignlen-pos;
94 memset(stop_codon,
'.', restlen);
100 uchar **sequfilt = ARB_calloc<uchar*>(numberspecies+1);
108 for (i=0; i<numberspecies; i++) {
111 for (
unsigned long col=0; (col<maxalignlen); col++) {
112 char c = the_sequences[i][col];
115 sequfilt[i][newcount++] = c;
127 for (i=0; i<256; i++) isInfo[i] =
true;
128 isInfo[
UINT(
'-')] =
false;
129 isInfo[
UINT(
'.')] =
false;
134 isInfo[
UINT(
'N')] =
false;
135 isInfo[
UINT(
'n')] =
false;
138 isInfo[
UINT(
'X')] =
false;
139 isInfo[
UINT(
'x')] =
false;
147 bool modified =
false;
148 char *filterString = filter->
to_string();
150 for (i=0; i<maxalignlen; i++) {
152 bool wantColumn =
false;
154 for (
size_t n=0; n<numberspecies; n++) {
156 if (isInfo[
UINT(the_sequences[n][i])]) {
163 filterString[i] =
'0';
172 delete allocatedFilter;
173 filter = allocatedFilter =
new AP_filter(filterString,
NULp, len);
185 for (i=0; i<numberspecies; i++) {
190 sequfilt[i][len] = 0;
191 memset(sequfilt[i],
'.', len);
194 for (
size_t col=0; (col<maxalignlen) && (c=the_sequences[i][col]); col++) {
196 sequfilt[i][newcount++] = simplify[c];
217 int elementtype =
TEXT;
218 int elementtype_init =
RNA;
232 elementtype_init = elementtype;
237 arb_progress progress(
"Read data from DB", numberspecies);
239 for (gb_species = the_species[number]; gb_species && !
error; gb_species = the_species[++number]) {
241 this_elem = &(dataset.
element[curelem]);
246 #define GET_FIELD_CONTENT(fieldname,buffer,bufsize) do { \
247 gbd = GB_entry(gb_species, fieldname); \
249 const char *val = GB_read_char_pntr(gbd); \
250 strcpy_truncate(buffer, val, bufsize); \
252 else buffer[0] = 0; \
270 unsigned char *species_name;
272 for (species_name=the_names[number]; species_name && !
error; species_name=the_names[++number]) {
274 this_elem = &(dataset.
element[curelem]);
282 this_elem->
id[0] = 0;
296 "External program call may fail or produce invalid results.\n"
297 "You might want to use 'Species/Synchronize IDs' and read the associated help.",
307 for (i=0; i<numberspecies; i++) {
316 delete allocatedFilter;
329 uchar **the_sequences;
331 long numberspecies = 0;
334 numberspecies, maxalignlen);
336 gde_assert(contradicted(the_species, the_names));
343 int res =
InsertDatainGDE(dataset,
NULp, the_names, (
unsigned char **)the_sequences, numberspecies, maxalignlen, filter, compress, cutoff_stop_codon, typeinfo);
344 for (
long i=0; i<numberspecies; i++) {
345 delete the_sequences[i];
347 delete the_sequences;
348 if (the_species)
delete the_species;
349 else delete the_names;
359 long numberspecies = 0;
360 long missingdata = 0;
386 the_species[numberspecies]=gb_species;
397 char **the_sequences;
ARB_calloc(the_sequences, numberspecies+1);
399 for (
long i=0; the_species[i]; i++) {
400 ARB_alloc(the_sequences[i], maxalignlen+1);
401 the_sequences[i][maxalignlen] = 0;
402 memset(the_sequences[i],
'.', (
size_t)maxalignlen);
404 int size = strlen(data);
405 if (size > maxalignlen) size = (
int)maxalignlen;
409 int res =
InsertDatainGDE(dataset, the_species,
NULp, (
unsigned char **)the_sequences, numberspecies, maxalignlen, filter, compress, cutoff_stop_codon, typeinfo);
410 for (
long i=0; i<numberspecies; i++) {
411 free(the_sequences[i]);
420 if (a->
seqlen == 0)
return -1;
427 case TEXT:
return '~';
428 case MASK:
return '0';
438 Warning(
"Putelem:insert beyond end of sequence space ignored");
440 else if (b >= (a->
offset)) {
448 for (
int j=b; j<a->
offset; j++) temp[j-b] =
'0';
453 for (
int j=b; j<a->
offset; j++) temp[j-b] =
'\0';
457 for (
int j=b; j<a->
offset; j++) temp[j-b] =
'-';
462 for (
int j=b; j<a->
offset; j++) temp[j-b] =
' ';
char short_name[SIZE_SHORT_NAME]
GBDATA * GBT_first_marked_species_rel_species_data(GBDATA *gb_species_data)
char seq_name[SIZE_SEQ_NAME]
int ReadArbdb2(NA_Alignment &dataset, AP_filter *filter, GapCompression compress, bool cutoff_stop_codon, TypeInfo typeinfo)
char * ARB_strdup(const char *str)
const char * GBS_global_string(const char *templat,...)
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
const uchar * get_simplify_table() const
int ReadArbdb(NA_Alignment &dataset, bool marked, AP_filter *filter, GapCompression compress, bool cutoff_stop_codon, TypeInfo typeinfo)
#define AWAR_GDE_EXPORT_FILTER
GBDATA * GBT_first_species_rel_species_data(GBDATA *gb_species_data)
void putelem(NA_Sequence *a, int b, NA_Base c)
void strcpy_truncate(char *dest, const char *source, size_t dest_size)
TYPE * ARB_alloc(size_t nelem)
void AppendNA(NA_Base *buffer, int len, NA_Sequence *seq)
GDE_get_sequences_cb get_sequences
struct gde_database_access db_access
int AWTC_name_quality(const char *short_name)
#define GET_FIELD_CONTENT(fieldname, buffer, bufsize)
static void error(const char *msg)
size_t get_length() const
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
GB_alignment_type GBT_get_alignment_type(GBDATA *gb_main, const char *aliname)
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
size_t get_filtered_length() const
void Warning(const char *s)
static __ATTR__USERESULT int InsertDatainGDE(NA_Alignment &dataset, GBDATA **the_species, unsigned char **the_names, unsigned char **the_sequences, unsigned long numberspecies, unsigned long maxalignlen, const AP_filter *filter, GapCompression compress, bool cutoff_stop_codon, TypeInfo typeinfo)
static void set_constant_fields(NA_Sequence *this_elem)
fputs(TRACE_PREFIX, stderr)
TYPE * ARB_calloc(size_t nelem)
int Default_PROColor_LKUP[]
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
#define __ATTR__USERESULT
void ARB_realloc(TYPE *&tgt, size_t nelem)
int getelem(NA_Sequence *a, int b)
static void AppendNA_and_free(NA_Sequence *this_elem, uchar *&sequfilt)
GB_ERROR is_invalid() const
void aw_message(const char *msg)
bool is_std_gap(const char c)
GBDATA * GBT_next_species(GBDATA *gb_species)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
bool use_position(size_t pos) const
char authority[SIZE_AUTHORITY]
void inc_and_check_user_abort(GB_ERROR &error)
void InitNASeq(NA_Sequence *seq, int type)
int Arbdb_get_curelem(NA_Alignment &dataset)
GBDATA * GBT_get_species_data(GBDATA *gb_main)