104 if (!name || !name[0]) {
135 return GB_entry(gb_gene_data,
"gene");
158 size_t pos_size = parts*
sizeof(pos->
start_pos[0]);
159 size_t comp_size = parts*
sizeof(pos->
complement[0]);
160 size_t data_size = 2*pos_size+3*comp_size;
164 if (lastFreedPosition && lastFreedPosition->
parts == parts) {
166 lastFreedPosition =
NULp;
172 pos->
start_pos = (
size_t*)ARB_calloc<char>(data_size);
197 if (lastFreedPosition) {
199 free(lastFreedPosition);
202 lastFreedPosition = pos;
217 if (!gb_field) error =
GBS_global_string(
"Expected entry '%s' missing", field_name);
224 if (parseTable.
size() != parts_expected) {
236 for (p = 0; p<parts_expected && !
error; p++) {
238 results[p] = strtol(parseTable[p], &end, 10);
239 if (end == parseTable[p]) {
252 bool joinable =
false;
260 if (parts>1) joinable =
true;
261 else if (parts<-1) parts = -parts;
277 error =
parseCSV(gb_gene,
"pos_complement", parts, parseTable);
278 for (p = 0; p<parts && !
error; p++) {
279 const char *val = parseTable[p];
280 if ((val[0] !=
'0' && val[0] !=
'1') || val[1] != 0) {
281 error =
GBS_global_string(
"Invalid content '%s' in 'pos_complement' (expected: \"01\")", val);
292 if (gb_pos_certain) {
293 error =
parseCSV(gb_gene,
"pos_certain", parts, parseTable);
295 for (p = 0; p<parts && !
error; p++) {
296 const unsigned char *val = (
unsigned char *)(parseTable[p]);
299 for (vp = 0; vp<2; vp++) {
300 unsigned char c = val[vp];
301 if (c !=
'<' && c !=
'=' && c !=
'>' && (c !=
"+-"[vp])) {
302 error =
GBS_global_string(
"Invalid content '%s' in 'pos_certain' (expected 2 from \"<=>\")", val);
334 GBDATA *gb_pos_complement;
353 if (!gb_pos_certain) {
359 if (gb_pos_certain) {
361 gb_pos_certain =
NULp;
394 else if (pos->
stop_pos[p] > length) {
402 if (!c || !strchr(
"<=>+", c)) error =
GBS_global_string(
"Invalid uncertainty '%c'", c);
403 else if (!c2 || !strchr(
"<=>-", c2)) error =
GBS_global_string(
"Invalid uncertainty '%c'", c2);
405 if (c ==
'+' || c2 ==
'-') {
406 if (c ==
'+' && c2 ==
'-') {
412 error =
"uncertainties '+' and '-' can only be used together";
423 if (pos->
parts == 1) {
424 if (gb_pos_joined) error =
GB_delete(gb_pos_joined);
430 if (!error && gb_pos_certain) {
435 if (!gb_pos_joined) {
447 for (p = 0; p<pos->
parts; ++p) {
457 if (gb_pos_certain) {
489 int parts = location->
parts;
490 int *idx = ARB_alloc<int>(parts);
493 for (p = 0; p<parts; ++p) idx[p] = p;
495 location2sort = location;
497 location2sort =
NULp;
499 for (p = 0; p<parts; ++p) {
502 #define swap(a, b, type) do { type tmp = (a); (a) = (b); (b) = (tmp); } while (0)
507 swap(idx[i], idx[p],
int);
511 #if defined(DEBUG) && 0
512 printf(
"Locations sorted:\n");
513 for (p = 0; p<parts; ++p) {
519 for (p = 1; p<parts; p++) {
531 location->
parts = i+1;
533 #if defined(DEBUG) && 0
534 parts = location->
parts;
535 printf(
"Locations merged:\n");
536 for (p = 0; p<parts; ++p) {
569 return GB_export_errorf(
"The gene-species '%s' refers to an unknown organism (%s)\n"
570 "This occurs if you rename or delete the organism or change the entry\n"
571 "'ARB_origin_species' and will most likely cause serious problems.",
629 if (strcmp(gene_name, origin_gene_name) == 0) {
631 if (strcmp(organism_name, origin_species_name) == 0) {
649 const char *origin_species_name;
654 if (origin_species_name) {
667 const char *origin_gene_name;
672 if (origin_gene_name) {
732 fprintf(stderr,
"ARBDB-warning: found unspecific species named '%s', but expected an 'organism' with that name\n", name);
759 while (gb_organism) {
799 static struct arb_unit_test::test_alignment_data TestAlignmentData_Genome[] = {
800 { 0,
"spec",
"AUCUCCUAAACCCAACCGUAGUUCGAAUUGAG" },
803 #define TEST_EXPECT_MEMBER_EQUAL(s1,s2,member) TEST_EXPECT_EQUAL((s1)->member, (s2)->member)
805 #define TEST_EXPECT_GENPOS_EQUAL(p1,p2) do { \
806 TEST_EXPECT_MEMBER_EQUAL(p1, p2, parts); \
807 TEST_EXPECT_MEMBER_EQUAL(p1, p2, joinable); \
808 for (int p = 0; p<(p1)->parts; ++p) { \
809 TEST_EXPECT_MEMBER_EQUAL(p1, p2, start_pos[p]); \
810 TEST_EXPECT_MEMBER_EQUAL(p1, p2, stop_pos[p]); \
811 TEST_EXPECT_MEMBER_EQUAL(p1, p2, complement[p]); \
812 if ((p1)->start_uncertain) { \
813 TEST_EXPECT_MEMBER_EQUAL(p1, p2, start_uncertain[p]); \
814 TEST_EXPECT_MEMBER_EQUAL(p1, p2, stop_uncertain[p]); \
819 #define TEST_WRITE_READ_GEN_POSITION(pos) \
821 error = GEN_write_position(gb_gene, (pos), 0); \
823 GEN_position *rpos = GEN_read_position(gb_gene); \
825 error = GB_await_error(); \
828 TEST_EXPECT_GENPOS_EQUAL((pos), rpos); \
829 GEN_free_position(rpos); \
832 TEST_EXPECT_NULL(error.deliver()); \
835 #define TEST_WRITE_GEN_POSITION_ERROR(pos,exp_error) do { \
836 error = GEN_write_position(gb_gene, &*(pos), 0); \
837 TEST_EXPECT_EQUAL(error.deliver(), exp_error); \
840 #define TEST_GENPOS_FIELD(field,value) do { \
841 GBDATA *gb_field = GB_entry(gb_gene, (field)); \
843 TEST_REJECT_NULL(gb_field); \
844 TEST_EXPECT_EQUAL(GB_read_char_pntr(gb_field), (value)); \
847 TEST_EXPECT_NULL(gb_field); \
851 #define TEST_GENPOS_FIELDS(start,stop,complement,certain) do { \
852 TEST_GENPOS_FIELD("pos_start", start); \
853 TEST_GENPOS_FIELD("pos_stop", stop); \
854 TEST_GENPOS_FIELD("pos_complement", complement); \
855 TEST_GENPOS_FIELD("pos_certain", certain); \
858 #define TEST_GENE_SEQ_AND_LENGTH(werr,wseq,wlen) do { \
860 char *seq = GBT_read_gene_sequence_and_length(gb_gene, true, '-', &len); \
861 TEST_EXPECT_EQUAL(GB_have_error(), werr); \
863 TEST_EXPECT_EQUAL(len, (size_t)(wlen)); \
864 TEST_EXPECT_EQUAL(seq, (wseq)); \
872 void TEST_GEN_position() {
877 GBDATA *
gb_main = TEST_CREATE_DB(error,
"ali_genom", TestAlignmentData_Genome,
false);
889 GEN_position_Ptr pos;
893 TEST_WRITE_GEN_POSITION_ERROR(pos, "Illegal
start position 0");
895 pos->start_pos[0] = 5;
896 pos->stop_pos[0] = 10;
897 pos->complement[0] = 1;
901 TEST_WRITE_READ_GEN_POSITION(&*pos);
902 TEST_GENPOS_FIELDS("5", "10", "1", "==");
904 TEST_GENE_SEQ_AND_LENGTH(false, "TTTAGG", 6);
908 pos = GEN_new_position(3, false);
910 TEST_WRITE_GEN_POSITION_ERROR(pos, "Illegal
start position 0");
914 pos->start_pos[0] = 5; pos->start_pos[1] = 10; pos->start_pos[2] = 25;
915 pos->stop_pos[0] = 15; pos->stop_pos[1] = 20; pos->stop_pos[2] = 25;
916 pos->complement[0] = 0; pos->complement[1] = 1; pos->complement[2] = 0;
918 pos->start_uncertain[0] = '<';
919 pos->stop_uncertain[2] = '>';
921 TEST_WRITE_READ_GEN_POSITION(&*pos);
922 TEST_GENPOS_FIELDS("5,10,25", "15,20,25", "0,1,0", "<=,==,=>");
924 TEST_GENE_SEQ_AND_LENGTH(false, "CCUAAACCCAA-TACGGTTGGGT-
G", 25);
926 pos->stop_uncertain[2] = 'x';
927 TEST_WRITE_GEN_POSITION_ERROR(pos, "Invalid uncertainty 'x'");
929 pos->stop_uncertain[2] = '+';
930 TEST_WRITE_GEN_POSITION_ERROR(pos, "Invalid uncertainty '+'");
932 pos->start_uncertain[2] = '+';
933 pos->stop_uncertain[2] = '-';
934 TEST_WRITE_GEN_POSITION_ERROR(pos, "Invalid positions 25^25 for uncertainties +-");
936 pos->stop_pos[2] = 26;
937 TEST_WRITE_GEN_POSITION_ERROR(pos, (
void*)
NULp);
939 pos->stop_pos[0] = 100;
940 TEST_WRITE_GEN_POSITION_ERROR(pos, "Illegal stop position 100 (>
length(=32))");
GBDATA * GEN_create_nonexisting_gene(GBDATA *gb_species, const char *name)
GBDATA * GEN_next_pseudo_species(GBDATA *gb_species)
static GBDATA * GEN_read_pseudo_species_from_hash(const GB_HASH *pseudo_hash, const char *organism_name, const char *gene_name)
GBDATA * GEN_first_gene(GBDATA *gb_species)
unsigned char * complement
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
long GB_read_int(GBDATA *gbd)
static GB_ERROR parsePositions(GBDATA *gb_gene, const char *field_name, int parts_expected, size_t *results, ConstStrArray &parseTable)
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
GB_ERROR GEN_write_position(GBDATA *gb_gene, const GEN_position *pos, long seqLength)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
GBDATA * GEN_find_origin_organism(GBDATA *gb_pseudo, const GB_HASH *organism_hash)
static GEN_position * lastFreedPosition
const char * GEN_origin_organism(GBDATA *gb_pseudo)
GBDATA * GBT_expect_item_rel_item_data(GBDATA *gb_item_data, const char *id_field, const char *id_value)
GBDATA * GB_nextEntry(GBDATA *entry)
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
bool GEN_is_organism(GBDATA *gb_species)
GBDATA * GEN_expect_gene(GBDATA *gb_species, const char *name)
const char * GBS_global_string(const char *templat,...)
void GBK_terminatef(const char *templat,...)
void cat(const char *from)
char * GEN_global_gene_identifier(GBDATA *gb_gene, GBDATA *gb_organism)
GEN_position * GEN_new_position(int parts, bool joinable)
GBDATA * GEN_find_origin_gene(GBDATA *gb_pseudo, const GB_HASH *organism_hash)
GBDATA * GEN_next_marked_organism(GBDATA *gb_organism)
GBDATA * GB_get_grandfather(GBDATA *gbd)
GBDATA * GB_get_father(GBDATA *gbd)
GB_ERROR GB_push_transaction(GBDATA *gbd)
static struct GEN_position_mem_handler GEN_position_dealloc
GBDATA * GEN_first_marked_pseudo_species(GBDATA *gb_main)
static GB_ERROR parseCSV(GBDATA *gb_gene, const char *field_name, size_t parts_expected, ConstStrArray &parseTable)
GB_ERROR GB_delete(GBDATA *&source)
static HelixNrInfo * start
static const char * pseudo_species_hash_key(const char *organism_name, const char *gene_name)
#define SmartCustomPtr(type, deallocator)
GB_ERROR GB_export_error(const char *error)
GB_ERROR GB_await_error()
GBDATA * GB_create_container(GBDATA *father, const char *key)
long GB_read_count(GBDATA *gbd)
long GEN_get_organism_count(GBDATA *gb_main)
void GEN_add_pseudo_species_to_hash(GBDATA *gb_pseudo, GB_HASH *pseudo_hash)
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
GBDATA * GEN_next_marked_gene(GBDATA *gb_gene)
unsigned char * start_uncertain
static int cmp_location_parts(const void *v1, const void *v2)
static GEN_position * location2sort
GBDATA * GEN_first_gene_rel_gene_data(GBDATA *gb_gene_data)
const char * GEN_origin_gene(GBDATA *gb_pseudo)
GB_HASH * GEN_create_pseudo_species_hash(GBDATA *gb_main, long additionalSize)
GBDATA * GEN_find_gene_rel_gene_data(GBDATA *gb_gene_data, const char *name)
#define TEST_REJECT_NULL(n)
static void error(const char *msg)
GBDATA * GB_get_root(GBDATA *gbd)
bool GEN_is_pseudo_gene_species(GBDATA *gb_species)
GB_ERROR GEN_organism_not_found(GBDATA *gb_pseudo)
~GEN_position_mem_handler()
bool GEN_is_genome_db(GBDATA *gb_main, int default_value)
GBDATA * GEN_first_organism(GBDATA *gb_main)
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
GEN_position * GEN_read_position(GBDATA *gb_gene)
GBDATA * GEN_next_gene(GBDATA *gb_gene)
void GEN_use_uncertainties(GEN_position *pos)
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
void GEN_free_position(GEN_position *pos)
GBDATA * GEN_first_marked_organism(GBDATA *gb_main)
bool GB_has_key(GBDATA *gbd, const char *key)
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
GB_ERROR GB_write_int(GBDATA *gbd, long i)
GBDATA * GEN_find_organism(GBDATA *gb_main, const char *name)
void GEN_sortAndMergeLocationParts(GEN_position *location)
GB_ERROR GB_export_errorf(const char *templat,...)
static GBDATA * GEN_create_nonexisting_gene_rel_gene_data(GBDATA *gb_gene_data, const char *name)
TYPE * ARB_calloc(size_t nelem)
GBDATA * GEN_findOrCreate_gene_data(GBDATA *gb_species)
#define TEST_EXPECT_NULL(n)
static GBDATA * GEN_next_marked_pseudo_species(GBDATA *gb_species)
GBDATA * GEN_find_gene(GBDATA *gb_species, const char *name)
GBDATA * GB_next_marked(GBDATA *gbd, const char *keystring)
#define assert_or_exit(cond)
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
GBDATA * GEN_find_pseudo_species(GBDATA *gb_main, const char *organism_name, const char *gene_name, const GB_HASH *pseudo_hash)
char * GB_read_string(GBDATA *gbd)
GBDATA * GBT_first_species(GBDATA *gb_main)
GBDATA * GEN_find_or_create_gene_rel_gene_data(GBDATA *gb_gene_data, const char *name)
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
GBDATA * GBT_next_species(GBDATA *gb_species)
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
void reserve(size_t forElems)
const char * get_data() const
GBDATA * GEN_first_marked_gene(GBDATA *gb_species)
GBDATA * GEN_find_gene_data(GBDATA *gb_species)
GBDATA * GB_first_marked(GBDATA *gbd, const char *keystring)
long GBT_get_species_count(GBDATA *gb_main)
GBDATA * GBT_find_item_rel_item_data(GBDATA *gb_item_data, const char *id_field, const char *id_value)
GBDATA * GEN_first_pseudo_species(GBDATA *gb_main)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, SplitMode mode)
long GBS_read_hash(const GB_HASH *hs, const char *key)
GBDATA * GB_entry(GBDATA *father, const char *key)
GBDATA * GEN_next_organism(GBDATA *gb_organism)
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
GBDATA * GEN_expect_gene_data(GBDATA *gb_species)
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
unsigned char * stop_uncertain