104 if (!name || !name[0]) {
135 return GB_entry(gb_gene_data,
"gene");
158 size_t pos_size = parts*
sizeof(pos->
start_pos[0]);
159 size_t comp_size = parts*
sizeof(pos->
complement[0]);
160 size_t data_size = 2*pos_size+3*comp_size;
164 if (lastFreedPosition && lastFreedPosition->
parts == parts) {
166 lastFreedPosition =
NULp;
172 pos->
start_pos = (
size_t*)ARB_calloc<char>(data_size);
197 if (lastFreedPosition) {
199 free(lastFreedPosition);
202 lastFreedPosition = pos;
217 if (!gb_field) error =
GBS_global_string(
"Expected entry '%s' missing", field_name);
224 if (parseTable.
size() != parts_expected) {
236 for (p = 0; p<parts_expected && !
error; p++) {
238 results[p] = strtol(parseTable[p], &end, 10);
239 if (end == parseTable[p]) {
252 bool joinable =
false;
260 if (parts>1) joinable =
true;
261 else if (parts<-1) parts = -parts;
277 error =
parseCSV(gb_gene,
"pos_complement", parts, parseTable);
278 for (p = 0; p<parts && !
error; p++) {
279 const char *val = parseTable[p];
280 if ((val[0] !=
'0' && val[0] !=
'1') || val[1] != 0) {
281 error =
GBS_global_string(
"Invalid content '%s' in 'pos_complement' (expected: \"01\")", val);
292 if (gb_pos_certain) {
293 error =
parseCSV(gb_gene,
"pos_certain", parts, parseTable);
295 for (p = 0; p<parts && !
error; p++) {
296 const unsigned char *val = (
unsigned char *)(parseTable[p]);
299 for (vp = 0; vp<2; vp++) {
300 unsigned char c = val[vp];
301 if (c !=
'<' && c !=
'=' && c !=
'>' && (c !=
"+-"[vp])) {
302 error =
GBS_global_string(
"Invalid content '%s' in 'pos_certain' (expected 2 from \"<=>\")", val);
334 GBDATA *gb_pos_complement;
353 if (!gb_pos_certain) {
359 if (gb_pos_certain) {
361 gb_pos_certain =
NULp;
394 else if (pos->
stop_pos[p] > length) {
402 if (!c || !strchr(
"<=>+", c)) error =
GBS_global_string(
"Invalid uncertainty '%c'", c);
403 else if (!c2 || !strchr(
"<=>-", c2)) error =
GBS_global_string(
"Invalid uncertainty '%c'", c2);
405 if (c ==
'+' || c2 ==
'-') {
406 if (c ==
'+' && c2 ==
'-') {
412 error =
"uncertainties '+' and '-' can only be used together";
423 if (pos->
parts == 1) {
424 if (gb_pos_joined) error =
GB_delete(gb_pos_joined);
430 if (!error && gb_pos_certain) {
435 if (!gb_pos_joined) {
447 for (p = 0; p<pos->
parts; ++p) {
457 if (gb_pos_certain) {
471 if (!error && gb_pos_certain) error =
GB_write_string(gb_pos_certain, suncertain);
499 int parts = location->
parts;
500 int *idx = ARB_alloc<int>(parts);
503 for (p = 0; p<parts; ++p) idx[p] = p;
505 location2sort = location;
507 location2sort =
NULp;
509 for (p = 0; p<parts; ++p) {
512 #define swap(a, b, type) do { type tmp = (a); (a) = (b); (b) = (tmp); } while (0)
517 swap(idx[i], idx[p],
int);
521 #if defined(DEBUG) && 0
522 printf(
"Locations sorted:\n");
523 for (p = 0; p<parts; ++p) {
529 for (p = 1; p<parts; p++) {
541 location->
parts = i+1;
543 #if defined(DEBUG) && 0
544 parts = location->
parts;
545 printf(
"Locations merged:\n");
546 for (p = 0; p<parts; ++p) {
579 return GB_export_errorf(
"The gene-species '%s' refers to an unknown organism (%s)\n"
580 "This occurs if you rename or delete the organism or change the entry\n"
581 "'ARB_origin_species' and will most likely cause serious problems.",
639 if (strcmp(gene_name, origin_gene_name) == 0) {
641 if (strcmp(organism_name, origin_species_name) == 0) {
659 const char *origin_species_name;
664 if (origin_species_name) {
677 const char *origin_gene_name;
682 if (origin_gene_name) {
742 fprintf(stderr,
"ARBDB-warning: found unspecific species named '%s', but expected an 'organism' with that name\n", name);
769 while (gb_organism) {
809 static struct arb_unit_test::test_alignment_data TestAlignmentData_Genome[] = {
810 { 0,
"spec",
"AUCUCCUAAACCCAACCGUAGUUCGAAUUGAG" },
813 #define TEST_EXPECT_MEMBER_EQUAL(s1,s2,member) TEST_EXPECT_EQUAL((s1)->member, (s2)->member)
815 #define TEST_EXPECT_GENPOS_EQUAL(p1,p2) do { \
816 TEST_EXPECT_MEMBER_EQUAL(p1, p2, parts); \
817 TEST_EXPECT_MEMBER_EQUAL(p1, p2, joinable); \
818 for (int p = 0; p<(p1)->parts; ++p) { \
819 TEST_EXPECT_MEMBER_EQUAL(p1, p2, start_pos[p]); \
820 TEST_EXPECT_MEMBER_EQUAL(p1, p2, stop_pos[p]); \
821 TEST_EXPECT_MEMBER_EQUAL(p1, p2, complement[p]); \
822 if ((p1)->start_uncertain) { \
823 TEST_EXPECT_MEMBER_EQUAL(p1, p2, start_uncertain[p]); \
824 TEST_EXPECT_MEMBER_EQUAL(p1, p2, stop_uncertain[p]); \
829 #define TEST_WRITE_READ_GEN_POSITION(pos) \
831 error = GEN_write_position(gb_gene, (pos), 0); \
833 GEN_position *rpos = GEN_read_position(gb_gene); \
835 error = GB_await_error(); \
838 TEST_EXPECT_GENPOS_EQUAL((pos), rpos); \
839 GEN_free_position(rpos); \
842 TEST_EXPECT_NULL(error.deliver()); \
845 #define TEST_WRITE_GEN_POSITION_ERROR(pos,exp_error) do { \
846 error = GEN_write_position(gb_gene, &*(pos), 0); \
847 TEST_EXPECT_EQUAL(error.deliver(), exp_error); \
850 #define TEST_GENPOS_FIELD(field,value) do { \
851 GBDATA *gb_field = GB_entry(gb_gene, (field)); \
853 TEST_REJECT_NULL(gb_field); \
854 TEST_EXPECT_EQUAL(GB_read_char_pntr(gb_field), (value)); \
857 TEST_EXPECT_NULL(gb_field); \
861 #define TEST_GENPOS_FIELDS(start,stop,complement,certain) do { \
862 TEST_GENPOS_FIELD("pos_start", start); \
863 TEST_GENPOS_FIELD("pos_stop", stop); \
864 TEST_GENPOS_FIELD("pos_complement", complement); \
865 TEST_GENPOS_FIELD("pos_certain", certain); \
868 #define TEST_GENE_SEQ_AND_LENGTH(werr,wseq,wlen) do { \
870 char *seq = GBT_read_gene_sequence_and_length(gb_gene, true, '-', &len); \
871 TEST_EXPECT_EQUAL(GB_have_error(), werr); \
873 TEST_EXPECT_EQUAL(len, (size_t)(wlen)); \
874 TEST_EXPECT_EQUAL(seq, (wseq)); \
882 void TEST_GEN_position() {
887 GBDATA *
gb_main = TEST_CREATE_DB(error,
"ali_genom", TestAlignmentData_Genome,
false);
899 GEN_position_Ptr pos;
903 TEST_WRITE_GEN_POSITION_ERROR(pos, "Illegal
start position 0");
905 pos->start_pos[0] = 5;
906 pos->stop_pos[0] = 10;
907 pos->complement[0] = 1;
911 TEST_WRITE_READ_GEN_POSITION(&*pos);
912 TEST_GENPOS_FIELDS("5", "10", "1", "==");
914 TEST_GENE_SEQ_AND_LENGTH(false, "TTTAGG", 6);
918 pos = GEN_new_position(3, false);
920 TEST_WRITE_GEN_POSITION_ERROR(pos, "Illegal
start position 0");
924 pos->start_pos[0] = 5; pos->start_pos[1] = 10; pos->start_pos[2] = 25;
925 pos->stop_pos[0] = 15; pos->stop_pos[1] = 20; pos->stop_pos[2] = 25;
926 pos->complement[0] = 0; pos->complement[1] = 1; pos->complement[2] = 0;
928 pos->start_uncertain[0] = '<';
929 pos->stop_uncertain[2] = '>';
931 TEST_WRITE_READ_GEN_POSITION(&*pos);
932 TEST_GENPOS_FIELDS("5,10,25", "15,20,25", "0,1,0", "<=,==,=>");
934 TEST_GENE_SEQ_AND_LENGTH(false, "CCUAAACCCAA-TACGGTTGGGT-
G", 25);
936 pos->stop_uncertain[2] = 'x';
937 TEST_WRITE_GEN_POSITION_ERROR(pos, "Invalid uncertainty 'x'");
939 pos->stop_uncertain[2] = '+';
940 TEST_WRITE_GEN_POSITION_ERROR(pos, "Invalid uncertainty '+'");
942 pos->start_uncertain[2] = '+';
943 pos->stop_uncertain[2] = '-';
944 TEST_WRITE_GEN_POSITION_ERROR(pos, "Invalid positions 25^25 for uncertainties +-");
946 pos->stop_pos[2] = 26;
947 TEST_WRITE_GEN_POSITION_ERROR(pos, (
void*)
NULp);
949 pos->stop_pos[0] = 100;
950 TEST_WRITE_GEN_POSITION_ERROR(pos, "Illegal stop position 100 (>
length(=32))");
GBDATA * GEN_create_nonexisting_gene(GBDATA *gb_species, const char *name)
GBDATA * GEN_next_pseudo_species(GBDATA *gb_species)
static GBDATA * GEN_read_pseudo_species_from_hash(const GB_HASH *pseudo_hash, const char *organism_name, const char *gene_name)
GBDATA * GEN_first_gene(GBDATA *gb_species)
unsigned char * complement
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
long GB_read_int(GBDATA *gbd)
static GB_ERROR parsePositions(GBDATA *gb_gene, const char *field_name, int parts_expected, size_t *results, ConstStrArray &parseTable)
long GBS_write_hash(GB_HASH *hs, const char *key, long val)
GB_ERROR GEN_write_position(GBDATA *gb_gene, const GEN_position *pos, long seqLength)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
GBDATA * GEN_find_origin_organism(GBDATA *gb_pseudo, const GB_HASH *organism_hash)
static GEN_position * lastFreedPosition
const char * GEN_origin_organism(GBDATA *gb_pseudo)
GBDATA * GBT_expect_item_rel_item_data(GBDATA *gb_item_data, const char *id_field, const char *id_value)
GBDATA * GB_nextEntry(GBDATA *entry)
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
bool GEN_is_organism(GBDATA *gb_species)
GBDATA * GEN_expect_gene(GBDATA *gb_species, const char *name)
const char * GBS_global_string(const char *templat,...)
void GBK_terminatef(const char *templat,...)
char * GEN_global_gene_identifier(GBDATA *gb_gene, GBDATA *gb_organism)
GEN_position * GEN_new_position(int parts, bool joinable)
GBDATA * GEN_find_origin_gene(GBDATA *gb_pseudo, const GB_HASH *organism_hash)
GBDATA * GEN_next_marked_organism(GBDATA *gb_organism)
GBDATA * GB_get_grandfather(GBDATA *gbd)
GBDATA * GB_get_father(GBDATA *gbd)
GB_ERROR GB_push_transaction(GBDATA *gbd)
static struct GEN_position_mem_handler GEN_position_dealloc
GBDATA * GEN_first_marked_pseudo_species(GBDATA *gb_main)
static GB_ERROR parseCSV(GBDATA *gb_gene, const char *field_name, size_t parts_expected, ConstStrArray &parseTable)
GB_ERROR GB_delete(GBDATA *&source)
static HelixNrInfo * start
static const char * pseudo_species_hash_key(const char *organism_name, const char *gene_name)
#define SmartCustomPtr(type, deallocator)
GB_ERROR GB_export_error(const char *error)
GBS_strstruct * GBS_stropen(long init_size)
GB_ERROR GB_await_error()
GBDATA * GB_create_container(GBDATA *father, const char *key)
long GB_read_count(GBDATA *gbd)
long GEN_get_organism_count(GBDATA *gb_main)
void GEN_add_pseudo_species_to_hash(GBDATA *gb_pseudo, GB_HASH *pseudo_hash)
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
GBDATA * GEN_next_marked_gene(GBDATA *gb_gene)
unsigned char * start_uncertain
static int cmp_location_parts(const void *v1, const void *v2)
static GEN_position * location2sort
GBDATA * GEN_first_gene_rel_gene_data(GBDATA *gb_gene_data)
const char * GEN_origin_gene(GBDATA *gb_pseudo)
GB_HASH * GEN_create_pseudo_species_hash(GBDATA *gb_main, long additionalSize)
GBDATA * GEN_find_gene_rel_gene_data(GBDATA *gb_gene_data, const char *name)
#define TEST_REJECT_NULL(n)
void GBS_strcat(GBS_strstruct *strstr, const char *ptr)
static void error(const char *msg)
GBDATA * GB_get_root(GBDATA *gbd)
bool GEN_is_pseudo_gene_species(GBDATA *gb_species)
GB_ERROR GEN_organism_not_found(GBDATA *gb_pseudo)
void GBT_splitNdestroy_string(ConstStrArray &names, char *&namelist, const char *separator, bool dropEmptyTokens)
~GEN_position_mem_handler()
bool GEN_is_genome_db(GBDATA *gb_main, int default_value)
GBDATA * GEN_first_organism(GBDATA *gb_main)
GBDATA * GBT_next_marked_species(GBDATA *gb_species)
GEN_position * GEN_read_position(GBDATA *gb_gene)
GBDATA * GEN_next_gene(GBDATA *gb_gene)
void GEN_use_uncertainties(GEN_position *pos)
GBDATA * GBT_find_species_rel_species_data(GBDATA *gb_species_data, const char *name)
void GEN_free_position(GEN_position *pos)
GBDATA * GEN_first_marked_organism(GBDATA *gb_main)
bool GB_has_key(GBDATA *gbd, const char *key)
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
GB_ERROR GB_write_int(GBDATA *gbd, long i)
GBDATA * GEN_find_organism(GBDATA *gb_main, const char *name)
void GBS_chrcat(GBS_strstruct *strstr, char ch)
void GEN_sortAndMergeLocationParts(GEN_position *location)
GB_ERROR GB_export_errorf(const char *templat,...)
static GBDATA * GEN_create_nonexisting_gene_rel_gene_data(GBDATA *gb_gene_data, const char *name)
TYPE * ARB_calloc(size_t nelem)
GBDATA * GEN_findOrCreate_gene_data(GBDATA *gb_species)
#define TEST_EXPECT_NULL(n)
static GBDATA * GEN_next_marked_pseudo_species(GBDATA *gb_species)
GBDATA * GEN_find_gene(GBDATA *gb_species, const char *name)
GBDATA * GB_next_marked(GBDATA *gbd, const char *keystring)
#define assert_or_exit(cond)
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
GBDATA * GEN_find_pseudo_species(GBDATA *gb_main, const char *organism_name, const char *gene_name, const GB_HASH *pseudo_hash)
char * GB_read_string(GBDATA *gbd)
char * GBS_strclose(GBS_strstruct *strstr)
GBDATA * GBT_first_species(GBDATA *gb_main)
GBDATA * GEN_find_or_create_gene_rel_gene_data(GBDATA *gb_gene_data, const char *name)
GBDATA * GB_find_string(GBDATA *gbd, const char *key, const char *str, GB_CASE case_sens, GB_SEARCH_TYPE gbs)
GBDATA * GBT_next_species(GBDATA *gb_species)
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
void reserve(size_t forElems)
GBDATA * GEN_first_marked_gene(GBDATA *gb_species)
GBDATA * GEN_find_gene_data(GBDATA *gb_species)
GBDATA * GB_first_marked(GBDATA *gbd, const char *keystring)
long GBT_get_species_count(GBDATA *gb_main)
GBDATA * GBT_find_item_rel_item_data(GBDATA *gb_item_data, const char *id_field, const char *id_value)
GBDATA * GEN_first_pseudo_species(GBDATA *gb_main)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
long GBS_read_hash(const GB_HASH *hs, const char *key)
GBDATA * GB_entry(GBDATA *father, const char *key)
GBDATA * GEN_next_organism(GBDATA *gb_organism)
char * GBS_global_string_copy(const char *templat,...)
void GB_close(GBDATA *gbd)
GBDATA * GEN_expect_gene_data(GBDATA *gb_species)
GB_HASH * GBS_create_hash(long estimated_elements, GB_CASE case_sens)
unsigned char * stop_uncertain