ARB
GEN_gene.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : GEN_gene.cxx //
4 // Purpose : //
5 // //
6 // Coded by Ralf Westram (coder@reallysoft.de) in 2001 //
7 // Institute of Microbiology (Technical University Munich) //
8 // http://www.arb-home.de/ //
9 // //
10 // =============================================================== //
11 
12 #include "GEN_gene.hxx"
13 #include "GEN_local.hxx"
14 #include "GEN_nds.hxx"
15 
16 #include <aw_awar.hxx>
17 #include <aw_msg.hxx>
18 #include <aw_root.hxx>
19 #include <arbdbt.h>
20 #include <adGene.h>
21 
22 // Standard fields of a gb_gene entry:
23 // -----------------------------------
24 // name = short name of gene (unique in one species)
25 // type = type of gene (e.g. 'gene', 'CDS', 'tRNA', 'misc_feature')
26 // pos_start = start-position(s) of gene(-parts); range is 1...genomeLength
27 // pos_stop = end-position(s) of gene(-parts); range is 1...genomeLength
28 // pos_certain = contains pairs of chars (1. for start-pos, 2. for end-pos)
29 //
30 // '=' means 'pos is exact'
31 // '<' means 'pos may be lower'
32 // '>' means 'pos may be higher'
33 // '+' means 'pos is directly behind'
34 // '-' means 'pos is directly before'
35 //
36 // if pos_certain is missing -> like '=='
37 //
38 // pos_complement = 1 -> CDS is on opposite strand
39 
40 // fields for split genes:
41 // --------------------------
42 // pos_joined = xxx -> gene consists of abs(xxx) parts (if missing xxx == 1 is assumed)
43 //
44 // if abs(xxx)>1, the gene consists of several parts.
45 // In that case the fields 'pos_start', 'pos_stop', 'pos_certain' and 'pos_complement'
46 // contain multiple comma-separated values - one for each joined part.
47 //
48 // if xxx is < -1, then joining the parts does not make sense (or nothing is known about it)
49 //
50 // Note: Please do not access these fields manually - use GEN_read_position!
51 
52 // other fields added by importer:
53 // -------------------------------
54 //
55 // During import ARB tries to reproduce existing translations.
56 // If it succeeds, it removes the translation.
57 //
58 // ARB_translation = written if ARB translation differs from original translation
59 // (original translation is not deleted in this case)
60 // ARB_translation_note = additional info about failed translation
61 // ARB_translation_rm = 1 -> translation was reproduced and deleted
62 //
63 // if a gene with type 'gene' exists and another gene with different type, but
64 // identical location exists as well, ARB sets ARB_display_hidden to 1 for
65 // the 'gene'. For the other gene with diff. type ARB sets a reference to the
66 // hidden 'gene':
67 //
68 // ARB_is_gene = shortname of related hidden gene
69 
70 
71 // fields used for display:
72 // ------------------------
73 // ARB_display_hidden = 1 -> do not display this gene (depends on AWAR_GENMAP_SHOW_HIDDEN too)
74 // ARB_color = color group
75 
76 
77 
78 // Old format standard fields of a gb_gene entry:
79 // ----------------------------------------------
80 // name = short name of gene (unique in one species)
81 // pos_begin = start-position of gene
82 // pos_end = end-position of gene
83 // pos_uncertain = contains 2 chars (1. for start-pos, 2. for end-pos); = means 'pos is exact'; < means 'pos may be lower'; > means 'pos may be higher'; missing -> like ==
84 // complement = 1 -> encoding from right to left
85 //
86 // fields for split genes:
87 // --------------------------
88 // pos_joined = xxx -> gene consists of xxx parts (may not exist if xxx == 1)
89 // pos_beginxxx, pos_endxxx = start-/end-positions for parts 2...n
90 // pos_uncertainxxx = like above for parts 2...n
91 //
92 
93 using namespace std;
94 
95 static const GEN_position *loadPositions4gene(GBDATA *gb_gene) {
96  static GEN_position *loaded_position = NULp;
97  static GBDATA *positionLoaded4gene = NULp;
98 
99  if (positionLoaded4gene != gb_gene) {
100  if (loaded_position) {
101  GEN_free_position(loaded_position);
102  loaded_position = NULp;
103  positionLoaded4gene = NULp;
104  }
105 
106  if (gb_gene) {
107  loaded_position = GEN_read_position(gb_gene);
108  if (loaded_position) positionLoaded4gene = gb_gene;
109  }
110  }
111  return loaded_position;
112 }
113 
114 void GEN_gene::init() {
115  name = GBT_get_name_or_description(gb_gene);
116 
117  GBDATA *gbd = GB_entry(gb_gene, "complement");
118  complement = gbd ? GB_read_byte(gbd) == 1 : false;
119 }
120 
121 void GEN_gene::load_location(int part, const GEN_position *location) {
122  gen_assert(part >= 1);
123  gen_assert(part <= location->parts);
124 
125  pos1 = location->start_pos[part-1];
126  pos2 = location->stop_pos[part-1];
127  complement = location->complement[part-1];
128 
129  gen_assert(pos1 <= pos2);
130 }
131 
132 GEN_gene::GEN_gene(GBDATA *gb_gene_, GEN_root *root_, const GEN_position *location) :
133  gb_gene(gb_gene_),
134  root(root_)
135 {
136  init();
137  load_location(1, location);
138  nodeInfo = GEN_make_node_text_nds(gb_gene, 0);
139 }
140 
141 GEN_gene::GEN_gene(GBDATA *gb_gene_, GEN_root *root_, const GEN_position *location, int partNumber) :
142  gb_gene(gb_gene_),
143  root(root_)
144 {
145  // partNumber 1..n which part of a split gene
146  // maxParts 1..n of how many parts consists this gene?
147 
148  init();
149  load_location(partNumber, location);
150 
151  {
152  char buffer[30];
153  sprintf(buffer, " (%i/%i)", partNumber, location->parts);
154  nodeInfo = name+buffer;
155  }
156 }
157 
158 void GEN_gene::reinit_NDS() const {
159  nodeInfo = GEN_make_node_text_nds(gb_gene, 0);
160 }
161 
162 // ------------------
163 // GEN_root
164 
165 GEN_root::GEN_root(const char *organism_name_, const char *gene_name_, GBDATA *gb_main_, AW_root *aw_root, GEN_graphic *gen_graphic_) :
166  gb_main(gb_main_),
167  gen_graphic(gen_graphic_),
168  organism_name(organism_name_),
169  gene_name(gene_name_),
170  error_reason(""),
171  length(-1),
172  gb_gene_data(NULp)
173 {
175  GBDATA *gb_organism = GBT_find_species(gb_main, organism_name.c_str());
176 
177  if (!gb_organism) {
178  error_reason = ARB_strdup("Please select a species.");
179  }
180  else {
181  GBDATA *gb_data = GBT_find_sequence(gb_organism, GENOM_ALIGNMENT);
182  if (!gb_data) {
183  error_reason = GBS_global_string_copy("'%s' has no data in '%s'", organism_name.c_str(), GENOM_ALIGNMENT);
184  }
185  else {
186  length = GB_read_count(gb_data);
187 
188  gb_gene_data = GEN_find_gene_data(gb_organism);
189  GBDATA *gb_gene = gb_gene_data ? GEN_first_gene_rel_gene_data(gb_gene_data) : NULp;
190 
191  if (!gb_gene) {
192  error_reason = GBS_global_string("Species '%s' has no gene-information", organism_name.c_str());
193  }
194  else {
195  bool show_hidden = aw_root->awar(AWAR_GENMAP_SHOW_HIDDEN)->read_int() != 0;
196 
197  while (gb_gene) {
198  bool show_this = show_hidden;
199 
200  if (!show_this) {
201  GBDATA *gbd = GB_entry(gb_gene, ARB_HIDDEN);
202 
203  if (!gbd || !GB_read_byte(gbd)) { // gene is not hidden
204  show_this = true;
205  }
206  }
207 
208  if (show_this) {
209  const GEN_position *location = loadPositions4gene(gb_gene);
210 
211  if (!location) {
213  char *id = GEN_global_gene_identifier(gb_gene, gb_organism);
214  aw_message(GBS_global_string("Can't load gene '%s':\nReason: %s", id, warning));
215  free(id);
216  }
217  else {
218  int parts = location->parts;
219  if (parts == 1) {
220  gene_set.insert(GEN_gene(gb_gene, this, location));
221  }
222  else { // joined gene
223  for (int p = 1; p <= parts; ++p) {
224  gene_set.insert(GEN_gene(gb_gene, this, location, p));
225  }
226  }
227  }
228  }
229  gb_gene = GEN_next_gene(gb_gene);
230  }
231  }
232  }
233  }
234 }
235 
236 void GEN_root::reinit_NDS() const {
237  GEN_iterator end = gene_set.end();
238  for (GEN_iterator gene = gene_set.begin(); gene != end; ++gene) {
239  gene->reinit_NDS();
240  }
241 }
char * GEN_make_node_text_nds(GBDATA *gbd, int mode)
Definition: GEN_nds.cxx:78
unsigned char * complement
Definition: adGene.h:41
GBDATA * GEN_next_gene(GBDATA *gb_gene)
Definition: adGene.cxx:138
#define AWAR_GENMAP_SHOW_HIDDEN
Definition: GEN_local.hxx:40
void GEN_free_position(GEN_position *pos)
Definition: adGene.cxx:195
#define ARB_HIDDEN
Definition: adGene.h:22
static int pos1
Definition: ClustalV.cxx:58
int parts
Definition: adGene.h:37
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
long read_int() const
Definition: AW_awar.cxx:184
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
void warning(int warning_num, const char *warning_message)
Definition: util.cxx:61
STL namespace.
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
#define gen_assert(bed)
Definition: GEN_local.hxx:19
GEN_root(const char *organism_name_, const char *gene_name_, GBDATA *gb_main_, AW_root *aw_root, GEN_graphic *gen_graphic_)
Definition: GEN_gene.cxx:165
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
long GB_read_count(GBDATA *gbd)
Definition: arbdb.cxx:758
void reinit_NDS() const
Definition: GEN_gene.cxx:158
size_t * stop_pos
Definition: adGene.h:40
GBDATA * GEN_find_gene_data(GBDATA *gb_species)
Definition: adGene.cxx:50
static const GEN_position * loadPositions4gene(GBDATA *gb_gene)
Definition: GEN_gene.cxx:95
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
static int pos2
Definition: ClustalV.cxx:59
void reinit_NDS() const
Definition: GEN_gene.cxx:236
#define GENOM_ALIGNMENT
Definition: adGene.h:19
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:708
GEN_gene_set::iterator GEN_iterator
Definition: GEN_gene.hxx:80
GEN_gene(GBDATA *gb_gene_, GEN_root *root_, const GEN_position *location)
Definition: GEN_gene.cxx:132
char * GEN_global_gene_identifier(GBDATA *gb_gene, GBDATA *gb_organism)
Definition: adGene.cxx:783
int GB_read_byte(GBDATA *gbd)
Definition: arbdb.cxx:734
static ARB_init_perl_interface init
Definition: ARB_ext.c:101
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
size_t * start_pos
Definition: adGene.h:39
#define NULp
Definition: cxxforward.h:116
GBDATA * GBT_find_species(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:139
GB_transaction ta(gb_var)
GBDATA * gb_main
Definition: adname.cxx:32
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
GEN_position * GEN_read_position(GBDATA *gb_gene)
Definition: adGene.cxx:250
size_t length
GBDATA * GEN_first_gene_rel_gene_data(GBDATA *gb_gene_data)
Definition: adGene.cxx:134
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:194