ARB
arb_gene_probe.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : arb_gene_probe.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include <arbdbt.h>
12 #include <adGene.h>
13 
14 #include <map>
15 #include <list>
16 #include <set>
17 #include <string>
18 
19 #include <unistd.h>
20 #include <sys/types.h>
21 
22 #define gp_assert(cond) arb_assert(cond)
23 
24 using namespace std;
25 
26 #if defined(DEBUG)
27 // #define CREATE_DEBUG_FILES
28 // #define DUMP_OVERLAP_CALC
29 #endif // DEBUG
30 
31 // --------------------------------------------------------------------------------
32 
33 static int gene_counter = 0; // pre-incremented counters
34 static int split_gene_counter = 0;
35 static int intergene_counter = 0;
36 
37 struct nameOrder {
38  bool operator()(const char *name1, const char *name2) const {
39  // Normally it is sufficient to have any order, as long as it is strict.
40  // But for UNIT_TESTS we need a reproducable order, which does not
41  // depend on memory layout of DB elements.
42 #if defined(UNIT_TESTS) // UT_DIFF
43  return strcmp(name1, name2)<0; // slow, determined by species names
44 #else
45  return (name1-name2)<0; // fast, but depends on memory layout (e.g. on MEMORY_TEST in gb_memory.h)
46 #endif
47  }
48 };
49 
50 typedef map<const char *, string, nameOrder> FullNameMap;
52 
53 // --------------------------------------------------------------------------------
54 
55 struct PositionPair {
56  int begin; // these positions are in range [0 .. genome_length-1]
57  int end;
58 
59  static int genome_length;
60 
61 #if defined(DEBUG)
62  void check_legal() const {
63  gp_assert(begin >= 0);
64  gp_assert(begin <= end);
65  gp_assert(end < genome_length);
66  }
67 #endif // DEBUG
68 
69  PositionPair() : begin(-1), end(-1) {}
70  PositionPair(int begin_, int end_) : begin(begin_), end(end_) {
71 #if defined(DEBUG)
72  check_legal();
73 #endif // DEBUG
74  }
75 
76  int length() const { return end-begin+1; }
77 
78  bool overlapsWith(const PositionPair& other) const {
79 #if defined(DEBUG)
80  check_legal();
81  other.check_legal();
82 #endif // DEBUG
83  return ! ((end < other.begin) || (other.end < begin));
84  }
85 
86 #if defined(DUMP_OVERLAP_CALC)
87  void dump(const char *note) const {
88  printf("%s begin=%i end=%i\n", note, begin, end);
89  }
90 #endif // DUMP_OVERLAP_CALC
91 };
92 
94 
95 typedef list<PositionPair> PositionPairList;
96 
97 struct ltNonOverlap {
98  // sorting with this operator identifies all overlapping PositionPair's as "equal"
99  bool operator ()(const PositionPair& p1, const PositionPair& p2) const {
100  return p1.end < p2.begin;
101  }
102 };
103 
105  typedef set<PositionPair, ltNonOverlap> OverlappingGeneSet;
106 
107  OverlappingGeneSet usedRanges;
108  unsigned long overlapSize;
109  unsigned long geneSize;
110 public:
111  GenePositionMap() : overlapSize(0), geneSize(0) {}
112 
113  void announceGene(PositionPair gene);
114  GB_ERROR buildIntergeneList(const PositionPair& wholeGenome, PositionPairList& intergeneList) const;
115  unsigned long getOverlap() const { return overlapSize; }
116  unsigned long getAllGeneSize() const { return geneSize; }
117 
118 #if defined(DUMP_OVERLAP_CALC)
119  void dump() const;
120 #endif // DUMP_OVERLAP_CALC
121 };
122 
123 // ____________________________________________________________
124 // start of implementation of class GenePositionMap:
125 
127  OverlappingGeneSet::iterator found = usedRanges.find(gene);
128  if (found == usedRanges.end()) { // gene does not overlap with currently known ranges
129  usedRanges.insert(gene); // add to known ranges
130  }
131  else {
132  // 'found' overlaps with 'gene'
133  int gene_length = gene.length();
134 
135  do {
136  gp_assert(gene.overlapsWith(*found));
137 
138  gene = PositionPair(min(found->begin, gene.begin), max(found->end, gene.end)); // calc combined range
139  int combined_length = gene.length();
140 
141  size_t overlap = (found->length()+gene_length)-combined_length;
142  overlapSize += overlap;
143  geneSize += gene_length;
144 
145  usedRanges.erase(found);
146 
147  gene_length = combined_length;
148  found = usedRanges.find(gene); // search for further overlaps
149  } while (found != usedRanges.end());
150 
151  usedRanges.insert(gene); // insert the combined range
152  }
153 }
154 
156  OverlappingGeneSet::iterator end = usedRanges.end();
157  OverlappingGeneSet::iterator curr = usedRanges.begin();
158  OverlappingGeneSet::iterator prev = end;
159 
160  if (curr == end) { // nothing defined -> use whole genome as one big intergene
161  intergeneList.push_back(wholeGenome);
162  }
163  else {
164  if (curr->begin > wholeGenome.begin) { // intergene before first gene range ?
165  intergeneList.push_back(PositionPair(wholeGenome.begin, curr->begin-1));
166  }
167 
168  prev = curr; ++curr;
169 
170  while (curr != end) {
171  if (prev->end < curr->begin) {
172  if (prev->end != (curr->begin-1)) { // not directly adjacent
173  intergeneList.push_back(PositionPair(prev->end+1, curr->begin-1));
174  }
175  }
176  else {
177  return "Internal error: Overlapping gene ranges";
178  }
179 
180  prev = curr; ++curr;
181  }
182 
183  if (prev != end && prev->end < wholeGenome.end) {
184  intergeneList.push_back(PositionPair(prev->end+1, wholeGenome.end));
185  }
186  }
187  return NULp;
188 }
189 
190 #if defined(DUMP_OVERLAP_CALC)
191 void GenePositionMap::dump() const {
192  printf("List of ranges used by genes:\n");
193  for (OverlappingGeneSet::iterator g = usedRanges.begin(); g != usedRanges.end(); ++g) {
194  g->dump("- ");
195  }
196  printf("Overlap: %lu bases\n", getOverlap());
197 }
198 #endif // DUMP_OVERLAP_CALC
199 
200 // -end- of implementation of class GenePositionMap.
201 
202 static GB_ERROR create_data_entry(GBDATA *gb_species2, const char *sequence, int seqlen) {
203  GB_ERROR error = NULp;
204  char *gene_sequence = new char[seqlen+1];
205 
206  memcpy(gene_sequence, sequence, seqlen); // @@@ FIXME: avoid this copy!
207  gene_sequence[seqlen] = 0;
208 
209  GBDATA *gb_ali = GB_create_container(gb_species2, "ali_ptgene");
210  if (!gb_ali) error = GB_await_error();
211  else error = GBT_write_string(gb_ali, "data", gene_sequence);
212 
213  delete [] gene_sequence;
214  return error;
215 }
216 
217 #if defined(DEBUG)
218 static void CHECK_SEMI_ESCAPED(const char *name) {
219  // checks whether all ";\\" are escaped
220  while (*name) {
221  gp_assert(*name != ';'); // oops, unescaped ';'
222  if (*name == '\\') ++name;
223  ++name;
224  }
225 }
226 #else
227 #define CHECK_SEMI_ESCAPED(s)
228 #endif // DEBUG
229 
230 
231 static GBDATA *create_gene_species(GBDATA *gb_species_data2, const char *internal_name, const char *long_name, int abspos, const char *sequence, int length) {
232  // Note: 'sequence' is not necessarily 0-terminated!
233 
234 #if defined(DEBUG)
235  const char *firstSem = strchr(long_name, ';');
236  gp_assert(firstSem);
237  CHECK_SEMI_ESCAPED(firstSem+1);
238 #endif // DEBUG
239 
240  GB_ERROR error = GB_push_transaction(gb_species_data2);
241  GBDATA *gb_species2 = NULp;
242 
243  if (!error) {
244  gb_species2 = GB_create_container(gb_species_data2, "species");
245  if (!gb_species2) error = GB_await_error();
246  }
247 
248  if (!error) {
249  GBDATA *gb_name = GB_create(gb_species2, "name", GB_STRING);
250 
251  if (!gb_name) error = GB_await_error();
252  else {
253  error = GB_write_string(gb_name, internal_name);
254  if (!error) {
255  const char *static_internal_name = GB_read_char_pntr(gb_name); // use static copy from db as map-index (internal_name is temporary)
256  error = create_data_entry(gb_species2, sequence, length);
257  if (!error) {
258  names[static_internal_name] = long_name;
259  error = GBT_write_int(gb_species2, "abspos", abspos);
260  }
261  }
262  }
263  }
264 
265  error = GB_end_transaction(gb_species_data2, error);
266 
267  if (error) { // be more verbose :
268  error = GBS_global_string("%s (internal_name='%s', long_name='%s')", error, internal_name, long_name);
269  GB_export_error(error);
270  gb_species2 = NULp;
271  }
272 
273  return gb_species2;
274 }
275 
276 static GB_ERROR create_genelike_entry(const char *internal_name, GBDATA *gb_species_data2, int start_pos, int end_pos, const char *ali_genome, const char *long_name) {
277  GBDATA *gb_genespecies = create_gene_species(gb_species_data2, internal_name, long_name, start_pos, ali_genome+start_pos, end_pos-start_pos+1);
278  return gb_genespecies ? NULp : GB_await_error();
279 }
280 
281 static GB_ERROR create_intergene(GBDATA *gb_species_data2, int start_pos, int end_pos, const char *ali_genome, const char *long_gene_name) {
282  if (start_pos <= end_pos) {
283  char internal_name[128];
284  sprintf(internal_name, "i%x", intergene_counter++);
285  return create_genelike_entry(internal_name, gb_species_data2, start_pos, end_pos, ali_genome, long_gene_name);
286  }
287  return "Illegal inter-gene positions (start behind end)";
288 }
289 
290 static GB_ERROR create_gene(GBDATA *gb_species_data2, int start_pos, int end_pos, const char *ali_genome, const char *long_gene_name) {
291  if (start_pos <= end_pos) {
292  char internal_name[128];
293  sprintf(internal_name, "n%x", gene_counter++);
294  return create_genelike_entry(internal_name, gb_species_data2, start_pos, end_pos, ali_genome, long_gene_name);
295  }
296  return "Illegal gene positions (start behind end)";
297 }
298 
299 static GB_ERROR create_split_gene(GBDATA *gb_species_data2, PositionPairList& part_list, const char *ali_genome, const char *long_gene_name) {
300  GB_ERROR error = NULp;
301  PositionPairList::iterator list_end = part_list.end();
302 
303  int gene_size = 0;
304  for (PositionPairList::iterator part = part_list.begin(); part != list_end; ++part) {
305  int part_size = part->end-part->begin+1;
306  gp_assert(part_size > 0);
307  gene_size += part_size;
308  }
309  gp_assert(gene_size > 0);
310  char *gene_sequence = new char[gene_size+1];
311  int gene_off = 0;
312 
313  char *split_pos_list = NULp; // contains split information: 'gene pos of part2,abs pos of part2;gene pos of part3,abs pos of part3;...'
314 
315  for (PositionPairList::iterator part = part_list.begin(); part != list_end;) {
316  int part_size = part->end-part->begin+1;
317  int genome_pos = part->begin;
318  memcpy(gene_sequence+gene_off, ali_genome+part->begin, part_size);
319  gene_off += part_size;
320 
321  ++part;
322 
323  if (!split_pos_list) { // first part
324  split_pos_list = GBS_global_string_copy("%i", gene_off); // gene offset of part 2
325  }
326  else { // next parts
327  char *next_split_pos_list;
328  if (part != list_end) { // not last
329  next_split_pos_list = GBS_global_string_copy("%s,%i;%i", split_pos_list, genome_pos, gene_off);
330  }
331  else { // last part
332  next_split_pos_list = GBS_global_string_copy("%s,%i", split_pos_list, genome_pos);
333  }
334  freeset(split_pos_list, next_split_pos_list);
335  }
336  }
337 
338  char internal_name[128];
339  sprintf(internal_name, "s%x", split_gene_counter++);
340 
341  const PositionPair& first_part = part_list.front();
342  GBDATA *gb_species2 = create_gene_species(gb_species_data2, internal_name, long_gene_name, first_part.begin,
343  gene_sequence, first_part.end-first_part.begin+1);
344 
345  if (!gb_species2) error = GB_await_error();
346  else {
347 #if defined(DEBUG) && 0
348  printf("split gene: long_gene_name='%s' internal_name='%s' split_pos_list='%s'\n",
349  long_gene_name, internal_name, split_pos_list);
350 #endif // DEBUG
351  error = GBT_write_string(gb_species2, "splitpos", split_pos_list);
352  }
353 
354  free(split_pos_list);
355  delete [] gene_sequence;
356 
357  return error;
358 }
359 
360 static GB_ERROR scan_gene_positions(GBDATA *gb_gene, PositionPairList& part_list) {
361  GB_ERROR error = NULp;
362  GEN_position *location = GEN_read_position(gb_gene);
363 
364  if (!location) error = GB_await_error();
365  else {
367  int parts = location->parts;
368  for (int p = 0; p<parts; ++p) {
369  part_list.push_back(PositionPair(location->start_pos[p]-1, location->stop_pos[p]-1));
370  }
371  GEN_free_position(location);
372  }
373  return error;
374 }
375 
376 static GB_ERROR insert_genes_of_organism(GBDATA *gb_organism, GBDATA *gb_species_data2) {
377  // insert all genes of 'gb_organism' as pseudo-species
378  // into new 'species_data' (gb_species_data2)
379 
380  GB_ERROR error = NULp;
381  const char *organism_name = GBT_get_name(gb_organism);
382 
383  GenePositionMap geneRanges;
384 
385  int gene_counter_old = gene_counter; // used for statistics only (see end of function)
386  int split_gene_counter_old = split_gene_counter;
387  int intergene_counter_old = intergene_counter;
388 
389  GBDATA *gb_ali_genom = GBT_find_sequence(gb_organism, GENOM_ALIGNMENT);
390  gp_assert(gb_ali_genom); // existence has to be checked by caller!
391 
392  const char *ali_genom = GB_read_char_pntr(gb_ali_genom);
393  if (!ali_genom) error = GB_await_error();
394  PositionPair::genome_length = GB_read_count(gb_ali_genom); // this affects checks in PositionPair
395 
396  if (!organism_name && !error) {
397  error = "encountered invalid organism (lacks 'name' entry)";
398  }
399 
400  for (GBDATA *gb_gene = GEN_first_gene(gb_organism);
401  gb_gene && !error;
402  gb_gene = GEN_next_gene(gb_gene))
403  {
404  const char *gene_name = GBT_get_name(gb_gene);
405 
406  PositionPairList part_list;
407  error = scan_gene_positions(gb_gene, part_list);
408 
409  if (!error && !gene_name) error = "encountered invalid gene (lacks 'name' entry)";
410  if (!error && part_list.empty()) error = "empty position list";
411  if (!error) {
412  int split_count = part_list.size();
413  PositionPair first_part = *part_list.begin();
414 
415  if (!error) {
416  char *esc_gene_name = GBS_escape_string(gene_name, ";", '\\');
417  char *long_gene_name = GBS_global_string_copy("%s;%s", organism_name, esc_gene_name);
418  if (split_count == 1) { // normal gene
419  error = create_gene(gb_species_data2, first_part.begin, first_part.end, ali_genom, long_gene_name);
420  geneRanges.announceGene(first_part);
421  }
422  else { // split gene
423  error = create_split_gene(gb_species_data2, part_list, ali_genom, long_gene_name);
424 
425  for (PositionPairList::iterator p = part_list.begin(); p != part_list.end(); ++p) {
426  geneRanges.announceGene(*p);
427  }
428  }
429  free(long_gene_name);
430  free(esc_gene_name);
431  }
432  }
433 
434  if (error && gene_name) error = GBS_global_string("in gene '%s': %s", gene_name, error);
435  }
436 
437  if (!error) { // add intergenes
438  PositionPairList intergenes;
439  PositionPair wholeGenome(0, PositionPair::genome_length-1);
440  error = geneRanges.buildIntergeneList(wholeGenome, intergenes);
441 
442  for (PositionPairList::iterator i = intergenes.begin(); !error && i != intergenes.end(); ++i) {
443  char *long_intergene_name = GBS_global_string_copy("%s;intergene_%i_%i", organism_name, i->begin, i->end);
444  error = create_intergene(gb_species_data2, i->begin, i->end, ali_genom, long_intergene_name);
445  free(long_intergene_name);
446  }
447  }
448 
449  if (error && organism_name) error = GBS_global_string("in organism '%s': %s", organism_name, error);
450 
451  if (!error) {
452  int new_genes = gene_counter-gene_counter_old; // only non-split genes
453  int new_split_genes = split_gene_counter-split_gene_counter_old;
454  int new_intergenes = intergene_counter-intergene_counter_old;
455 
456  unsigned long genesSize = geneRanges.getAllGeneSize();
457  unsigned long overlaps = geneRanges.getOverlap();
458  double data_grow = overlaps/double(PositionPair::genome_length)*100;
459  double gene_overlap = overlaps/double(genesSize)*100;
460 
461  if (new_split_genes) {
462 
463  printf(" - %s: %i genes (%i split), %i intergenes",
464  organism_name, new_genes+new_split_genes, new_split_genes, new_intergenes);
465  }
466  else {
467  printf(" - %s: %i genes, %i intergenes",
468  organism_name, new_genes, new_intergenes);
469  }
470  printf(" (data grow: %5.2f%%, gene overlap: %5.2f%%=%lu bp)\n", data_grow, gene_overlap, overlaps);
471  }
472 
473 #if defined(DUMP_OVERLAP_CALC)
474  geneRanges.dump();
475 #endif // DUMP_OVERLAP_CALC
476 
477  return error;
478 }
479 
480 int ARB_main(int argc, char *argv[]) {
481 
482  printf("\n"
483  "arb_gene_probe 1.2 -- (C) 2003/2004 Lehrstuhl fuer Mikrobiologie - TU Muenchen\n"
484  "written by Tom Littschwager, Bernd Spanfelner, Conny Wolf, Ralf Westram.\n");
485 
486  if (argc != 3) {
487  printf("Usage: arb_gene_probe input_database output_database\n");
488  printf(" Prepares a genome database for Gene-PT-Server\n");
489  return EXIT_FAILURE;
490  }
491 
492  const char *inputname = argv[1];
493  const char *outputname = argv[2];
494 
495  // GBK_terminate("test-crash of arb_gene_probe");
496 
497  printf("Converting '%s' -> '%s' ..\n", inputname, outputname);
498 
499  GB_ERROR error = NULp;
500  GB_shell shell;
501  GBDATA *gb_main = GB_open(inputname, "rw"); // rootzeiger wird gesetzt
502  if (!gb_main) {
503  error = GBS_global_string("Database '%s' not found", inputname);
504  }
505  else {
506  GB_request_undo_type(gb_main, GB_UNDO_NONE); // disable arbdb builtin undo
507  GB_begin_transaction(gb_main);
508 
510  GBDATA *gb_species_data_new = GBT_create(gb_main, "species_data", 7); // create a second 'species_data' container
511 
512  if (!gb_species_data_new) error = GB_await_error();
513 
514  int non_ali_genom_species = 0;
515  int ali_genom_species = 0;
516 
517  for (GBDATA *gb_species = GBT_first_species_rel_species_data(gb_species_data);
518  gb_species && !error;
519  gb_species = GBT_next_species(gb_species))
520  {
521  GBDATA *gb_ali_genom = GBT_find_sequence(gb_species, GENOM_ALIGNMENT);
522  if (!gb_ali_genom) {
523  // skip species w/o alignment 'GENOM_ALIGNMENT' (genome DBs often contain pseudo species)
524  ++non_ali_genom_species;
525  }
526  else {
527  error = insert_genes_of_organism(gb_species, gb_species_data_new);
528  ++ali_genom_species;
529  }
530  }
531 
532  if (non_ali_genom_species) {
533  printf("%i species had no alignment in '" GENOM_ALIGNMENT "' and have been skipped.\n", non_ali_genom_species);
534  }
535  if (!error && ali_genom_species == 0) {
536  error = "no species with data in alignment '" GENOM_ALIGNMENT "' were found";
537  }
538 
539  if (!error) {
540  printf("%i species had data in alignment '" GENOM_ALIGNMENT "'.\n"
541  "Found %i genes (%i were split) and %i intergene regions.\n",
542  ali_genom_species, gene_counter, split_gene_counter, intergene_counter);
543  }
544 
545  if (!error) {
546  error = GB_delete(gb_species_data); // delete first (old) 'species_data' container
547  }
548 
549  if (!error) {
550  // create map-string
551  char* map_string;
552  {
553  FullNameMap::iterator NameEnd = names.end();
554  FullNameMap::iterator NameIter;
555 
556  size_t mapsize = 0;
557  for (NameIter = names.begin(); NameIter != NameEnd; ++NameIter) {
558  mapsize += strlen(NameIter->first)+NameIter->second.length()+2;
559  }
560 
561  map_string = new char[mapsize+1];
562  size_t moff = 0;
563 
564  for (NameIter = names.begin(); NameIter != NameEnd; ++NameIter) {
565  int len1 = strlen(NameIter->first);
566  int len2 = NameIter->second.length();
567 
568  memcpy(map_string+moff, NameIter->first, len1);
569  map_string[moff+len1] = ';';
570  moff += len1+1;
571 
572  memcpy(map_string+moff, NameIter->second.c_str(), len2);
573  map_string[moff+len2] = ';';
574  moff += len2+1;
575  }
576  map_string[moff] = 0;
577 
578  gp_assert(moff <= mapsize);
579  }
580 
581  GBDATA *gb_gene_map = GB_create_container(gb_main, "gene_map");
582  if (!gb_gene_map) error = GB_await_error();
583  else error = GBT_write_string(gb_gene_map, "map_string", map_string);
584 
585  delete [] map_string;
586  }
587 
588  if (!error) {
589  // set default alignment for pt_server
590  error = GBT_set_default_alignment(gb_main, "ali_ptgene");
591 
592  if (!error) {
593  GBDATA *gb_use = GB_search(gb_main, "presets/alignment/alignment_name", GB_STRING);
594  if (!gb_use) error = GB_await_error();
595  else {
596  GB_topSecurityLevel unsecured(gb_main);
597  error = GB_write_string(gb_use, "ali_ptgene");
598  }
599  }
600  }
601 
602  error = GB_end_transaction(gb_main, error);
603 
604  if (!error) {
605  printf("Saving '%s' ..\n", outputname);
606  error = GB_save_as(gb_main, outputname, "bfm");
607  if (error) unlink(outputname);
608  }
609 
610  GB_close(gb_main);
611  }
612 
613  if (error) {
614  printf("Error in arb_gene_probe: %s\n", error);
615  return EXIT_FAILURE;
616  }
617 
618  printf("arb_gene_probe done.\n");
619  return EXIT_SUCCESS;
620 }
621 
GB_ERROR GB_begin_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2516
int length() const
const char * GB_ERROR
Definition: arb_core.h:25
static GB_ERROR create_gene(GBDATA *gb_species_data2, int start_pos, int end_pos, const char *ali_genome, const char *long_gene_name)
static int split_gene_counter
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
static GB_ERROR create_data_entry(GBDATA *gb_species2, const char *sequence, int seqlen)
GBDATA * GEN_next_gene(GBDATA *gb_gene)
Definition: adGene.cxx:138
unsigned long getAllGeneSize() const
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
Definition: arbdb.cxx:1385
void announceGene(PositionPair gene)
void GEN_free_position(GEN_position *pos)
Definition: adGene.cxx:195
static int gene_counter
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
Definition: arbdb.cxx:2549
int parts
Definition: adGene.h:37
static GB_ERROR create_genelike_entry(const char *internal_name, GBDATA *gb_species_data2, int start_pos, int end_pos, const char *ali_genome, const char *long_name)
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
STL namespace.
static GB_ERROR scan_gene_positions(GBDATA *gb_gene, PositionPairList &part_list)
void GEN_sortAndMergeLocationParts(GEN_position *location)
Definition: adGene.cxx:497
char * GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char)
Definition: adstring.cxx:124
static int intergene_counter
#define EXIT_SUCCESS
Definition: arb_a2ps.c:154
map< const char *, string, nameOrder > FullNameMap
GB_ERROR GB_push_transaction(GBDATA *gbd)
Definition: arbdb.cxx:2482
static int genome_length
GB_ERROR GB_delete(GBDATA *&source)
Definition: arbdb.cxx:1904
GBDATA * GBT_first_species_rel_species_data(GBDATA *gb_species_data)
Definition: aditem.cxx:121
PositionPair(int begin_, int end_)
#define CHECK_SEMI_ESCAPED(s)
static FullNameMap names
static GB_ERROR insert_genes_of_organism(GBDATA *gb_organism, GBDATA *gb_species_data2)
GB_ERROR GB_export_error(const char *error)
Definition: arb_msg.cxx:259
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
GBDATA * GB_create_container(GBDATA *father, const char *key)
Definition: arbdb.cxx:1827
long GB_read_count(GBDATA *gbd)
Definition: arbdb.cxx:752
GB_ERROR buildIntergeneList(const PositionPair &wholeGenome, PositionPairList &intergeneList) const
bool overlapsWith(const PositionPair &other) const
GBDATA * GB_create(GBDATA *father, const char *key, GB_TYPES type)
Definition: arbdb.cxx:1779
GBDATA * gb_species_data
Definition: adname.cxx:34
GB_ERROR GBT_set_default_alignment(GBDATA *gb_main, const char *alignment_name)
Definition: adali.cxx:680
GB_ERROR GB_save_as(GBDATA *gbd, const char *path, const char *savetype)
size_t * stop_pos
Definition: adGene.h:40
static void error(const char *msg)
Definition: mkptypes.cxx:96
static GB_ERROR create_split_gene(GBDATA *gb_species_data2, PositionPairList &part_list, const char *ali_genome, const char *long_gene_name)
#define GENOM_ALIGNMENT
Definition: adGene.h:19
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
Definition: adali.cxx:670
#define EXIT_FAILURE
Definition: arb_a2ps.c:157
list< PositionPair > PositionPairList
int ARB_main(int argc, char *argv[])
unsigned long getOverlap() const
GB_ERROR GBT_write_string(GBDATA *gb_container, const char *fieldpath, const char *content)
Definition: adtools.cxx:451
#define gp_assert(cond)
GBDATA * GBT_create(GBDATA *father, const char *key, long delete_level)
Definition: adtools.cxx:26
size_t * start_pos
Definition: adGene.h:39
GBDATA * GEN_first_gene(GBDATA *gb_species)
Definition: adGene.cxx:130
GBDATA * GBT_next_species(GBDATA *gb_species)
Definition: aditem.cxx:128
#define NULp
Definition: cxxforward.h:97
const char * GBT_get_name(GBDATA *gb_item)
Definition: aditem.cxx:450
GB_ERROR GB_request_undo_type(GBDATA *gb_main, GB_UNDO_TYPE type) __ATTR__USERESULT_TODO
Definition: adindex.cxx:718
GB_ERROR GBT_write_int(GBDATA *gb_container, const char *fieldpath, long content)
Definition: adtools.cxx:471
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:898
GBDATA * gb_main
Definition: adname.cxx:33
static GBDATA * create_gene_species(GBDATA *gb_species_data2, const char *internal_name, const char *long_name, int abspos, const char *sequence, int length)
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
static GB_ERROR create_intergene(GBDATA *gb_species_data2, int start_pos, int end_pos, const char *ali_genome, const char *long_gene_name)
GEN_position * GEN_read_position(GBDATA *gb_gene)
Definition: adGene.cxx:250
size_t length
#define min(a, b)
Definition: f2c.h:153
bool operator()(const char *name1, const char *name2) const
char * GBS_global_string_copy(const char *templat,...)
Definition: arb_msg.cxx:195
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:649
GBDATA * GBT_get_species_data(GBDATA *gb_main)
Definition: aditem.cxx:105
#define max(a, b)
Definition: f2c.h:154