ARB
PT_etc.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : PT_etc.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "probe.h"
12 
13 #include <PT_server_prototypes.h>
14 #include "pt_prototypes.h"
15 
16 #include <struct_man.h>
17 #include <arb_strbuf.h>
18 
19 void pt_export_error(PT_local *locs, const char *error) {
20  freedup(locs->ls_error, error);
21 }
23  if (error) pt_export_error(locs, error.deliver());
24  else error.expect_no_error();
25 }
26 
27 static const gene_struct *get_gene_struct_by_internal_gene_name(const char *gene_name) {
28  gene_struct to_search(gene_name, "", "");
29 
30  gene_struct_index_internal::const_iterator found = gene_struct_internal2arb.find(&to_search);
31  return (found == gene_struct_internal2arb.end()) ? NULp : *found;
32 }
33 static const gene_struct *get_gene_struct_by_arb_species_gene_name(const char *species_gene_name) {
34  const char *slash = strchr(species_gene_name, '/');
35  if (!slash) {
36  fprintf(stderr, "Internal error: '%s' should be in format 'organism/gene'\n", species_gene_name);
37  return NULp;
38  }
39 
40  int slashpos = slash-species_gene_name;
41  char *organism = ARB_strdup(species_gene_name);
42  organism[slashpos] = 0;
43 
44  gene_struct to_search("", organism, species_gene_name+slashpos+1);
45  free(organism);
46 
47  gene_struct_index_arb::const_iterator found = gene_struct_arb2internal.find(&to_search);
48  return (found == gene_struct_arb2internal.end()) ? NULp : *found;
49 }
50 
51 static const char *arb2internal_name(const char *name) {
52  // convert arb name ('species/gene') into internal shortname
54  return found ? found->get_internal_gene_name() : NULp;
55 }
56 
57 const char *virt_name(const PT_probematch *ml) {
58  // get the name with a virtual function
59  if (gene_flag) {
61  return gs ? gs->get_arb_species_name() : "<cantResolveName>";
62  }
63  else {
64  pt_assert(psg.data[ml->name].get_shortname());
65  return psg.data[ml->name].get_shortname();
66  }
67 }
68 
69 const char *virt_fullname(const PT_probematch * ml) {
70  if (gene_flag) {
72  return gs ? gs->get_arb_gene_name() : "<cantResolveGeneFullname>";
73  }
74  else {
75  return psg.data[ml->name].get_fullname() ? psg.data[ml->name].get_fullname() : "<undefinedFullname>";
76  }
77 }
78 
79 #define MAX_LIST_PART_SIZE 50
80 
81 static const char *get_list_part(const char *list, int& offset) {
82  // scans strings with format "xxxx#yyyy#zzzz"
83  // your first call should be with offset == 0
84  //
85  // returns : static copy of each part or 0 when done
86  // offset is incremented by this function and set to -1 when all parts were returned
87 
88  static char buffer[2][MAX_LIST_PART_SIZE+1];
89  static int curr_buff = 0; // toggle buffer to allow 2 parallel gets w/o invalidation
90 
91  if (offset<0) return NULp; // already done
92  curr_buff ^= 1; // toggle buffer
93 
94  const char *numsign = strchr(list+offset, '#');
95  int num_offset;
96  if (numsign) {
97  num_offset = numsign-list;
98  pt_assert(list[num_offset] == '#');
99  }
100  else { // last list part
101  num_offset = offset+strlen(list+offset);
102  pt_assert(list[num_offset] == 0);
103  }
104 
105  // now num_offset points to next '#' or to end-of-string
106 
107  int len = num_offset-offset;
109 
110  memcpy(buffer[curr_buff], list+offset, len);
111  buffer[curr_buff][len] = 0; // EOS
112 
113  offset = (list[num_offset] == '#') ? num_offset+1 : -1; // set offset for next part
114 
115  return buffer[curr_buff];
116 }
117 
118 #undef MAX_LIST_PART_SIZE
119 
120 char *ptpd_read_names(PT_local *locs, const char *names_list, const char *checksums, ARB_ERROR& error) {
121  /* read the name list separated by '#' and set the flag for the group members,
122  + returns a list of names which have not been found
123  */
124 
125  // clear 'is_group'
126  for (int i = 0; i < psg.data_count; i++) {
127  psg.data[i].set_group_state(0); // Note: probes are designed for species with is_group == 1
128  }
129  locs->group_count = 0;
130 
131  error = NULp;
132 
133  if (!names_list) {
134  error = "Can't design probes for no species (species list is empty)";
135  return NULp;
136  }
137 
138  int noff = 0;
139  int coff = 0;
140 
141  GBS_strstruct *not_found = NULp;
142 
143  while (noff >= 0) {
144  pt_assert(coff >= 0); // otherwise 'checksums' contains less elements than 'names_list'
145  const char *arb_name = get_list_part(names_list, noff);
146  const char *internal_name = arb_name; // differs only for gene pt server
147 
148  if (arb_name[0] == 0) {
149  pt_assert(names_list[0] == 0);
150  break; // nothing marked
151  }
152 
153  if (gene_flag) {
154  const char *slash = strchr(arb_name, '/');
155 
156  if (!slash) {
157  // ARB has to send 'species/gene'.
158  // If it did not, user did not mark 'Gene probes ?' flag
159 
160  error = GBS_global_string("Expected '/' in '%s' (this PT-server can only design probes for genes)", arb_name);
161  break;
162  }
163 
164  internal_name = arb2internal_name(arb_name);
165  pt_assert(internal_name);
166  }
167 
168  int idx = GBS_read_hash(psg.namehash, internal_name);
169  bool found = false;
170 
171  if (idx) {
172  --idx; // because 0 means not found!
173 
174  if (checksums) {
175  const char *checksum = get_list_part(checksums, coff);
176  // if sequence checksum changed since pt server was updated -> not found
177  found = atol(checksum) == psg.data[idx].get_checksum();
178  }
179  else {
180  found = true;
181  }
182 
183  if (found) {
184  psg.data[idx].set_group_state(1); // mark
185  locs->group_count++;
186  }
187  }
188 
189  if (!found) { // name not found -> put into result
190  if (!not_found) not_found = new GBS_strstruct(1000);
191  else not_found->put('#');
192  not_found->cat(arb_name);
193  }
194  }
195 
196  char *result = NULp;
197  if (not_found && !error) {
198  result = not_found->release();
199  }
200  delete not_found;
201  return result;
202 }
203 
204 bytestring *PT_unknown_names(const PT_pdc *pdc) {
205  PT_local *locs = (PT_local*)pdc->mh.parent->parent;
206  static bytestring unknown = { NULp, 0 };
207  delete unknown.data;
208 
210  unknown.data = ptpd_read_names(locs, pdc->names.data, pdc->checksums.data, error);
211  if (unknown.data) {
212  unknown.size = strlen(unknown.data) + 1;
213  pt_assert(!error);
214  }
215  else {
216  unknown.data = ARB_strdup("");
217  unknown.size = 1;
218  }
219  pt_export_error_if(locs, error);
220  return &unknown;
221 }
222 
struct probe_input_data * data
Definition: probe.h:356
string result
void pt_export_error_if(PT_local *locs, ARB_ERROR &error)
Definition: PT_etc.cxx:22
const char * get_fullname() const
Definition: probe.h:177
static const char * arb2internal_name(const char *name)
Definition: PT_etc.cxx:51
probe_struct_global psg
Definition: PT_main.cxx:36
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
static const char * get_list_part(const char *list, int &offset)
Definition: PT_etc.cxx:81
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
char * release()
Definition: arb_strbuf.h:129
GB_HASH * namehash
Definition: probe.h:353
void cat(const char *from)
Definition: arb_strbuf.h:199
const char * virt_name(const PT_probematch *ml)
Definition: PT_etc.cxx:57
char buffer[MESSAGE_BUFFERSIZE]
Definition: seq_search.cxx:34
gene_struct_index_internal gene_struct_internal2arb
Definition: PT_main.cxx:47
gene_struct_index_arb gene_struct_arb2internal
Definition: PT_main.cxx:46
GB_ERROR deliver() const
Definition: arb_error.h:116
const char * get_arb_gene_name() const
Definition: probe.h:437
static void error(const char *msg)
Definition: mkptypes.cxx:96
const char * get_shortname() const
Definition: probe.h:170
void expect_no_error() const
Definition: arb_error.h:138
static const gene_struct * get_gene_struct_by_internal_gene_name(const char *gene_name)
Definition: PT_etc.cxx:27
const char * virt_fullname(const PT_probematch *ml)
Definition: PT_etc.cxx:69
#define pt_assert(bed)
Definition: PT_tools.h:22
const char * get_arb_species_name() const
Definition: probe.h:436
long get_checksum() const
Definition: probe.h:204
void set_group_state(bool isGroupMember)
Definition: probe.h:220
static const gene_struct * get_gene_struct_by_arb_species_gene_name(const char *species_gene_name)
Definition: PT_etc.cxx:33
void pt_export_error(PT_local *locs, const char *error)
Definition: PT_etc.cxx:19
T_PT_LOCS locs
int gene_flag
Definition: PT_main.cxx:39
const char * get_internal_gene_name() const
Definition: probe.h:435
#define NULp
Definition: cxxforward.h:116
#define MAX_LIST_PART_SIZE
Definition: PT_etc.cxx:79
#define offset(field)
Definition: GLwDrawA.c:73
char * ptpd_read_names(PT_local *locs, const char *names_list, const char *checksums, ARB_ERROR &error)
Definition: PT_etc.cxx:120
bytestring * PT_unknown_names(const PT_pdc *pdc)
Definition: PT_etc.cxx:204
long GBS_read_hash(const GB_HASH *hs, const char *key)
Definition: adhash.cxx:392
void put(char c)
Definition: arb_strbuf.h:174