ARB
MP_sonde.cxx
Go to the documentation of this file.
1 // ============================================================= //
2 // //
3 // File : MP_sonde.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // ============================================================= //
10 
11 #include "MP_externs.hxx"
12 #include "MultiProbe.hxx"
13 
14 #include <aw_msg.hxx>
15 #include <arbdbt.h>
16 #include <client.h>
17 
18 #include <cmath>
19 #include <servercntrl.h>
20 
21 Sonde::Sonde(const char* bezeichner, int num_probes, int allowed_mis, double outside_mis) {
22  kennung = ARB_strdup(bezeichner);
23  bitkennung = NULp;
24  // fuer Basissonden haben die Bitvektoren noch nicht die Volle laenge, da noch nicht bekannt ist, wieviele Sonden eingetragen werden
25  hitliste = NULp;
26  length_hitliste = 0;
27  minelem = 0;
28  maxelem = 0;
29 
30  mp_assert(num_probes>0);
31 
32  Allowed_Mismatch = new long[num_probes];
33  Outside_Mismatch = new double[num_probes];
34  for (int i=0; i<num_probes; i++) { // LOOP_VECTORIZED=2[!>=910<11] // completely fails with 9.x and 10.x series
35  Allowed_Mismatch[i]=0;
36  Outside_Mismatch[i]=0;
37  }
38 
39  Allowed_Mismatch[0] = allowed_mis;
40  Outside_Mismatch[0] = outside_mis;
41 
42 
43 }
44 
46  int i;
47 
48  free(kennung);
49 
50  for (i=0; i<length_hitliste; i++) {
51  delete hitliste[i]; // Hits loeschen
52  }
53  delete [] hitliste;
54 
55  delete [] Allowed_Mismatch;
56  delete [] Outside_Mismatch;
57  delete bitkennung;
58 }
59 
60 void Sonde::print() {
61  printf("\nSonde %s\n------------------------------------------------\n", kennung);
62  bitkennung->print();
63  printf("Laenge hitliste %ld mit minelem %ld und maxelem %ld\n", length_hitliste, minelem, maxelem);
64  printf("Far %ld, Mor %ld, AllMM %ld, OutMM %f\n\n", kombi_far, kombi_mor, *Allowed_Mismatch, *Outside_Mismatch);
65 }
66 
67 
68 MO_Mismatch** Sonde::get_matching_species(int ptserver_id, bool match_also_revcompl, int match_weight, int match_mis, const char *match_seq, MO_Liste *convert, long *number_of_species, GB_ERROR& error) {
69  MO_Mismatch **ret_list = NULp;
70  const char *servername = arb_look_and_start_ptserver(AISC_MAGIC_NUMBER, ptserver_id, error);
71 
72  error = NULp;
73 
74  if (servername) {
75  char *match_name, *match_mismatches, *match_wmismatches;
76  T_PT_MATCHLIST match_list;
77  long match_list_cnt = -1;
78  bytestring bs;
79  int i = 0;
80 
81  // @@@ maybe DRY section below with similar section (in this directory)
82  mp_gl_struct mp_pd_gl;
83  mp_pd_gl.link = aisc_open(servername, mp_pd_gl.com, AISC_MAGIC_NUMBER, &error);
84  mp_pd_gl.locs.clear();
85 
86  if (!error && !mp_pd_gl.link) {
87  error = "Cannot contact Probe bank server";
88  }
89 
90  if (!error && MP_init_local_com_struct(mp_pd_gl) != 0) {
91  error = "Cannot contact Probe bank server (2)";
92  }
93 
94  if (!error &&
95  aisc_put(mp_pd_gl.link, PT_LOCS, mp_pd_gl.locs,
96  LOCS_MATCH_ALSO_REVCOMP, (long)match_also_revcompl, // also match reverse-complement?
97  LOCS_COMPLEMENT_FIRST, (long)0, // (use sequence passed below as is. do not complement it.)
98  LOCS_MATCH_SORT_BY, (long)match_weight, // Weighted
99  LOCS_MATCH_MAX_MISMATCHES, (long)match_mis, // Mismatches
100  LOCS_SEARCHMATCH, match_seq, // Sequence
101  NULp))
102  {
103  error = "Connection to PT_SERVER lost (4)";
104  }
105 
106  bs.data = NULp;
107  if (!error) {
108  char *locs_error = NULp;
109 
110  aisc_get(mp_pd_gl.link, PT_LOCS, mp_pd_gl.locs,
111  LOCS_MATCH_LIST, match_list.as_result_param(),
112  LOCS_MATCH_LIST_CNT, &match_list_cnt,
113  LOCS_MP_MATCH_STRING, &bs, // @@@ want unittest for this function
114  LOCS_ERROR, &locs_error,
115  NULp);
116 
117  if (locs_error[0]) {
118  error = GBS_static_string(locs_error);
119  }
120  free(locs_error);
121  }
122 
123  if (bs.data) {
124  char toksep[2];
125  toksep[0] = 1;
126  toksep[1] = 0;
127 
128  ret_list = new MO_Mismatch*[match_list_cnt];
129 
130  match_name = strtok(bs.data, toksep);
131  match_mismatches = strtok(NULp, toksep);
132  match_wmismatches = strtok(NULp, toksep);
133 
134  mp_assert(convert->get_mo_liste() != NULp); // failed to populate MO_Liste
135 
136  while (match_name && match_mismatches && match_wmismatches) {
137  ret_list[i] = new MO_Mismatch;
138  ret_list[i]->nummer = convert->get_index_by_entry(match_name);
139  if (match_weight == NON_WEIGHTED)
140  ret_list[i]->mismatch = atof(match_mismatches);
141  else // WEIGHTED und WEIGHTED_PLUS_POS
142  ret_list[i]->mismatch = atof(match_wmismatches);
143 
144 
145  match_name = strtok(NULp, toksep);
146  match_mismatches = strtok(NULp, toksep);
147  match_wmismatches = strtok(NULp, toksep);
148 
149  i++;
150  }
151  }
152  else {
153  error = "No matching species found.";
154  }
155 
156  *number_of_species = match_list_cnt;
157 
158  aisc_close(mp_pd_gl.link, mp_pd_gl.com);
159  free(bs.data);
160  }
161 
162  return ret_list;
163 }
164 
165 
166 double Sonde::check_for_min(long k, MO_Mismatch** probebacts, long laenge) {
167  long i = k+1;
168  double min;
169 
170  min = probebacts[k]->mismatch; // min ist gleich mismatch des ersten MOs
171  while ((i<laenge) && (probebacts[k]->nummer == probebacts[i]->nummer)) {
172  if (min > probebacts[i]->mismatch) {
173  // wenn min groesser ist als mismatch des naechsten MOs -> setze min auf groesse des naechsten
174  min = probebacts[i]->mismatch;
175  }
176  i++; // checke naechsten MO
177  }
178  return min;
179 }
180 
181 
182 
183 int Sonde::gen_Hitliste(MO_Liste *Bakterienliste) {
184  // Angewandt auf eine frische Sonde generiert diese Methode die Hitliste durch eine
185  // Anfrage an die Datenbank, wobei der Name der Sonde uebergeben wird
186 
187  MO_Mismatch** probebacts;
188  long i, k; // Zaehlervariable
189  long laenge = 0;
190  double mm_to_search = 0;
191  int mm_int_to_search = 0;
192 
193 
194  // DATENBANKAUFRUF
196  if (mm_to_search > (int) mm_to_search)
197  mm_int_to_search = (int) mm_to_search + 1;
198  else
199  mm_int_to_search = (int) mm_to_search;
200 
201  GB_ERROR error;
205  mm_int_to_search,
206  kennung,
207  Bakterienliste,
208  &laenge,
209  error);
210 
211  // ACHTUNG probebacts mit laenge enthaelt nur laenge-1 Eintraege von 0 bis laenge -2
212  if (!laenge || !probebacts) {
213  mp_assert(error);
214  aw_message(error);
215  if (!laenge) aw_message("This probe matches no species!");
216  if (!probebacts) {
217  aw_message("This probe matches no species!");
218  return 11;
219  }
220  return 1;
221  }
222  else {
223  mp_assert(!error);
224  }
225 
226  // Ptrliste ist Nullterminiert
227  // Sortieren des Baktnummernfeldes:
228 
229  heapsort(laenge, probebacts);
230 
231  double min_mm; // Minimaler Mismatch
232  // laenge ist die Anzahl der Eintraege in probebact
233  // Korrekturschleife, um Mehrfachtreffer auf das gleiche Bakterium abzufangen
234 
235  for (k=0; k < laenge-1; k++) {
236  if (probebacts[k]->nummer == probebacts[k+1]->nummer) {
237  min_mm = check_for_min(k, probebacts, laenge);
238  probebacts[k]->mismatch = min_mm;
239  while ((k<laenge-1) && (probebacts[k]->nummer == probebacts[k+1]->nummer)) {
240  probebacts[k+1]->mismatch = min_mm;
241  k++;
242  }
243  }
244  }
245 
246  // Das hier funktioniert, da Liste sortiert ist
247  minelem = probebacts[0]->nummer;
248  maxelem = probebacts[laenge-1]->nummer;
249 
250  // Probebacts besteht aus eintraegen der Art (Nummer, Mismatch)
251  hitliste = new Hit*[laenge+1];
252  for (i=0; i<laenge+1; i++)
253  hitliste[i]=NULp;
254 
255  for (i=0; i<laenge; i++) {
256  hitliste[i] = new Hit(probebacts[i]->nummer);
257  hitliste[i]->set_mismatch_at_pos(0, probebacts[i]->mismatch);
258  }
259  length_hitliste = laenge;
260 
261  // Loesche hitflags wieder
262  long bl_index = 0;
263  Bakt_Info** baktliste = Bakterienliste->get_mo_liste();
264  Bakt_Info** bl_elem = baktliste+1;
265  while (bl_elem[bl_index]) {
266  bl_elem[bl_index]->kill_flag();
267  bl_index++;
268  }
269  // Loeschen der Temps
270  for (i=0; i<laenge; i++) {
271  delete probebacts[i];
272  }
273  delete [] probebacts;
274  return 0;
275 }
276 
277 
278 
280  // Gibt Zeiger auf ein Hit Element zurueck, welches an Stelle index steht, vorerst nur zur Ausgabe gedacht
281  if (hitliste && (index < length_hitliste))
282  return hitliste[index];
283  else
284  return NULp;
285 }
286 
287 
288 
289 
290 void Sonde::heapsort(long feldlaenge, MO_Mismatch** Nr_Mm_Feld) {
291  // Heapsortfunktion, benutzt sink(), sortiert Feld von longs
292  long m=0, i=0;
293  MO_Mismatch* tmpmm;
294 
295  for (i=(feldlaenge-1)/2; i>-1; i--) {
296  sink(i, feldlaenge-1, Nr_Mm_Feld);
297  }
298  for (m=feldlaenge-1; m>0; m--) {
299  tmpmm = Nr_Mm_Feld[0];
300  Nr_Mm_Feld[0] = Nr_Mm_Feld[m];
301  Nr_Mm_Feld[m] = tmpmm;
302 
303  sink(0, m-1, Nr_Mm_Feld);
304  }
305 }
306 
307 void Sonde::sink(long i, long t, MO_Mismatch** A) {
308  // Algorithmus fuer den Heapsort
309  long j, k;
310  MO_Mismatch* tmpmm;
311 
312  j = 2*i;
313  k = j+1;
314  if (j <= t) {
315  if (A[i]->nummer >= A[j]->nummer) j = i;
316  if (k <= t && A[k]->nummer > A[j]->nummer) j = k;
317 
318  if (i != j) {
319  tmpmm = A[i]; A[i] = A[j]; A[j] = tmpmm;
320  sink(j, t, A);
321  }
322  }
323 }
324 
326  bitkennung = bv;
327 }
328 
329 
330 
331 // ########################################################################################################
332 /* Bakt_Info haengt in der MO_Liste drinnen. Hier werden u.a. die Hitflags gespeichert
333  */
334 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Methoden Bakt_Info~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
335 
336 Bakt_Info::Bakt_Info(const char* n) {
337  name = ARB_strdup(n); // MEL (match_name in mo_liste)
338  hit_flag = 0;
339 }
340 
342  free(name);
343  hit_flag = 0;
344 }
345 
346 
347 // ##########################################################################################################
348 // Hit speichert die Trefferinformation
349 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Methoden Hit
350 
351 Hit::Hit(long baktnummer) {
352  // Mismatch Array mit Laenge = anzahl Sonden in Experiment
353  int i=0;
354  mismatch = new double[mp_gl_awars.no_of_probes+1];
355  for (i=0; i<mp_gl_awars.no_of_probes+1; i++) // LOOP_VECTORIZED=2
356  mismatch[i]=101;
357 
358  baktid = baktnummer;
359 }
360 
362  delete [] mismatch;
363 }
364 
365 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
366 
367 // --------------------------------------------------------------------------------
368 
369 #ifdef UNIT_TESTS
370 #ifndef TEST_UNIT_H
371 #include <test_unit.h>
372 #endif
373 
374 #include <arb_strbuf.h>
375 
376 static char *create_list(const MO_Liste& list) {
377  GBS_strstruct buf(1000);
378  int count = list.get_laenge();
379  for (int c = 0; c<=count+1; c++) {
380  if (c) buf.put(',');
381  const char *name = list.get_entry_by_index(c);
382  buf.cat(name ? name : "(null)");
383  }
384  return buf.release();
385 }
386 
387 static char *create_list(MO_Mismatch **list, int count, const MO_Liste& spec) {
388  GBS_strstruct buf(1000);
389  for (int c = 0; c<count; c++) {
390  if (c) buf.put(',');
391  const MO_Mismatch* mm = list[c];
392  const char *name = spec.get_entry_by_index(mm->nummer);
393  buf.nprintf(30, "%s/%3.1f", name, mm->mismatch);
394  }
395  return buf.release();
396 }
397 
398 void TEST_get_matching_species() {
399  // tests ptserver functions 'MP_MATCH_STRING' + 'MP_ALL_SPECIES_STRING' (against regression)
400 
401  // test here runs versus database ../UNIT_TESTER/run/TEST_pt_src.arb
402  TEST_SETUP_GLOBAL_ENVIRONMENT("ptserver");
403 
404  GB_shell shell;
405  GBDATA *gb_main = GB_open("TEST_pt_src.arb", "rw");
406  TEST_REJECT_NULL(gb_main);
407 
408  Sonde s("some-probe", 5, 3, 20);
409 
410  GB_ERROR error;
411 
412  MO_Liste::set_gb_main(gb_main);
413  MO_Liste *Bakterienliste;
414  Bakterienliste = new MO_Liste;
415 
416  for (int pass = 0; pass<=1; ++pass) {
417  error = Bakterienliste->get_all_species(TEST_SERVER_ID);
418  TEST_EXPECT_NO_ERROR(error);
419 
420  // test content of 'Bakterienliste':
421  TEST_EXPECT_EQUAL(Bakterienliste->get_laenge(), 22);
422  TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(create_list(*Bakterienliste),
423  "(null),BcSSSS00,Bl0LLL00,ClnCorin,CltBotul,CPPParap,ClfPerfr,DlcTolu2,PbcAcet2,PbrPropi,Stsssola,DsssDesu,LgtLytic,DcdNodos,FrhhPhil,PsAAAA00,PslFlave,HllHalod,VbrFurni,VblVulni,VbhChole,AclPleur,PtVVVulg,(null)");
424 
425  if (pass == 0) { delete Bakterienliste; Bakterienliste = new MO_Liste; }
426  }
427 
428  for (int pass = 0; pass<=1; ++pass) {
429  long laenge = 0;
430  double mm_to_search = 0.0 + 1.0 + 0;
431  int mm_int_to_search = int(mm_to_search-0.000001)+1;
432 
433  MO_Mismatch** probebacts = s.get_matching_species(TEST_SERVER_ID,
434  1, // mp_gl_awars.complement,
435  2, // mp_gl_awars.weightedmismatches,
436  mm_int_to_search,
437  "atgatgatg", // kennung,
438  Bakterienliste,
439  &laenge,
440  error);
441 
442  TEST_EXPECT_NO_ERROR(error);
443 
444  // test content of probebacts:
445  TEST_EXPECT_EQUAL(laenge, 11);
446  TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(create_list(probebacts, laenge, *Bakterienliste),
447  "BcSSSS00/0.2,ClfPerfr/1.0,LgtLytic/1.0,FrhhPhil/1.0,ClfPerfr/1.1,VbrFurni/1.1,VblVulni/1.1,Bl0LLL00/1.1,AclPleur/1.2,VbrFurni/1.5,VblVulni/1.5");
448 
449  // cleanup
450  for (int i=0; i<laenge; i++) {
451  delete probebacts[i];
452  }
453  delete [] probebacts;
454  }
455 
456  delete Bakterienliste;
457 
458  GB_close(gb_main);
459 }
460 
461 #endif // UNIT_TESTS
462 
463 // --------------------------------------------------------------------------------
464 
Hit * get_hitdata_by_number(long index)
Definition: MP_sonde.cxx:279
const char * GB_ERROR
Definition: arb_core.h:25
GBDATA * GB_open(const char *path, const char *opent)
Definition: ad_load.cxx:1363
T_PT_LOCS locs
Definition: mpdefs.h:51
void set_bitkennung(Bitvector *bv)
Definition: MP_sonde.cxx:325
void print()
Definition: MP_permute.cxx:80
static void set_gb_main(GBDATA *gb_main_)
Definition: MultiProbe.hxx:324
int aisc_close(aisc_com *link, AISC_Object &object)
Definition: client.c:249
Definition: trnsprob.h:20
#define TEST_SETUP_GLOBAL_ENVIRONMENT(modulename)
Definition: test_unit.h:1491
char * ARB_strdup(const char *str)
Definition: arb_string.h:27
double mismatch
Definition: mpdefs.h:40
void convert(const FormattedFile &in, const FormattedFile &out)
Definition: convert.cxx:154
void print()
Definition: MP_sonde.cxx:60
Bakt_Info(const char *n)
Definition: MP_sonde.cxx:336
#define NON_WEIGHTED
Definition: mpdefs.h:18
void heapsort(long feldlaenge, MO_Mismatch **Nr_Mm_Feld)
Definition: MP_sonde.cxx:290
int aisc_put(aisc_com *link, int o_type, const AISC_Object &object,...)
Definition: client.c:539
void sink(long i, long t, MO_Mismatch **A)
Definition: MP_sonde.cxx:307
MO_Mismatch ** get_matching_species(int ptserver_id, bool match_also_revcompl, int match_weight, int match_mis, const char *match_seq, MO_Liste *convert, long *number_of_species, GB_ERROR &error)
Definition: MP_sonde.cxx:68
GB_ERROR get_all_species(int ptserver_id)
Definition: MP_mo_liste.cxx:44
void kill_flag()
Definition: MultiProbe.hxx:189
void set_mismatch_at_pos(int pos, double mm)
Definition: MultiProbe.hxx:202
long nummer
Definition: mpdefs.h:39
char * data
Definition: bytestring.h:16
double get_Allowed_Mismatch_no(int no)
Definition: MultiProbe.hxx:242
#define TEST_REJECT_NULL(n)
Definition: test_unit.h:1325
int MP_init_local_com_struct(struct mp_gl_struct &mp_pd_gl)
Definition: MP_noclass.cxx:764
static void error(const char *msg)
Definition: mkptypes.cxx:96
long get_index_by_entry(const char *key)
long complement
Definition: MultiProbe.hxx:94
const char * get_entry_by_index(long index) const
~Hit()
Definition: MP_sonde.cxx:361
long get_laenge() const
int gen_Hitliste(MO_Liste *Bakterienliste)
Definition: MP_sonde.cxx:183
long ptserver
Definition: MultiProbe.hxx:98
const char * arb_look_and_start_ptserver(int magic_number, int ptserver_id, GB_ERROR &error)
float outside_mismatches_difference
Definition: MultiProbe.hxx:92
float greyzone
Definition: MultiProbe.hxx:105
double check_for_min(long k, MO_Mismatch **probebacts, long laenge)
Definition: MP_sonde.cxx:166
#define AISC_MAGIC_NUMBER
Definition: client_privat.h:51
long weightedmismatches
Definition: MultiProbe.hxx:93
aisc_com * aisc_open(const char *path, AISC_Object &main_obj, long magic, GB_ERROR *error)
Definition: client.c:205
Bakt_Info ** get_mo_liste()
Definition: MultiProbe.hxx:322
const char * GBS_static_string(const char *str)
Definition: arb_msg.cxx:212
#define TEST_EXPECT_NO_ERROR(call)
Definition: test_unit.h:1118
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
#define NULp
Definition: cxxforward.h:116
long no_of_probes
Definition: MultiProbe.hxx:96
aisc_com * link
Definition: mpdefs.h:50
int aisc_get(aisc_com *link, int o_type, const AISC_Object &object,...)
Definition: client.c:266
#define mp_assert(cond)
Definition: mkptypes.cxx:32
Sonde(const char *bezeichner, int num_probes, int allowed_mis, double outside_mis)
Definition: MP_sonde.cxx:21
awar_vars mp_gl_awars
Definition: MP_main.cxx:23
Hit(long baktnummer)
Definition: MP_sonde.cxx:351
GBDATA * gb_main
Definition: adname.cxx:32
T_PT_MAIN com
Definition: mpdefs.h:52
#define min(a, b)
Definition: f2c.h:153
#define TEST_EXPECT_EQUAL(expr, want)
Definition: test_unit.h:1294
void GB_close(GBDATA *gbd)
Definition: arbdb.cxx:655
~Sonde()
Definition: MP_sonde.cxx:45
GB_write_int const char s
Definition: AW_awar.cxx:154