ARB
rns.c
Go to the documentation of this file.
1 #include <limits.h>
2 #include <stdlib.h>
3 #include <memory.h>
4 
5 #include "rns.h"
6 #include "spreadin.h"
7 
8 
9 // -----------------------------
10 // Erzeugung der Ur-RNS
11 
12 int orgLen; // Laenge der Ur-RNS
13 double orgHelixPart; // Anteil Helix-Bereich
14 static int rnsCreated; // Anzahl bisher erzeugter RNS-Sequenzen
15 
16 // -----------------
17 // Mutation
18 
19 int timeSteps; // Anzahl Zeitschritte
20 Frand mrpb_Init, // Initialisierungsfunktion fuer 'mutationRatePerBase'
21  l2hrpb_Init, // Initialisierungsfunktion fuer 'loop2helixRatePerBase'
22  pairPart, // Anteil paarender Helix-Bindungen
23  mutationRate, // Mutationsrate
24  splitRate, // Spaltungsrate
25  helixGcDruck, // G-C-Druck im Helix-Bereich
26  helixGcRate, // Verhaeltnis G:C im Helix-Bereich
27  helixAtRate, // Verhaeltnis A:T im Helix-Bereich
28  loopGcDruck, // G-C-Druck im Loop-Bereich
29  loopGcRate, // Verhaeltnis G:C im Loop-Bereich
30  loopAtRate; // Verhaeltnis A:T im Loop-Bereich
31 double transitionRate, // Transition-Rate
32  transversionRate; // Transversion-Rate
33 
34 static double *mutationRatePerBase, // positionsspez. Mutationsrate (wird nur einmal bestimmt und bleibt dann konstant)
35  *loop2helixRatePerBase; // positionsspez. Rate fuer Wechsel Loop-Base in Helix-Base und vv. (wird nur einmal bestimmt und bleibt dann konstant)
36 static int mrpb_anz, // Anzahl Positionen
37  mrpb_allocated, // wirklich Groesse des Arrays
38  l2hrpb_anz, // Anzahl Positionen
39  l2hrpb_allocated; // wirklich Groesse des Arrays
40 static DoubleProb helixMutationMatrix, // Mutationsmatrix fuer Helix-Bereiche
41  loopMutationMatrix; // Mutationsmatrix fuer Loop-Bereiche
42 
43 // ---------------------------
44 // Ausgabefilepointer
45 
46 FILE *topo, // Topologie
47  *seq; // Sequenzen
48 
49 // ------------------
50 // Sonstiges
51 
52 static int minDepth = INT_MAX, // minimale Tiefe (Astanzahl) der Blattspitzen
53  maxDepth = INT_MIN; // maximale Tiefe der Blattspitzen
54 
55 void dumpDepths() {
56  printf("Minimale Baumtiefe = %i\n", minDepth);
57  printf("Maximale Baumtiefe = %i\n", maxDepth);
58 }
59 static void dumpRNS(RNS rns) {
60  int b,
61  b1,
62  b2;
63  static int cleared,
64  h_cnt[BASETYPES+1][BASETYPES+1],
65  l_cnt[BASETYPES+1],
66  loop,
67  helix;
68 
69  if (!cleared) {
70  for (b1 = 0; b1<(BASETYPES+1); b1++) {
71  for (b2 = 0; b2<(BASETYPES+1); b2++) h_cnt[b1][b2] = 0;
72  l_cnt[b1] = 0;
73  }
74 
75  loop = 0;
76  helix = 0;
77 
78  cleared = 1;
79  }
80 
81  if (rns) {
82  for (b = 0; b<(rns->bases); b++) {
83  char base = rns->base[b];
84 
85  if (isHelical(base)) {
86  int bt1 = char2BaseType(base),
87  bt2 = char2BaseType(rns->base[b+1]);
88 
89  h_cnt[bt1][bt2]++;
90  helix++;
91  b++;
92  }
93  else {
94  int bt = char2BaseType(base);
95 
96  l_cnt[bt]++;
97  loop++;
98  }
99  }
100  }
101  else {
102  printf("Helix-Basenpaare = %i\n"
103  "Loop-Basen = %i\n"
104  "Helix:Loop = %f\n",
105  helix,
106  loop,
107  (double)helix/(double)loop);
108 
109  {
110  int gc = h_cnt[BASE_C][BASE_G]+h_cnt[BASE_G][BASE_C],
111  at = h_cnt[BASE_A][BASE_T]+h_cnt[BASE_T][BASE_A],
112  paarend = gc+at;
113 
114  printf("GC-Paare = %i\n"
115  "AT-Paare = %i\n"
116  "Paare:Helix-Bindungen = %f\n"
117  "GC-Paare:Paare = %f\n",
118  gc,
119  at,
120  (double)paarend/(double)helix,
121  (double)gc/(double)paarend);
122  }
123 
124  printf("\n");
125  }
126 }
127 static void initBaseSpecificProbs(int bases) {
128  int b;
129 
130  mrpb_anz = bases;
131  mrpb_allocated = bases;
132  mutationRatePerBase = malloc(bases*sizeof(double));
133 
134  l2hrpb_anz = bases;
135  l2hrpb_allocated = bases;
136  loop2helixRatePerBase = malloc(bases*sizeof(double));
137 
139 
140  for (b = 0; b<bases; b++) {
141  mutationRatePerBase[b] = getFrand(mrpb_Init);
142  loop2helixRatePerBase[b] = getFrand(l2hrpb_Init);
143  }
144 }
145 static RNS allocRNS(int len) {
146  RNS rns = malloc(sizeof(*rns));
147 
148  if (!rns) outOfMemory();
149 
150  rns->bases = len;
151  rns->base = malloc(sizeof(*(rns->base))*len);
152 
153  if (!rns->base) outOfMemory();
154 
155  return rns;
156 }
158  // Erzeugt eine Ur-RNS
159  RNS rns = allocRNS(orgLen);
160  int helixLen = orgLen*orgHelixPart,
161  l;
162  str base = rns->base;
163 
164  printf("Generating origin species..\n");
165 
166  initBaseSpecificProbs(orgLen);
167 
168  rns->laufNr = rnsCreated++;
169 
170  // -----------------------------------------
171  // Helix erzeugen (im Loop-Bereich)
172 
173  if (helixLen%1) helixLen--; // muss gerade Laenge haben, da nur Paare!
174 
175  assert(helixLen<=orgLen);
176 
177  rns->helix = helixLen/2;
178  rns->pairing = 0;
179 
180  {
181  DoubleProb orgHelixProb;
182  Spreading s;
183  int b1,
184  b2;
185  double actPairPart = getFrand(pairPart),
186  actHelixGcDruck = getFrand(helixGcDruck),
187  actHelixGcRate = getFrand(helixGcRate),
188  actHelixAtRate = getFrand(helixAtRate),
189  nonPairProb = (1.0-actPairPart)/2.0;
190 
191  for (b1 = 0; b1<BASETYPES; b1++) {
192  for (b2 = 0; b2<BASETYPES; b2++) {
193  if (isPairing(b1, b2)) {
194  switch (b1) {
195  case BASE_A:
196  case BASE_T: {
197  orgHelixProb[b1][b2] = (actPairPart*(1.0-actHelixGcDruck))/2.0;
198  break;
199  }
200  case BASE_C:
201  case BASE_G: {
202  orgHelixProb[b1][b2] = (actPairPart*actHelixGcDruck)/2.0;
203  break;
204  }
205  }
206  }
207  else {
208  double prob = nonPairProb;
209  int b = b1;
210 
211  while (1) { // wird je einmal mit b1 und b2 ausgefuehrt
212  switch (b) {
213  case BASE_A: {
214  prob = prob*(1.0-actHelixGcDruck)*actHelixAtRate;
215  break;
216  }
217  case BASE_C: {
218  prob = prob*actHelixGcDruck*(1.0-actHelixGcRate);
219  break;
220  }
221  case BASE_G: {
222  prob = prob*actHelixGcDruck*actHelixGcRate;
223  break;
224  }
225  case BASE_T: {
226  prob = prob*(1.0-actHelixGcDruck)*(1.0-actHelixAtRate);
227  break;
228  }
229  }
230 
231  if (b==b2) break;
232  b = b2;
233  }
234 
235  orgHelixProb[b1][b2] = prob;
236  }
237  }
238  }
239 
240  s = newSpreading((double*)orgHelixProb, BASEQUAD);
241 
242  for (l = 0; l<helixLen; l+=2) {
243  int val = spreadRand(s),
244  B1 = val%BASETYPES,
245  B2 = val/BASETYPES;
246 
247  base[l] = helixBaseChar[B1];
248  base[l+1] = helixBaseChar[B2];
249 
250  rns->pairing += isPairing(B1, B2);
251  }
252 
253  freeSpreading(s);
254  }
255 
256  // ----------------------
257  // Loop erzeugen
258 
259  {
260  SingleProb orgLoopProb;
261  Spreading s;
262  double actLoopGcDruck = getFrand(loopGcDruck),
263  actLoopGcRate = getFrand(loopGcRate),
264  actLoopAtRate = getFrand(loopAtRate);
265 
266  orgLoopProb[BASE_A] = (1.0-actLoopGcDruck)*actLoopAtRate;
267  orgLoopProb[BASE_C] = actLoopGcDruck*(1.0-actLoopGcRate);
268  orgLoopProb[BASE_G] = actLoopGcDruck*actLoopGcRate;
269  orgLoopProb[BASE_T] = (1.0-actLoopGcDruck)*(1.0-actLoopAtRate);
270 
271  s = newSpreading((double*)orgLoopProb, BASETYPES);
272  for (; l<orgLen; l++) base[l] = loopBaseChar[spreadRand(s)];
273  freeSpreading(s);
274  }
275 
276  return rns;
277 }
278 void freeRNS(RNS rns) {
279  free(rns->base);
280  free(rns);
281 }
282 static RNS dupRNS(RNS rns) {
283  RNS neu = malloc(sizeof(*rns));
284 
285  if (!neu) outOfMemory();
286 
287  memcpy(neu, rns, sizeof(*rns));
288 
289  neu->base = malloc(rns->bases*sizeof(*(neu->base)));
290  memcpy(neu->base, rns->base, rns->bases);
291 
292  neu->laufNr = rnsCreated++;
293 
294  return neu;
295 }
296 static void calcMutationMatrix(DoubleProb mutationMatrix, double gcDruck, double gcRate, double atRate, double *pairProb) {
297  double k = transitionRate/transversionRate,
298  fa = (1.0-gcDruck)*atRate,
299  fc = gcDruck*(1.0-gcRate),
300  fg = gcDruck*gcRate,
301  ft = (1.0-gcDruck)*(1.0-atRate),
302  bfa = transversionRate*fa,
303  bfc = transversionRate*fc,
304  bfg = transversionRate*fg,
305  bft = transversionRate*ft,
306  kag = k/(fa+fg),
307  kct = k/(fc+ft);
308 
309  // Matrix besetzen
310 
311  mutationMatrix[BASE_A][BASE_A] = 1.0-(kag+3.0)*bfa;
312  mutationMatrix[BASE_C][BASE_A] = bfa;
313  mutationMatrix[BASE_G][BASE_A] = (kag+1.0)*bfa;
314  mutationMatrix[BASE_T][BASE_A] = bfa;
315 
316  mutationMatrix[BASE_A][BASE_C] = bfc;
317  mutationMatrix[BASE_C][BASE_C] = 1.0-(kct+3.0)*bfc;
318  mutationMatrix[BASE_G][BASE_C] = bfc;
319  mutationMatrix[BASE_T][BASE_C] = (kct+1.0)*bfc;
320 
321  mutationMatrix[BASE_A][BASE_G] = (kag+1.0)*bfg;
322  mutationMatrix[BASE_C][BASE_G] = bfg;
323  mutationMatrix[BASE_G][BASE_G] = 1.0-(kag+3.0)*bfg;
324  mutationMatrix[BASE_T][BASE_G] = bfg;
325 
326  mutationMatrix[BASE_A][BASE_T] = bft;
327  mutationMatrix[BASE_C][BASE_T] = (kct+1.0)*bft;
328  mutationMatrix[BASE_G][BASE_T] = bft;
329  mutationMatrix[BASE_T][BASE_T] = 1.0-(kct+3.0)*bft;
330 
331  if (pairProb) { // soll pairProb berechnet werden?
332  double mutatesTo[BASETYPES],
333  freq[BASETYPES]; // Haeufigkeit der einzelnen Basen
334  int von,
335  nach;
336 
337  freq[BASE_A] = fa;
338  freq[BASE_C] = fc;
339  freq[BASE_G] = fg;
340  freq[BASE_T] = ft;
341 
342  for (nach = 0; nach<BASETYPES; nach++)
343  mutatesTo[nach] = 0.0;
344 
345  for (von = 0; von<BASETYPES; von++)
346  for (nach = 0; nach<BASETYPES; nach++)
347  mutatesTo[nach] += mutationMatrix[von][nach]*freq[von];
348 
349  *pairProb = 2.0*mutatesTo[BASE_A]*mutatesTo[BASE_T] + 2.0*mutatesTo[BASE_C]*mutatesTo[BASE_G];
350  }
351 }
352 static int calcPairTrials(double pairProb, double actPairPart) {
353  // Berechnet die Anzahl Mutations-Wiederholungen, die notwendig sind, um
354  // mindestens 'actPairPart' Prozent paarende Bindungen zu erhalten, falls
355  // die Wahrscheinlichkeit eine paarende Bindung zu erzeugen gleich
356  // 'pairProb' ist.
357  int trials = 1;
358  double failProb = 1.0-pairProb,
359  succProb = pairProb;
360 
361  while (succProb<actPairPart) {
362  pairProb *= failProb;
363  succProb += pairProb;
364  trials++;
365  }
366 
367  return trials;
368 }
369 static void mutateRNS(int no_of_father, RNS rns, int steps, int depth) {
370  // Mutiert eine RNS bis zur naechsten Spaltung
371  // 'steps' Anzahl noch zu durchlaufender Zeitschritte
372  int splitInSteps,
373  s;
374  double mutationTime = 0.0;
375 
376  // --------------------------------------------
377  // Schritte bis zur Spaltung berechnen
378 
379  {
380  double actualSplitRate = getFrand(splitRate);
381 
382  assert(actualSplitRate!=0);
383 
384  splitInSteps = (int)(1.0/actualSplitRate);
385  if (splitInSteps>steps) splitInSteps = steps;
386 
387  assert(splitInSteps>=1);
388  }
389 
390  // ---------------------------------
391  // Zeitschritte durchlaufen
392 
393  for (s = 0; s<splitInSteps; s++) {
394  int b,
395  pairTrials; // Anzahl Versuche eine paarende Helixbindung herzustellen
396  double actMutationRate = getFrand(mutationRate),
397  actPairPart = getFrand(pairPart);
398  Spreading s_helix[BASETYPES],
399  s_loop[BASETYPES];
400 
401  {
402  double pairProb; // Wahrscheinlichkeit, dass ein Paar im helikalen Bereich entsteht
403 
404  calcMutationMatrix(helixMutationMatrix, /*1.0,*/ getFrand(helixGcDruck), getFrand(helixGcRate), getFrand(helixAtRate), &pairProb);
405  calcMutationMatrix(loopMutationMatrix, /*actMutationRate,*/ getFrand(loopGcDruck), getFrand(loopGcRate), getFrand(loopAtRate), NULL);
406 
407  pairTrials = calcPairTrials(pairProb, actPairPart);
408  }
409 
410  for (b = 0; b<BASETYPES; b++) {
411  s_helix[b] = newSpreading(&(helixMutationMatrix[b][0]), BASETYPES);
412  s_loop[b] = newSpreading(&(loopMutationMatrix[b][0]), BASETYPES);
413  }
414 
415  mutationTime += actMutationRate; // Mutationszeit aufaddieren (Einheit ist Mutationsrate*Zeitschritte)
416 
417  // ---------------------------------------
418  // Alle Basen(-paare) durchlaufen
419 
420  for (b = 0; b<(rns->bases);) {
421  char base = rns->base[b];
422 
423  if (!isDeleted(base)) { // Deletes ignorieren
424  // --------------------------
425  // Helicale Bereiche
426 
427  if (isHelical(base)) {
428  int trials = pairTrials,
429  mut1 = randProb()<mutationRatePerBase[b]*actMutationRate,
430  mut2 = randProb()<mutationRatePerBase[b+1]*actMutationRate;
431  char base2 = rns->base[b+1];
432 
433  assert(isHelical(base2));
434 
435  if (mut1 || mut2) {
436  int bt1 = char2BaseType(base),
437  bt2 = char2BaseType(base2);
438 
439  if (isPairing(bt1, bt2)) {
440  rns->pairing--;
441  }
442 
443  while (trials--) {
444  if (mut1) {
445  if (mut2) { // beide Basen mutieren
446  bt1 = spreadRand(s_helix[bt1]);
447  bt2 = spreadRand(s_helix[bt2]);
448  }
449  else { // nur 1.Base mutieren
450  bt1 = spreadRand(s_helix[bt1]);
451  }
452  }
453  else { // nur 2.Base mutieren
454  bt2 = spreadRand(s_helix[bt2]);
455  }
456 
457  if (isPairing(bt1, bt2)) { // paarend? ja->abbrechen
458  rns->pairing++;
459  break;
460  }
461  }
462 
463  rns->base[b] = helixBaseChar[bt1];
464  rns->base[b+1] = helixBaseChar[bt2];
465  }
466 
467  b++;
468  }
469 
470  // ----------------------
471  // Loop-Bereiche
472 
473  else {
474  double mutationProb = actMutationRate*mutationRatePerBase[b];
475 
476  if (randProb()<mutationProb) {
477  rns->base[b] = loopBaseChar[spreadRand(s_loop[char2BaseType(base)])];
478  }
479  }
480  }
481 
482  b++;
483  }
484 
485  for (b = 0; b<BASETYPES; b++) {
486  freeSpreading(s_helix[b]);
487  freeSpreading(s_loop[b]);
488  }
489  }
490 
491  splitRNS(no_of_father, rns, mutationTime, steps-splitInSteps, depth+1);
492 }
493 void splitRNS(int no_of_father, RNS origin, double age, int steps, int depth) {
494  // Spaltet eine RNS in zwei Species auf
495  int x;
496 
497  dumpRNS(origin);
498 
499  // --------------------------
500  // Sequenz schreiben
501 
502  if (no_of_father != -1) {
503  fprintf(seq, ">no%i son of no%i\n", origin->laufNr, no_of_father);
504  }
505  else {
506  fprintf(seq, ">no%i father of all species\n", origin->laufNr);
507  }
508  no_of_father = origin->laufNr; // now i'm the father
509  for (x = 0; x<(origin->bases); x++) fputc(origin->base[x], seq);
510  fputc('\n', seq);
511 
512  if (steps) { // Species splitten!
513  double gcDruck_val = helixGcDruck->val, // Frand-Werte merken
514  pairPart_val = pairPart->val,
515  mutationRate_val = mutationRate->val,
516  splitRate_val = splitRate->val;
517 
518  fprintf(topo, "(no%i:%f,\n", origin->laufNr, age);
519 
520  {
521  RNS left = dupRNS(origin); // linker Sohn
522 
523  mutateRNS(no_of_father, left, steps, depth);
524  freeRNS(left);
525  }
526 
527  fputs(",\n", topo);
528 
529  helixGcDruck->val = gcDruck_val; // Frand-Werte wiederherstellen
530  pairPart->val = pairPart_val;
531  mutationRate->val = mutationRate_val;
532  splitRate->val = splitRate_val;
533 
534  {
535  RNS right = dupRNS(origin); // rechter Sohn
536 
537  mutateRNS(no_of_father, right, steps, depth);
538  freeRNS(right);
539  }
540 
541  fputc(')', topo);
542  }
543  else { // Baumspitze
544  if (depth>maxDepth) maxDepth = depth;
545  else if (depth<minDepth) minDepth = depth;
546 
547  fprintf(topo, "no%i:%f", origin->laufNr, age);
548 
549  if ((origin->laufNr%100) == 0) {
550  printf("generated Species: %i\n", origin->laufNr);
551  }
552  }
553 
554  if (age==0.0) dumpRNS(NULL);
555 }
556 
557 
static DoubleProb helixMutationMatrix
Definition: rns.c:40
char loopBaseChar[BASECHARS]
Definition: base.c:5
Definition: frand.h:8
int laufNr
Definition: rns.h:52
Definition: base.h:9
Frand helixGcRate
Definition: rns.c:20
void freeRNS(RNS rns)
Definition: rns.c:278
double SingleProb[BASETYPES]
Definition: rns.h:14
void freeSpreading(Spreading s)
Definition: spreadin.c:31
Frand splitRate
Definition: rns.c:20
static RNS dupRNS(RNS rns)
Definition: rns.c:282
Definition: base.h:10
static int maxDepth
Definition: rns.c:53
static int l2hrpb_allocated
Definition: rns.c:36
double val
Definition: frand.h:9
static void dumpRNS(RNS rns)
Definition: rns.c:59
int orgLen
Definition: rns.c:12
static int rnsCreated
Definition: rns.c:14
int timeSteps
Definition: rns.c:19
#define isHelical(b)
Definition: base.h:25
static int mrpb_anz
Definition: rns.c:36
char helixBaseChar[BASECHARS]
Definition: base.c:4
double randProb()
Definition: frand.c:47
static int calcPairTrials(double pairProb, double actPairPart)
Definition: rns.c:352
FILE * seq
Definition: rns.c:46
static int minDepth
Definition: rns.c:52
Frand pairPart
Definition: rns.c:20
Definition: rns.h:50
Frand l2hrpb_Init
Definition: rns.c:20
static int mrpb_allocated
Definition: rns.c:36
RNS createOriginRNS()
Definition: rns.c:157
void dumpDepths()
Definition: rns.c:55
double DoubleProb[BASETYPES][BASETYPES]
Definition: rns.h:15
Definition: base.h:12
#define char2BaseType(c)
Definition: base.h:35
int helix
Definition: rns.h:52
static void initBaseSpecificProbs(int bases)
Definition: rns.c:127
static double * loop2helixRatePerBase
Definition: rns.c:34
static void calcMutationMatrix(DoubleProb mutationMatrix, double gcDruck, double gcRate, double atRate, double *pairProb)
Definition: rns.c:296
Definition: base.h:11
fputc('\n', stderr)
#define BASEQUAD
Definition: base.h:19
static double * mutationRatePerBase
Definition: rns.c:34
#define isPairing(b1, b2)
Definition: base.h:26
char * base
Definition: rns.h:51
Frand helixAtRate
Definition: rns.c:20
#define isDeleted(b)
Definition: base.h:24
FILE * topo
Definition: rns.c:46
static RNS allocRNS(int len)
Definition: rns.c:145
#define BASETYPES
Definition: base.h:17
fputs(TRACE_PREFIX, stderr)
Spreading newSpreading(double *value, int values)
Definition: spreadin.c:5
Frand helixGcDruck
Definition: rns.c:20
int spreadRand(Spreading s)
Definition: spreadin.c:35
Frand mutationRate
Definition: rns.c:20
double getFrand(Frand f)
Definition: frand.c:39
static DoubleProb loopMutationMatrix
Definition: rns.c:40
int bases
Definition: rns.h:52
double transitionRate
Definition: rns.c:31
Frand loopGcRate
Definition: rns.c:20
#define assert(bed)
Frand loopGcDruck
Definition: rns.c:20
#define outOfMemory()
Definition: defines.h:14
void splitRNS(int no_of_father, RNS origin, double age, int steps, int depth)
Definition: rns.c:493
static void mutateRNS(int no_of_father, RNS rns, int steps, int depth)
Definition: rns.c:369
Frand loopAtRate
Definition: rns.c:20
double transversionRate
Definition: rns.c:31
int pairing
Definition: rns.h:52
static int l2hrpb_anz
Definition: rns.c:36
Frand mrpb_Init
Definition: rns.c:20
double orgHelixPart
Definition: rns.c:13
GB_write_int const char s
Definition: AW_awar.cxx:154