ARB
ColumnStat.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ColumnStat.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "ColumnStat.hxx"
12 #include "awt_sel_boxes.hxx"
13 #include "ga_local.h"
14 #include <AP_filter.hxx>
15 #include <BI_helix.hxx>
16 
17 #include <aw_root.hxx>
18 #include <aw_awar.hxx>
19 #include <aw_msg.hxx>
20 #include <aw_select.hxx>
21 
22 #include <arbdbt.h>
23 #include <arb_strbuf.h>
24 #include <arb_defs.h>
25 
26 #define SRT_AWARCOLSTAT_NAME "/name=/name"
27 #define SRT_AWARCOLSTAT_ALIGNMENT "/name=/alignment"
28 #define SRT_AWARCOLSTAT_SMOOTH "/name=/smooth"
29 #define SRT_AWARCOLSTAT_ENABLE_HELIX "/name=/enable_helix"
30 
32  GB_transaction ta(gb_main);
33 
34  freeset(alignment_name, awr->awar(awar_alignment)->read_string());
35  freeset(type_path, GBS_string_eval(alignment_name, "*=*1/_TYPE"));
36 
37  if (saisel) saisel->refresh();
38 }
39 
40 static void refresh_columnstat_selection(AW_root*, ColumnStat *column_stat) {
41  column_stat->refresh_sai_selection_list();
42 }
43 
44 ColumnStat::ColumnStat(GBDATA *gb_main_, AW_root *awr_, const char *awar_template, AW_awar *awar_used_alignment) :
45  gb_main(gb_main_),
46  awr(awr_),
48  type_path(NULp),
49  saisel(NULp),
50  seq_len(0),
51  weights(NULp),
52  rates(NULp),
53  ttratio(NULp),
54  is_helix(NULp),
55  mut_sum(NULp),
56  freq_sum(NULp),
57  desc(NULp)
58 {
59  /* awar_template == ".../name"
60  * -> generated ".../alignment"
61  * ".../smooth"
62  * ".../enable_helix"
63  */
64 
65  memset(frequency, 0, ARRAY_ELEMS(frequency)*sizeof(*frequency));
66 
67  awar_name = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_NAME);
68  awar_alignment = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_ALIGNMENT);
69  awar_smooth = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_SMOOTH);
70  awar_enable_helix = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_ENABLE_HELIX);
71 
72  ga_assert(strcmp(awar_name, awar_alignment) != 0); // awar_template must end with (or contain) "/name"
73 
74  awr->awar_string(awar_name, "");
75  awr->awar_string(awar_alignment)->map(awar_used_alignment);
76  awr->awar_int(awar_smooth);
77  awr->awar_int(awar_enable_helix, 1);
78 
79  awr->awar(awar_alignment)->add_callback(makeRootCallback(refresh_columnstat_selection, this));
80  refresh_sai_selection_list(); // same as refresh_columnstat_selection(this)
81 }
82 
84  delete [] weights; weights = NULp;
85  delete [] rates; rates = NULp;
86  delete [] ttratio; ttratio = NULp;
87  delete [] is_helix; is_helix = NULp;
88  delete [] mut_sum; mut_sum = NULp;
89  delete [] freq_sum; freq_sum = NULp;
90  delete desc; desc = NULp;
91 
92  for (int i=0; i<256; i++) {
93  delete [] frequency[i];
94  frequency[i] = NULp;
95  }
96  seq_len = 0; // mark invalid
97 }
98 
100  forget();
101  delete awar_name;
102  delete awar_alignment;
103  delete awar_smooth;
104  delete awar_enable_helix;
105 }
106 
108  forget(); // delete previously calculated stats
109 
110  GB_transaction ta(gb_main);
111  GB_ERROR error = filter ? filter->is_invalid() : NULp;
112  size_t alignment_length = 0;
113 
114  if (!error) {
115  long alignment_length_l = GBT_get_alignment_len(gb_main, alignment_name);
116  if (alignment_length_l<=0) {
117  error = GB_await_error();
118  }
119  else if (alignment_length_l == 1) {
120  error = GB_append_exportedError(GBS_global_string("bad size for alignment '%s'", alignment_name));
121  }
122  else {
123  alignment_length = alignment_length_l;
124  if (filter && filter->get_length() != alignment_length) {
125  error = GBS_global_string("Incompatible filter (filter=%zu bp, alignment=%zu bp)",
126  filter->get_length(), alignment_length);
127  }
128  }
129  }
130  seq_len = 0;
131 
132  if (!error) {
133  char *sai_name = awr->awar(awar_name)->read_string();
134  GBDATA *gb_sai = GBT_find_SAI(gb_main, sai_name);
135 
136  if (!gb_sai) {
137  if (strcmp(sai_name, "") != 0) { // no error if SAI name is empty
138  error = GBS_global_string("Column statistic: no such SAI: '%s'", sai_name);
139  }
140  }
141 
142  if (gb_sai) {
143  GBDATA *gb_ali = NULp;
144  GBDATA *gb_freqs = NULp;
145  if (!error) {
146  gb_ali = GB_entry(gb_sai, alignment_name);
147  if (!gb_ali) error = GBS_global_string("SAI '%s' does not exist in alignment '%s'", sai_name, alignment_name);
148  }
149  if (!error) {
150  gb_freqs = GB_entry(gb_ali, "FREQUENCIES");
151  if (!gb_freqs) error = GBS_global_string("Column statistic '%s' is invalid (has no FREQUENCIES)", sai_name);
152  }
153 
154  if (!error) {
155  seq_len = filter ? filter->get_filtered_length() : alignment_length;
156 
157  delete [] weights; weights = new GB_UINT4[seq_len];
158  delete [] rates; rates = new float[seq_len];
159  delete [] ttratio; ttratio = new float[seq_len];
160  delete [] is_helix; is_helix = new bool[seq_len];
161  delete [] mut_sum; mut_sum = new GB_UINT4[seq_len];
162  delete [] freq_sum; freq_sum = new GB_UINT4[seq_len];
163 
164  delete desc; desc = NULp;
165 
166  size_t i;
167  size_t j;
168  for (i=0; i<256; i++) { delete frequency[i]; frequency[i]=NULp; }
169 
170  long use_helix = awr->awar(awar_enable_helix)->read_int();
171 
172  for (i=0; i<seq_len; i++) { // LOOP_VECTORIZED
173  is_helix[i] = false;
174  weights[i] = 1;
175  }
176 
177  if (!error && use_helix) { // calculate weights and helix filter
178  BI_helix helix;
179  error = helix.init(this->gb_main, alignment_name);
180  if (error) {
181  aw_message(error);
182  error = NULp;
183  goto no_helix;
184  }
185  error = NULp;
186  for (j=i=0; i<alignment_length; i++) {
187  if (!filter || filter->use_position(i)) {
188  if (helix.is_pairpos(i)) {
189  is_helix[j] = true;
190  weights[j] = 1;
191  }
192  else {
193  is_helix[j] = false;
194  weights[j] = 2;
195  }
196  j++;
197  }
198  }
199  }
200  no_helix :
201 
202  for (i=0; i<seq_len; i++) freq_sum[i] = 0;
203 
204  GBDATA *gb_freq;
205  GB_UINT4 *freqi[256];
206  for (i=0; i<256; i++) freqi[i] = NULp;
207 
208  int wf; // ********* read the frequency statistic
209  for (gb_freq = GB_child(gb_freqs); gb_freq; gb_freq = GB_nextChild(gb_freq)) {
210  char *key = GB_read_key(gb_freq);
211  if (key[0] == 'N' && key[1] && !key[2]) {
212  wf = key[1];
213  freqi[wf] = GB_read_ints(gb_freq);
214  }
215  free(key);
216  }
217 
218  GB_UINT4 *minmut = NULp;
219  GB_UINT4 *transver = NULp;
220  GBDATA *gb_minmut = GB_entry(gb_freqs, "TRANSITIONS");
221  GBDATA *gb_transver = GB_entry(gb_freqs, "TRANSVERSIONS");
222 
223  if (!gb_minmut) error = GBS_global_string("Column statistic '%s' is invalid (has no FREQUENCIES/TRANSITIONS)", sai_name);
224  if (!gb_transver) error = GBS_global_string("Column statistic '%s' is invalid (has no FREQUENCIES/TRANSVERSIONS)", sai_name);
225 
226  if (gb_minmut) {
227  minmut = GB_read_ints(gb_minmut);
228  if (!minmut) error = GBS_global_string("Column statistic '%s' is invalid (error reading FREQUENCIES/TRANSITIONS: %s)", sai_name, GB_await_error());
229  }
230  if (gb_transver) {
231  transver = GB_read_ints(gb_transver);
232  if (!transver) error = GBS_global_string("Column statistic '%s' is invalid (error reading FREQUENCIES/TRANSVERSIONS: %s)", sai_name, GB_await_error());
233  }
234 
235  if (!error) {
236  unsigned long max_freq_sum = 0;
237  for (wf = 0; wf<256; wf++) { // ********* calculate sum of mutations
238  if (!freqi[wf]) continue; // ********* calculate nbases for each col.
239  for (j=i=0; i<alignment_length; i++) {
240  if (filter && !filter->use_position(i)) continue;
241  freq_sum[j] += freqi[wf][i];
242  mut_sum[j] = minmut[i];
243  j++;
244  }
245  }
246  for (i=0; i<seq_len; i++)
247  if (freq_sum[i] > max_freq_sum)
248  max_freq_sum = freq_sum[i];
249  unsigned long min_freq_sum = DIST_MIN_SEQ(max_freq_sum);
250 
251  for (wf = 0; wf<256; wf++) {
252  if (!freqi[wf]) continue;
253  frequency[wf] = new float[seq_len];
254  for (j=i=0; i<alignment_length; i++) {
255  if (filter && !filter->use_position(i)) continue;
256  if (freq_sum[j] > min_freq_sum) {
257  frequency[wf][j] = freqi[wf][i]/(float)freq_sum[j];
258  }
259  else {
260  frequency[wf][j] = 0;
261  weights[j] = 0;
262  }
263  j++;
264  }
265  }
266 
267  for (j=i=0; i<alignment_length; i++) { // ******* calculate rates
268  if (filter && !filter->use_position(i)) continue;
269  if (!weights[j]) {
270  rates[j] = 1.0;
271  ttratio[j] = 0.5;
272  j++;
273  continue;
274  }
275  rates[j] = (mut_sum[j] / (float)freq_sum[j]);
276  if (transver[i] > 0) {
277  ttratio[j] = (minmut[i] - transver[i]) / (float)transver[i];
278  }
279  else {
280  ttratio[j] = 2.0;
281  }
282  if (ttratio[j] < 0.05) ttratio[j] = 0.05;
283  if (ttratio[j] > 5.0) ttratio[j] = 5.0;
284  j++;
285  }
286 
287  // ****** normalize rates
288 
289  double sum_rates = 0;
290  for (i=0; i<seq_len; i++) sum_rates += rates[i];
291  sum_rates /= seq_len;
292  for (i=0; i<seq_len; i++) rates[i] /= sum_rates; // LOOP_VECTORIZED
293  }
294 
295  free(transver);
296  free(minmut);
297  for (i=0; i<256; i++) free(freqi[i]);
298  }
299  }
300  free(sai_name);
301  }
302 
303  return error;
304 }
305 
307  for (size_t i = 0; i<seq_len; ++i) {
308  if (rates[i]>0.0000001) {
309  weights[i] *= (int)(2.0 / rates[i]); // before weights is in [0 .. 2] => resulting weights <= 10 million
310  }
311  }
312 }
313 
314 
315 
317  if (seq_len) {
318  double sum_rates[2] = { 0.0, 0.0 };
319  double sum_tt[2] = { 0.0, 0.0 };
320  long count[2] = { 0, 0 };
321 
322  for (unsigned j=0; j<seq_len; j++) {
323  if (weights[j]) {
324  fputc(".#"[is_helix[j]], stdout);
325  printf("%u: minmut %5i freqs %5i rates %5f tt %5f ",
326  j, mut_sum[j], freq_sum[j], rates[j], ttratio[j]);
327  for (int wf = 0; wf<256; wf++) {
328  if (frequency[wf]) printf("%c:%5f ", wf, frequency[wf][j]);
329  }
330  sum_rates[is_helix[j]] += rates[j];
331  sum_tt[is_helix[j]] += ttratio[j];
332  count[is_helix[j]]++;
333  printf("w: %i\n", weights[j]);
334  }
335  }
336  printf("Helical Rates %5f Non Hel. Rates %5f\n",
337  sum_rates[1]/count[1], sum_rates[0]/count[0]);
338  printf("Helical TT %5f Non Hel. TT %5f\n",
339  sum_tt[1]/count[1], sum_tt[0]/count[0]);
340  }
341 }
342 
343 static char *filter_columnstat_SAIs(GBDATA *gb_extended, ColumnStat *column_stat) {
344  // returns NULp for non-columnstat SAIs
345  char *result = NULp;
346  if (column_stat->has_valid_alignment()) {
347  GBDATA *gb_type = GB_search(gb_extended, column_stat->get_type_path(), GB_FIND);
348 
349  if (gb_type) {
350  const char *type = GB_read_char_pntr(gb_type);
351 
352  if (GBS_string_matches(type, "PV?:*", GB_MIND_CASE)) {
353  GBS_strstruct buf(100);
354 
355  buf.cat(GBT_get_name_or_description(gb_extended));
356  buf.cat(": <");
357  buf.cat(type);
358  buf.put('>');
359 
360  result = buf.release();
361  }
362  }
363  }
364  return result;
365 }
366 
368  GB_transaction ta(gb_main);
369  saisel = awt_create_SAI_selection_list(gb_main, aww, awar_name, makeSaiSelectionlistFilterCallback(filter_columnstat_SAIs, this));
370 }
371 
373  column_stat->create_sai_selection_list(aws);
374 }
375 
377  GB_transaction ta(column_stat->get_gb_main());
378  AW_window_simple *aws = new AW_window_simple;
379  aws->init(aw_root, "SELECT_CSP", "Select Column Statistic");
380  aws->load_xfig("awt/col_statistic.fig");
381 
382  aws->at("close");
383  aws->callback(AW_POPDOWN);
384  aws->create_button("CLOSE", "CLOSE", "C");
385 
386  aws->at("help"); aws->callback(makeHelpCallback("awt_csp.hlp"));
387  aws->create_button("HELP", "HELP", "H");
388 
389  aws->at("box");
390  COLSTAT_create_selection_list(aws, column_stat);
391 
392  aws->at("smooth");
393  aws->create_toggle_field(column_stat->get_awar_smooth());
394  aws->insert_toggle("Calculate each column (nearly) independently", "D", 0);
395  aws->insert_toggle("Smooth parameter estimates a little", "M", 1);
396  aws->insert_toggle("Smooth parameter estimates across columns", "S", 2);
397  aws->update_toggle_field();
398 
399  aws->at("helix");
400  aws->label("Use Helix Information (SAI 'HELIX')");
401  aws->create_toggle(column_stat->get_awar_enable_helix());
402  return aws;
403 }
const char * get_awar_smooth() const
Definition: ColumnStat.hxx:80
bool is_pairpos(size_t pos) const
Definition: BI_helix.hxx:117
string result
GB_TYPES type
AW_DB_selection * awt_create_SAI_selection_list(GBDATA *gb_main, AW_window *aws, const char *varname, const SaiSelectionlistFilterCallback &fcb)
char * GB_read_key(GBDATA *gbd)
Definition: arbdb.cxx:1652
GBDATA * GB_child(GBDATA *father)
Definition: adquery.cxx:322
const char * get_type_path() const
Definition: ColumnStat.hxx:82
GB_ERROR GB_append_exportedError(GB_ERROR error)
Definition: arb_msg.cxx:387
AW_window * COLSTAT_create_selection_window(AW_root *aw_root, ColumnStat *column_stat)
Definition: ColumnStat.cxx:376
void print()
Definition: ColumnStat.cxx:316
const char * get_awar_enable_helix() const
Definition: ColumnStat.hxx:81
long read_int() const
Definition: AW_awar.cxx:184
void COLSTAT_create_selection_list(AW_window *aws, ColumnStat *column_stat)
Definition: ColumnStat.cxx:372
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:203
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:833
static char * alignment_name
char * GBS_string_eval(const char *insource, const char *icommand)
Definition: admatch.cxx:699
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
char * release()
Definition: arb_strbuf.h:129
#define SRT_AWARCOLSTAT_ENABLE_HELIX
Definition: ColumnStat.cxx:29
void cat(const char *from)
Definition: arb_strbuf.h:199
#define SRT_AWARCOLSTAT_ALIGNMENT
Definition: ColumnStat.cxx:27
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
AW_awar * add_callback(const RootCallback &cb)
Definition: AW_awar.cxx:231
ColumnStat(GBDATA *gb_main_, AW_root *awr_, const char *awar_template, AW_awar *awar_used_alignment)
Definition: ColumnStat.cxx:44
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:177
unsigned int GB_UINT4
Definition: arbdb_base.h:37
void refresh_sai_selection_list()
Definition: ColumnStat.cxx:31
#define ga_assert(cond)
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:342
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
void create_sai_selection_list(AW_window *aww)
Definition: ColumnStat.cxx:367
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
static void error(const char *msg)
Definition: mkptypes.cxx:96
fputc('\n', stderr)
#define SRT_AWARCOLSTAT_NAME
Definition: ColumnStat.cxx:26
size_t get_length() const
Definition: AP_filter.hxx:83
__ATTR__USERESULT GB_ERROR calculate(AP_filter *filter)
Definition: ColumnStat.cxx:107
GB_UINT4 * GB_read_ints(GBDATA *gbd)
Definition: arbdb.cxx:1013
char * read_string() const
Definition: AW_awar.cxx:198
void forget()
Definition: ColumnStat.cxx:83
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
Definition: arbdb.h:86
bool has_valid_alignment() const
Definition: ColumnStat.hxx:78
size_t get_filtered_length() const
Definition: AP_filter.hxx:82
GBDATA * get_gb_main() const
Definition: ColumnStat.hxx:76
#define DIST_MIN_SEQ(seq_anz)
Definition: ColumnStat.hxx:38
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
static double ttratio
AW_awar * map(const char *awarn)
Definition: AW_awar.cxx:521
GB_ERROR is_invalid() const
Definition: AP_filter.hxx:123
static char * filter_columnstat_SAIs(GBDATA *gb_extended, ColumnStat *column_stat)
Definition: ColumnStat.cxx:343
void aw_message(const char *msg)
Definition: AW_status.cxx:1142
#define SRT_AWARCOLSTAT_SMOOTH
Definition: ColumnStat.cxx:28
#define NULp
Definition: cxxforward.h:116
void weight_by_inverseRates() const
Definition: ColumnStat.cxx:306
GBDATA * GB_nextChild(GBDATA *child)
Definition: adquery.cxx:326
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:904
GBDATA * gb_main
Definition: adname.cxx:32
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
bool GBS_string_matches(const char *str, const char *expr, GB_CASE case_sens)
Definition: admatch.cxx:193
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:459
bool use_position(size_t pos) const
Definition: AP_filter.hxx:85
GB_ERROR init(GBDATA *gb_main)
Definition: BI_helix.cxx:327
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void put(char c)
Definition: arb_strbuf.h:174
void refresh()
Definition: AW_select.cxx:184
static void refresh_columnstat_selection(AW_root *, ColumnStat *column_stat)
Definition: ColumnStat.cxx:40