ARB
ColumnStat.cxx
Go to the documentation of this file.
1 // =============================================================== //
2 // //
3 // File : ColumnStat.cxx //
4 // Purpose : //
5 // //
6 // Institute of Microbiology (Technical University Munich) //
7 // http://www.arb-home.de/ //
8 // //
9 // =============================================================== //
10 
11 #include "ColumnStat.hxx"
12 #include "awt_sel_boxes.hxx"
13 #include "ga_local.h"
14 #include <AP_filter.hxx>
15 #include <BI_helix.hxx>
16 
17 #include <aw_root.hxx>
18 #include <aw_awar.hxx>
19 #include <aw_msg.hxx>
20 #include <aw_select.hxx>
21 
22 #include <arbdbt.h>
23 #include <arb_strbuf.h>
24 #include <arb_defs.h>
25 
26 #define SRT_AWARCOLSTAT_NAME "/name=/name"
27 #define SRT_AWARCOLSTAT_ALIGNMENT "/name=/alignment"
28 #define SRT_AWARCOLSTAT_SMOOTH "/name=/smooth"
29 #define SRT_AWARCOLSTAT_ENABLE_HELIX "/name=/enable_helix"
30 
32  GB_transaction ta(gb_main);
33 
34  freeset(alignment_name, awr->awar(awar_alignment)->read_string());
35  freeset(type_path, GBS_string_eval(alignment_name, "*=*1/_TYPE"));
36 
37  if (saisel) saisel->refresh();
38 }
39 
40 static void refresh_columnstat_selection(AW_root*, ColumnStat *column_stat) {
41  column_stat->refresh_sai_selection_list();
42 }
43 
44 ColumnStat::ColumnStat(GBDATA *gb_main_, AW_root *awr_, const char *awar_template, AW_awar *awar_used_alignment) :
45  gb_main(gb_main_),
46  awr(awr_),
48  type_path(NULp),
49  saisel(NULp),
50  seq_len(0),
51  weights(NULp),
52  rates(NULp),
53  ttratio(NULp),
54  is_helix(NULp),
55  mut_sum(NULp),
56  freq_sum(NULp),
57  desc(NULp)
58 {
59  /* awar_template == ".../name"
60  * -> generated ".../alignment"
61  * ".../smooth"
62  * ".../enable_helix"
63  */
64 
65  memset(frequency, 0, ARRAY_ELEMS(frequency)*sizeof(*frequency));
66 
67  awar_name = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_NAME);
68  awar_alignment = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_ALIGNMENT);
69  awar_smooth = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_SMOOTH);
70  awar_enable_helix = GBS_string_eval(awar_template, SRT_AWARCOLSTAT_ENABLE_HELIX);
71 
72  ga_assert(strcmp(awar_name, awar_alignment) != 0); // awar_template must end with (or contain) "/name"
73 
74  awr->awar_string(awar_name, "");
75  awr->awar_string(awar_alignment)->map(awar_used_alignment);
76  awr->awar_int(awar_smooth);
77  awr->awar_int(awar_enable_helix, 1);
78 
79  awr->awar(awar_alignment)->add_callback(makeRootCallback(refresh_columnstat_selection, this));
80  refresh_sai_selection_list(); // same as refresh_columnstat_selection(this)
81 }
82 
84  delete [] weights; weights = NULp;
85  delete [] rates; rates = NULp;
86  delete [] ttratio; ttratio = NULp;
87  delete [] is_helix; is_helix = NULp;
88  delete [] mut_sum; mut_sum = NULp;
89  delete [] freq_sum; freq_sum = NULp;
90  delete desc; desc = NULp;
91 
92  for (int i=0; i<256; i++) {
93  delete [] frequency[i];
94  frequency[i] = NULp;
95  }
96  seq_len = 0; // mark invalid
97 }
98 
100  forget();
101  delete awar_name;
102  delete awar_alignment;
103  delete awar_smooth;
104  delete awar_enable_helix;
105 }
106 
108  forget(); // delete previously calculated stats
109 
110  GB_transaction ta(gb_main);
111  GB_ERROR error = filter ? filter->is_invalid() : NULp;
112  size_t alignment_length = 0;
113 
114  if (!error) {
115  long alignment_length_l = GBT_get_alignment_len(gb_main, alignment_name);
116  if (alignment_length_l <= 1) {
117  error = GBS_global_string("Unknown size for alignment '%s'", alignment_name);
118  }
119  else {
120  alignment_length = alignment_length_l;
121  if (filter && filter->get_length() != alignment_length) {
122  error = GBS_global_string("Incompatible filter (filter=%zu bp, alignment=%zu bp)",
123  filter->get_length(), alignment_length);
124  }
125  }
126  }
127  seq_len = 0;
128 
129  if (!error) {
130  char *sai_name = awr->awar(awar_name)->read_string();
131  GBDATA *gb_sai = GBT_find_SAI(gb_main, sai_name);
132 
133  if (!gb_sai) {
134  if (strcmp(sai_name, "") != 0) { // no error if SAI name is empty
135  error = GBS_global_string("Column statistic: no such SAI: '%s'", sai_name);
136  }
137  }
138 
139  if (gb_sai) {
140  GBDATA *gb_ali = NULp;
141  GBDATA *gb_freqs = NULp;
142  if (!error) {
143  gb_ali = GB_entry(gb_sai, alignment_name);
144  if (!gb_ali) error = GBS_global_string("SAI '%s' does not exist in alignment '%s'", sai_name, alignment_name);
145  }
146  if (!error) {
147  gb_freqs = GB_entry(gb_ali, "FREQUENCIES");
148  if (!gb_freqs) error = GBS_global_string("Column statistic '%s' is invalid (has no FREQUENCIES)", sai_name);
149  }
150 
151  if (!error) {
152  seq_len = filter ? filter->get_filtered_length() : alignment_length;
153 
154  delete [] weights; weights = new GB_UINT4[seq_len];
155  delete [] rates; rates = new float[seq_len];
156  delete [] ttratio; ttratio = new float[seq_len];
157  delete [] is_helix; is_helix = new bool[seq_len];
158  delete [] mut_sum; mut_sum = new GB_UINT4[seq_len];
159  delete [] freq_sum; freq_sum = new GB_UINT4[seq_len];
160 
161  delete desc; desc = NULp;
162 
163  size_t i;
164  size_t j;
165  for (i=0; i<256; i++) { delete frequency[i]; frequency[i]=NULp; }
166 
167  long use_helix = awr->awar(awar_enable_helix)->read_int();
168 
169  for (i=0; i<seq_len; i++) { // LOOP_VECTORIZED
170  is_helix[i] = false;
171  weights[i] = 1;
172  }
173 
174  if (!error && use_helix) { // calculate weights and helix filter
175  BI_helix helix;
176  error = helix.init(this->gb_main, alignment_name);
177  if (error) {
178  aw_message(error);
179  error = NULp;
180  goto no_helix;
181  }
182  error = NULp;
183  for (j=i=0; i<alignment_length; i++) {
184  if (!filter || filter->use_position(i)) {
185  if (helix.pairtype(i) == HELIX_PAIR) {
186  is_helix[j] = true;
187  weights[j] = 1;
188  }
189  else {
190  is_helix[j] = false;
191  weights[j] = 2;
192  }
193  j++;
194  }
195  }
196  }
197  no_helix :
198 
199  for (i=0; i<seq_len; i++) freq_sum[i] = 0;
200 
201  GBDATA *gb_freq;
202  GB_UINT4 *freqi[256];
203  for (i=0; i<256; i++) freqi[i] = NULp;
204 
205  int wf; // ********* read the frequency statistic
206  for (gb_freq = GB_child(gb_freqs); gb_freq; gb_freq = GB_nextChild(gb_freq)) {
207  char *key = GB_read_key(gb_freq);
208  if (key[0] == 'N' && key[1] && !key[2]) {
209  wf = key[1];
210  freqi[wf] = GB_read_ints(gb_freq);
211  }
212  free(key);
213  }
214 
215  GB_UINT4 *minmut = NULp;
216  GB_UINT4 *transver = NULp;
217  GBDATA *gb_minmut = GB_entry(gb_freqs, "TRANSITIONS");
218  GBDATA *gb_transver = GB_entry(gb_freqs, "TRANSVERSIONS");
219 
220  if (!gb_minmut) error = GBS_global_string("Column statistic '%s' is invalid (has no FREQUENCIES/TRANSITIONS)", sai_name);
221  if (!gb_transver) error = GBS_global_string("Column statistic '%s' is invalid (has no FREQUENCIES/TRANSVERSIONS)", sai_name);
222 
223  if (gb_minmut) {
224  minmut = GB_read_ints(gb_minmut);
225  if (!minmut) error = GBS_global_string("Column statistic '%s' is invalid (error reading FREQUENCIES/TRANSITIONS: %s)", sai_name, GB_await_error());
226  }
227  if (gb_transver) {
228  transver = GB_read_ints(gb_transver);
229  if (!transver) error = GBS_global_string("Column statistic '%s' is invalid (error reading FREQUENCIES/TRANSVERSIONS: %s)", sai_name, GB_await_error());
230  }
231 
232  if (!error) {
233  unsigned long max_freq_sum = 0;
234  for (wf = 0; wf<256; wf++) { // ********* calculate sum of mutations
235  if (!freqi[wf]) continue; // ********* calculate nbases for each col.
236  for (j=i=0; i<alignment_length; i++) {
237  if (filter && !filter->use_position(i)) continue;
238  freq_sum[j] += freqi[wf][i];
239  mut_sum[j] = minmut[i];
240  j++;
241  }
242  }
243  for (i=0; i<seq_len; i++)
244  if (freq_sum[i] > max_freq_sum)
245  max_freq_sum = freq_sum[i];
246  unsigned long min_freq_sum = DIST_MIN_SEQ(max_freq_sum);
247 
248  for (wf = 0; wf<256; wf++) {
249  if (!freqi[wf]) continue;
250  frequency[wf] = new float[seq_len];
251  for (j=i=0; i<alignment_length; i++) {
252  if (filter && !filter->use_position(i)) continue;
253  if (freq_sum[j] > min_freq_sum) {
254  frequency[wf][j] = freqi[wf][i]/(float)freq_sum[j];
255  }
256  else {
257  frequency[wf][j] = 0;
258  weights[j] = 0;
259  }
260  j++;
261  }
262  }
263 
264  for (j=i=0; i<alignment_length; i++) { // ******* calculate rates
265  if (filter && !filter->use_position(i)) continue;
266  if (!weights[j]) {
267  rates[j] = 1.0;
268  ttratio[j] = 0.5;
269  j++;
270  continue;
271  }
272  rates[j] = (mut_sum[j] / (float)freq_sum[j]);
273  if (transver[i] > 0) {
274  ttratio[j] = (minmut[i] - transver[i]) / (float)transver[i];
275  }
276  else {
277  ttratio[j] = 2.0;
278  }
279  if (ttratio[j] < 0.05) ttratio[j] = 0.05;
280  if (ttratio[j] > 5.0) ttratio[j] = 5.0;
281  j++;
282  }
283 
284  // ****** normalize rates
285 
286  double sum_rates = 0;
287  for (i=0; i<seq_len; i++) sum_rates += rates[i];
288  sum_rates /= seq_len;
289  for (i=0; i<seq_len; i++) rates[i] /= sum_rates; // LOOP_VECTORIZED
290  }
291 
292  free(transver);
293  free(minmut);
294  for (i=0; i<256; i++) free(freqi[i]);
295  }
296  }
297  free(sai_name);
298  }
299 
300  return error;
301 }
302 
304  for (size_t i = 0; i<seq_len; ++i) {
305  if (rates[i]>0.0000001) {
306  weights[i] *= (int)(2.0 / rates[i]); // before weights is in [0 .. 2] => resulting weights <= 10 million
307  }
308  }
309 }
310 
311 
312 
314  if (seq_len) {
315  double sum_rates[2] = { 0.0, 0.0 };
316  double sum_tt[2] = { 0.0, 0.0 };
317  long count[2] = { 0, 0 };
318 
319  for (unsigned j=0; j<seq_len; j++) {
320  if (weights[j]) {
321  fputc(".#"[is_helix[j]], stdout);
322  printf("%u: minmut %5i freqs %5i rates %5f tt %5f ",
323  j, mut_sum[j], freq_sum[j], rates[j], ttratio[j]);
324  for (int wf = 0; wf<256; wf++) {
325  if (frequency[wf]) printf("%c:%5f ", wf, frequency[wf][j]);
326  }
327  sum_rates[is_helix[j]] += rates[j];
328  sum_tt[is_helix[j]] += ttratio[j];
329  count[is_helix[j]]++;
330  printf("w: %i\n", weights[j]);
331  }
332  }
333  printf("Helical Rates %5f Non Hel. Rates %5f\n",
334  sum_rates[1]/count[1], sum_rates[0]/count[0]);
335  printf("Helical TT %5f Non Hel. TT %5f\n",
336  sum_tt[1]/count[1], sum_tt[0]/count[0]);
337  }
338 }
339 
340 static char *filter_columnstat_SAIs(GBDATA *gb_extended, ColumnStat *column_stat) {
341  // returns NULp for non-columnstat SAIs
342  char *result = NULp;
343  if (column_stat->has_valid_alignment()) {
344  GBDATA *gb_type = GB_search(gb_extended, column_stat->get_type_path(), GB_FIND);
345 
346  if (gb_type) {
347  const char *type = GB_read_char_pntr(gb_type);
348 
349  if (GBS_string_matches(type, "PV?:*", GB_MIND_CASE)) {
350  GBS_strstruct *strstruct = GBS_stropen(100);
351 
352  GBS_strcat(strstruct, GBT_get_name_or_description(gb_extended));
353  GBS_strcat(strstruct, ": <");
354  GBS_strcat(strstruct, type);
355  GBS_strcat(strstruct, ">");
356 
357  result = GBS_strclose(strstruct);
358  }
359  }
360  }
361  return result;
362 }
363 
365  GB_transaction ta(gb_main);
366  saisel = awt_create_SAI_selection_list(gb_main, aww, awar_name, true, makeSaiSelectionlistFilterCallback(filter_columnstat_SAIs, this));
367 }
368 
370  column_stat->create_sai_selection_list(aws);
371 }
372 
374  GB_transaction ta(column_stat->get_gb_main());
375  AW_window_simple *aws = new AW_window_simple;
376  aws->init(aw_root, "SELECT_CSP", "Select Column Statistic");
377  aws->load_xfig("awt/col_statistic.fig");
378 
379  aws->at("close");
380  aws->callback(AW_POPDOWN);
381  aws->create_button("CLOSE", "CLOSE", "C");
382 
383  aws->at("help"); aws->callback(makeHelpCallback("awt_csp.hlp"));
384  aws->create_button("HELP", "HELP", "H");
385 
386  aws->at("box");
387  COLSTAT_create_selection_list(aws, column_stat);
388 
389  aws->at("smooth");
390  aws->create_toggle_field(column_stat->get_awar_smooth());
391  aws->insert_toggle("Calculate each column (nearly) independently", "D", 0);
392  aws->insert_toggle("Smooth parameter estimates a little", "M", 1);
393  aws->insert_toggle("Smooth parameter estimates across columns", "S", 2);
394  aws->update_toggle_field();
395 
396  aws->at("helix");
397  aws->label("Use Helix Information (SAI 'HELIX')");
398  aws->create_toggle(column_stat->get_awar_enable_helix());
399  return aws;
400 }
const char * get_awar_smooth() const
Definition: ColumnStat.hxx:80
string result
GB_TYPES type
char * GB_read_key(GBDATA *gbd)
Definition: arbdb.cxx:1650
GBDATA * GB_child(GBDATA *father)
Definition: adquery.cxx:322
const char * get_type_path() const
Definition: ColumnStat.hxx:82
AW_window * COLSTAT_create_selection_window(AW_root *aw_root, ColumnStat *column_stat)
Definition: ColumnStat.cxx:373
void print()
Definition: ColumnStat.cxx:313
const char * get_awar_enable_helix() const
Definition: ColumnStat.hxx:81
long read_int() const
Definition: AW_awar.cxx:187
void COLSTAT_create_selection_list(AW_window *aws, ColumnStat *column_stat)
Definition: ColumnStat.cxx:369
const char * GBS_global_string(const char *templat,...)
Definition: arb_msg.cxx:204
long GBT_get_alignment_len(GBDATA *gb_main, const char *aliname)
Definition: adali.cxx:706
static char * alignment_name
char * GBS_string_eval(const char *insource, const char *icommand)
Definition: admatch.cxx:699
void AW_POPDOWN(AW_window *window)
Definition: AW_window.cxx:52
#define SRT_AWARCOLSTAT_ENABLE_HELIX
Definition: ColumnStat.cxx:29
#define SRT_AWARCOLSTAT_ALIGNMENT
Definition: ColumnStat.cxx:27
#define ARRAY_ELEMS(array)
Definition: arb_defs.h:19
AW_awar * add_callback(const RootCallback &cb)
Definition: AW_awar.cxx:234
ColumnStat(GBDATA *gb_main_, AW_root *awr_, const char *awar_template, AW_awar *awar_used_alignment)
Definition: ColumnStat.cxx:44
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
Definition: aditem.cxx:177
unsigned int GB_UINT4
Definition: arbdb_base.h:37
void refresh_sai_selection_list()
Definition: ColumnStat.cxx:31
GBS_strstruct * GBS_stropen(long init_size)
Definition: arb_strbuf.cxx:39
GB_ERROR GB_await_error()
Definition: arb_msg.cxx:353
WindowCallback makeHelpCallback(const char *helpfile)
Definition: aw_window.hxx:106
void create_sai_selection_list(AW_window *aww)
Definition: ColumnStat.cxx:364
static int weights[MAX_BASETYPES][MAX_BASETYPES]
Definition: ClustalV.cxx:71
void GBS_strcat(GBS_strstruct *strstr, const char *ptr)
Definition: arb_strbuf.cxx:108
static void error(const char *msg)
Definition: mkptypes.cxx:96
fputc('\n', stderr)
#define SRT_AWARCOLSTAT_NAME
Definition: ColumnStat.cxx:26
size_t get_length() const
Definition: AP_filter.hxx:83
__ATTR__USERESULT GB_ERROR calculate(AP_filter *filter)
Definition: ColumnStat.cxx:107
GB_UINT4 * GB_read_ints(GBDATA *gbd)
Definition: arbdb.cxx:1007
char * read_string() const
Definition: AW_awar.cxx:201
void forget()
Definition: ColumnStat.cxx:83
AW_awar * awar(const char *awar)
Definition: AW_root.cxx:554
#define ga_assert(cond)
Definition: ga_local.h:15
Definition: arbdb.h:86
bool has_valid_alignment() const
Definition: ColumnStat.hxx:78
size_t get_filtered_length() const
Definition: AP_filter.hxx:82
GBDATA * get_gb_main() const
Definition: ColumnStat.hxx:76
#define DIST_MIN_SEQ(seq_anz)
Definition: ColumnStat.hxx:38
AW_awar * awar_int(const char *var_name, long default_value=0, AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:580
BI_PAIR_TYPE pairtype(size_t pos) const
Definition: BI_helix.hxx:101
static double ttratio
AW_awar * map(const char *awarn)
Definition: AW_awar.cxx:523
char * GBS_strclose(GBS_strstruct *strstr)
Definition: arb_strbuf.cxx:69
GB_ERROR is_invalid() const
Definition: AP_filter.hxx:123
static char * filter_columnstat_SAIs(GBDATA *gb_extended, ColumnStat *column_stat)
Definition: ColumnStat.cxx:340
void aw_message(const char *msg)
Definition: AW_status.cxx:932
#define SRT_AWARCOLSTAT_SMOOTH
Definition: ColumnStat.cxx:28
#define NULp
Definition: cxxforward.h:97
AW_DB_selection * awt_create_SAI_selection_list(GBDATA *gb_main, AW_window *aws, const char *varname, bool fallback2default, const SaiSelectionlistFilterCallback &fcb)
void weight_by_inverseRates() const
Definition: ColumnStat.cxx:303
GBDATA * GB_nextChild(GBDATA *child)
Definition: adquery.cxx:326
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
Definition: arbdb.cxx:898
GBDATA * gb_main
Definition: adname.cxx:33
AW_awar * awar_string(const char *var_name, const char *default_value="", AW_default default_file=AW_ROOT_DEFAULT)
Definition: AW_root.cxx:570
bool GBS_string_matches(const char *str, const char *expr, GB_CASE case_sens)
Definition: admatch.cxx:193
GBDATA * GB_search(GBDATA *gbd, const char *fieldpath, GB_TYPES create)
Definition: adquery.cxx:531
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
Definition: aditem.cxx:441
bool use_position(size_t pos) const
Definition: AP_filter.hxx:85
GBDATA * GB_entry(GBDATA *father, const char *key)
Definition: adquery.cxx:334
void refresh()
Definition: AW_select.cxx:184
static void refresh_columnstat_selection(AW_root *, ColumnStat *column_stat)
Definition: ColumnStat.cxx:40