17 inline int ALI_PROFILE::is_binding_marker(
char c) {
18 return c ==
'~' || c ==
'x';
24 char message_buffer[100];
55 pt_member = pt_fam_list->
first();
58 weight = 1 + d * number;
59 sprintf(message_buffer,
"%s (weight = %f, matches = %d)",
69 ali_warning(
"Sequence not found in Database (Sequence ignored)");
82 pt_member = pt_ext_list->
first();
87 sprintf(message_buffer,
"%s (weight = %f, matches = %d)",
97 ali_warning(
"Sequence not found in Database (Sequence ignored)");
111 float a[7], w[7], w_sum, sm[7][7];
112 float base_gap_weights[5], w_bg_sum;
121 float (*w_Del)[], (*percent)[];
125 l = (
unsigned long *)
CALLOC((
unsigned int) members,
sizeof(
long));
126 g = (
float *)
CALLOC((
unsigned int) members,
sizeof(float));
127 seq = (
unsigned char **)
CALLOC((
unsigned int) members,
sizeof(
char *));
128 seq_len = (
long *)
CALLOC((
unsigned int) members,
sizeof(
long));
132 family_member = family_list->
first();
136 g[0] = family_member->
weight;
138 sub_costs_maximum = 0.0;
141 family_member = family_list->
next();
144 g[i] = family_member->
weight;
147 ali_warning(
"Family members have different length");
153 for (i = 0; i < 5; i++)
154 for (j = 0; j < 5; j++)
158 for (i = 0; i < members; i++)
163 base_weights = (float (**) [4])
CALLOC((
unsigned int) prof_len,
sizeof(
float [4]));
164 sub_costs = (float (**) [6])
CALLOC((
unsigned int) prof_len,
sizeof(
float [6]));
165 binding_costs = (float (*) [5][5])
CALLOC((
unsigned int) 5,
sizeof(
float [5]));
166 lmin = (
unsigned long *)
CALLOC((
unsigned int) prof_len,
sizeof(
unsigned long));
167 lmax = (
unsigned long *)
CALLOC((
unsigned int) prof_len,
sizeof(
unsigned long));
168 gap_costs = (
float ***)
CALLOC((
unsigned int) prof_len,
sizeof(
float *));
169 gap_percents = (
float***)
CALLOC((
unsigned int) prof_len,
sizeof(
float *));
171 ali_out_of_memory_if(!binding_costs || !sub_costs || !lmin || !lmax || !gap_costs || !gap_percents || !base_weights);
175 for (i = 0; i < 5; i++)
176 for (j = 0; j < 5; j++) {
178 if ((*binding_costs)[i][j] > w_bind_maximum)
179 w_bind_maximum = (*binding_costs)[i][j];
184 for (p = 0; p < prof_len; p++) {
186 for (i = 0; i < 7; i++)
187 a[i] = w[i] = sm[5][i] = sm[i][5] = sm[6][i] = sm[i][6] = 0.0;
188 for (i = 0; i < 6; i++)
189 (*sub_costs)[p][i] = 0.0;
194 for (i = 0; i < members; i++) {
195 if (p <
size_t(seq_len[i])) {
197 w[seq[i][p]] += g[i];
211 for (i = 0; i < 4; i++)
212 (*base_weights)[p][i] = w[i] / w_sum;
214 for (i = 0; i < 4; i++)
215 (*base_weights)[p][i] = 0.25;
219 for (i = 0; i < 5; i++)
220 base_gap_weights[i] = w[i] / w_bg_sum;
222 for (i = 0; i < 5; i++)
223 base_gap_weights[i] = 0.2;
226 for (j = 0; j < 5; j++) {
227 for (i = 0; i < 4; i++) {
228 sm[5][j] += (*base_weights)[p][i] * sm[i][j];
229 sm[j][5] += (*base_weights)[p][i] * sm[j][i];
232 for (i = 0; i < 4; i++)
233 sm[5][5] += (*base_weights)[p][i] * sm[i][i];
236 for (j = 0; j < 6; j++)
237 for (i = 0; i < 5; i++) {
238 sm[6][j] += base_gap_weights[i] * sm[i][j];
239 sm[j][6] += base_gap_weights[i] * sm[j][i];
241 for (i = 0; i < 5; i++)
242 sm[6][6] += base_gap_weights[i] * sm[i][i];
245 for (i = 0; i < members; i++) {
246 if (p <
size_t(seq_len[i])) {
247 for (j = 0; j < 6; j++) {
248 (*sub_costs)[p][j] += g[i] * sm[seq[i][p]][j];
252 for (j = 0; j < 6; j++) {
257 for (j = 0; j < 6; j++) {
258 (*sub_costs)[p][j] /= members;
259 if ((*sub_costs)[p][j] > sub_costs_maximum)
260 sub_costs_maximum = (*sub_costs)[p][j];
266 for (i = 0; i < members; i++)
273 if (lmin[p] == p && lmax[p] == 0) {
274 lmin[p] = lmax[p] = p;
277 w_Del = (float (*) [])
CALLOC((
unsigned int) (lmax[p]-lmin[p]+2),
sizeof(
float));
278 percent = (float (*) [])
CALLOC((
unsigned int) (lmax[p]-lmin[p]+2),
sizeof(
float));
280 (*gap_costs)[p] = (
float *) w_Del;
281 (*gap_percents)[p] = (
float *) percent;
284 for (j = 0; j <= lmax[p] - lmin[p] + 1; j++) {
286 for (i = 0; i < members; i++) {
288 if (p <
size_t(seq_len[i])) {
289 if (l[i] == prof_len + 1 || l[i] >= j + lmin[p]) {
293 (*w_Del)[j] += g[i] * sm[seq[i][p]][4];
298 if (l[i] >= j + lmin[p] && l[i] < prof_len+1) {
306 (*w_Del)[j] /= members;
310 for (i = 0; i < members; i++) {
316 for (j = 0; j <= lmax[p] - lmin[p] + 1; j++) {
318 for (i = 0; i < members; i++) {
319 if (l[i] >= j + lmin[p] && l[i] < prof_len+1) {
320 (*percent)[j] += 1.0;
323 (*percent)[j] /= members;
335 int ALI_PROFILE::find_next_helix(
char h[],
unsigned long h_len,
337 unsigned long *helix_nr,
338 unsigned long *
start,
unsigned long *end)
343 for (i = pos; i < h_len && !isdigit(h[i]); i++) ;
344 if (i >= h_len)
return -1;
347 sscanf(&h[i],
"%ld", helix_nr);
348 for (; i < h_len && isdigit(h[i]); i++) ;
349 for (; i < h_len && !isdigit(h[i]); i++) ;
355 int ALI_PROFILE::find_comp_helix(
char h[],
unsigned long h_len,
356 unsigned long pos,
unsigned long helix_nr,
357 unsigned long *start,
unsigned long *end)
364 for (; i < h_len && !isdigit(h[i]); i++) ;
365 if (i >= h_len)
return -1;
367 sscanf(&h[i],
"%ld", &nr);
368 for (; i < h_len && isdigit(h[i]); i++) ;
369 }
while (helix_nr != nr);
371 for (; i < h_len && !isdigit(h[i]); i++) ;
377 void ALI_PROFILE::delete_comp_helix(
char h1[],
char h2[],
unsigned long h_len,
378 unsigned long start,
unsigned long end)
382 for (i = start; i < h_len && i <= end; i++) {
391 const char *error_string;
400 helix_len = bi_helix.
size();
401 helix = (
long **)
CALLOC((
unsigned int) helix_len,
sizeof(
long));
402 helix_borders = (
char **)
CALLOC((
unsigned int) helix_len,
sizeof(
long));
406 for (i = 0; i < helix_len; i++) {
413 char message_buffer[100];
421 initialize_helix(context);
423 family_list = find_family(seq, context);
425 ali_error(
"Family not found (maybe incompatible PT and DB Servers)");
428 calculate_costs(family_list, context);
433 sprintf(message_buffer,
"Multi gap factor = %f", multi_gap_factor);
435 sprintf(message_buffer,
"Maximal substitution costs = %f", sub_costs_maximum);
437 sprintf(message_buffer,
"Normal insert costs = %f", insert_cost);
439 sprintf(message_buffer,
"Multiple insert costs = %f", multi_insert_cost);
443 family_member = family_list->
first();
445 delete family_member;
447 family_member = family_list->
next();
449 delete family_member;
462 for (i = 0; i < prof_len; i++)
464 free((*gap_costs)[i]);
468 for (i = 0; i < prof_len; i++)
469 if ((*gap_percents)[i])
470 free((*gap_percents)[i]);
475 delete norm_sequence;
486 switch ((*helix_borders)[pos]) {
489 for (i = (
long) pos + 1; i < (
long) prof_len; i++)
491 *last = (
unsigned long) i;
498 for (i = (
long) pos - 1; i >= 0; i--)
500 *first = (
unsigned long) i;
506 for (i = (
long) pos - 1; i >= 0; i--)
507 switch ((*helix_borders)[i]) {
511 *first = (
unsigned long) i;
512 for (i = (
long) pos + 1; i < (
long) prof_len; i++)
513 switch ((*helix_borders)[i]) {
516 printf(
"pos = %ld\n", pos);
519 *last = (
unsigned long) i;
531 switch ((*helix_borders)[pos]) {
537 for (i = (
long) pos - 1; i >= 0; i--)
538 switch ((*helix_borders)[i]) {
542 *first = (
unsigned long) i + 1;
543 for (i = (
long) pos + 1; i < (
long) prof_len; i++)
544 switch ((*helix_borders)[i]) {
546 *last = (
unsigned long) i - 1;
552 *last = prof_len - 1;
556 for (i = (
long) pos + 1; i < (
long) prof_len; i++)
557 switch ((*helix_borders)[i]) {
559 *last = (
unsigned long) i - 1;
565 *last = prof_len - 1;
580 seq = (
char *)
CALLOC((
unsigned int) prof_len + 1,
sizeof(
char));
583 for (p = 0; p < prof_len; p++) {
584 min = (*sub_costs)[p][0];
586 for (i = 1; i < 5; i++) {
587 if (min > (*sub_costs)[p][i]) {
588 min = (*sub_costs)[p][i];
592 if (min == (*sub_costs)[p][i])
605 seq[prof_len] =
'\0';
612 unsigned long pos_1_seq, pos_2_seq, last_seq_pos;
616 last_seq_pos = first_seq_pos + seq->
length() - 1;
617 for (pos_1_seq = first_seq_pos; pos_1_seq <= last_seq_pos; pos_1_seq++) {
618 pos_compl = (*helix)[pos_1_seq];
619 if (pos_compl >= 0) {
620 pos_2_seq = (
unsigned long) pos_compl;
621 if (pos_2_seq > pos_1_seq && pos_2_seq <= last_seq_pos)
622 costs +=
w_bind(pos_1_seq, seq->
base(pos_1_seq),
623 pos_2_seq, seq->
base(pos_2_seq));
625 if (pos_2_seq < first_seq_pos || pos_2_seq > last_seq_pos)
626 costs += w_bind_maximum;
float w_bind(unsigned long pos_1, unsigned char base_1, unsigned long pos_2, unsigned char base_2)
bool is_pairpos(size_t pos) const
int ali_is_real_base(char c)
void ali_out_of_memory_if(bool cond)
unsigned char base(unsigned long position)
ALI_PROFILE(ALI_SEQUENCE *sequence, ALI_PROFILE_CONTEXT *context)
int mark_family_extension_flag
char ali_number_to_base(unsigned char n)
int find_family(ALI_SEQUENCE *sequence, int find_type=1)
float w_binding(unsigned long first_position_of_sequence, ALI_SEQUENCE *sequence)
int is_in_helix(unsigned long pos, unsigned long *first, unsigned long *last)
size_t opposite_position(size_t pos) const
unsigned char * sequence()
#define ALI_PROFILE_BORDER_RIGHT
static HelixNrInfo * start
char * cheapest_sequence()
void * CALLOC(long i, long j)
static int weight[maxsites+1]
ALI_NORM_SEQUENCE * sequence()
ALI_SEQUENCE * get_sequence(char *name, int and_mark=0)
void ali_error(const char *message, const char *func)
ALI_TLIST< ali_pt_member * > * get_family_list()
float multi_insert_factor
ALI_TLIST< ali_pt_member * > * get_extension_list()
void ali_warning(const char *message)
void ali_message(const char *message)
void commit_transaction()
int is_outside_helix(unsigned long pos, unsigned long *first, unsigned long *last)
double binding_matrix[5][5]
GB_ERROR init(GBDATA *gb_main)
double substitute_matrix[5][5]
#define ALI_PROFILE_BORDER_LEFT