31 #define e4_assert(bed) arb_assert(bed)
62 ARB_strdup(
"HG HI HS EB ES TS H- G- I- T- E- B- S-"),
72 freenull(pfold_pairs[i]);
73 freenull(pfold_pair_chars[i]);
185 { 1.34, 0.00, 0.00, 0.00 },
186 { 0.00, 0.00, 0.00, 0.00 },
187 { 0.50, 0.00, 0.00, 1.39 },
188 { 0.00, 0.00, 1.03, 0.00 },
189 { 0.00, 1.13, 0.00, 0.00 },
190 { 1.42, 0.00, 0.00, 2.03 },
191 { 1.05, 1.05, 0.00, 0.00 },
192 { 0.00, 0.00, 1.21, 1.00 },
193 { 0.50, 0.00, 0.00, 0.00 },
194 { 1.02, 1.52, 0.00, 0.00 },
195 { 1.14, 1.24, 0.00, 0.00 },
196 { 1.09, 0.00, 0.00, 1.01 },
197 { 1.37, 1.00, 0.00, 0.00 },
198 { 1.07, 1.31, 0.00, 0.00 },
199 { 0.00, 0.00, 1.21, 1.36 },
200 { 0.00, 0.00, 0.00, 1.00 },
201 { 0.00, 1.13, 0.00, 0.00 },
202 { 1.02, 1.30, 0.00, 0.00 },
203 { 0.00, 1.40, 1.00, 0.00 },
204 { 1.00, 1.62, 0.00, 0.00 } };
249 { 142, 83, 66, 0.060, 0.076, 0.035, 0.058 },
250 { 98, 93, 95, 0.070, 0.106, 0.099, 0.085 },
251 { 101, 54, 146, 0.147, 0.110, 0.179, 0.081 },
252 { 67, 89, 156, 0.161, 0.083, 0.191, 0.091 },
253 { 70, 119, 119, 0.149, 0.050, 0.117, 0.128 },
254 { 151, 37, 74, 0.056, 0.060, 0.077, 0.064 },
255 { 111, 110, 98, 0.074, 0.098, 0.037, 0.098 },
256 { 57, 75, 156, 0.102, 0.085, 0.190, 0.152 },
257 { 100, 87, 95, 0.140, 0.047, 0.093, 0.054 },
258 { 108, 160, 47, 0.043, 0.034, 0.013, 0.056 },
259 { 121, 130, 59, 0.061, 0.025, 0.036, 0.070 },
260 { 116, 74, 101, 0.055, 0.115, 0.072, 0.095 },
261 { 145, 105, 60, 0.068, 0.082, 0.014, 0.055 },
262 { 113, 138, 60, 0.059, 0.041, 0.065, 0.065 },
263 { 57, 55, 152, 0.102, 0.301, 0.034, 0.068 },
264 { 77, 75, 143, 0.120, 0.139, 0.125, 0.106 },
265 { 83, 119, 96, 0.086, 0.108, 0.065, 0.079 },
266 { 108, 137, 96, 0.077, 0.013, 0.064, 0.167 },
267 { 69, 147, 114, 0.082, 0.065, 0.114, 0.125 },
268 { 106, 170, 50, 0.062, 0.048, 0.028, 0.053 } };
298 char2AA =
new int [256];
299 for (
int i = 0; i < 256; i++) {
302 for (
int i = 0; amino_acids[i]; i++) {
303 char2AA[(
unsigned char)amino_acids[i]] = i;
326 cout << endl <<
"Searching for nucleation sites:" << endl;
333 double sumOfFormVal = 0, sumOfBreakVal = 0;
337 for (
int i = 0; i < ((length + 1) - windowSize); i++) {
341 for (count = 0; count < windowSize; count++) {
343 while (pos < ((length + 1) - windowSize) &&
344 strchr(gap_chars, sequence[pos + count])) {
347 aa = char2AA[sequence[pos + count]];
356 if ((sumOfFormVal > (windowSize - 2)) && (sumOfBreakVal < 2)) {
357 for (
int j = i; j < (pos + count); j++) {
358 if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[
s];
361 if (aa == -1) i = pos + count;
362 sumOfFormVal = 0, sumOfBreakVal = 0;
367 cout << structure << endl;
388 cout << endl <<
"Extending nucleation sites:" << endl;
393 bool break_structure =
false;
394 int start = 0, end = 0;
400 for (
int indStruct = 0; indStruct <
length; indStruct++) {
403 while (indStruct < length &&
404 ((structure[indStruct] ==
' ') || strchr(gap_chars, sequence[indStruct]))
407 if (indStruct >= length)
break;
409 start = indStruct - 1;
410 while (indStruct < length &&
411 (structure[indStruct] !=
' ' || strchr(gap_chars, sequence[indStruct]))) {
419 while (start > 1 && strchr(gap_chars, sequence[start])) {
423 if (start >= 0) break_structure = (char2AA[sequence[
start]] == -1);
424 while (!break_structure && (start > 1) && (structure[
start] ==
' ')) {
426 break_structure = (sequence[
start] ==
'P');
428 if (break_structure)
break;
430 break_structure = strchr(structure_breaker[s], sequence[start]);
431 neighbour = start - 1;
432 while (neighbour > 0 && strchr(gap_chars, sequence[neighbour])) {
436 if (neighbour <= 0 || char2AA[sequence[neighbour]] == -1) {
441 strchr(structure_breaker[s], sequence[neighbour]) ||
442 strchr(structure_indifferent[s], sequence[neighbour]);
443 if (!break_structure) {
444 structure[
start] = structure_chars[
s];
450 while (end < (length - 2) && strchr(gap_chars, sequence[end])) {
454 if (end <= (length - 1)) break_structure = (char2AA[sequence[end]] == -1);
455 while (!break_structure && (end < (length - 2))) {
457 break_structure = (sequence[end] ==
'P');
458 if (s ==
BETA_SHEET) break_structure |= (sequence[end] ==
'E');
459 if (break_structure)
break;
461 break_structure = strchr(structure_breaker[s], sequence[end]);
463 while (neighbour < (length - 2) && strchr(gap_chars, sequence[neighbour])) {
467 if (neighbour >= (length - 1) || char2AA[sequence[neighbour]] == -1) {
473 strchr(structure_breaker[s], sequence[neighbour]) ||
474 strchr(structure_indifferent[s], sequence[neighbour]);
475 if (!break_structure) {
476 structure[end] = structure_chars[
s];
485 cout << structure << endl;
506 cout << endl <<
"Searching for beta-turns: " << endl;
511 const int windowSize = 4;
512 double P_a = 0, P_b = 0, P_turn = 0;
518 for (
int i = 0; i < ((length + 1) - windowSize); i++) {
520 for (count = 0; count < windowSize; count++) {
522 while (pos < ((length + 1) - windowSize) &&
523 strchr(gap_chars, sequence[pos + count])) {
526 aa = char2AA[sequence[pos + count]];
539 if ((p_t > 0.000075) && (P_turn > 100) && (P_turn > P_a) && (P_turn > P_b)) {
540 for (
int j = i; j < (pos + count); j++) {
541 if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[
BETA_TURN];
545 if (aa == -1) i = pos + count;
546 p_t = 1, P_a = 0, P_b = 0, P_turn = 0;
551 cout << structure << endl;
575 cout << endl <<
"Resolving overlaps: " << endl;
587 for (
int pos = 0; pos <
length; pos++) {
606 for (
int i = start; i < end; i++) {
608 while (i < end && strchr(gap_chars, sequence[i])) {
611 int aa = char2AA[sequence[i]];
622 for (
int i = start; i < end; i++) {
637 else if (structures[
BETA_SHEET][pos] !=
' ') {
645 cout << structures[summary] << endl;
670 cout << endl <<
"Predicting secondary structure for sequence:" << endl << sequence << endl;
673 e4_assert((
int)strlen((
const char *)sequence) == length);
702 size_t end_minus_start =
size_t(end-start);
704 size_t length = strlen((
const char *)structure_sai);
705 size_t match_end =
std::min(
std::min(end_minus_start, length), strlen((
const char *)structure_cmp));
707 enum { BEND = 3, NOSTRUCT = 4 };
708 char *struct_chars[] = {
724 for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
731 int struct_start =
start;
732 int struct_end =
start;
734 int current_struct = 4;
739 if (strlen(pair_chars_2) != 10) {
744 switch (match_method) {
748 for (count = 0; count < match_end; count++) {
751 int len = strlen(pairs[n_pt])-1;
752 char *p = pairs[n_pt];
753 for (
int j = 0; j < len; j += 3) {
754 if ((p[j] == structure_sai[count + start] && p[j+1] == structure_cmp[count + start]) ||
755 (p[j] == structure_cmp[count + start] && p[j+1] == structure_sai[count + start])) {
756 result_buffer[count] = *pair_chars[n_pt];
765 while (count <= end_minus_start) {
766 result_buffer[count] =
' ';
773 for (
size_t i = 0; i <= end_minus_start; i++) result_buffer[i] =
' ';
776 while (structure_sai[struct_start] !=
'\0' && structure_cmp[struct_start] !=
'\0' &&
777 strchr(gap_chars, structure_sai[struct_start]) &&
778 strchr(gap_chars, structure_cmp[struct_start])) {
781 if (structure_sai[struct_start] ==
'\0' || structure_cmp[struct_start] ==
'\0')
break;
784 for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
787 if (current_struct != BEND && current_struct != NOSTRUCT) {
789 while (struct_start >= 0) {
791 while (struct_start > 0 &&
792 strchr(gap_chars, structure_sai[struct_start]) &&
793 strchr(gap_chars, structure_cmp[struct_start])) {
796 aa = char2AA[structure_cmp[struct_start]];
797 if (struct_start == 0 && aa == -1) {
800 else if (strchr(struct_chars[current_struct], structure_sai[struct_start]) && aa != -1) {
812 struct_start =
start;
814 while (structure_sai[struct_start] !=
'\0' && structure_cmp[struct_start] !=
'\0' &&
815 strchr(gap_chars, structure_sai[struct_start]) &&
816 strchr(gap_chars, structure_cmp[struct_start])) {
819 if (structure_sai[struct_start] ==
'\0' || structure_cmp[struct_start] ==
'\0')
break;
820 struct_end = struct_start;
821 while (struct_end < end) {
822 aa = char2AA[structure_cmp[struct_end]];
823 if (current_struct == NOSTRUCT) {
827 result_buffer[struct_end -
start] = pair_chars_2[0];
830 else if (current_struct == BEND) {
831 result_buffer[struct_end -
start] = pair_chars_2[9];
835 while (structure_sai[struct_end] !=
'\0') {
837 while (strchr(gap_chars, structure_sai[struct_end]) &&
838 strchr(gap_chars, structure_cmp[struct_end]) &&
839 structure_sai[struct_end] !=
'\0' && structure_cmp[struct_end] !=
'\0') {
842 aa = char2AA[structure_cmp[struct_end]];
843 if (structure_sai[struct_end] !=
'\0' && structure_cmp[struct_end] !=
'\0' &&
844 strchr(struct_chars[current_struct], structure_sai[struct_end]) && aa != -1) {
859 #if 0 // code w/o effect
864 if (prob_normalized >= 0 && prob_normalized <= 9) {
865 prob_symbol = pair_chars_2[prob_normalized];
872 if (structure_sai[struct_end] ==
'\0' || structure_cmp[struct_end] ==
'\0') {
878 struct_start = struct_end;
879 for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
889 for (
int i = 0; i < 4 && !
error; i++) {
890 structures[i] =
new char [length + 1];
891 if (!structures[i]) {
892 error =
"Out of memory";
895 for (
size_t j = 0; j <
length; j++) {
896 structures[i][j] =
' ';
898 structures[i][
length] =
'\0';
903 for (count = 0; count < match_end; count++) {
905 if (!strchr(gap_chars, structure_sai[count + start]) && strchr(gap_chars, structure_cmp[count + start])) {
907 }
else if (strchr(gap_chars, structure_sai[count + start]) ||
916 int len = strlen(pairs[n_pt])-1;
917 char *p = pairs[n_pt];
918 for (
int n_struct = 0; n_struct < 3; n_struct++) {
919 for (
int j = 0; j < len; j += 3) {
920 if ((p[j] == structures[n_struct][count + start] && p[j+1] == structure_sai[count + start]) ||
921 (p[j] == structure_sai[count + start] && p[j+1] == structures[n_struct][count + start])) {
922 result_buffer[count] = *pair_chars[n_pt];
933 while (count <= end_minus_start) {
934 result_buffer[count] =
' ';
939 for (
int i = 0; i < 4; i++) {
941 delete structures[i];
942 structures[i] =
NULp;
955 for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
961 for (
size_t i = 0; i <= end_minus_start; i++) result_buffer[i] =
' ';
974 freenull(*protstruct);
982 if (protstruct_len) *protstruct_len = (
long)strlen(*protstruct);
985 if (protstruct_len) protstruct_len =
NULp;
987 error =
GBS_global_string(
"SAI \"%s\" does not exist.\nDisabled protein structure display!", SAI_name);
1032 if (strcmp(sai_name, selected_sai) != 0 && strstr(sai_name, sai_filter)) {
1053 for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
1054 const char *name = pfold_match_type_awars[i].
name;
1056 cdef.
add(awar, name);
1063 AW_window_simple *aws =
new AW_window_simple;
1064 aws->init(awr,
"PFOLD_PROPS",
"PROTEIN_MATCH_SETTINGS");
1068 aws->auto_space(5, 2);
1070 aws->create_button(
"CLOSE",
"CLOSE",
"C");
1074 aws->create_button(
"HELP",
"HELP");
1080 aws->label_length(27);
1085 aws->label(
"Show protein structure match?");
1086 aws->callback(*refreshCallback);
1091 aws->label_length(30);
1092 aws->label(
"Selected Protein Structure SAI");
1096 aws->label(
"-> Filter SAI names for");
1103 aws->label_length(12);
1104 aws->label(
"Match Method");
1106 for (
int i = 0;
const char *mm_aw = pfold_match_method_awars[i].
name; i++) {
1107 aws->callback(*refreshCallback);
1108 if (match_method == pfold_match_method_awars[i].value) {
1109 aws->insert_default_option(mm_aw,
"", match_method);
1112 aws->insert_option(mm_aw,
"", pfold_match_method_awars[i].value);
1115 aws->update_option_menu();
1120 aws->label_length(40);
1121 aws->label(
"Match Symbols (Range 0-100% in steps of 10%)");
1122 aws->callback(*refreshCallback);
1125 for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
1126 aws->label_length(12);
1128 aws->label(pfold_match_type_awars[i].name);
1129 aws->callback(*refreshCallback);
1130 aws->create_input_field(awar, 30);
1133 if (!i) aws->get_at_position(&ex, &ey);
1135 aws->callback(*refreshCallback);
1136 aws->create_input_field(awar, 3);
void insert_option(AW_label choice_label, const char *mnemonic, const char *var_value, const char *name_of_color=NULp)
Compare two protein secondary structures.
GBDATA * GBT_first_SAI(GBDATA *gb_main)
void add(const char *awar_name, const char *config_name)
AW_window * ED4_pfold_create_props_window(AW_root *awr, const WindowCallback *refreshCallback)
Creates the "Protein Match Settings" window.
static int * char2AA
Maps character to amino acid one letter code.
static name_value_pair pfold_match_method_awars[4]
Awars for the match method; binds the PFOLD_MATCH_METHOD to the corresponding name that is used to cr...
static double cf_parameters[20][4]
Former and breaker values for alpha-helices and beta-sheets (= strands).
#define PFOLD_AWAR_SAI_FILTER
Filter SAIs for given criteria (string); used in option menu for SAI selection.
char * ARB_strdup(const char *str)
void AWT_insert_config_manager(AW_window *aww, AW_default default_file_, const char *id, const StoreConfigCallback &store_cb, const RestoreConfigCallback &load_or_reset_cb, const char *macro_id, const AWT_predefined_config *predef)
const char * GBS_global_string(const char *templat,...)
#define cf_former(aa, strct)
Returns the former value of an amino acid depending on the given structure type.
#define PFOLD_AWAR_SYMBOL_TEMPLATE_2
Symbols for the match quality as used for match method SECSTRUCT_SEQUENCE.
static char * alignment_name
void AW_POPDOWN(AW_window *window)
char * pfold_pair_chars[PFOLD_PAIRS]
Symbols for the match quality (defined by PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECST...
Compare an amino acid sequence with a reference protein secondary structure.
name_value_pair pfold_match_type_awars[]
Awars for the match type; binds the PFOLD_MATCH_TYPE to the corresponding awar name.
static double max_breaker_value[3]
Maximum breaker value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (no breaker values...
void update_option_menu()
char * pfold_pairs[PFOLD_PAIRS]
Match pair definition (see PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECSTRUCT and SECSTR...
static void ED4_pfold_select_SAI_and_update_option_menu(AW_window *aww, AW_option_menu_struct *oms, bool set_sai)
Callback function to select the reference protein structure SAI and to update the SAI option menu...
static const char * structure_indifferent[2]
Amino acids that are indifferent for a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_p...
static void ED4_pfold_find_turns(const unsigned char *sequence, char *structure, int length)
Predicts beta-turns from the given amino acid sequence.
static HelixNrInfo * start
#define PFOLD_AWAR_MATCH_METHOD
Selected method for computing the match quality (see PFOLD_MATCH_METHOD).
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
static double cf_parameters_norm[20][7]
Normalized former values for alpha-helices, beta-sheets (= strands) and beta-turns as well as beta-tu...
GB_ERROR GB_export_error(const char *error)
WindowCallback makeHelpCallback(const char *helpfile)
int ED4_pfold_round_sym(double d)
Symmetric arithmetic rounding of a double value to an integer value.
#define cf_breaker(aa, strct)
Returns the breaker value of an amino acid depending on the given structure type. ...
GB_ERROR ED4_pfold_set_SAI(char **protstruct, GBDATA *gb_main, const char *alignment_name, long *protstruct_len)
Sets the reference protein secondary structure SAI.
#define PFOLD_AWAR_SYMBOL_TEMPLATE
Symbols for the match quality as used for match methods SECSTRUCT_SECSTRUCT and SECSTRUCT_SEQUENCE_PR...
GBDATA * get_gb_main() const
static double min_former_value[3]
Minimum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_n...
static const char * structure_breaker[2]
Amino acids that break a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_pfold_extend_nu...
static void error(const char *msg)
PFOLD_MATCH_METHOD
Defines the methods for match computation. For details refer to ED4_pfold_calculate_secstruct_match()...
#define PFOLD_AWAR_PAIR_TEMPLATE
Structure pairs that define the match quality (see pfold_pairs) as used for match methods SECSTRUCT_S...
char * read_string() const
const char * name
Name or description.
AW_awar * awar(const char *awar)
Defines a name-value pair (e.g. for awars, menu entries, etc.).
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
void clear_option_menu(AW_option_menu_struct *oms)
Compare a full prediction of the protein secondary structure from its amino acid sequence with a refe...
void insert_default_option(AW_label choice_label, const char *mnemonic, const char *var_value, const char *name_of_color=NULp)
static struct pfold_mem_handler pfold_dealloc
GBDATA * GBT_next_SAI(GBDATA *gb_sai)
static char structure_chars[3]
Characters representing protein secondary structure.
#define PFOLD_AWAR_ENABLE
Enable structure match.
PFOLD_STRUCTURE
Protein secondary structure types.
static double max_former_value[3]
Maximum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_n...
static void ED4_pfold_find_nucleation_sites(const unsigned char *sequence, char *structure, int length, const PFOLD_STRUCTURE s)
Finds nucleation sites that initiate the specified structure.
static void ED4_pfold_init_statics()
Initializes static variables.
const char * get_alignment_name() const
char * GB_read_string(GBDATA *gbd)
static const char * amino_acids
Specifies the characters used for amino acid one letter code.
void aw_message(const char *msg)
static void setup_pfold_config(AWT_config_definition &cdef)
static void ED4_pfold_resolve_overlaps(const unsigned char *sequence, char *structures[4], int length)
Resolves overlaps of predicted secondary structures and creates structure summary.
#define ED4_AWAR_GAP_CHARS
static void ED4_pfold_extend_nucleation_sites(const unsigned char *sequence, char *structure, int length, const PFOLD_STRUCTURE s)
Extends the found nucleation sites in both directions.
static GB_ERROR ED4_pfold_predict_structure(const unsigned char *sequence, char *structures[4], int length)
Predicts protein secondary structures from the amino acid sequence.
GB_transaction ta(gb_var)
void callback(const WindowCallback &cb)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
GB_ERROR ED4_pfold_calculate_secstruct_match(const unsigned char *structure_sai, const unsigned char *structure_cmp, const int start, const int end, char *result_buffer, PFOLD_MATCH_METHOD match_method)
Compares a protein secondary structure with a primary structure or another secondary structure...
GB_ERROR write_int(long aw_int)
#define PFOLD_AWAR_SELECTED_SAI
Selected reference protein secondary structure SAI (i.e. the SAI that is used for structure compariso...
Adds support for protein structure prediction, comparison of two protein secondary structures and of ...
GB_write_int const char s