31 #define e4_assert(bed) arb_assert(bed) 
   62     ARB_strdup(
"HG HI HS EB ES TS H- G- I- T- E- B- S-"), 
 
   72             freenull(pfold_pairs[i]);
 
   73             freenull(pfold_pair_chars[i]);
 
  185     { 1.34,          0.00,           0.00,           0.00 },          
 
  186     { 0.00,          0.00,           0.00,           0.00 },          
 
  187     { 0.50,          0.00,           0.00,           1.39 },          
 
  188     { 0.00,          0.00,           1.03,           0.00 },          
 
  189     { 0.00,          1.13,           0.00,           0.00 },          
 
  190     { 1.42,          0.00,           0.00,           2.03 },          
 
  191     { 1.05,          1.05,           0.00,           0.00 },          
 
  192     { 0.00,          0.00,           1.21,           1.00 },          
 
  193     { 0.50,          0.00,           0.00,           0.00 },          
 
  194     { 1.02,          1.52,           0.00,           0.00 },          
 
  195     { 1.14,          1.24,           0.00,           0.00 },          
 
  196     { 1.09,          0.00,           0.00,           1.01 },          
 
  197     { 1.37,          1.00,           0.00,           0.00 },          
 
  198     { 1.07,          1.31,           0.00,           0.00 },          
 
  199     { 0.00,          0.00,           1.21,           1.36 },          
 
  200     { 0.00,          0.00,           0.00,           1.00 },          
 
  201     { 0.00,          1.13,           0.00,           0.00 },          
 
  202     { 1.02,          1.30,           0.00,           0.00 },          
 
  203     { 0.00,          1.40,           1.00,           0.00 },          
 
  204     { 1.00,          1.62,           0.00,           0.00 } };        
 
  249     { 142,   83,   66,     0.060,  0.076,  0.035,  0.058 },   
 
  250     {  98,   93,   95,     0.070,  0.106,  0.099,  0.085 },   
 
  251     { 101,   54,  146,     0.147,  0.110,  0.179,  0.081 },   
 
  252     {  67,   89,  156,     0.161,  0.083,  0.191,  0.091 },   
 
  253     {  70,  119,  119,     0.149,  0.050,  0.117,  0.128 },   
 
  254     { 151,   37,   74,     0.056,  0.060,  0.077,  0.064 },   
 
  255     { 111,  110,   98,     0.074,  0.098,  0.037,  0.098 },   
 
  256     {  57,   75,  156,     0.102,  0.085,  0.190,  0.152 },   
 
  257     { 100,   87,   95,     0.140,  0.047,  0.093,  0.054 },   
 
  258     { 108,  160,   47,     0.043,  0.034,  0.013,  0.056 },   
 
  259     { 121,  130,   59,     0.061,  0.025,  0.036,  0.070 },   
 
  260     { 116,   74,  101,     0.055,  0.115,  0.072,  0.095 },   
 
  261     { 145,  105,   60,     0.068,  0.082,  0.014,  0.055 },   
 
  262     { 113,  138,   60,     0.059,  0.041,  0.065,  0.065 },   
 
  263     {  57,   55,  152,     0.102,  0.301,  0.034,  0.068 },   
 
  264     {  77,   75,  143,     0.120,  0.139,  0.125,  0.106 },   
 
  265     {  83,  119,   96,     0.086,  0.108,  0.065,  0.079 },   
 
  266     { 108,  137,   96,     0.077,  0.013,  0.064,  0.167 },   
 
  267     {  69,  147,  114,     0.082,  0.065,  0.114,  0.125 },   
 
  268     { 106,  170,   50,     0.062,  0.048,  0.028,  0.053 } }; 
 
  298         char2AA = 
new int [256];
 
  299         for (
int i = 0; i < 256; i++) {
 
  302         for (
int i = 0; amino_acids[i]; i++) {
 
  303             char2AA[(
unsigned char)amino_acids[i]] = i;
 
  326     cout << endl << 
"Searching for nucleation sites:" << endl;
 
  333     double  sumOfFormVal = 0, sumOfBreakVal = 0; 
 
  337     for (
int i = 0; i < ((length + 1) - windowSize); i++) {
 
  341         for (count = 0; count < windowSize; count++) {
 
  343             while (pos < ((length + 1) - windowSize) &&
 
  344                     strchr(gap_chars, sequence[pos + count])) {
 
  347             aa = char2AA[sequence[pos + count]];
 
  356         if ((sumOfFormVal > (windowSize - 2)) && (sumOfBreakVal < 2)) {
 
  357             for (
int j = i; j < (pos + count); j++) {
 
  358                 if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[
s];
 
  361         if (aa == -1) i = pos + count; 
 
  362         sumOfFormVal = 0, sumOfBreakVal = 0;
 
  367     cout << structure << endl;
 
  388     cout << endl << 
"Extending nucleation sites:" << endl;
 
  393     bool break_structure = 
false;       
 
  394     int  start           = 0, end = 0;  
 
  400     for (
int indStruct = 0; indStruct < 
length; indStruct++) {
 
  403         while (indStruct < length &&
 
  404                ((structure[indStruct] == 
' ') || strchr(gap_chars, sequence[indStruct]))
 
  407         if (indStruct >= length) 
break;
 
  409         start = indStruct - 1;
 
  410         while (indStruct < length &&
 
  411                 (structure[indStruct] != 
' ' || strchr(gap_chars, sequence[indStruct]))) {
 
  419         while (start > 1 && strchr(gap_chars, sequence[start])) {
 
  423         if (start >= 0) break_structure = (char2AA[sequence[
start]] == -1);
 
  424         while (!break_structure && (start > 1) && (structure[
start] == 
' ')) {
 
  426             break_structure = (sequence[
start] == 
'P');
 
  428             if (break_structure) 
break;
 
  430             break_structure = strchr(structure_breaker[s], sequence[start]);
 
  431             neighbour = start - 1; 
 
  432             while (neighbour > 0 && strchr(gap_chars, sequence[neighbour])) {
 
  436             if (neighbour <= 0 || char2AA[sequence[neighbour]] == -1) {
 
  441                 strchr(structure_breaker[s], sequence[neighbour]) ||
 
  442                 strchr(structure_indifferent[s], sequence[neighbour]);
 
  443             if (!break_structure) {
 
  444                 structure[
start] = structure_chars[
s];
 
  450         while (end < (length - 2) && strchr(gap_chars, sequence[end])) {
 
  454         if (end <= (length - 1)) break_structure = (char2AA[sequence[end]] == -1);
 
  455         while (!break_structure && (end < (length - 2))) {
 
  457             break_structure = (sequence[end] == 
'P');
 
  458             if (s == 
BETA_SHEET) break_structure |= (sequence[end] == 
'E');
 
  459             if (break_structure) 
break;
 
  461             break_structure = strchr(structure_breaker[s], sequence[end]);
 
  463             while (neighbour < (length - 2) && strchr(gap_chars, sequence[neighbour])) {
 
  467             if (neighbour >= (length - 1) || char2AA[sequence[neighbour]] == -1) {
 
  473                 strchr(structure_breaker[s], sequence[neighbour]) ||
 
  474                 strchr(structure_indifferent[s], sequence[neighbour]);
 
  475             if (!break_structure) {
 
  476                 structure[end] = structure_chars[
s];
 
  485     cout << structure << endl;
 
  506     cout << endl << 
"Searching for beta-turns: " << endl;
 
  511     const int windowSize = 4; 
 
  512     double P_a = 0, P_b = 0, P_turn = 0; 
 
  518     for (
int i = 0; i < ((length + 1) - windowSize); i++) {
 
  520         for (count = 0; count < windowSize; count++) {
 
  522             while (pos < ((length + 1) - windowSize) &&
 
  523                     strchr(gap_chars, sequence[pos + count])) {
 
  526             aa = char2AA[sequence[pos + count]];
 
  539             if ((p_t > 0.000075) && (P_turn > 100) && (P_turn > P_a) && (P_turn > P_b)) {
 
  540                 for (
int j = i; j < (pos + count); j++) {
 
  541                     if (char2AA[sequence[j]] != -1) structure[j] = structure_chars[
BETA_TURN];
 
  545         if (aa == -1) i = pos + count; 
 
  546         p_t = 1, P_a = 0, P_b = 0, P_turn = 0;
 
  551     cout << structure << endl;
 
  575     cout << endl << 
"Resolving overlaps: " << endl;
 
  587     for (
int pos = 0; pos < 
length; pos++) {
 
  606             for (
int i = start; i < end; i++) {
 
  608                 while (i < end && strchr(gap_chars, sequence[i])) {
 
  611                 int aa = char2AA[sequence[i]];
 
  622             for (
int i = start; i < end; i++) {
 
  637             else if (structures[
BETA_SHEET][pos] != 
' ') {
 
  645     cout << structures[summary] << endl;
 
  670     cout << endl << 
"Predicting secondary structure for sequence:" << endl << sequence << endl;
 
  673     e4_assert((
int)strlen((
const char *)sequence) == length);
 
  702     size_t end_minus_start = 
size_t(end-start); 
 
  704     size_t length    = strlen((
const char *)structure_sai);
 
  705     size_t match_end = 
std::min(
std::min(end_minus_start, length), strlen((
const char *)structure_cmp));
 
  707     enum { BEND = 3, NOSTRUCT = 4 };
 
  708     char *struct_chars[] = {
 
  724     for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
 
  731     int    struct_start   = 
start;
 
  732     int    struct_end     = 
start;
 
  734     int    current_struct = 4;
 
  739     if (strlen(pair_chars_2) != 10) {
 
  744         switch (match_method) {
 
  748             for (count = 0; count < match_end; count++) {
 
  751                     int len = strlen(pairs[n_pt])-1;
 
  752                     char *p = pairs[n_pt];
 
  753                     for (
int j = 0; j < len; j += 3) {
 
  754                         if ((p[j] == structure_sai[count + start] && p[j+1] == structure_cmp[count + start]) ||
 
  755                              (p[j] == structure_cmp[count + start] && p[j+1] == structure_sai[count + start])) {
 
  756                             result_buffer[count] = *pair_chars[n_pt];
 
  765             while (count <= end_minus_start) { 
 
  766                 result_buffer[count] = 
' ';
 
  773             for (
size_t i = 0; i <= end_minus_start; i++) result_buffer[i] = 
' '; 
 
  776             while (structure_sai[struct_start] != 
'\0' && structure_cmp[struct_start] != 
'\0' &&
 
  777                     strchr(gap_chars, structure_sai[struct_start]) &&
 
  778                     strchr(gap_chars, structure_cmp[struct_start])) {
 
  781             if (structure_sai[struct_start] == 
'\0' || structure_cmp[struct_start] == 
'\0') 
break;
 
  784             for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
 
  787             if (current_struct != BEND && current_struct != NOSTRUCT) {
 
  789                 while (struct_start >= 0) {
 
  791                     while (struct_start > 0 &&
 
  792                             strchr(gap_chars, structure_sai[struct_start]) &&
 
  793                             strchr(gap_chars, structure_cmp[struct_start])) {
 
  796                     aa = char2AA[structure_cmp[struct_start]];
 
  797                     if (struct_start == 0 && aa == -1) { 
 
  800                     else if (strchr(struct_chars[current_struct], structure_sai[struct_start]) && aa != -1) {
 
  812             struct_start = 
start;
 
  814             while (structure_sai[struct_start] != 
'\0' && structure_cmp[struct_start] != 
'\0' &&
 
  815                     strchr(gap_chars, structure_sai[struct_start]) &&
 
  816                     strchr(gap_chars, structure_cmp[struct_start])) {
 
  819             if (structure_sai[struct_start] == 
'\0' || structure_cmp[struct_start] == 
'\0') 
break;
 
  820             struct_end = struct_start;
 
  821             while (struct_end < end) {
 
  822                 aa = char2AA[structure_cmp[struct_end]];
 
  823                 if (current_struct == NOSTRUCT) { 
 
  827                     result_buffer[struct_end - 
start] = pair_chars_2[0];
 
  830                 else if (current_struct == BEND) { 
 
  831                     result_buffer[struct_end - 
start] = pair_chars_2[9];
 
  835                     while (structure_sai[struct_end] != 
'\0') {
 
  837                         while (strchr(gap_chars, structure_sai[struct_end]) &&
 
  838                                 strchr(gap_chars, structure_cmp[struct_end]) &&
 
  839                                 structure_sai[struct_end] != 
'\0' && structure_cmp[struct_end] != 
'\0') {
 
  842                         aa = char2AA[structure_cmp[struct_end]];
 
  843                         if (structure_sai[struct_end] != 
'\0' && structure_cmp[struct_end] != 
'\0' &&
 
  844                              strchr(struct_chars[current_struct], structure_sai[struct_end]) && aa != -1) {
 
  859 #if 0 // code w/o effect 
  864                         if (prob_normalized >= 0 && prob_normalized <= 9) {
 
  865                             prob_symbol = pair_chars_2[prob_normalized];
 
  872                 if (structure_sai[struct_end] == 
'\0' || structure_cmp[struct_end] == 
'\0') {
 
  878                     struct_start = struct_end;
 
  879                     for (current_struct = 0; current_struct < 4 && !strchr(struct_chars[current_struct], structure_sai[struct_start]); current_struct++) {
 
  889             for (
int i = 0; i < 4 && !
error; i++) {
 
  890                 structures[i] = 
new char [length + 1];
 
  891                 if (!structures[i]) {
 
  892                     error = 
"Out of memory";
 
  895                     for (
size_t j = 0; j < 
length; j++) { 
 
  896                         structures[i][j] = 
' ';
 
  898                     structures[i][
length] = 
'\0';
 
  903                 for (count = 0; count < match_end; count++) {
 
  905                     if (!strchr(gap_chars, structure_sai[count + start]) && strchr(gap_chars, structure_cmp[count + start])) {
 
  907                     } 
else if (strchr(gap_chars, structure_sai[count + start]) ||
 
  916                             int len = strlen(pairs[n_pt])-1;
 
  917                             char *p = pairs[n_pt];
 
  918                             for (
int n_struct = 0; n_struct < 3; n_struct++) {
 
  919                                 for (
int j = 0; j < len; j += 3) {
 
  920                                     if ((p[j] == structures[n_struct][count + start] && p[j+1] == structure_sai[count + start]) ||
 
  921                                          (p[j] == structure_sai[count + start] && p[j+1] == structures[n_struct][count + start])) {
 
  922                                         result_buffer[count] = *pair_chars[n_pt];
 
  933                 while (count <= end_minus_start) { 
 
  934                     result_buffer[count] = 
' ';
 
  939             for (
int i = 0; i < 4; i++) {
 
  941                     delete structures[i];
 
  942                     structures[i] = 
NULp;
 
  955     for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
 
  961         for (
size_t i = 0; i <= end_minus_start; i++) result_buffer[i] = 
' '; 
 
  974     freenull(*protstruct);
 
  982         if (protstruct_len) *protstruct_len = (
long)strlen(*protstruct);
 
  985         if (protstruct_len) protstruct_len = 
NULp;
 
  987             error = 
GBS_global_string(
"SAI \"%s\" does not exist.\nDisabled protein structure display!", SAI_name);
 
 1032         if (strcmp(sai_name, selected_sai) != 0 && strstr(sai_name, sai_filter)) {
 
 1053     for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
 
 1054         const char *name = pfold_match_type_awars[i].
name;
 
 1056         cdef.
add(awar, name);
 
 1063     AW_window_simple *aws = 
new AW_window_simple;
 
 1064     aws->init(awr, 
"PFOLD_PROPS", 
"PROTEIN_MATCH_SETTINGS");
 
 1068     aws->auto_space(5, 2);
 
 1070     aws->create_button(
"CLOSE", 
"CLOSE", 
"C");
 
 1074     aws->create_button(
"HELP", 
"HELP");
 
 1080     aws->label_length(27);
 
 1085     aws->label(
"Show protein structure match?");
 
 1086     aws->callback(*refreshCallback); 
 
 1091     aws->label_length(30);
 
 1092     aws->label(
"Selected Protein Structure SAI");
 
 1096     aws->label(
"-> Filter SAI names for");
 
 1103     aws->label_length(12);
 
 1104     aws->label(
"Match Method");
 
 1106     for (
int i = 0; 
const char *mm_aw = pfold_match_method_awars[i].
name; i++) {
 
 1107         aws->callback(*refreshCallback); 
 
 1108         if (match_method == pfold_match_method_awars[i].value) {
 
 1109             aws->insert_default_option(mm_aw, 
"", match_method);
 
 1112             aws->insert_option(mm_aw, 
"", pfold_match_method_awars[i].value);
 
 1115     aws->update_option_menu();
 
 1120     aws->label_length(40);
 
 1121     aws->label(
"Match Symbols (Range 0-100% in steps of 10%)");
 
 1122     aws->callback(*refreshCallback); 
 
 1125     for (
int i = 0; pfold_match_type_awars[i].
name; i++) {
 
 1126         aws->label_length(12);
 
 1128         aws->label(pfold_match_type_awars[i].name);
 
 1129         aws->callback(*refreshCallback); 
 
 1130         aws->create_input_field(awar, 30);
 
 1133         if (!i) aws->get_at_position(&ex, &ey);
 
 1135         aws->callback(*refreshCallback); 
 
 1136         aws->create_input_field(awar, 3);
 
void insert_option(AW_label choice_label, const char *mnemonic, const char *var_value, const char *name_of_color=NULp)
Compare two protein secondary structures. 
GBDATA * GBT_first_SAI(GBDATA *gb_main)
void add(const char *awar_name, const char *config_name)
AW_window * ED4_pfold_create_props_window(AW_root *awr, const WindowCallback *refreshCallback)
Creates the "Protein Match Settings" window. 
static int * char2AA
Maps character to amino acid one letter code. 
static name_value_pair pfold_match_method_awars[4]
Awars for the match method; binds the PFOLD_MATCH_METHOD to the corresponding name that is used to cr...
static double cf_parameters[20][4]
Former and breaker values for alpha-helices and beta-sheets (= strands). 
#define PFOLD_AWAR_SAI_FILTER
Filter SAIs for given criteria (string); used in option menu for SAI selection. 
char * ARB_strdup(const char *str)
void AWT_insert_config_manager(AW_window *aww, AW_default default_file_, const char *id, const StoreConfigCallback &store_cb, const RestoreConfigCallback &load_or_reset_cb, const char *macro_id, const AWT_predefined_config *predef)
const char * GBS_global_string(const char *templat,...)
#define cf_former(aa, strct)
Returns the former value of an amino acid depending on the given structure type. 
#define PFOLD_AWAR_SYMBOL_TEMPLATE_2
Symbols for the match quality as used for match method SECSTRUCT_SEQUENCE. 
static char * alignment_name
void AW_POPDOWN(AW_window *window)
char * pfold_pair_chars[PFOLD_PAIRS]
Symbols for the match quality (defined by PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECST...
Compare an amino acid sequence with a reference protein secondary structure. 
name_value_pair pfold_match_type_awars[]
Awars for the match type; binds the PFOLD_MATCH_TYPE to the corresponding awar name. 
static double max_breaker_value[3]
Maximum breaker value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (no breaker values...
void update_option_menu()
char * pfold_pairs[PFOLD_PAIRS]
Match pair definition (see PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECSTRUCT and SECSTR...
static void ED4_pfold_select_SAI_and_update_option_menu(AW_window *aww, AW_option_menu_struct *oms, bool set_sai)
Callback function to select the reference protein structure SAI and to update the SAI option menu...
static const char * structure_indifferent[2]
Amino acids that are indifferent for a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_p...
static void ED4_pfold_find_turns(const unsigned char *sequence, char *structure, int length)
Predicts beta-turns from the given amino acid sequence. 
static HelixNrInfo * start
#define PFOLD_AWAR_MATCH_METHOD
Selected method for computing the match quality (see PFOLD_MATCH_METHOD). 
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
static double cf_parameters_norm[20][7]
Normalized former values for alpha-helices, beta-sheets (= strands) and beta-turns as well as beta-tu...
GB_ERROR GB_export_error(const char *error)
WindowCallback makeHelpCallback(const char *helpfile)
int ED4_pfold_round_sym(double d)
Symmetric arithmetic rounding of a double value to an integer value. 
#define cf_breaker(aa, strct)
Returns the breaker value of an amino acid depending on the given structure type. ...
GB_ERROR ED4_pfold_set_SAI(char **protstruct, GBDATA *gb_main, const char *alignment_name, long *protstruct_len)
Sets the reference protein secondary structure SAI. 
#define PFOLD_AWAR_SYMBOL_TEMPLATE
Symbols for the match quality as used for match methods SECSTRUCT_SECSTRUCT and SECSTRUCT_SEQUENCE_PR...
GBDATA * get_gb_main() const 
static double min_former_value[3]
Minimum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_n...
static const char * structure_breaker[2]
Amino acids that break a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_pfold_extend_nu...
static void error(const char *msg)
PFOLD_MATCH_METHOD
Defines the methods for match computation. For details refer to ED4_pfold_calculate_secstruct_match()...
#define PFOLD_AWAR_PAIR_TEMPLATE
Structure pairs that define the match quality (see pfold_pairs) as used for match methods SECSTRUCT_S...
char * read_string() const 
const char * name
Name or description. 
AW_awar * awar(const char *awar)
Defines a name-value pair (e.g. for awars, menu entries, etc.). 
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
void clear_option_menu(AW_option_menu_struct *oms)
Compare a full prediction of the protein secondary structure from its amino acid sequence with a refe...
void insert_default_option(AW_label choice_label, const char *mnemonic, const char *var_value, const char *name_of_color=NULp)
static struct pfold_mem_handler pfold_dealloc
GBDATA * GBT_next_SAI(GBDATA *gb_sai)
static char structure_chars[3]
Characters representing protein secondary structure. 
#define PFOLD_AWAR_ENABLE
Enable structure match. 
PFOLD_STRUCTURE
Protein secondary structure types. 
static double max_former_value[3]
Maximum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_n...
static void ED4_pfold_find_nucleation_sites(const unsigned char *sequence, char *structure, int length, const PFOLD_STRUCTURE s)
Finds nucleation sites that initiate the specified structure. 
static void ED4_pfold_init_statics()
Initializes static variables. 
const char * get_alignment_name() const 
char * GB_read_string(GBDATA *gbd)
static const char * amino_acids
Specifies the characters used for amino acid one letter code. 
void aw_message(const char *msg)
static void setup_pfold_config(AWT_config_definition &cdef)
static void ED4_pfold_resolve_overlaps(const unsigned char *sequence, char *structures[4], int length)
Resolves overlaps of predicted secondary structures and creates structure summary. 
#define ED4_AWAR_GAP_CHARS
static void ED4_pfold_extend_nucleation_sites(const unsigned char *sequence, char *structure, int length, const PFOLD_STRUCTURE s)
Extends the found nucleation sites in both directions. 
static GB_ERROR ED4_pfold_predict_structure(const unsigned char *sequence, char *structures[4], int length)
Predicts protein secondary structures from the amino acid sequence. 
GB_transaction ta(gb_var)
void callback(const WindowCallback &cb)
GB_CSTR GBT_get_name_or_description(GBDATA *gb_item)
GB_ERROR ED4_pfold_calculate_secstruct_match(const unsigned char *structure_sai, const unsigned char *structure_cmp, const int start, const int end, char *result_buffer, PFOLD_MATCH_METHOD match_method)
Compares a protein secondary structure with a primary structure or another secondary structure...
GB_ERROR write_int(long aw_int)
#define PFOLD_AWAR_SELECTED_SAI
Selected reference protein secondary structure SAI (i.e. the SAI that is used for structure compariso...
Adds support for protein structure prediction, comparison of two protein secondary structures and of ...
GB_write_int const char s