ARB
|
Implements the functions defined in ed4_protein_2nd_structure.hxx. More...
#include "ed4_protein_2nd_structure.hxx"
#include "ed4_class.hxx"
#include "ed4_awars.hxx"
#include <aw_awar.hxx>
#include <aw_msg.hxx>
#include <aw_root.hxx>
#include "arbdbt.h"
#include <iostream>
#include <awt_config_manager.hxx>
Go to the source code of this file.
Classes | |
struct | pfold_mem_handler |
Macros | |
#define | e4_assert(bed) arb_assert(bed) |
Functions | |
int | ED4_pfold_round_sym (double d) |
Symmetric arithmetic rounding of a double value to an integer value. More... | |
static void | ED4_pfold_init_statics () |
Initializes static variables. More... | |
static void | ED4_pfold_find_nucleation_sites (const unsigned char *sequence, char *structure, int length, const PFOLD_STRUCTURE s) |
Finds nucleation sites that initiate the specified structure. More... | |
static void | ED4_pfold_extend_nucleation_sites (const unsigned char *sequence, char *structure, int length, const PFOLD_STRUCTURE s) |
Extends the found nucleation sites in both directions. More... | |
static void | ED4_pfold_find_turns (const unsigned char *sequence, char *structure, int length) |
Predicts beta-turns from the given amino acid sequence. More... | |
static void | ED4_pfold_resolve_overlaps (const unsigned char *sequence, char *structures[4], int length) |
Resolves overlaps of predicted secondary structures and creates structure summary. More... | |
static GB_ERROR | ED4_pfold_predict_structure (const unsigned char *sequence, char *structures[4], int length) |
Predicts protein secondary structures from the amino acid sequence. More... | |
GB_ERROR | ED4_pfold_calculate_secstruct_match (const unsigned char *structure_sai, const unsigned char *structure_cmp, const int start, const int end, char *result_buffer, PFOLD_MATCH_METHOD match_method) |
Compares a protein secondary structure with a primary structure or another secondary structure. More... | |
GB_ERROR | ED4_pfold_set_SAI (char **protstruct, GBDATA *gb_main, const char *alignment_name, long *protstruct_len) |
Sets the reference protein secondary structure SAI. More... | |
static void | ED4_pfold_select_SAI_and_update_option_menu (AW_window *aww, AW_option_menu_struct *oms, bool set_sai) |
Callback function to select the reference protein structure SAI and to update the SAI option menu. More... | |
static void | setup_pfold_config (AWT_config_definition &cdef) |
AW_window * | ED4_pfold_create_props_window (AW_root *awr, const WindowCallback *refreshCallback) |
Creates the "Protein Match Settings" window. More... | |
Variables | |
name_value_pair | pfold_match_type_awars [] |
Awars for the match type; binds the PFOLD_MATCH_TYPE to the corresponding awar name. More... | |
char * | pfold_pair_chars [PFOLD_PAIRS] |
Symbols for the match quality (defined by PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECSTRUCT and SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match(). More... | |
char * | pfold_pairs [PFOLD_PAIRS] |
Match pair definition (see PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECSTRUCT and SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match(). More... | |
static struct pfold_mem_handler | pfold_dealloc |
static const char * | amino_acids = "ARDNCEQGHILKMFPSTWYV" |
Specifies the characters used for amino acid one letter code. More... | |
static int * | char2AA = NULp |
Maps character to amino acid one letter code. More... | |
static char | structure_chars [3] = { 'H', 'E', 'T' } |
Characters representing protein secondary structure. More... | |
static const char * | structure_breaker [2] |
Amino acids that break a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites(). More... | |
static const char * | structure_indifferent [2] |
Amino acids that are indifferent for a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites(). More... | |
static name_value_pair | pfold_match_method_awars [4] |
Awars for the match method; binds the PFOLD_MATCH_METHOD to the corresponding name that is used to create the menu in ED4_pfold_create_props_window(). More... | |
static double | max_former_value [3] = { 1.42, 1.62, 156 } |
Maximum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_norm). More... | |
static double | min_former_value [3] = { 0.0, 0.0, 47 } |
Minimum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_norm). More... | |
static double | max_breaker_value [3] = { 1.21, 2.03, 0.0 } |
Maximum breaker value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (no breaker values => 0). More... | |
static double | cf_parameters [20][4] |
Former and breaker values for alpha-helices and beta-sheets (= strands). More... | |
static double | cf_parameters_norm [20][7] |
Normalized former values for alpha-helices, beta-sheets (= strands) and beta-turns as well as beta-turn probabilities. More... | |
Implements the functions defined in ed4_protein_2nd_structure.hxx.
Definition in file ED4_protein_2nd_structure.cxx.
#define e4_assert | ( | bed | ) | arb_assert(bed) |
Definition at line 31 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_calculate_secstruct_match(), ED4_pfold_extend_nucleation_sites(), ED4_pfold_find_nucleation_sites(), ED4_pfold_find_turns(), ED4_pfold_predict_structure(), ED4_pfold_resolve_overlaps(), and ED4_pfold_select_SAI_and_update_option_menu().
|
inline |
Symmetric arithmetic rounding of a double value to an integer value.
[in] | d | Value to be rounded |
Rounds a double value to an integer value using symmetric arithmetic rounding, i.e. a number is rounded to if and to otherwise.
Definition at line 281 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_calculate_secstruct_match().
|
static |
Initializes static variables.
So far, this function only concerns char2AA which gets initialized here. See char2AA for details on the values. It is called by ED4_pfold_predict_structure() and ED4_pfold_calculate_secstruct_match().
Definition at line 295 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_calculate_secstruct_match(), and ED4_pfold_predict_structure().
|
static |
Finds nucleation sites that initiate the specified structure.
[in] | sequence | Amino acid sequence |
[out] | structure | Predicted secondary structure |
[in] | length | Size of sequence and structure |
[in] | s | Secondary structure type (either ALPHA_HELIX or BETA_SHEET) |
This function finds nucleation sites that initiate the specified structure (alpha-helix or beta-sheet). A window of a fixed size is moved over the sequence and former and breaker values (as defined by cf_parameters) for the amino acids in the window are summed up. If the former values in this region reach a certain value and the breaker values do not exceed a certain limit a nucleation site is formed, i.e. the region is assumed to be the corresponding secondary structure. The result is stored in structure.
Definition at line 324 of file ED4_protein_2nd_structure.cxx.
References ALPHA_HELIX, ED4_root::aw_root, AW_root::awar(), BETA_SHEET, cf_parameters, e4_assert, ED4_AWAR_GAP_CHARS, ED4_ROOT, AW_awar::read_string(), and s.
Referenced by ED4_pfold_predict_structure().
|
static |
Extends the found nucleation sites in both directions.
[in] | sequence | Amino acid sequence |
[out] | structure | Predicted secondary structure |
[in] | length | Size of sequence and structure |
[in] | s | Secondary structure type (either ALPHA_HELIX or BETA_SHEET) |
The function extends the nucleation sites found by ED4_pfold_find_nucleation_sites() in both directions. Extension continues until a certain amino acid constellation is found. The amino acid 'P' breaks an alpha-helix and 'P' as well as 'E' break a beta-sheet. Also, two successive breakers or one breaker followed by an indifferent amino acid (as defined by structure_breaker and structure_indifferent) break the structure. The result is stored in structure.
Definition at line 386 of file ED4_protein_2nd_structure.cxx.
References ALPHA_HELIX, ED4_root::aw_root, AW_root::awar(), BETA_SHEET, e4_assert, ED4_AWAR_GAP_CHARS, ED4_ROOT, length, AW_awar::read_string(), s, and start.
Referenced by ED4_pfold_predict_structure().
|
static |
Predicts beta-turns from the given amino acid sequence.
[in] | sequence | Amino acid sequence |
[out] | structure | Predicted secondary structure |
[in] | length | Size of sequence and structure |
A window of a fixed size is moved over the sequence and former values for alpha-helices, beta-sheets and beta-turns are summed up. In addition, beta-turn probabilities are multiplied. The values are specified in cf_parameters_norm. If the former values for beta-turn are greater than the ones for alpha-helix and beta-sheet and the turn probabilities exceed a certain limit the region is assumed to be a beta-turn. The result is stored in structure.
Definition at line 504 of file ED4_protein_2nd_structure.cxx.
References ED4_root::aw_root, AW_root::awar(), BETA_TURN, cf_parameters_norm, e4_assert, ED4_AWAR_GAP_CHARS, ED4_ROOT, and AW_awar::read_string().
Referenced by ED4_pfold_predict_structure().
|
static |
Resolves overlaps of predicted secondary structures and creates structure summary.
[in] | sequence | Amino acid sequence |
[in,out] | structures | Predicted secondary structures (ALPHA_HELIX, BETA_SHEET, BETA_TURN and STRUCTURE_SUMMARY, in this order) |
[in] | length | Size of sequence and structures[i] |
The function takes the given predicted structures (alpha-helix, beta-sheet and beta-turn) and searches for overlapping regions. If a beta-turn is found the structure summary is assumed to be a beta-turn. For overlapping alpha-helices and beta-sheets the former values are summed up for this region and the structure summary is assumed to be the structure type with the higher former value. The result is stored in structures 3.
Definition at line 573 of file ED4_protein_2nd_structure.cxx.
References ALPHA_HELIX, ED4_root::aw_root, AW_root::awar(), BETA_SHEET, BETA_TURN, cf_parameters, e4_assert, ED4_AWAR_GAP_CHARS, ED4_ROOT, length, AW_awar::read_string(), s, start, and STRUCTURE_SUMMARY.
Referenced by ED4_pfold_predict_structure().
|
static |
Predicts protein secondary structures from the amino acid sequence.
[in] | sequence | Amino acid sequence |
[out] | structures | Predicted secondary structures (ALPHA_HELIX, BETA_SHEET, BETA_TURN and STRUCTURE_SUMMARY, in this order) |
[in] | length | Size of sequence and structures[i] |
This function predicts the protein secondary structures from the amino acid sequence according to the Chou-Fasman algorithm. In a first step, nucleation sites for alpha-helices and beta-sheets are found using ED4_pfold_find_nucleation_sites(). In a next step, the found structures are extended obeying certain rules with ED4_pfold_extend_nucleation_sites(). Beta-turns are found with the function ED4_pfold_find_turns(). In a final step, overlapping regions are identified and resolved to create a structure summary with ED4_pfold_resolve_overlaps(). The results are written to structures[i] and can be accessed via the enums ALPHA_HELIX, BETA_SHEET, BETA_TURN and STRUCTURE_SUMMARY.
Definition at line 668 of file ED4_protein_2nd_structure.cxx.
References ALPHA_HELIX, BETA_SHEET, BETA_TURN, e4_assert, ED4_pfold_extend_nucleation_sites(), ED4_pfold_find_nucleation_sites(), ED4_pfold_find_turns(), ED4_pfold_init_statics(), ED4_pfold_resolve_overlaps(), error(), and NULp.
Referenced by ED4_pfold_calculate_secstruct_match().
GB_ERROR ED4_pfold_calculate_secstruct_match | ( | const unsigned char * | structure_sai, |
const unsigned char * | structure_cmp, | ||
int | start, | ||
int | end, | ||
char * | result_buffer, | ||
PFOLD_MATCH_METHOD | match_method = SECSTRUCT_SEQUENCE |
||
) |
Compares a protein secondary structure with a primary structure or another secondary structure.
[in] | structure_sai | Reference protein structure SAI (secondary structure) |
[in] | structure_cmp | Protein structure to compare (primary or secondary structure) |
[in] | start | The start of the match computation (visible area in editor) |
[in] | end | The end of the match computation (visible area in editor) |
[out] | result_buffer | Result buffer for match symbols |
[in] | match_method | Method for structure match computation |
This function compares a protein secondary structure with a primary structure (= amino acid sequence) or another secondary structure depending on match_method.
The match criteria (for SECSTRUCT_SECSTRUCT and SECSTRUCT_SEQUENCE_PREDICT) as well as the match symbols (for all methods) can be adjusted by the user in the "Protein Match Settings" dialog. The result of the match computation (i.e. the match symbols) is written to the result buffer.
Definition at line 690 of file ED4_protein_2nd_structure.cxx.
References ALPHA_HELIX, ARB_strdup(), ED4_root::aw_root, AW_root::awar(), BETA_SHEET, BETA_TURN, cf_breaker, cf_former, e4_assert, ED4_AWAR_GAP_CHARS, ED4_pfold_init_statics(), ED4_pfold_predict_structure(), ED4_pfold_round_sym(), ED4_ROOT, error(), GB_export_error(), length, max_breaker_value, max_former_value, min, min_former_value, name_value_pair::name, NULp, PFOLD_AWAR_PAIR_TEMPLATE, PFOLD_AWAR_SYMBOL_TEMPLATE, PFOLD_AWAR_SYMBOL_TEMPLATE_2, PFOLD_MATCH_METHOD_COUNT, PFOLD_MATCH_TYPE_COUNT, AW_awar::read_string(), SECSTRUCT_SECSTRUCT, SECSTRUCT_SEQUENCE, SECSTRUCT_SEQUENCE_PREDICT, start, STRUCT_NO_MATCH, STRUCT_PERFECT_MATCH, and STRUCT_UNKNOWN.
Referenced by ED4_show_protein_match_on_device().
GB_ERROR ED4_pfold_set_SAI | ( | char ** | protstruct, |
GBDATA * | gb_main, | ||
const char * | alignment_name, | ||
long * | protstruct_len = NULp |
||
) |
Sets the reference protein secondary structure SAI.
[out] | protstruct | Pointer to reference protein secondary structure SAI |
[in] | gb_main | Main database |
[in] | alignment_name | Name of the alignment to search for |
[out] | protstruct_len | Length of reference protein secondary structure SAI |
The function searches the database gb_main for the currently selected SAI as defined by PFOLD_AWAR_SELECTED_SAI and assigns the data of the alignment alignment_name to protstruct. If protstruct_len is specified the length of the new reference SAI is stored. The function is used in the editor to initialize the reference protein secondary structure SAI and to update it if the selected SAI is changed in the "Protein Match Settings" dialog. For this purpose it should be called with &ED4_ROOT->protstruct and &ED4_ROOT->protstruct_len.
Definition at line 967 of file ED4_protein_2nd_structure.cxx.
References ED4_root::aw_root, AW_root::awar(), ED4_ROOT, error(), GB_read_string(), GBS_global_string(), GBT_find_SAI(), GBT_find_sequence(), long, NULp, PFOLD_AWAR_ENABLE, PFOLD_AWAR_SELECTED_SAI, AW_awar::read_int(), AW_awar::read_string(), ta, and AW_awar::write_int().
Referenced by ARB_main(), ED4_alignment_length_changed(), ED4_pfold_select_SAI_and_update_option_menu(), and ED4_manager::update_bases_and_rebuild_consensi().
|
static |
Callback function to select the reference protein structure SAI and to update the SAI option menu.
[in] | aww | The calling window |
[in,out] | oms | The SAI option menu |
[in] | set_sai | Specifies if SAI should be updated |
The function is called whenever the selected SAI or the SAI filter is changed in the "Protein Match Settings" dialog (see ED4_pfold_create_props_window()). It can be called with set_sai defined to update the reference protein secondary structure SAI in the editor via ED4_pfold_set_SAI() and to update the selection in the SAI option menu. If set_sai is 0 only the option menu is updated. This is necessary if only the SAI filter changed but not the selected SAI.
Definition at line 1011 of file ED4_protein_2nd_structure.cxx.
References aw_message(), ED4_root::aw_root, AW_root::awar(), AW_window::callback(), AW_window::clear_option_menu(), e4_assert, ED4_pfold_set_SAI(), ED4_ROOT, gb_main, GBT_first_SAI(), GBT_get_name_or_description(), GBT_next_SAI(), ED4_root::get_alignment_name(), ED4_root::get_gb_main(), AW_window::insert_default_option(), AW_window::insert_option(), PFOLD_AWAR_SAI_FILTER, PFOLD_AWAR_SELECTED_SAI, ED4_root::protstruct, ED4_root::protstruct_len, AW_awar::read_string(), ta, and AW_window::update_option_menu().
Referenced by ED4_pfold_create_props_window().
|
static |
Definition at line 1045 of file ED4_protein_2nd_structure.cxx.
References AWT_config_definition::add(), GBS_global_string(), name_value_pair::name, PFOLD_AWAR_ENABLE, PFOLD_AWAR_MATCH_METHOD, PFOLD_AWAR_PAIR_TEMPLATE, PFOLD_AWAR_SAI_FILTER, PFOLD_AWAR_SELECTED_SAI, PFOLD_AWAR_SYMBOL_TEMPLATE, and PFOLD_AWAR_SYMBOL_TEMPLATE_2.
Referenced by ED4_pfold_create_props_window().
Creates the "Protein Match Settings" window.
[in] | awr | Root window |
[in] | cb | Callback struct |
The "Protein Match Settings" window allows the user to configure the properties for protein match computation. These settings include turning the match computation on and off (bound to awar PFOLD_AWAR_ENABLE), selecting the reference protein secondary structure SAI (bound to awar PFOLD_AWAR_SELECTED_SAI), choosing the match method (bound to awar PFOLD_AWAR_MATCH_METHOD, see PFOLD_MATCH_METHOD) and the definition of the match pairs (bound to awar PFOLD_AWAR_PAIR_TEMPLATE and pfold_match_type_awars, see PFOLD_MATCH_TYPE and pfold_pairs) as well as the match symbols (bound to awar PFOLD_AWAR_SYMBOL_TEMPLATE and pfold_match_type_awars or PFOLD_AWAR_SYMBOL_TEMPLATE_2, see PFOLD_MATCH_TYPE and pfold_pair_chars or PFOLD_PAIR_CHARS_2). Via a filter (bound to PFOLD_AWAR_SAI_FILTER) the SAIs shown in the option menu can be narrowed down to a selection of SAIs whose names contain the specified string. The callback function ED4_pfold_select_SAI_and_update_option_menu() is bound to the SAI option menu and the SAI filter to update the selected SAI in the editor or the selection in the SAI option menu.
Definition at line 1062 of file ED4_protein_2nd_structure.cxx.
References AW_window::at(), AW_POPDOWN(), ED4_root::aw_root, AW_ROOT_DEFAULT, AW_root::awar(), AWT_insert_config_manager(), ED4_pfold_select_SAI_and_update_option_menu(), ED4_ROOT, makeHelpCallback(), name_value_pair::name, PFOLD_AWAR_ENABLE, PFOLD_AWAR_MATCH_METHOD, PFOLD_AWAR_PAIR_TEMPLATE, PFOLD_AWAR_SAI_FILTER, PFOLD_AWAR_SELECTED_SAI, PFOLD_AWAR_SYMBOL_TEMPLATE, PFOLD_AWAR_SYMBOL_TEMPLATE_2, AW_awar::read_int(), and setup_pfold_config().
Referenced by ED4_popup_helix_or_pfold_window().
name_value_pair pfold_match_type_awars[] |
Awars for the match type; binds the PFOLD_MATCH_TYPE to the corresponding awar name.
Definition at line 39 of file ED4_protein_2nd_structure.cxx.
Referenced by ed4_create_all_awars().
char* pfold_pair_chars[PFOLD_PAIRS] |
Symbols for the match quality (defined by PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECSTRUCT and SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
Definition at line 50 of file ED4_protein_2nd_structure.cxx.
Referenced by ed4_create_all_awars().
char* pfold_pairs[PFOLD_PAIRS] |
Match pair definition (see PFOLD_MATCH_TYPE) as used for match methods SECSTRUCT_SECSTRUCT and SECSTRUCT_SEQUENCE_PREDICT in ED4_pfold_calculate_secstruct_match().
Definition at line 60 of file ED4_protein_2nd_structure.cxx.
Referenced by ed4_create_all_awars().
|
static |
|
static |
Specifies the characters used for amino acid one letter code.
These are the characters that represent amino acids in one letter code. The order is important as the array initializes char2AA which is used to access array elements in the tables cf_parameters and cf_parameters_norm.
Definition at line 86 of file ED4_protein_2nd_structure.cxx.
Maps character to amino acid one letter code.
This array maps a character to an integer value. It is initialized with the function ED4_pfold_init_statics() which creates an array of the size 256 (for ISO/IEC 8859-1 character encoding). Characters that represent an amino acid get values from 0 to 19 (according to their position in amino_acids) and all others get the value -1. That way, it can be used to get parameters from the tables cf_parameters and cf_parameters_norm or to check if a certain character represents an amino acid.
Definition at line 98 of file ED4_protein_2nd_structure.cxx.
Characters representing protein secondary structure.
Defines the characters representing secondary structure as output by the function ED4_pfold_predict_structure(). According to common standards, these are:
H = alpha-helix,
E = beta-sheet,
T = beta-turn.
Definition at line 109 of file ED4_protein_2nd_structure.cxx.
|
static |
Amino acids that break a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
Definition at line 112 of file ED4_protein_2nd_structure.cxx.
|
static |
Amino acids that are indifferent for a certain structure (ALPHA_HELIX or BETA_SHEET) as used in ED4_pfold_extend_nucleation_sites().
Definition at line 118 of file ED4_protein_2nd_structure.cxx.
|
static |
Awars for the match method; binds the PFOLD_MATCH_METHOD to the corresponding name that is used to create the menu in ED4_pfold_create_props_window().
Definition at line 124 of file ED4_protein_2nd_structure.cxx.
|
static |
Maximum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_norm).
Definition at line 131 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_calculate_secstruct_match().
|
static |
Minimum former value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (in cf_parameters_norm).
Definition at line 132 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_calculate_secstruct_match().
|
static |
Maximum breaker value for alpha-helix, beta-sheet (in cf_parameters) and beta-turn (no breaker values => 0).
Definition at line 133 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_calculate_secstruct_match().
|
static |
Former and breaker values for alpha-helices and beta-sheets (= strands).
{ // Helix Former Strand Former Helix Breaker Strand Breaker Amino // Value Value Value Value Acid // ----------------------------------------------------------------------- { 1.34, 0.00, 0.00, 0.00 }, // A { 0.00, 0.00, 0.00, 0.00 }, // R { 0.50, 0.00, 0.00, 1.39 }, // D { 0.00, 0.00, 1.03, 0.00 }, // N { 0.00, 1.13, 0.00, 0.00 }, // C { 1.42, 0.00, 0.00, 2.03 }, // E { 1.05, 1.05, 0.00, 0.00 }, // Q { 0.00, 0.00, 1.21, 1.00 }, // G { 0.50, 0.00, 0.00, 0.00 }, // H { 1.02, 1.52, 0.00, 0.00 }, // I { 1.14, 1.24, 0.00, 0.00 }, // L { 1.09, 0.00, 0.00, 1.01 }, // K { 1.37, 1.00, 0.00, 0.00 }, // M { 1.07, 1.31, 0.00, 0.00 }, // F { 0.00, 0.00, 1.21, 1.36 }, // P { 0.00, 0.00, 0.00, 1.00 }, // S { 0.00, 1.13, 0.00, 0.00 }, // T { 1.02, 1.30, 0.00, 0.00 }, // W { 0.00, 1.40, 1.00, 0.00 }, // Y { 1.00, 1.62, 0.00, 0.00 }}; // V
The former and breaker values are used to find alpha-helix and beta-sheet nucleation sites in ED4_pfold_find_nucleation_sites() and to resolve overlaps in ED4_pfold_resolve_overlaps(). Addressing the array with the enums ALPHA_HELIX or BETA_SHEET as second index gives the former values and addressing it with ALPHA_HELIX+2 or BETA_SHEET+2 gives the breaker values. The first index is for the amino acid. Use char2AA to convert an amino acid character to the corresponding index.
Definition at line 181 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_find_nucleation_sites(), and ED4_pfold_resolve_overlaps().
|
static |
Normalized former values for alpha-helices, beta-sheets (= strands) and beta-turns as well as beta-turn probabilities.
{ // P(a) P(b) P(turn) f(i) f(i+1) f(i+2) f(i+3) Amino Acid // -------------------------------------------------------------------- { 142, 83, 66, 0.060, 0.076, 0.035, 0.058 }, // A { 98, 93, 95, 0.070, 0.106, 0.099, 0.085 }, // R { 101, 54, 146, 0.147, 0.110, 0.179, 0.081 }, // D { 67, 89, 156, 0.161, 0.083, 0.191, 0.091 }, // N { 70, 119, 119, 0.149, 0.050, 0.117, 0.128 }, // C { 151, 37, 74, 0.056, 0.060, 0.077, 0.064 }, // E { 111, 110, 98, 0.074, 0.098, 0.037, 0.098 }, // Q { 57, 75, 156, 0.102, 0.085, 0.190, 0.152 }, // G { 100, 87, 95, 0.140, 0.047, 0.093, 0.054 }, // H { 108, 160, 47, 0.043, 0.034, 0.013, 0.056 }, // I { 121, 130, 59, 0.061, 0.025, 0.036, 0.070 }, // L { 116, 74, 101, 0.055, 0.115, 0.072, 0.095 }, // K { 145, 105, 60, 0.068, 0.082, 0.014, 0.055 }, // M { 113, 138, 60, 0.059, 0.041, 0.065, 0.065 }, // F { 57, 55, 152, 0.102, 0.301, 0.034, 0.068 }, // P { 77, 75, 143, 0.120, 0.139, 0.125, 0.106 }, // S { 83, 119, 96, 0.086, 0.108, 0.065, 0.079 }, // T { 108, 137, 96, 0.077, 0.013, 0.064, 0.167 }, // W { 69, 147, 114, 0.082, 0.065, 0.114, 0.125 }, // Y { 106, 170, 50, 0.062, 0.048, 0.028, 0.053 }}; // V
The normalized former values are used to find beta-turns in an amino acid sequence in ED4_pfold_find_turns(). Addressing the array with the enums ALPHA_HELIX, BETA_SHEET or BETA_TURN as second index gives the former values and addressing it with BETA_TURN+i gives the turn probabilities. The first index is for the amino acid. Use char2AA to convert an amino acid character to the corresponding index.
Definition at line 246 of file ED4_protein_2nd_structure.cxx.
Referenced by ED4_pfold_find_turns().