35 return first.
MI > second.
MI;
46 size_t size = result.size();
47 for (
size_t i = 0; i < size; ++i) {
48 if (result[i] == cluster2) {
65 Cma::Cma(vector<string> & alph,
int seq_num) {
68 num_of_seqs = seq_num;
94 JointEntropy = computeJointEntropy(seq);
96 JointEntropy = JointEntropy + JointEntropy.adjoint() - JointEntropy.diagonal().asDiagonal();
98 entropy = JointEntropy.diagonal();
100 cout <<
"computing Mutual Information... ";
102 clock_t
start = clock();
104 VectorXd Ones = VectorXd::Ones(entropy.size());
105 MatrixXd Hx = (entropy * Ones.transpose()).transpose();
106 MatrixXd Hy = Hx.transpose();
108 MutualInformation = Hx + Hy - JointEntropy;
110 cout <<
"completed in " << ((double) clock() -
start) / CLOCKS_PER_SEC
113 return MutualInformation;
139 MatrixXd Cma::computeMutualInformationP(
VecVecType & seq) {
141 if (MutualInformation.size() == 0) {
142 computeMutualInformation(seq);
145 cout <<
"computing noiseless Mutual Information (MIp)... ";
147 clock_t start = clock();
152 for (
int i = 0; i < MutualInformation.rows(); ++i) {
153 for (
int j = 0; j < MutualInformation.cols(); ++j) {
154 if (MutualInformation(i, j) < 0.) {
155 MutualInformation(i, j) = 0.;
160 VectorXd mMIs = computeMeanMutualInformationVector();
161 double mMI = computeOveralMeanMutualInformation();
163 MutualInformationP = MutualInformation - ((mMIs * mMIs.transpose()) / mMI);
165 cout <<
"completed in " << ((double) clock() -
start) / CLOCKS_PER_SEC
168 return MutualInformationP;
179 list<MITuple> Cma::compute_mituples(MatrixXd mutualInformation) {
180 size_t size = mutualInformation.cols();
182 list<MITuple> mituples;
186 for (
size_t i = 0; i < size; ++i) {
187 for (
size_t j = i + 1; j < size; ++j) {
189 curr.
MI = mutualInformation(i, j);
192 mituples.push_back(curr);
216 VectorXi Cma::computeClusters(list<MITuple> mituples,
size_t size,
219 cout <<
"computing clusters of correlated positions... ";
222 VectorXi
result = VectorXi::Zero(size);
226 int rest =
int(size);
227 for (list<MITuple>::iterator it = mituples.begin(); it != mituples.end(); ++it) {
228 double result1 =
abs(result[it->pos1]);
229 double result2 =
abs(result[it->pos2]);
230 if (it->MI <= threshold || rest == 0) {
234 result[it->pos1] = cluster;
235 result[it->pos2] = cluster++;
239 result[it->pos2] = result[it->pos1];
242 else if (result1 < epsilon and result2 >
epsilon) {
243 result[it->pos1] = result[it->pos2];
246 else if (result1 > epsilon and result2 > epsilon && result1 - result2 > epsilon) {
247 unifyCluster(result[it->pos1], result[it->pos2], result);
253 cout <<
"done." << endl;
266 MatrixXd Cma::getJointEntropy() {
272 MatrixXd Cma::getMI() {
274 return MutualInformation;
278 MatrixXd Cma::getMIp() {
280 return MutualInformationP;
284 VectorXi Cma::getClusters() {
295 void Cma::initAlphabet(vector<string> alph) {
297 alphabet = vector<string> (alph);
300 for (vector<string>::iterator it = alph.begin(); it != alph.end(); ++it) {
301 alphabet_map[*it] = i;
304 for (vector<string>::iterator it = alph.begin(); it != alph.end(); ++it) {
305 for (vector<string>::iterator it2 = alph.begin(); it2 != alph.end(); ++it2) {
306 alphabet_map[*it + *it2] = i;
310 alphabet_map[
"total"] = i;
330 MatrixXd Cma::computeJointEntropy(
const VecVecType & seqs) {
333 return Cma::computeJointEntropy(hist);
349 cout <<
"computing joint entropy (this may take a while)... ";
351 clock_t start = clock();
353 int hist_size =
int(hist.size());
354 MatrixXd
result(hist_size, hist_size);
355 result.setZero(hist_size, hist_size);
356 for (
unsigned int i = 0; i < (
unsigned int) hist_size; ++i) {
359 if (hist_size > 30 and i % (hist_size / 30) == 0) {
360 cout <<
"(" << setw(6) << setiosflags(ios::fixed)
361 << setprecision(2) << i / float(hist_size) * 100 <<
"%)";
365 for (
unsigned int j = i; j < hist[i].size(); ++j) {
366 int total_i_j = hist[i][j][alphabet_map.at(
"total")];
367 int total_i_i = hist[i][i][alphabet_map.at(
"total")];
368 int total_j_j = hist[j][j][alphabet_map.at(
"total")];
369 int mismatches = total_i_i + total_j_j - 2 * total_i_j;
370 if (total_i_j != 0) {
371 double result_i_j = 0.;
372 for (map<int, int>::iterator it = hist[i][j].begin(); it
373 != hist[i][j].end(); ++it) {
375 result_i_j += (pair / total_i_j) * log2(pair / total_i_j);
378 result(i, j) = -result_i_j + double(mismatches) / (total_i_i
379 + total_j_j) * Cma::penalty;
395 if (hist_size > 30 and i % (hist_size / 30) == 0) {
396 cout <<
"\b\b\b\b\b\b\b\b\b";
400 cout <<
"completed in " << ((double) clock() -
start) / CLOCKS_PER_SEC
413 bool Cma::isValid(
string & key) {
415 map<string, int>::iterator it = alphabet_map.find(key);
416 return it != alphabet_map.end();
432 cout <<
"building histogram (this may take a while)... ";
434 clock_t start = clock();
436 if (alignedSequences.empty()) {
437 throw "no (marked?) sequences";
443 alignedSequences[0].size()));
447 for (VecVecType::const_iterator it = alignedSequences.begin(); it
448 != alignedSequences.end(); ++it) {
450 cout <<
"(" << setw(6) << setiosflags(ios::fixed) << setprecision(2)
451 << ii / float(num_of_seqs) * 100 <<
"%)";
455 for (
size_t i = 0; i < it->size(); ++i) {
456 string key1 = it->at(i);
460 for (
size_t j = i; j < it->size(); ++j) {
461 string key2 = it->at(j);
463 result[i][j][alphabet_map.at(key1 + key2)] += 1;
464 result[i][j][alphabet_map.at(
"total")] += 1;
469 cout <<
"\b\b\b\b\b\b\b\b\b";
473 cout <<
"completed in " << ((double) clock() -
start) / CLOCKS_PER_SEC
487 VectorXd Cma::computeMeanMutualInformationVector() {
489 return MutualInformation.rowwise().sum() / num_of_seqs;
501 double Cma::computeOveralMeanMutualInformation() {
503 return 2. * MutualInformation.sum() / num_of_seqs / num_of_seqs;
static bool compareMITuples(MITuple first, MITuple second)
static HelixNrInfo * start
static double getEntropy(double **E, double m, int l)
vector< vector< string > > VecVecType
vector< map< int, int > > MapVecType
void unifyCluster(int cluster1, int cluster2, VectorXi &result)
vector< MapVecType > MapMatrixType