28 vector<string> alphabet(0);
29 alphabet.push_back(
"A");
30 alphabet.push_back(
"C");
31 alphabet.push_back(
"G");
32 alphabet.push_back(
"T");
35 if (!loader->has_error()) {
37 cma =
new Cma(alphabet, seqs->size());
44 Analyser::~Analyser() {
54 Cma* Analyser::getCma() {
64 AlignedSequenceLoader* Analyser::getLoader() {
76 GB_ERROR Analyser::saveSAI(
GBDATA *gb_main, vector<size_t> clusters,
double threshold) {
85 vector<char> clusts = normalizeClusters(clusters);
88 copy(clusts.begin(), clusts.end(), ostream_iterator<char> (ss,
""));
99 options <<
"CMA_CLUSTERING: [threshold: " << threshold <<
"]";
123 vector<char> Analyser::normalizeClusters(vector<size_t> clusters) {
126 map<unsigned int, char> rename_map;
130 for (vector<size_t>::iterator it = clusters.begin(); it != clusters.end(); ++it) {
131 if (rename_map.find(*it) == rename_map.end()) {
132 rename_map[*it] = classes++;
134 result.push_back(rename_map[*it]);
152 error = a.get_error();
155 Cma *cma = a.getCma();
157 cma->computeMutualInformationP(*(a.getLoader()->getSequences()));
159 list<MITuple> mituple = cma->compute_mituples(cma->getMIp());
162 <<
"The highest MI value was: " << mituple.begin()->MI
163 <<
" at position (" << mituple.begin()->pos1 <<
", "
164 << mituple.begin()->pos2 <<
")." << endl
165 <<
"(Note: pairs with MI-values down to the threshold will be joined in one cluster)" << endl;
170 <<
"Press Ctrl-d to quit or" << endl
171 <<
"choose a threshold value for the clustering process: ";
178 cout << endl <<
"quit" << endl;
182 threshold = strtod(input.c_str(),
NULp);
184 cout <<
"Building clusters with threshold = " << threshold << endl;
186 VectorXi cl = cma->computeClusters(mituple,
size_t(cma->getMIp().cols()), threshold);
187 vector<size_t> *clusters =
new vector<size_t> (0, 0);
188 AlignedSequenceLoader *l = a.getLoader();
191 for (vector<size_t>::const_iterator it = l->getPositionMap().begin(); it != l->getPositionMap().end(); ++it) {
193 clusters->push_back(0);
196 clusters->push_back(cl[j]);
201 GB_ERROR e = a.saveSAI(gb_main, *clusters, threshold);
205 cout <<
"Error while saving SAI: " << e << endl;
209 cout <<
"Saved results to SAI '" << Analyser::getSAIname() <<
"'" << endl
210 <<
"(To analyse the results, open the editor and visualise the clusters using the SAI annotations)" << endl;
213 if (!interactive)
break;
217 catch (
const char *err) {
225 <<
"arb_rnacma -- correlated mutation analysis" << endl
226 <<
" computes clusters of correlated positions" << endl
227 <<
"(C) 2010 The ARB-project" << endl
228 <<
"Written 2009/2010 by Breno Faria" << endl
230 <<
"arb_rnacma uses the eigen C++ library (http://eigen.tuxfamily.org/)" << endl
231 <<
"eigen is copyrighted by LGPL3" << endl
251 cout << endl <<
"Error in arb_rnacma: " << error <<
". terminating." << endl;
269 const char *db =
"TEST_nuc_short.arb";
270 const char *aliname =
"ali_16s";
271 const char *sainame =
"cma_clusters";
274 TEST_EXPECT_RESULT__NOERROREXPORTED(gb_main =
GB_open(db,
"rw"));
276 const int NO_OF_MARKED = 8;
281 TEST_EXPECT_NORESULT__NOERROREXPORTED(
GBT_find_SAI(gb_main, sainame));
289 const char *expected_clusters_data;
291 { 0.5,
"--------------------------0-----10-----0---------------------23--041---000--5-55-----000060----------0600005---55575-7----5000140--3----0--0--------------2-6------0--0000--000--------0000------0-00----------0------0-00000-0-" },
292 { 0.666,
"--------------------------0------1-----1----------------------2--13----456--7-88-----9::0;------------;004:8---888<7-<----8494-3---2----5--5----------------;----------16---199---------01---------------------9------:--::99---" },
293 { 0.833,
"----------------------------------------------------------------------------0--1-----2-------------------------111-0---------------------------------------------------------3---------------------------------3------------2---" },
294 { 1.0,
"-------------------------------------------------------------------------------0--------------------------------00-----------------------------------------------------------1---------------------------------1----------------" },
295 { 1.5,
"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------" },
317 if (!data[idx].expected_clusters_data)
break;
323 const int SOME_THRESHOLD = 0.5;
334 for (
int fail = 1; fail<=2; ++fail) {
GB_ERROR run_rnacma(GBDATA *gb_main, bool interactive, double threshold)
GBDATA * GB_open(const char *path, const char *opent)
GBDATA * GBT_first_marked_species(GBDATA *gb_main)
GB_ERROR GB_write_string(GBDATA *gbd, const char *s)
long GBT_mark_all(GBDATA *gb_main, int flag)
GB_ERROR GB_end_transaction(GBDATA *gbd, GB_ERROR error)
const char * GBS_global_string(const char *templat,...)
GBDATA * GB_get_father(GBDATA *gbd)
GB_ERROR GB_push_transaction(GBDATA *gbd)
GB_ERROR GB_delete(GBDATA *&source)
GBDATA * GBT_find_SAI(GBDATA *gb_main, const char *name)
NOT4PERL GBDATA * GBT_add_data(GBDATA *species, const char *ali_name, const char *key, GB_TYPES type) __ATTR__DEPRECATED_TODO("better use GBT_create_sequence_data()")
GB_ERROR GB_await_error()
GB_ERROR GBT_set_default_alignment(GBDATA *gb_main, const char *alignment_name)
#define TEST_REJECT_NULL(n)
static void error(const char *msg)
AlignedSequenceLoader(class GBDATA *gb_main)
vector< vector< string > > VecVecType
GBDATA * GBT_find_sequence(GBDATA *gb_species, const char *aliname)
long GBT_count_marked_species(GBDATA *gb_main)
static void copy(double **i, double **j)
GB_ERROR close(GB_ERROR error)
void GB_write_flag(GBDATA *gbd, long flag)
Cma(vector< string > &alph, int seq_number)
#define TEST_EXPECT_NO_ERROR(call)
int ARB_main(int, char *[])
#define TEST_EXPECT_ERROR_CONTAINS(call, part)
char * GBT_get_default_alignment(GBDATA *gb_main)
GBDATA * GBT_find_or_create_SAI(GBDATA *gb_main, const char *name)
GB_transaction ta(gb_var)
GB_CSTR GB_read_char_pntr(GBDATA *gbd)
#define TEST_EXPECT_EQUAL(expr, want)
void GB_close(GBDATA *gbd)