ARB
probe_collection.hxx
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // probe_collection.hxx
3 // ----------------------------------------------------------------------------
4 // Declarations of classes used to create and manage probe collections in Arb.
5 // ----------------------------------------------------------------------------
6 
7 #ifndef PROBE_COLLECTION_HXX
8 #define PROBE_COLLECTION_HXX
9 
10 #ifndef ARB_ASSERT_H
11 #include "arb_assert.h"
12 #endif
13 #ifndef ARBTOOLS_H
14 #include <arbtools.h>
15 #endif
16 #ifndef _GLIBCXX_STRING
17 #include <string>
18 #endif
19 #ifndef _GLIBCXX_ITERATOR
20 #include <iterator>
21 #endif
22 #ifndef _GLIBCXX_LIST
23 #include <list>
24 #endif
25 #ifndef _GLIBCXX_MAP
26 #include <map>
27 #endif
28 #ifndef _GLIBCXX_CERRNO
29 #include <cerrno>
30 #endif
31 #ifndef _GLIBCXX_CSTDLIB
32 #include <cstdlib>
33 #endif
34 #ifndef _GLIBCXX_CSTDIO
35 #include <cstdio>
36 #endif
37 #ifndef _UNISTD_H
38 #include <unistd.h>
39 #endif
40 #ifndef _SYS_STAT_H
41 #include <sys/stat.h>
42 #endif
43 
44 // ----------------------------------------------------------------------------
45 // struct ArbCachedString
46 // ----------------------------------------------------------------------------
47 // This structure is a descriptor for a string cache on file by ArbStringCache
48 // ----------------------------------------------------------------------------
50  fpos_t Pos;
51  int Len;
52 };
53 
54 // ----------------------------------------------------------------------------
55 // class ArbStringCache
56 // ----------------------------------------------------------------------------
57 // This class is used as a disk based storage of strings (typically result
58 // output) to minimise memory usage. The result string is only used for
59 // display purposes (the Arb results list) so by not storing it in physical
60 // memory in the ArProbeResult class we can reduce our memory consumption
61 // considerably.
62 // ----------------------------------------------------------------------------
63 class ArbStringCache : virtual Noncopyable {
64  std::string FileName;
65  FILE *WriteCacheFile;
66  mutable FILE *ReadCacheFile;
67  mutable char *ReadBuffer;
68  mutable int ReadBufferLength;
69  bool IsOpen;
70 
71  void open();
72  void close();
73  bool allocReadBuffer(int nLength) const;
74 
75 public:
77  virtual ~ArbStringCache();
78 
79  bool saveString(const char *pString, ArbCachedString& rCachedString);
80  bool saveString(const char *pString, int nLength, ArbCachedString& rCachedString);
81  bool loadString(std::string& rString, const ArbCachedString& rCachedString) const;
82  void flush();
83 };
84 
85 
86 // ----------------------------------------------------------------------------
87 // class ArbRefCount
88 // ----------------------------------------------------------------------------
89 
90 // WARNING: code uses manual reference counting (most likely not 100% correct)
91 // @@@ consider using SmartPtr<myclass> instead of deriving myclass from ArbRefCount
92 // see also: comments about errors supposed in std::string (grep for REFCOUNT_HACK)
93 
94 class ArbRefCount {
95  mutable int RefCount;
96 
97 public:
98  ArbRefCount() : RefCount(1) {}
99  ArbRefCount(const ArbRefCount&) : RefCount(1) {}
100  virtual ~ArbRefCount() {}
101 
102  void lock() const {
103  RefCount++;
104  }
105  bool unlock() const {
106  RefCount--;
107  return RefCount == 0;
108  }
109  void free() const {
110  if (unlock()) {
111  delete this;
112  }
113  }
114 };
115 
116 // ----------------------------------------------------------------------------
117 // class ArbProbeMatchWeighting
118 // ----------------------------------------------------------------------------
119 // This class defines the match weighting algorithm and parameterisation for
120 // the weighting applied to a match.
121 // ----------------------------------------------------------------------------
123  float PenaltyMatrix[4][4];
124  float Width;
125  float Bias;
126 
127  int toIndex(char nC) const;
128  double positionalWeight(int nPos, int nLength) const;
129  void copy(const ArbProbeMatchWeighting& rCopy);
130 
131 public:
133  ArbProbeMatchWeighting(const float aValues[16], float dWidth, float dBias);
134 
136  virtual ~ArbProbeMatchWeighting();
137 
139 
140  void initialise(const float aValues[16], float dWidth, float dBias);
141  bool initialise(const char *pCSValues, const char *pCSWidth, const char *pCSBias);
142 
143  void setParameters(const float aValues[16], float dWidth, float dBias) {
144  initialise(aValues, dWidth, dBias);
145  }
146  void getParameters(float aValues[16], float& dWidth, float& dBias) const;
147 
148  void writeXML(FILE *hFile, const char *pPrefix) const;
149 
150  double matchWeight(const char *pSequenceA, const char *pSequenceB) const;
151  double matchWeightResult(const char *pProbeSequence, const char *pMatchResult) const;
152 };
153 
154 // ----------------------------------------------------------------------------
155 // class ArbProbe
156 // ----------------------------------------------------------------------------
157 // This class is an abstraction of a probe used in the probe matching with
158 // specificity feature in Arb.
159 // ----------------------------------------------------------------------------
160 class ArbProbe : public ArbRefCount {
161  std::string Name;
163  std::string DisplayName;
164 
165 public:
166  ArbProbe();
167  ArbProbe(const char *pName, const char *pSequence);
168  ArbProbe(const ArbProbe& rCopy);
169  virtual ~ArbProbe();
170 
171  void writeXML(FILE *hFile, const char *pPrefix) const;
172 
173  void nameAndSequence(const char* pName, const char* pSequence);
174 
175  const std::string& name() const { return Name; }
176  const std::string& sequence() const { return Sequence; }
177  const std::string& displayName() const { return DisplayName; }
178 
179  int allowedMismatches() const;
180 };
181 
182 // ----------------------------------------------------------------------------
183 // List of ArbProbe* objects
184 // ----------------------------------------------------------------------------
185 typedef std::list<ArbProbe*> ArbProbePtrList;
186 typedef ArbProbePtrList::iterator ArbProbePtrListIter;
187 typedef ArbProbePtrList::const_iterator ArbProbePtrListConstIter;
188 
189 // ----------------------------------------------------------------------------
190 // class ArbProbeCollection
191 // ----------------------------------------------------------------------------
192 // This class is a collection of a probes to match against in the probe
193 // matching with specificity feature in Arb.
194 // ----------------------------------------------------------------------------
196  std::string Name;
197  ArbProbePtrList ProbeList;
198  ArbProbeMatchWeighting MatchWeighting;
199  mutable bool HasChanged;
200 
201  void flush();
202  void copy(const ArbProbePtrList& rList);
203 
204 public:
206  ArbProbeCollection(const char *pName);
208  virtual ~ArbProbeCollection();
209 
211 
212  bool openXML(const char *pFileAndPath, std::string& rErrorMessage);
213  bool saveXML(const char *pFileAndPath) const;
214 
215  void setParameters(const float aValues[16], float dWidth, float dBias);
216  void getParameters(float aValues[16], float& dWidth, float& dBias) const;
217 
218  const ArbProbePtrList& probeList() const { return ProbeList; }
219  const ArbProbeMatchWeighting& matchWeighting() const { return MatchWeighting; }
220 
221  const ArbProbe *find(const char *pSequence) const;
222 
223  bool add(const char *pName, const char *pSequence, const ArbProbe **ppProbe = NULp);
224  bool replace(const char *oldSequence, const char *pName, const char *pSequence, const ArbProbe **ppProbe = NULp);
225  bool remove(const char *pSequence);
226  bool clear();
227 
228  void name(const char *pName);
229  const std::string& name() const { return Name; }
230 
231  bool hasChanged() const { return HasChanged; }
232  bool hasProbes() const { return ProbeList.size() > 0; }
233 };
234 
235 // ----------------------------------------------------------------------------
236 // class ArbMatchResult
237 // ----------------------------------------------------------------------------
238 class ArbMatchResult : public ArbRefCount {
239  const ArbProbe *Probe;
240  ArbCachedString CachedResultA;
241  ArbCachedString CachedResultB;
242  double Weight;
243  mutable int Index;
244  mutable int Padding;
245 
246 public:
247  ArbMatchResult();
248  ArbMatchResult(const ArbProbe *pProbe, const char *pResult, int nSplitPoint, double dWeight);
249  ArbMatchResult(const ArbMatchResult& rCopy);
250  virtual ~ArbMatchResult();
251 
253 
254  static void addedHeadline(std::string& rHeadline);
255  void weightAndResult(std::string& rDest) const;
256  void result(std::string& sResult) const;
257  double weight() const { return Weight; }
258 
259  void padding(int nPadding) const { Padding = nPadding; }
260  void index(int nIndex) const { Index = nIndex; }
261  int index() const { return Index; }
262 };
263 
264 // ----------------------------------------------------------------------------
265 // multimap of ArbMatchResult* by string
266 // ----------------------------------------------------------------------------
267 
268 typedef std::multimap<std::string, ArbMatchResult*> ArbMatchResultPtrByStringMultiMap;
269 typedef ArbMatchResultPtrByStringMultiMap::iterator ArbMatchResultPtrByStringMultiMapIter;
270 typedef ArbMatchResultPtrByStringMultiMap::const_iterator ArbMatchResultPtrByStringMultiMapConstIter;
271 
272 // ----------------------------------------------------------------------------
273 // multimap of ArbMatchResult* by double
274 // ----------------------------------------------------------------------------
275 // We use this as a means of results sorting by match weight
276 // ----------------------------------------------------------------------------
277 
278 typedef std::multimap<double, ArbMatchResult*> ArbMatchResultPtrByDoubleMultiMap;
279 typedef ArbMatchResultPtrByDoubleMultiMap::iterator ArbMatchResultPtrByDoubleMultiMapIter;
280 
281 
282 // ----------------------------------------------------------------------------
283 // List of strings
284 // ----------------------------------------------------------------------------
285 
286 typedef std::list<std::string> ArbStringList;
287 typedef std::list<std::string>::const_iterator ArbStringListConstIter;
288 
289 // ----------------------------------------------------------------------------
290 // class ArbMatchResultSet
291 // ----------------------------------------------------------------------------
292 class ArbMatchResultSet FINAL_TYPE : public ArbRefCount {
293  const ArbProbe *Probe;
294  std::string Headline;
296  ArbStringList CommentList;
297  int Index;
298  int EndFullName;
299 
300  void flush();
301  void copy(const ArbMatchResultPtrByStringMultiMap& rMap);
302 
303 public:
304  ArbMatchResultSet();
305  ArbMatchResultSet(const ArbProbe *pProbe);
306 
307  ArbMatchResultSet(const ArbMatchResultSet& rCopy);
308  DECLARE_ASSIGNMENT_OPERATOR(ArbMatchResultSet);
309  virtual ~ArbMatchResultSet();
310 
311  void initialise(const ArbProbe *pProbe, int nIndex);
312 
313  bool add(const char *pName,
314  const char *pFullName,
315  const char *pMatchPart,
316  const char *pResult,
317  const ArbProbeMatchWeighting& rMatchWeighting);
318 
319  bool addComment(const char *pComment);
320 
321  void findMaximumWeight(double& dMaximumWeight) const;
322 
323  const ArbMatchResultPtrByStringMultiMap& resultMap() const { return ResultMap; }
324 
325  bool isMatched(const ArbStringList& rCladeList,
326  bool& bPartialMatch,
327  double dThreshold,
328  double dCladeMarkedThreshold,
329  double dCladePartiallyMarkedThreshold) const;
330 
331  bool isMatched(const std::string& rName, double dThreshold) const;
332  const ArbStringList& commentList() const { return CommentList; }
333 
334  bool hasResults() const {
335  return Probe && ResultMap.size() > 0;
336  }
337 
338  const ArbProbe *probe() const { return Probe; }
339 
340  void headline(const char *pHeadline, int nEndFullName) {
341  if (pHeadline) {
342  Headline = pHeadline;
343  EndFullName = nEndFullName;
344  }
345  }
346  const std::string& headline() const { return Headline; }
347  int endFullName() const { return EndFullName; }
348 
349  void enumerateResults(ArbMatchResultPtrByDoubleMultiMap& rMap, int nMaxFullName);
350 
351  int index() const { return Index; }
352 };
353 
354 // ----------------------------------------------------------------------------
355 // Map of ArbMatchResultSet objects by string
356 // ----------------------------------------------------------------------------
357 typedef std::pair<const std::string, ArbMatchResultSet> ArbMatchResultSetStringPair;
358 typedef std::map<std::string, ArbMatchResultSet> ArbMatchResultSetByStringMap;
359 typedef std::map<std::string, ArbMatchResultSet>::iterator ArbMatchResultSetByStringMapIter;
360 typedef std::map<std::string, ArbMatchResultSet>::const_iterator ArbMatchResultSetByStringMapConstIter;
361 typedef std::map<std::string, ArbMatchResultSet>::reverse_iterator ArbMatchResultSetByStringMapRIter;
362 typedef std::map<std::string, ArbMatchResultSet>::const_reverse_iterator ArbMatchResultSetByStringMapConstRIter;
363 
364 
365 // ----------------------------------------------------------------------------
366 // Map of int by string
367 // ----------------------------------------------------------------------------
368 typedef std::pair<int, const std::string> ArbStringIntPair;
369 typedef std::map<int, std::string> ArbIntByStringMap;
370 typedef std::map<int, std::string>::iterator ArbIntByStringMapIter;
371 typedef std::map<int, std::string>::const_iterator ArbIntByStringMapConstIter;
372 typedef std::map<int, std::string>::reverse_iterator ArbIntByStringMapRIter;
373 typedef std::map<int, std::string>::const_reverse_iterator ArbIntByStringMapConstRIter;
374 
375 
376 typedef bool (*ArbMatchResultsEnumCallback)(void *pContext, const char *pResult, bool bIsComment, int nItem, int nItems);
377 
378 
379 // ----------------------------------------------------------------------------
380 // class ArbMatchResultsManager
381 // ----------------------------------------------------------------------------
384  ArbMatchResultSetByStringMap ResultSetMap;
385  double MaximumWeight;
386  std::string ResultsFileName;
387 
388  void flush();
389  void initFileName();
390 
391 public:
394  virtual ~ArbMatchResultsManager();
395 
396  void reset();
397 
398  ArbMatchResultSet *addResultSet(const ArbProbe *pProbe);
399  const ArbMatchResultSet *findResultSet(const char *pProbeSequence) const;
400  void updateResults();
401 
402  const ArbMatchResultPtrByStringMultiMap& resultsMap() const { return ResultsMap; }
403  const ArbMatchResultSetByStringMap& resultSetMap() const { return ResultSetMap; }
404 
405  double maximumWeight() const { return MaximumWeight; }
406 
407  int enumerate_results(ArbMatchResultsEnumCallback pCallback, void *pContext);
408 
409  const char *resultsFileName() const;
410  void openResultsFile() const;
411 
412  bool hasResults() const { return ResultSetMap.size() > 0; }
413 };
414 
415 // ----------------------------------------------------------------------------
416 
419 
420 #else
421 #error probe_collection.hxx included twice
422 #endif // PROBE_COLLECTION_HXX
ArbMatchResultSet * addResultSet(const ArbProbe *pProbe)
bool unlock() const
return string(buffer, length)
std::map< std::string, ArbMatchResultSet > ArbMatchResultSetByStringMap
const ArbProbePtrList & probeList() const
ArbMatchResultsManager & get_results_manager()
virtual ~ArbProbe()
void writeXML(FILE *hFile, const char *pPrefix) const
void add(int v)
Definition: ClustalV.cxx:461
double maximumWeight() const
ArbProbePtrList::const_iterator ArbProbePtrListConstIter
bool hasChanged() const
std::map< std::string, ArbMatchResultSet >::iterator ArbMatchResultSetByStringMapIter
ArbProbeMatchWeighting & operator=(const ArbProbeMatchWeighting &rCopy)
void initialise(const float aValues[16], float dWidth, float dBias)
void writeXML(FILE *hFile, const char *pPrefix) const
int allowedMismatches() const
const ArbMatchResultPtrByStringMultiMap & resultMap() const
std::map< int, std::string >::reverse_iterator ArbIntByStringMapRIter
double matchWeight(const char *pSequenceA, const char *pSequenceB) const
const ArbProbe * probe() const
bool saveString(const char *pString, ArbCachedString &rCachedString)
int index() const
std::map< std::string, ArbMatchResultSet >::reverse_iterator ArbMatchResultSetByStringMapRIter
bool openXML(const char *pFileAndPath, std::string &rErrorMessage)
bool loadString(std::string &rString, const ArbCachedString &rCachedString) const
void weightAndResult(std::string &rDest) const
virtual ~ArbRefCount()
ArbProbeCollection & get_probe_collection()
std::map< int, std::string > ArbIntByStringMap
ArbRefCount(const ArbRefCount &)
void getParameters(float aValues[16], float &dWidth, float &dBias) const
void headline(const char *pHeadline, int nEndFullName)
void setParameters(const float aValues[16], float dWidth, float dBias)
const char * resultsFileName() const
int enumerate_results(ArbMatchResultsEnumCallback pCallback, void *pContext)
void index(int nIndex) const
bool add(const char *pName, const char *pSequence, const ArbProbe **ppProbe=NULp)
std::map< int, std::string >::const_iterator ArbIntByStringMapConstIter
#define DECLARE_ASSIGNMENT_OPERATOR(T)
Definition: arbtools.h:61
std::map< int, std::string >::const_reverse_iterator ArbIntByStringMapConstRIter
bool replace(const char *oldSequence, const char *pName, const char *pSequence, const ArbProbe **ppProbe=NULp)
std::map< int, std::string >::iterator ArbIntByStringMapIter
ArbMatchResultPtrByStringMultiMap::iterator ArbMatchResultPtrByStringMultiMapIter
std::list< std::string > ArbStringList
ArbMatchResultPtrByStringMultiMap::const_iterator ArbMatchResultPtrByStringMultiMapConstIter
void lock() const
const std::string & displayName() const
const ArbMatchResultSet * findResultSet(const char *pProbeSequence) const
void result(std::string &sResult) const
void padding(int nPadding) const
const std::string & name() const
int index() const
const ArbProbeMatchWeighting & matchWeighting() const
std::map< std::string, ArbMatchResultSet >::const_iterator ArbMatchResultSetByStringMapConstIter
std::pair< const std::string, ArbMatchResultSet > ArbMatchResultSetStringPair
std::list< ArbProbe * > ArbProbePtrList
static void copy(double **i, double **j)
Definition: trnsprob.cxx:32
xml element
ArbProbeCollection & operator=(const ArbProbeCollection &rCopy)
ArbProbePtrList::iterator ArbProbePtrListIter
bool hasResults() const
const ArbMatchResultSetByStringMap & resultSetMap() const
const std::string & sequence() const
ArbMatchResultPtrByDoubleMultiMap::iterator ArbMatchResultPtrByDoubleMultiMapIter
ArbMatchResult & operator=(const ArbMatchResult &rCopy)
virtual ~ArbStringCache()
#define NULp
Definition: cxxforward.h:116
static void addedHeadline(std::string &rHeadline)
void nameAndSequence(const char *pName, const char *pSequence)
void getParameters(float aValues[16], float &dWidth, float &dBias) const
const ArbStringList & commentList() const
void setParameters(const float aValues[16], float dWidth, float dBias)
int endFullName() const
std::multimap< double, ArbMatchResult * > ArbMatchResultPtrByDoubleMultiMap
std::pair< int, const std::string > ArbStringIntPair
std::multimap< std::string, ArbMatchResult * > ArbMatchResultPtrByStringMultiMap
const std::string & name() const
const ArbMatchResultPtrByStringMultiMap & resultsMap() const
bool saveXML(const char *pFileAndPath) const
double matchWeightResult(const char *pProbeSequence, const char *pMatchResult) const
double weight() const
void free() const
bool(* ArbMatchResultsEnumCallback)(void *pContext, const char *pResult, bool bIsComment, int nItem, int nItems)
const ArbProbe * find(const char *pSequence) const
const std::string & headline() const
std::list< std::string >::const_iterator ArbStringListConstIter
std::map< std::string, ArbMatchResultSet >::const_reverse_iterator ArbMatchResultSetByStringMapConstRIter