match.h

Go to the documentation of this file.
00001 // match.h
00002 #include <iostream>
00003 #include <string>
00004 #include <vector>
00005 
00006 using namespace std;
00007 
00008 /* a hit is a summary of all the matches between each pair of 
00009  * query-target, only non-overlapping matches will be registered
00010  * for a hit.
00011  * According to NCBI blast -m 8 output has the following fields:
00012  * Query id, Subject id, % identity, alignment length, mismatches, gap
00013  * openings, query start, query end, sequence start, sequence end, e-value, bit
00014  * score
00015  *
00016  */
00017 
00018 /* A match does not have an identifier: query-target.  It is all the 
00019  * numerical part of the blast -m 8 output
00020  */
00021 class inputException {};
00022 
00023 class match
00024 {
00025         public:
00026                 match(istream &in) throw(inputException);
00027                 match(const match &m);
00028                 match& operator=(const match &m);
00029                 friend ostream& operator<<(ostream &o, const match &m);
00030                 bool overlap(const match &m) const;
00031                 float getScore() const { return score; }
00032                 int getLength() const { return length; }
00033                 float getLenxid() const { return length*identity; }
00034                 double getE() const { return E; }
00035 
00036         private:
00037                 float identity;
00038                 int length, mismatch, gap, qstart, qend, tstart, tend;
00039                 double E;   // exp from blast
00040                 float score;
00041 };
00042 
00043 /* contains one or matches. A summary of all the matches between 
00044  * the query and the target in the database
00045  * */
00046 class hit
00047 {
00048         public:
00049                 class end {};
00050                 /* to read the first line */
00051                 hit() : score(0), matches(), matchCnt(0) {}
00052                 //hit(istream &in);
00053                 // used q, and t to construct and will set the next q,t 
00054                 hit(string &q, string &t, istream &in) throw(end, inputException);
00055 
00056                 /* append the next match to this hit */
00057                 void getNext(istream &in);
00058                 bool sameQuery(const string &nq) const { return query==nq; }
00059                 bool sameTarget(const string &nt) const { return target==nt; }
00060                 bool sameHit(const string &q, const string &t) const;
00061                 bool overlap(const match &m) const;
00062                 /* r < 1.  this hit is 0.9x of h's sumScore */
00063                 bool scoreAsBigAs(const hit &h, float r) const { return score > r*h.score; }
00064                 /* output the hit result for loading into a table
00065                  * query,target,average_identity,sum_match_length,sum_score,selratio,
00066                  * minE, prodE(products of all non-overlapping E)
00067                  * */
00068                 friend ostream& operator<<(ostream &o, const hit &h);
00069                 int getLength() const { return length; }
00070 
00071                 /* non-overlapping matches */
00072                 int getMatchCount() const { return matches.size(); }
00073                 int getTotalMatch() const { return matchCnt; }
00074                 void dumpRaw(ostream &o) const;
00075                 float getIdentity() const { return identity; }
00076                 float getScore() const { return score; }
00077                 // weighted average
00078 
00079                 static char fields[];
00080 
00081         private:
00082                 string query, target;
00083                 vector<match> matches; //non-overlapping matches
00084                 float score;   // sum of score from all non-overlapping matches
00085                 int matchCnt;  // total match inputed
00086                 int length;    // sum of length form all matches
00087                 float identity; // identity averaged over all matches
00088                 double minE;   // minimus of all the E values from all matches
00089                 double prodE;  // product of all non-overlapping E's 
00090 };
00091 

Generated on Wed Aug 10 11:56:57 2011 for Softwares from Orpara by  doxygen 1.5.6