dynaln.h

Go to the documentation of this file.
00001 #ifndef DYNALN_H
00002 #define DYNALN_H
00003 
00004 #include "bioseq.h"
00005 #include "matrix.h"
00006 #include <list>
00007 #include <vector>
00008 
00009 /* experimental, might not be needed 
00010 struct Alignedcol {
00011    Alignedcol() { }
00012    Alignedcol(const char t, const char m, const char b) 
00013       : top(t), middle(m), buttom(b) { }
00014    char top;
00015    char middle;
00016    char buttom;
00017 };
00018 
00019 struct Alnstring {
00020    string top;
00021    string middle;
00022    string buttom;
00023 };
00024 
00025 struct Alnsummary {
00026    int score;
00027    int idencnt;
00028    int simcnt;  // excluding identical residues
00029    int numgaps1;
00030    int numgaps2;
00031    int gaplen1;
00032    int gaplen2;
00033 };
00034 */
00035 
00039 enum ALNTYPE {GLOBAL, LOCAL};
00040 //enum POINTER {diag, top, left}; // for traceback
00041 
00054 class Dynaln {
00055    public:
00056       Dynaln() : ST(), gapi(-8), gape(-2), seq1(0),
00057                  seq2(0),alnidx(), S(0), M(0),
00058                  IX(0), IY(0), Smax(0), idencnt(0), simcnt(0), numgaps1(0),
00059                  numgaps2(0), gaplen1(0), gaplen2(0),
00060                  seq1begin(-1), seq1end(-1), seq2begin(-1), seq2end(-1),
00061                  topaln(), middle(), buttomaln(), topruler(), buttomruler() { }
00079       Dynaln(const bioseq &s1, const bioseq &s2) 
00080          : ST(), gapi(-8), gape(-2), seq1(&s1), seq2(&s2), 
00081             alnidx(), S(0), M(0), IX(0), IY(0),
00082             Smax(0), idencnt(0), simcnt(0), numgaps1(0),
00083             numgaps2(0), gaplen1(0), gaplen2(0),
00084             seq1begin(-1), seq1end(-1), seq2begin(-1), seq2end(-1),
00085             topaln(), middle(), buttomaln(), topruler(),buttomruler()
00086             { if ( (s1.getSequenceType() & NUCLEIC_ACID & s2.getSequenceType()) > 0)
00087                ST.setMatrix("NUC.4.4");
00088                   }
00096       Dynaln(const bioseq &s1, const bioseq &s2, const string &ma) 
00097          : ST(ma), seq1(&s1), seq2(&s2), 
00098             alnidx(), S(0), M(0), IX(0), IY(0),
00099             Smax(0), idencnt(0), simcnt(0), numgaps1(0),
00100             numgaps2(0), gaplen1(0), gaplen2(0),
00101             seq1begin(-1), seq1end(-1), seq2begin(-1), seq2end(-1),
00102             topaln(), middle(), buttomaln(), topruler(), buttomruler()
00103             { 
00104                gapi=2*ST.getMinScore(); 
00105                gape=ST.getMinScore()/3; 
00106                if (gape > -1) gape = -1; 
00107                if (gape < -3) gape = -3; }
00115       int global();
00120       int runglobal() {
00121          int s=global(); buildResult(); return s; }
00124       int local();
00128       int runlocal() {
00129          int s=local(); buildResult(); return s; }
00130       //void globalTraceBack();
00131       ~Dynaln();
00132 
00148       void buildResult();
00156       void buildAlnInfo();
00157 
00159 
00162       void printAlign(ostream &ous, const int w=70) const;
00165       const string& getTopAln() const { return topaln; }
00168       const string& getButtomAln() const { return buttomaln; }
00182       void getNgMatchArray(vector<int> &ngmarr, bool append=false, bool countsim=true) const;
00188       void getMatchArray(vector<int> &marr, bool countsim=true) const;
00205       string toDelimitedString(const string &dl=",", int ibase=0) const;
00206 
00211       void showseq() const {
00212          cout << seq1->toString() << "\n"
00213             << seq2->toString() << endl; }
00214 
00216       int getScore() { return Smax; }
00217       int getAlnlen() const { return alnidx.size(); }
00218       float getIdentity() const { return (float)idencnt/getAlnlen(); }
00219       float getSimilarity() const { return (float)(idencnt+simcnt)/getAlnlen(); }
00224       pair<int,int> numgaps() const { 
00225          return make_pair(numgaps1, numgaps2); }
00226       static string headers(const string &dl);
00227       //const Matrix& getScoringMatrix() const { return ST; }
00228       const Matrix* getScoringMatrix() const { return &ST; }
00231       int topBeginIndex() const { return seq1begin; }
00232       int buttomBeginIndex() const { return seq2begin; }
00233       int topEndIndex() const { return seq1end; }
00234       int buttomEndIndex() const { return seq2end; }
00235 
00237       void setMatrix(const string &mat) { ST.setMatrix(mat); }
00241       void setMatrix(const Matrix &mat) { ST=mat; }
00242 
00246       void setSeq1(const bioseq &sq) { seq1 = &sq;  }
00247       void setSeq2(const bioseq &sq) { seq2 = &sq; }
00248       void setSeq(const bioseq &sq1, const bioseq &sq2) {
00249          seq1=&sq1; seq2=&sq2; }
00261       void setseq(const bioseq &sq1, const bioseq &sq2) {
00262          seq1=&sq1; seq2=&sq2; }
00263       //void setSeq(const string &sq1, const string &sq2) {
00264        //  *seq1=sq1; *seq2=sq2; }
00265 
00266       void setGapInsert(int ins) { gapi=ins; }
00267       void setGapExtend(int ext) { gape=ext; }
00268       void setGapParameter(const int go, const int ge) {
00269          gapi=go; gape=ge; }
00270       void setAllParameter(const Matrix& mat, const int go, const int ge)
00271       { ST=mat; gapi=go; gape=ge; }
00272 
00274       static const char gapchar='-';
00275       static const char idenchar='|';
00276       static const char simchar=':';
00277       static const int simcut=0;
00284       static const int PTRNULL=0;
00285       static const int PTRDIAG=1;
00286       static const int PTRTOP=2; // Ix
00287       static const int PTRLEFT=4; // Iy
00288 
00290       void debug_showmatrix(ostream &ous) const;
00291 
00292    protected:
00294       void countnumgaps();
00307       void clearResult();
00308       //void allocmemLS();
00309       
00310       /* Nr is the number of row (seq1 length + 1)
00311        * Nc is the number of columns (seq2 length + 1)
00312        */
00313       //void allocmem(const int Nr, const int Nc);
00322       void allocmem();
00323       //void traceback(int &i, int &j, int *&Sc, bool local=false);
00324       
00325       /*
00326        * the traceback function produce the alignment result
00327        * and put it into the alnidx list structure.
00328        * It also set the begin,end of each sequence that.
00329        * For global alignment, it will traceback the begining
00330        * gaps. The ending gap will also be removed from the
00331        * end of the sequence.
00332        *
00333        * i, and j are the matrix location index 0-based
00334        */
00335       //void traceback(int &i, int &j, bool local=false);
00336       //void traceback(int &i, int &j);
00340       void tracepointer(int &i, int &j);
00343       void findAlnBoundary();
00344       //void initialize();
00345       int gapi, gape;  // should be negative numbers
00346       //Matrix ST; // scoring table, use protein BLOSUM50 as default.
00347       //We should use pointers
00351       Matrix ST; // scoring table, use protein BLOSUM50 as default.
00352       // for DNA use NUC.4.4
00354       const bioseq *seq1;
00355       const bioseq *seq2;
00356       // the code were assigned when allocating memory
00357       const int* C1; // needs to be updated properly when sequence got updated
00358       const int* C2; // code for the sequence
00361       int* S;    // simulate 2-D array, 
00362                  //size (seq1.length()+1)*(seq2.length()+1)
00364       int Ssize; // for the next round, memorize the last state
00365       int *M, *IX, *IY; // array for computing the scores
00366       //int* Itop;  // for temporary usage
00367       int numcol;  // for the next round
00368       //int* P; // for trace back pointer
00369       //int Ileft;  // for temporary usage
00370       //stack<pair<int, int> > alnidx;
00371       
00379       list<pair<int, int> > alnidx;
00380       //int score;  // assigned in the trace back step.
00381       int Smax;  // assigned before the trace back step.
00382       // in local it is use during the build up phase
00383       // in global it is the last cell m,n
00389       int Smaxi, Smaxj;  // for local dynamic square memory
00395       int seq1begin, seq1end, seq2begin, seq2end;
00396 
00397       // alignment summary and detailed information
00398       // The following members are for holding secondary results
00399       int idencnt;
00400       int simcnt;
00401       int numgaps1;
00402       int numgaps2;
00403       int gaplen1;
00404       int gaplen2;
00405 
00406       //int alnlen;
00407       //vector<Alignedcol> alnresult;
00416       string topaln, middle, buttomaln;  // aligned sequences
00417       string topruler, buttomruler;
00418       ALNTYPE alntype;
00419 };
00429 class LSDynaln : public Dynaln {
00430    public:
00431       LSDynaln() : Dynaln() { }
00432       LSDynaln(const bioseq &s1, const bioseq &s2) 
00433          : Dynaln(s1,s2) { }
00434       LSDynaln(const bioseq &s1, const bioseq &s2, const string &mat) 
00435          : Dynaln(s1,s2,mat) { }
00436       ~LSDynaln();
00442       int global();
00443       int local();
00446       int runlocal() {
00447          int s=local(); buildResult(); return s; }
00448       //void global();
00449       /* globalLS in reverse direction
00450        */
00451       //int globalR(int b1, int e1, int b2, int e2);
00452       
00453       /* b1, e1, b2, and e2 are 0-based index of the 
00454        * first and the second sequences respectively.
00455        * if (b1<e1 and b2<e2)
00456        * then it does forward computation and store
00457        * the result in S.
00458        * when b1>e1 and b2>e2 then it does reverse
00459        * computation and store the result in SR
00460        *
00461        * return a pair of pointers to the top and buttom
00462        *    arrays respectively.
00463        *    This version is incorrect in detail.
00464        */
00465       //pair< int*, int*> computeScore(int b1, int e1, int b2, int e2);
00466       /* return the start index of the top and buttom rows
00467        * in the four matrices
00468        * This can be used to retrieve the top/buttom rows of the
00469        * three scoring matrices: M, IX, IY
00470        */
00471       pair<int, int> computeScoreFW(int b1, int e1, int b2, int e2);
00475       pair<int, int> computeScoreBW(int b1, int e1, int b2, int e2);
00476 
00485       int path(int b1, int e1, int b2, int e2);
00486 
00487       /* Completly different from the full dynamic program traceback.
00488        * Only the last two rows of the result matrix are in memory!
00489        *
00490        * given the two arrays M.first(TOP row), M.second (BUTTOM row)
00491        * and the 0-based index r, c in terms of the matrix
00492        * len1 is the length of the range of sequence 1
00493        * len2 is that of the sequence 2
00494        * if len1 and len2 are given, then it will use the 
00495        * reverse of the sequence for tracing.
00496        * Arguments: 
00497        *    r row index
00498        *    c column index, r and c specify the starting point
00499        *    for traceback and it is the cell with the best score.
00500        */
00501       //list<pair<int,int> > traceback(pair<int, int> mrow, int b1, int &e1, int b2, int &e2, bool rev=false);
00510       list<pair<int,int> > tracebackFW(pair<int,int> mrow, int b1, int &e1, int b2, int &e2);
00520       list<pair<int,int> > tracebackBW(pair<int,int> mrow, int b1, int &e1, int b2, int &e2);
00521       //void showaln();  // testing function
00522 
00527       void buildResult();
00528 
00529       void debug_showmatrixForward(pair<int,int> &rows, const int col);
00530       void debug_showmatrixBackward(pair<int,int> &rows, const int col);
00531 
00532    private:
00538       void allocmem();
00539       int *MR, *IXR, *IYR;
00540       //int *SR;  // for LOCAL this is used to memorize the traceback pointer
00541       //int *ItopR; // not used in local
00542 };
00543 
00544 #endif

Generated on Wed Aug 10 11:56:55 2011 for Softwares from Orpara by  doxygen 1.5.6