00001 #ifndef DYNALN_H
00002 #define DYNALN_H
00003
00004 #include "bioseq.h"
00005 #include "matrix.h"
00006 #include <list>
00007 #include <vector>
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 enum ALNTYPE {GLOBAL, LOCAL};
00036
00037
00046 class Dynaln {
00047 public:
00048 Dynaln() : ST(), gapi(-8), gape(-2), seq1(0),
00049 seq2(0),alnidx(), S(0), M(0),
00050 IX(0), IY(0), Smax(0), idencnt(0), simcnt(0), numgaps1(0),
00051 numgaps2(0), gaplen1(0), gaplen2(0),
00052 seq1begin(-1), seq1end(-1), seq2begin(-1), seq2end(-1),
00053 topaln(), middle(), buttomaln() { }
00071 Dynaln(const bioseq &s1, const bioseq &s2)
00072 : ST(), gapi(-8), gape(-2), seq1(&s1), seq2(&s2),
00073 alnidx(), S(0), M(0), IX(0), IY(0),
00074 Smax(0), idencnt(0), simcnt(0), numgaps1(0),
00075 numgaps2(0), gaplen1(0), gaplen2(0),
00076 seq1begin(-1), seq1end(-1), seq2begin(-1), seq2end(-1),
00077 topaln(), middle(), buttomaln()
00078 { if ( (s1.getSequenceType() & NUCLEIC_ACID & s2.getSequenceType()) > 0)
00079 ST.setMatrix("NUC.4.4");
00080 }
00081 Dynaln(const bioseq &s1, const bioseq &s2, const string &ma)
00082 : ST(ma), seq1(&s1), seq2(&s2),
00083 alnidx(), S(0), M(0), IX(0), IY(0),
00084 Smax(0), idencnt(0), simcnt(0), numgaps1(0),
00085 numgaps2(0), gaplen1(0), gaplen2(0),
00086 seq1begin(-1), seq1end(-1), seq2begin(-1), seq2end(-1),
00087 topaln(), middle(), buttomaln()
00088 {
00089 gapi=2*ST.getMinScore();
00090 gape=ST.getMinScore()/3;
00091 if (gape > -1) gape = -1;
00092 if (gape < -3) gape = -3; }
00100 int global();
00104 int runglobal() {
00105 int s=global(); buildResult(); return s; }
00106 int local();
00107 int runlocal() {
00108 int s=local(); buildResult(); return s; }
00109
00110 ~Dynaln();
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127 void buildResult();
00131 void buildAlnInfo();
00132
00134 void printAlign(ostream &ous, const int w=70) const;
00135 const string& getTopAln() const { return topaln; }
00136 const string& getButtomAln() const { return buttomaln; }
00146 string toDelimitedString(const string &dl=",") const;
00147
00148 void showseq() const {
00149 cout << seq1->toString() << "\n"
00150 << seq2->toString() << endl; }
00151
00153 int getScore() { return Smax; }
00154 int getAlnlen() const { return alnidx.size(); }
00155 float getIdentity() const { return (float)idencnt/getAlnlen(); }
00156 float getSimilarity() const { return (float)(idencnt+simcnt)/getAlnlen(); }
00157
00158
00159 pair<int,int> numgaps() const {
00160 return make_pair(numgaps1, numgaps2); }
00161 static string headers(const string &dl);
00162
00163 const Matrix* getScoringMatrix() const { return &ST; }
00164 int topBeginIndex() const { return seq1begin; }
00165 int buttomBeginIndex() const { return seq2begin; }
00166 int topEndIndex() const { return seq1end; }
00167 int buttomEndIndex() const { return seq2end; }
00168
00170 void setMatrix(const string &mat) { ST.setMatrix(mat); }
00174 void setMatrix(const Matrix &mat) { ST=mat; }
00175
00179 void setSeq1(const bioseq &sq) { seq1 = &sq; }
00180 void setSeq2(const bioseq &sq) { seq2 = &sq; }
00181 void setSeq(const bioseq &sq1, const bioseq &sq2) {
00182 seq1=&sq1; seq2=&sq2; }
00187 void setseq(const bioseq &sq1, const bioseq &sq2) {
00188 seq1=&sq1; seq2=&sq2; }
00189
00190
00191
00192 void setGapInsert(int ins) { gapi=ins; }
00193 void setGapExtend(int ext) { gape=ext; }
00194 void setGapParameter(const int go, const int ge) {
00195 gapi=go; gape=ge; }
00196 void setAllParameter(const Matrix& mat, const int go, const int ge)
00197 { ST=mat; gapi=go; gape=ge; }
00198
00199
00200
00201 static const char gapchar='-';
00202 static const char idenchar='|';
00203 static const char simchar=':';
00204 static const int simcut=0;
00205
00206
00207
00208
00209
00210
00211 static const int PTRNULL=0;
00212 static const int PTRDIAG=1;
00213 static const int PTRTOP=2;
00214 static const int PTRLEFT=4;
00215
00216
00217 void debug_showmatrix(ostream &ous) const;
00218
00219 protected:
00220 void countnumgaps();
00225 void clearResult();
00226
00227
00228
00229
00230
00231
00232 void allocmem();
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250 void tracepointer(int &i, int &j);
00251 void findAlnBoundary();
00252
00253 int gapi, gape;
00254
00255
00259 Matrix ST;
00260
00261 const bioseq *seq1;
00262 const bioseq *seq2;
00263
00264 const int* C1;
00265 const int* C2;
00266 int* S;
00267
00268 int Ssize;
00269 int *M, *IX, *IY;
00270
00271 int numcol;
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281 list<pair<int, int> > alnidx;
00282
00283 int Smax;
00284
00285
00286
00287
00288
00289
00290
00291 int Smaxi, Smaxj;
00292
00293
00294 int idencnt;
00295 int simcnt;
00296 int numgaps1;
00297 int numgaps2;
00298 int gaplen1;
00299 int gaplen2;
00300
00301
00302
00303
00304
00305
00306 int seq1begin, seq1end, seq2begin, seq2end;
00307
00308
00309 string topaln, middle, buttomaln;
00310 ALNTYPE alntype;
00311 };
00312
00313 class LSDynaln : public Dynaln {
00314 public:
00315 LSDynaln() : Dynaln() { }
00316 LSDynaln(const bioseq &s1, const bioseq &s2)
00317 : Dynaln(s1,s2) { }
00318 LSDynaln(const bioseq &s1, const bioseq &s2, const string &mat)
00319 : Dynaln(s1,s2,mat) { }
00320 ~LSDynaln();
00321
00322
00323
00324
00325
00326 int global();
00327 int local();
00328 int runlocal() {
00329 int s=local(); buildResult(); return s; }
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353 pair<int, int> computeScoreFW(int b1, int e1, int b2, int e2);
00354
00355
00356
00357 pair<int, int> computeScoreBW(int b1, int e1, int b2, int e2);
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367 int path(int b1, int e1, int b2, int e2);
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392 list<pair<int,int> > tracebackFW(pair<int,int> mrow, int b1, int &e1, int b2, int &e2);
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402 list<pair<int,int> > tracebackBW(pair<int,int> mrow, int b1, int &e1, int b2, int &e2);
00403
00404
00405
00406
00407
00408
00409 void buildResult();
00410
00411 void debug_showmatrixForward(pair<int,int> &rows, const int col);
00412 void debug_showmatrixBackward(pair<int,int> &rows, const int col);
00413
00414 private:
00415
00416
00417
00418
00419
00420 void allocmem();
00421 int *MR, *IXR, *IYR;
00422
00423
00424 };
00425
00426 #endif