matrix.h

Go to the documentation of this file.
00001 #ifndef MATRIX_H
00002 #define MATRIX_H
00003 
00004 #include <string>
00005 #include <vector>
00006 #include <iostream>
00007 #include <fstream>
00008 #include "bioseq.h"
00009 
00010 using namespace std;
00011 
00012 //int aachar2num(char a);
00013 //declared in bioseq.h
00014 /*
00015 int max(int i1, int i2, int i3) {
00016    return max(max(i1,i2),i3);
00017 }
00018 int max(int i1, int i2) {
00019    if (i1>i2) return i1;
00020    return i2;
00021 }
00022 */
00023 
00024 void expandCombination(char** &buff, int &buffsize, int &ws, const char* alphabet, int alphabetsize);
00025 
00029 class Matrix {
00030    public:
00035       Matrix() : path("/home/kzhou/proj/seqaln/matrix"),
00036             name("blosum50"), match(0), mismatch(0),
00037             words(0), wordSize(0), mat() { for (int i=0; i<26; i++) for (int j=0; j<26; j++) mat[i][j]=default_mat[i][j]; /*read();*/ }
00038       Matrix(const string& matrixName, bool nucMatrix=false) 
00039          : path("/home/kzhou/proj/seqaln/matrix"),
00040             name(matrixName), match(0), mismatch(0),
00041             words(0), wordSize(0) { read(nucMatrix); }
00045       Matrix(const Matrix& mt);
00046 
00047       /* construct a matrix that has only two parameters */
00048       Matrix(int m, int mis) : match(m), mismatch(mis) { }
00049       ~Matrix();
00050       int getMinScore() const { return mins; }
00051       int getMaxScore() const { return maxs; }
00057       Matrix& operator=(const Matrix& mt);
00061       void show();
00062 
00067       const char* getAlphabet() const { return aas; }
00068       int getNumberOfAlphabet() const { return numsymbol; }
00069 
00073       void getWords(vector<string> &words, int ws) const;
00074       void growWord(const vector<string> &in, vector<string> &ou) const;
00075       void expandWord(char** &in, int &insize, int &ws) const;
00080       char** allwords(int ws) const;
00081       int getNumberOfWords() const { return wordsArraySize; }
00082       int getCurrentWordSize() const { return wordSize; }
00083       void showWords() const; // debug function
00094       int similarWord_debug(ostream &ous, const char* w, int ws, int cutoff=10, float fractioncutoff=0.75) const;
00098       int similarWord(vector<char*> &neighbor, const char* w, int ws, float cutoff=11, float fractioncutoff=0.8) const;
00099 
00100       /* return true for success, false for failure.
00101        * if nucMatrix is set, then it will use the 
00102        * hashbase() function defined in codon.h to
00103        * convert Base Char to integer, other than the generic 
00104        * char X-'A' function
00105        * */
00106       bool read(bool nucMatrix=false);
00107       bool read(const string &p, const string &n) { setPath(p); name=n; read(); }
00108 
00111       void setMatrix(const string &n) { name=n; read(); }
00112 
00113       void setPath(const string &p) { path=p; }
00114       int getMatchScore(const char c1, const char c2) {
00115          if (c1==c2) return match; return mismatch; }
00122       int lookup(int row, int col) const { 
00123          if (row > 26 || col > 26) {
00124             cerr << "Matrix index out of bound " << row << ", " << col << endl;
00125             exit(1);
00126          }
00127          return mat[row][col]; }
00133       int lookup(char row, char col) const { return mat[aachar2num(row)][aachar2num(col)]; }
00134 
00139       int score(const int* x, const int* y, const int len) const;
00140 
00141       /* the gap score, this should have something to do 
00142        * with the matrix
00143        * g is the length of the gap.
00144        */
00145       //int gap(int g);
00146       //gap(g)=alpha + (g-1)*beta
00147       //this is used in the resursion
00148       int alpha, beta;
00149 
00150    private:
00151       string path;
00152       string name; // Blosum50, Blosum62, etc
00153       int match;
00154       int mismatch;
00155       int mat[32][32]; // use only 27 of the elements
00156       static int default_mat[32][32];
00157       int maxs, mins;
00158       char aas[32]; // amino acid symbols, at most 26 so we had enough
00159       int numsymbol; // the size of aas array
00160       mutable char **words;
00161       mutable int wordsArraySize;
00162       mutable int wordSize;  // recoreds the size of words
00163 };
00164 
00165 #endif

Generated on Wed Aug 10 11:56:55 2011 for Softwares from Orpara by  doxygen 1.5.6