GModel.h

Go to the documentation of this file.
00001 #ifndef GMODEL_H
00002 #define GMODEL_H
00003 
00004 #include <string>  
00005 #include <vector> 
00006 #include <list> 
00007 #include <iostream>
00015 using namespace std; 
00016 
00017 class ExondirectionError : public exception {
00018    public:
00019       ExondirectionError() : message("Different direction of exon inside the same gene") { }
00020       ExondirectionError(const string &msg) : message(msg) { }
00021       const char* what() const throw() { return message.c_str(); }
00022       ~ExondirectionError() throw() { }
00023    private:
00024       string message;
00025 };
00026              
00033 template<typename T> class GModel;
00034 template<typename T> 
00035 ostream& operator<<(ostream& ous, const GModel<T> &mm);
00036 
00037 template<class T> class GModel { 
00038    public: 
00039       GModel() : id(), exons(), intn(), intronsBuilt(false) { }
00040       GModel(const T &name) : id(name), exons(), intn(), intronsBuilt(false) { }
00041       //void addExon(int bb, int ee) { exons.push_back(make_pair(bb,ee)); }
00042       
00043       /* require the exons are sorted from small to large
00044        */
00045       void addExon(int bb, int ee) throw(ExondirectionError);
00046       bool operator==(const GModel &mm) const;
00047       // return +, -, or ' ' if now known
00048       char direction() const;
00049       //pair<int,int> bound() const { return make_pair(exons[0].first, exons[exons.size()-1].second); }
00050       /* return the whole reange of the gene model
00051        */
00052       pair<int,int> bound() const { 
00053          return make_pair(exons.front().first, exons.back().second); }
00054       int size() const {
00055          if (direction() == '+')
00056             return exons.back().second - exons.front().first + 1; 
00057          else
00058             return exons.front().first - exons.back().second + 1; }
00059       /* one model contains another model
00060        * Both ends ousdide of mm, and the intron of 
00061        * mm is a subset of this object.
00062        */
00063       bool contain(const GModel &mm) const;
00064       /* only check for the introns, disregard the ends
00065        * This method is useful where some simple extension
00066        * of the partial models might have been wrong.
00067        */
00068       bool containIntron(const GModel &mm) const;
00072       bool endsContain(const GModel &mm) const;
00073       int numExons() const { return exons.size(); }
00074       //int start() const { return exons[0].first; }
00075       int start() const { return exons.front().first; }
00076       int begin() const { return exons.front().first; }
00077       //int end() const { return exons[exons.size()-1].second; }
00078       int end() const { return exons.back().second; }
00079       int finish() const { return exons.back().second; }
00080       // return all the intron boundaries
00081       const vector<int>& introns() const;
00082       // this is <T> strange <T> must be added after the function name.
00083       // output key, then exons
00084       friend ostream& operator<<<T>(ostream& os, const GModel<T>& mm);
00085 
00086       const T& getId() const { return id; }
00087       /* return all those introns longer than L
00088        */
00089       vector<pair<int,int> > intronLongerThan(const int L) const;
00090       int maxIntronLength() const;
00091       bool inside(pair<int,int> r) const {
00092          return min(start(), end()) > min(r.first, r.second) &&
00093             max(start(), end()) < max(r.first, r.second);
00094       }
00100       int overlap(const GModel<T> &mm) const;
00104       int overlapOpposite(const GModel<T> &mm) const { }
00108       int overlapBoth(const GModel<T> &mm) const { }
00109  
00110    protected: 
00111       T id; 
00112       //vector< pair<int,int> > exons;
00113       // sorted from 5'--to-->3' direction
00114       list< pair<int,int> > exons;
00115       mutable vector<int> intn;
00116       mutable bool intronsBuilt;
00117 };  
00118 
00119 
00120 template<class T>
00121 void GModel<T>::addExon(int bb, int ee) throw(ExondirectionError) { 
00122    if (bb < ee) {
00123       if (numExons() > 0 && direction() == '-') {
00124          cerr << "must add exon to gene of the same direction\n"
00125             << " adding " << bb << "-->" << ee << " to "
00126             << *this << endl;
00127          throw ExondirectionError();
00128          //exit(1);
00129       }
00130       exons.push_back(make_pair(bb,ee)); 
00131    }
00132    else if (bb > ee) {
00133       if (numExons() > 0 && direction() == '+') {
00134          cerr << "must add exon to gene of the same direction\n"
00135             << " adding " << bb << "-->" << ee << " to "
00136             << *this << endl;
00137          throw ExondirectionError();
00138          //exit(1);
00139       }
00140       exons.push_front(make_pair(bb,ee));
00141    }
00142 }
00143 
00144 template<class T>
00145 bool GModel<T>::operator==(const GModel<T> &mm) const {
00146    if (exons.size() != mm.exons.size()) return false;
00147    list<pair<int,int> >::const_iterator i,j;
00148    for (i=exons.begin(), j=mm.exons.begin(); i != exons.end(); i++, j++) {
00149       if (i->first != j->first || i->second != j->second)
00150          return false;
00151    }
00152    return true;
00153 }
00154 
00155 template<class T>
00156 char GModel<T>::direction() const {
00157    if (exons.empty()) return ' ';
00158    if (exons.front().first < exons.back().second)
00159       return '+';
00160    if (exons.front().first > exons.back().second)
00161       return '-';
00162    return ' ';
00163 }
00164 
00165 template<class T>
00166 bool GModel<T>::contain(const GModel<T> &mm) const {
00167    if (mm.numExons() > numExons() 
00168          || direction() != mm.direction()
00169          || !endsContain(mm)) 
00170       return false;
00171    // if the shorter one is single exon, 
00172    // needs special treatment
00173    if (mm.numExons()==1) {
00174       // both are single exon gene
00175       if (numExons() == 1)
00176          return true;
00177       list<pair<int,int> >::const_iterator it;
00178       if (direction() == '+') {
00179          for (it=exons.begin(); it != exons.end(); it++)
00180          {
00181             if (it->first <= mm.start() &&
00182                   it->second >= mm.end())
00183                return true;
00184          }
00185       }
00186       else if (direction() == '-') {
00187          for (it=exons.begin(); it != exons.end(); it++)
00188          {
00189             if (it->first >= mm.start() &&
00190                   it->second <= mm.end())
00191                return true;
00192          }
00193       }
00194       return false;
00195    }
00196 
00197    int b=0, i;
00198    vector<int> Ithis = introns();
00199    vector<int> Ithat = mm.introns();
00200 startSearch:
00201    while (b <= Ithis.size() - Ithat.size()) {
00202       //cout << "searching from " << b << endl;
00203       for (i=0; i<Ithat.size(); i++) {
00204          //cout << Ithat[i] << " x " << Ithis[b+i] << endl;
00205          if (Ithat[i] != Ithis[b+i]) {
00206             ++b;
00207             goto startSearch;
00208          }
00209       }
00210       /* debug code
00211       copy(Ithis.begin(), Ithis.end(), ostream_iterator<int>(cout, " "));
00212       cout << "\n contains intron: \n";
00213       copy(Ithat.begin(), Ithat.end(), ostream_iterator<int>(cout, " "));
00214       cout << endl;
00215       */
00216       return true;
00217    }
00218    /*
00219    cout << "\n+++++++++++++++++++++++\n";
00220    copy(Ithis.begin(), Ithis.end(), ostream_iterator<int>(cout, " "));
00221    cout << "\n does not contain intron: \n";
00222    copy(Ithat.begin(), Ithat.end(), ostream_iterator<int>(cout, " "));
00223    cout << endl;
00224    */
00225    return false;
00226 }
00227 
00228 // both gene must have at least one intron.
00229 template<class T>
00230 bool GModel<T>::containIntron(const GModel<T> &mm) const {
00231    if (numExons() < 2 || mm.numExons()<2
00232          || numExons() < mm.numExons()) return false;
00233    int b=0, i;
00234    vector<int> Ithis = introns();
00235    vector<int> Ithat = mm.introns();
00236 startSearch:
00237    while (b <= Ithis.size() - Ithat.size()) {
00238       for (i=0; i<Ithat.size(); i++) {
00239          if (Ithat[i] != Ithis[b+i]) {
00240             ++b;
00241             goto startSearch;
00242          }
00243       }
00244       return true;
00245    }
00246    return false;
00247 }
00248 
00249 template<class T>
00250 const vector<int>& GModel<T>::introns() const {
00251    if (intronsBuilt) return intn;
00252    if (!intn.empty()) intn.clear();
00253    if (numExons() == 1) return intn;
00254 
00255    //vector<int> tmp;
00256    list<pair<int,int> >::const_iterator i,j;
00257    i=exons.begin();
00258    j=--exons.end();
00259    if (direction() == '+') {
00260       intn.push_back(i->second+1);
00261       ++i;
00262       while (i != j) {
00263          intn.push_back(i->first-1);
00264          intn.push_back(i->second+1);
00265          ++i;
00266       }
00267       intn.push_back(j->first-1);
00268    }
00269    else if (direction() == '-') {
00270       intn.push_back(i->second-1);
00271       ++i;
00272       while (i != j) {
00273          intn.push_back(i->first+1);
00274          intn.push_back(i->second-1);
00275          ++i;
00276       }
00277       intn.push_back(j->first+1);
00278    }
00279    else {
00280       cerr << "direction wrong\n";
00281       exit(1);
00282    }
00283    intronsBuilt=true;
00284    return intn;
00285 }
00286 
00287 template<class T>
00288 bool GModel<T>::endsContain(const GModel<T> &mm) const {
00289    if (direction() != mm.direction()) return false;
00290 
00291    if (direction() == '+') {
00292       if (start() <= mm.start() && end() >= mm.end())
00293          return true;
00294       return false;
00295    }
00296    else if (direction() == '-') {
00297       if (start() >= mm.start() && end() <= mm.end())
00298          return true;
00299       return false;
00300    }
00301    else return false;
00302 }
00303 
00304 // out put the id first, then the exons
00305 template<class T>
00306 ostream& operator<<(ostream& ous, const GModel<T> &mm) {
00307    ous << mm.id << endl;
00308    list<pair<int,int> >::const_iterator i;
00309    i=mm.exons.begin();
00310    ous << i->first << '-' << i->second;
00311    ++i;
00312    while (i != mm.exons.end()) {
00313       ous << "; " << i->first << '-' << i->second;
00314       ++i;
00315    }
00316    return ous;
00317 }
00318 
00319 template<class T>
00320 vector<pair<int,int> > GModel<T>::intronLongerThan(const int L) const
00321 {
00322    vector<pair<int,int> > tmp;
00323    vector<int> ins = introns();
00324    int i;
00325    if (direction() == '+') {
00326       for (i=0; i<ins.size(); i+=2) {
00327          if ((ins[i+1] - ins[i] + 1) > L) {
00328             tmp.push_back(make_pair(ins[i], ins[i+1]));
00329             //return true;
00330          }
00331       }
00332       //return false;
00333    }
00334    else if (direction() == '-') {
00335       for (i=0; i<ins.size(); i += 2) {
00336          if ( (ins[i]-ins[i+1]+1) > L) 
00337             tmp.push_back(make_pair(ins[i], ins[i+1]));
00338             //return true;
00339       }
00340       //return false;
00341    }
00342    //else return false;
00343    return tmp;
00344 }
00345 
00346 template<class T>
00347 int GModel<T>::maxIntronLength() const {
00348    vector<int> ins = introns();
00349    int L=0;
00350    int i;
00351    if (direction() == '+') {
00352       for (i=0; i<ins.size(); i+=2) {
00353          if ((ins[i+1] - ins[i] + 1) > L) {
00354             L= ins[i+1] - ins[i] + 1;
00355          }
00356       }
00357    }
00358    else if (direction() == '-') {
00359       for (i=0; i<ins.size(); i += 2) {
00360          if ( (ins[i]-ins[i+1]+1) > L) 
00361             L= ins[i] - ins[i+1] + 1;
00362       }
00363    }
00364    return L;
00365 }
00366 template<class T>
00367 int GModel<T>::overlap(const GModel<T> &mm) const
00368 {
00369    if (direction() == '+') {
00370       if (mm.direction() == '+') {
00371          return min(end(),mm.end()) - max(begin(), mm.begin());
00372       }
00373       else if (mm.direction() == '-') {
00374          return 0;
00375       }
00376       else {
00377          cerr << "second model has no direction!\n";
00378          exit(1);
00379       }
00380    }
00381    else if (direction() == '-') {
00382       if (mm.direction() == '-') {
00383          return min(begin(), mm.begin()) - max(end(), mm.end());
00384       }
00385       else if (mm.direction() == '+') {
00386          return 0;
00387       }
00388       else {
00389          cerr << "second model has no direction!\n";
00390          exit(1);
00391       }
00392    }
00393    else {
00394       cerr << "first range has no direction()\n";
00395       exit(1);
00396    }
00397 }
00398 
00399 #endif

Generated on Wed Aug 10 11:56:49 2011 for Softwares from Orpara by  doxygen 1.5.6