GModel.h

Go to the documentation of this file.
00001 #ifndef GMODEL_H
00002 #define GMODEL_H
00003 
00004 #include <string>  
00005 #include <vector> 
00006 #include <list> 
00007 #include <iostream>
00008 #include <cstdlib>
00009 
00022 using namespace std; 
00023 
00024 class ExondirectionError : public exception {
00025    public:
00026       ExondirectionError() : message("Different direction of exon inside the same gene") { }
00027       ExondirectionError(const string &msg) : message(msg) { }
00028       const char* what() const throw() { return message.c_str(); }
00029       ~ExondirectionError() throw() { }
00030    private:
00031       string message;
00032 };
00033              
00043 template<typename T> class GModel;
00044 template<typename T> 
00045 ostream& operator<<(ostream& ous, const GModel<T> &mm);
00046 
00047 template<class T> class GModel { 
00048    public: 
00049       GModel() : id(), exons(), intn(), intronsBuilt(false) { }
00050       GModel(const T &name) : id(name), exons(), intn(), intronsBuilt(false) { }
00051       //void addExon(int bb, int ee) { exons.push_back(make_pair(bb,ee)); }
00052       
00053       /* require the exons are sorted from small to large
00054        */
00055       void addExon(int bb, int ee) throw(ExondirectionError);
00056       bool operator==(const GModel &mm) const;
00057       // return +, -, or ' ' if now known
00058       char direction() const;
00059       //pair<int,int> bound() const { return make_pair(exons[0].first, exons[exons.size()-1].second); }
00060       /* return the whole reange of the gene model
00061        */
00062       pair<int,int> bound() const { 
00063          return make_pair(exons.front().first, exons.back().second); }
00064       int size() const {
00065          if (direction() == '+')
00066             return exons.back().second - exons.front().first + 1; 
00067          else
00068             return exons.front().first - exons.back().second + 1; }
00069       /* one model contains another model
00070        * Both ends ousdide of mm, and the intron of 
00071        * mm is a subset of this object.
00072        */
00073       bool contain(const GModel &mm) const;
00074       /* only check for the introns, disregard the ends
00075        * This method is useful where some simple extension
00076        * of the partial models might have been wrong.
00077        */
00078       bool containIntron(const GModel &mm) const;
00082       bool endsContain(const GModel &mm) const;
00083       int numExons() const { return exons.size(); }
00084       //int start() const { return exons[0].first; }
00085       int start() const { return exons.front().first; }
00086       int begin() const { return exons.front().first; }
00087       //int end() const { return exons[exons.size()-1].second; }
00088       int end() const { return exons.back().second; }
00089       int finish() const { return exons.back().second; }
00090       // return all the intron boundaries
00091       const vector<int>& introns() const;
00092       // this is <T> strange <T> must be added after the function name.
00093       // output key, then exons
00094       friend ostream& operator<<<T>(ostream& os, const GModel<T>& mm);
00095 
00096       const T& getId() const { return id; }
00097       /* return all those introns longer than L
00098        */
00099       vector<pair<int,int> > intronLongerThan(const int L) const;
00100       int maxIntronLength() const;
00101       bool inside(pair<int,int> r) const {
00102          return min(start(), end()) > min(r.first, r.second) &&
00103             max(start(), end()) < max(r.first, r.second);
00104       }
00110       int overlap(const GModel<T> &mm) const;
00114       int overlapOpposite(const GModel<T> &mm) const { }
00118       int overlapBoth(const GModel<T> &mm) const { }
00119  
00120    protected: 
00121       T id; 
00122       //vector< pair<int,int> > exons;
00123       // sorted from 5'--to-->3' direction
00124       list< pair<int,int> > exons;
00125       mutable vector<int> intn;
00126       mutable bool intronsBuilt;
00127 };  
00128 
00129 
00130 template<class T>
00131 void GModel<T>::addExon(int bb, int ee) throw(ExondirectionError) { 
00132    if (bb < ee) {
00133       if (numExons() > 0 && direction() == '-') {
00134          cerr << "must add exon to gene of the same direction\n"
00135             << " adding " << bb << "-->" << ee << " to "
00136             << *this << endl;
00137          throw ExondirectionError();
00138          //exit(1);
00139       }
00140       exons.push_back(make_pair(bb,ee)); 
00141    }
00142    else if (bb > ee) {
00143       if (numExons() > 0 && direction() == '+') {
00144          cerr << "must add exon to gene of the same direction\n"
00145             << " adding " << bb << "-->" << ee << " to "
00146             << *this << endl;
00147          throw ExondirectionError();
00148          //exit(1);
00149       }
00150       exons.push_front(make_pair(bb,ee));
00151    }
00152 }
00153 
00154 template<class T>
00155 bool GModel<T>::operator==(const GModel<T> &mm) const {
00156    if (exons.size() != mm.exons.size()) return false;
00157    list<pair<int,int> >::const_iterator i,j;
00158    for (i=exons.begin(), j=mm.exons.begin(); i != exons.end(); i++, j++) {
00159       if (i->first != j->first || i->second != j->second)
00160          return false;
00161    }
00162    return true;
00163 }
00164 
00165 template<class T>
00166 char GModel<T>::direction() const {
00167    if (exons.empty()) return ' ';
00168    if (exons.front().first < exons.back().second)
00169       return '+';
00170    if (exons.front().first > exons.back().second)
00171       return '-';
00172    return ' ';
00173 }
00174 
00175 template<class T>
00176 bool GModel<T>::contain(const GModel<T> &mm) const {
00177    if (mm.numExons() > numExons() 
00178          || direction() != mm.direction()
00179          || !endsContain(mm)) 
00180       return false;
00181    // if the shorter one is single exon, 
00182    // needs special treatment
00183    if (mm.numExons()==1) {
00184       // both are single exon gene
00185       if (numExons() == 1)
00186          return true;
00187       list<pair<int,int> >::const_iterator it;
00188       if (direction() == '+') {
00189          for (it=exons.begin(); it != exons.end(); it++)
00190          {
00191             if (it->first <= mm.start() &&
00192                   it->second >= mm.end())
00193                return true;
00194          }
00195       }
00196       else if (direction() == '-') {
00197          for (it=exons.begin(); it != exons.end(); it++)
00198          {
00199             if (it->first >= mm.start() &&
00200                   it->second <= mm.end())
00201                return true;
00202          }
00203       }
00204       return false;
00205    }
00206 
00207    int b=0, i;
00208    vector<int> Ithis = introns();
00209    vector<int> Ithat = mm.introns();
00210 startSearch:
00211    while (b <= Ithis.size() - Ithat.size()) {
00212       //cout << "searching from " << b << endl;
00213       for (i=0; i<Ithat.size(); i++) {
00214          //cout << Ithat[i] << " x " << Ithis[b+i] << endl;
00215          if (Ithat[i] != Ithis[b+i]) {
00216             ++b;
00217             goto startSearch;
00218          }
00219       }
00220       /* debug code
00221       copy(Ithis.begin(), Ithis.end(), ostream_iterator<int>(cout, " "));
00222       cout << "\n contains intron: \n";
00223       copy(Ithat.begin(), Ithat.end(), ostream_iterator<int>(cout, " "));
00224       cout << endl;
00225       */
00226       return true;
00227    }
00228    /*
00229    cout << "\n+++++++++++++++++++++++\n";
00230    copy(Ithis.begin(), Ithis.end(), ostream_iterator<int>(cout, " "));
00231    cout << "\n does not contain intron: \n";
00232    copy(Ithat.begin(), Ithat.end(), ostream_iterator<int>(cout, " "));
00233    cout << endl;
00234    */
00235    return false;
00236 }
00237 
00238 // both gene must have at least one intron.
00239 template<class T>
00240 bool GModel<T>::containIntron(const GModel<T> &mm) const {
00241    if (numExons() < 2 || mm.numExons()<2
00242          || numExons() < mm.numExons()) return false;
00243    int b=0, i;
00244    vector<int> Ithis = introns();
00245    vector<int> Ithat = mm.introns();
00246 startSearch:
00247    while (b <= Ithis.size() - Ithat.size()) {
00248       for (i=0; i<Ithat.size(); i++) {
00249          if (Ithat[i] != Ithis[b+i]) {
00250             ++b;
00251             goto startSearch;
00252          }
00253       }
00254       return true;
00255    }
00256    return false;
00257 }
00258 
00259 template<class T>
00260 const vector<int>& GModel<T>::introns() const {
00261    if (intronsBuilt) return intn;
00262    if (!intn.empty()) intn.clear();
00263    if (numExons() == 1) return intn;
00264 
00265    //vector<int> tmp;
00266    list<pair<int,int> >::const_iterator i,j;
00267    i=exons.begin();
00268    j=--exons.end();
00269    if (direction() == '+') {
00270       intn.push_back(i->second+1);
00271       ++i;
00272       while (i != j) {
00273          intn.push_back(i->first-1);
00274          intn.push_back(i->second+1);
00275          ++i;
00276       }
00277       intn.push_back(j->first-1);
00278    }
00279    else if (direction() == '-') {
00280       intn.push_back(i->second-1);
00281       ++i;
00282       while (i != j) {
00283          intn.push_back(i->first+1);
00284          intn.push_back(i->second-1);
00285          ++i;
00286       }
00287       intn.push_back(j->first+1);
00288    }
00289    else {
00290       cerr << "direction wrong\n";
00291       exit(1);
00292    }
00293    intronsBuilt=true;
00294    return intn;
00295 }
00296 
00297 template<class T>
00298 bool GModel<T>::endsContain(const GModel<T> &mm) const {
00299    if (direction() != mm.direction()) return false;
00300 
00301    if (direction() == '+') {
00302       if (start() <= mm.start() && end() >= mm.end())
00303          return true;
00304       return false;
00305    }
00306    else if (direction() == '-') {
00307       if (start() >= mm.start() && end() <= mm.end())
00308          return true;
00309       return false;
00310    }
00311    else return false;
00312 }
00313 
00314 // out put the id first, then the exons
00315 template<class T>
00316 ostream& operator<<(ostream& ous, const GModel<T> &mm) {
00317    ous << mm.id << endl;
00318    list<pair<int,int> >::const_iterator i;
00319    i=mm.exons.begin();
00320    ous << i->first << '-' << i->second;
00321    ++i;
00322    while (i != mm.exons.end()) {
00323       ous << "; " << i->first << '-' << i->second;
00324       ++i;
00325    }
00326    return ous;
00327 }
00328 
00329 template<class T>
00330 vector<pair<int,int> > GModel<T>::intronLongerThan(const int L) const
00331 {
00332    vector<pair<int,int> > tmp;
00333    vector<int> ins = introns();
00334    int i;
00335    if (direction() == '+') {
00336       for (i=0; i<ins.size(); i+=2) {
00337          if ((ins[i+1] - ins[i] + 1) > L) {
00338             tmp.push_back(make_pair(ins[i], ins[i+1]));
00339             //return true;
00340          }
00341       }
00342       //return false;
00343    }
00344    else if (direction() == '-') {
00345       for (i=0; i<ins.size(); i += 2) {
00346          if ( (ins[i]-ins[i+1]+1) > L) 
00347             tmp.push_back(make_pair(ins[i], ins[i+1]));
00348             //return true;
00349       }
00350       //return false;
00351    }
00352    //else return false;
00353    return tmp;
00354 }
00355 
00356 template<class T>
00357 int GModel<T>::maxIntronLength() const {
00358    vector<int> ins = introns();
00359    int L=0;
00360    int i;
00361    if (direction() == '+') {
00362       for (i=0; i<ins.size(); i+=2) {
00363          if ((ins[i+1] - ins[i] + 1) > L) {
00364             L= ins[i+1] - ins[i] + 1;
00365          }
00366       }
00367    }
00368    else if (direction() == '-') {
00369       for (i=0; i<ins.size(); i += 2) {
00370          if ( (ins[i]-ins[i+1]+1) > L) 
00371             L= ins[i] - ins[i+1] + 1;
00372       }
00373    }
00374    return L;
00375 }
00376 template<class T>
00377 int GModel<T>::overlap(const GModel<T> &mm) const
00378 {
00379    if (direction() == '+') {
00380       if (mm.direction() == '+') {
00381          return min(end(),mm.end()) - max(begin(), mm.begin());
00382       }
00383       else if (mm.direction() == '-') {
00384          return 0;
00385       }
00386       else {
00387          cerr << "second model has no direction!\n";
00388          exit(1);
00389       }
00390    }
00391    else if (direction() == '-') {
00392       if (mm.direction() == '-') {
00393          return min(begin(), mm.begin()) - max(end(), mm.end());
00394       }
00395       else if (mm.direction() == '+') {
00396          return 0;
00397       }
00398       else {
00399          cerr << "second model has no direction!\n";
00400          exit(1);
00401       }
00402    }
00403    else {
00404       cerr << "first range has no direction()\n";
00405       exit(1);
00406    }
00407 }
00408 
00409 #endif

Generated on Wed Aug 10 11:56:48 2011 for Softwares from Orpara by  doxygen 1.5.6