RNAModel.h

Go to the documentation of this file.
00001 #ifndef RNAMODEL_H
00002 #define RNAMODEL_H
00003 
00004 #include "GenModel.h"
00005 #include <string>
00006 #include "bioseq.h"
00007 #include "Coverdepth.h"
00008 #include "strformat.h"
00009 #include <cassert>
00010 #include <cmath>
00011 
00012 using namespace std;
00013 
00022 class OutsideGenomicSequence : public exception {
00023    private:
00024       string message;
00025    public:
00026       OutsideGenomicSequence(const string& msg) throw() 
00027          : message(msg) { }
00028       OutsideGenomicSequence() throw() : message("Position outside Genomic Length") { }
00029       ~OutsideGenomicSequence() throw() { }
00030       const char* what() const throw() {
00031          return message.c_str();
00032       }
00033 };
00034 
00035 
00036 enum modeltype { RNA, mRNA, EST, JGI};
00037 
00038 /*
00039 class Intronbound {
00040    public:
00041       Intronbound() : seq(), count(0) { }
00042       Intronbound(const char ss[5], int cc) : seq(ss), count(cc) { }
00043       Intronbound(const Intronbound& ib) : seq(ib.seq), count(ib.count) { }
00044 
00045       char seq[5];
00046       int count;
00047 };
00048 */
00049 
00067 class RNAModel : public Noschain {
00068    public:
00072       RNAModel() : Noschain(), genid(0) { oid=nextId(); }
00073 
00079       explicit RNAModel(const Noschain &seg, const string &chrom, const string &gs)
00080          : Noschain(seg), gid(chrom), gseq(&gs), genid(0), oid(0) { 
00081             oid=nextId(); subsequence(rna, *gseq);
00082       }
00089       RNAModel(const string &exstr, const string &gname, const string &gs, 
00090             int oi)
00091          : Noschain(exstr), gid(gname), gseq(&gs), oid(oi), genid(0) {
00092             subsequence(rna, *gseq); }
00094       RNAModel(const string &exstr, const string &gname, const string &gs, 
00095             int oi, int gene_id)
00096          : Noschain(exstr), gid(gname), gseq(&gs), oid(oi), genid(gene_id) {
00097             subsequence(rna, *gseq); }
00103       RNAModel(const string &exstarts, const string &exends,
00104             char strand, const string& gi, const string& genomic)
00105          throw(Badinput, exception)
00106          : Noschain(exstarts, exends, strand), gid(gi), gseq(&genomic),
00107             genid(0), oid(serialid++) {
00108                if (exons[0].isNull() || exons.back().isNull()) 
00109                   throw Badinput("Null end exon");
00110             subsequence(rna, *gseq);
00111       }
00116       RNAModel(const string &exstarts, const string &exends,
00117             char strand, const string& gi, const string& genomic,
00118             int oo)
00119          throw(Badinput)
00120          : Noschain(exstarts, exends, strand), gid(gi), gseq(&genomic),
00121             genid(0), oid(oo) {
00122                if (exons[0].isNull() || exons.back().isNull()) 
00123                   throw Badinput("Null end exon");
00124             subsequence(rna, *gseq);
00125          }
00131       RNAModel(const RNAModel &rm) : Noschain(rm), gid(rm.gid),
00132          gseq(rm.gseq), rna(rm.rna), oid(serialid++), genid(rm.genid) { }
00136       RNAModel& operator=(const RNAModel &rm);
00139       void assignNewId() { oid=nextId(); }
00141       int getOid() const { return oid; }
00142       void setOid(int oi) { oid=oi; }
00146       void setNextOid() { oid=nextId(); }
00155       void assignNewGeneId() { genid=nextGeneId(); }
00156       void modelidAsGeneid() { genid=oid; }
00159       void assignCurrentGeneId() { genid=currentGeneId(); }
00160       int getGeneId() const { return genid; }
00161       int geneId() const { return genid; }
00162       void setGeneId(const int gene_id) { genid=gene_id; }
00169       void setGenomic(const string &gs) { gseq=&gs; }
00170       void setGenomicPointer(const string *gs) { gseq=gs; }
00174       void trimAfterPoint(const int p) throw (PointOutChain);
00178       void trimBeforePoint(const int p) throw (PointOutChain);
00180       const string& RNASequence() const { return rna; }
00186       int genomicIndex(int mrnaidx) const throw (OutsideGenomicSequence);
00187       int RNAIndex(int gidx) const throw (PointOutChain);
00188       bool sameExons(const RNAModel &gm) const { return Noschain::operator==(gm); }
00192       const string* getGenomicPointer() const { return gseq; }
00198       int exonLength() const { return rna.length(); }
00204       virtual bool valid() const;
00212       //bool operator==(const RNAModel &rmd) const { return rna == rmd.rna; }
00213       bool sameRNA(const RNAModel &rmd) const { return rna==rmd.rna; }
00214       const string& genomicId() const { return gid; }
00215       int RNALength() const { return rna.length(); }
00216       const string& RNAString() const { return rna; }
00223       void resetRNA() throw(Badinput) { subsequence(rna,*gseq); }
00224       void setRNA(const string &rseq) { rna=rseq; }
00228       virtual void reset() { subsequence(rna,*gseq); }
00229       void setGenomicId(const string &gg) { gid=gg; }
00233       bool append(Noschain &mod);
00242       RNAModel& reverse();
00243       
00247       string seqGenomic() const;
00248       int genomicLength() const { return gseq->length(); }
00249       virtual modeltype objtype() const { return RNA; }
00253       ostream& writeExon(ostream &ous, char sep='\t') const;
00255       ostream& print(ostream& ous) const;
00260       void intronBound(map<string,int> &spsite) const;
00261 
00262       static const char exonheader[];
00264 
00265       static int currentId() { return serialid; }
00267       static int nextId() { return serialid++; }
00268       static void advanceId() { ++serialid; }
00269       static void rollbackId() { --serialid; }
00270       static void setId(int oi) { serialid=oi; }
00272 
00273       static int nextGeneId() { return geneid++; }
00275       static int currentGeneId() { return geneid; }
00276       static void advanceGeneId() { ++geneid; }
00277 
00278    protected:
00279 
00283       string gid;
00286       const string *gseq;
00290       string rna;
00295       int oid; 
00298       int genid; 
00299 
00302       static int serialid; 
00303       static int exonid;
00304       static int geneid;
00305 };
00306 
00307 
00314 class mRNAModel : public RNAModel {
00315    public:
00316       mRNAModel() : RNAModel(), cdsb(-1), cdse(-1),
00317          gcdsb(-1), gcdse(-1), pep(), frame(0) { }
00323       explicit mRNAModel(const Noschain &seg, const string &chrom, const string &gs);
00331       explicit mRNAModel(const Noschain &seg, int gcdsb_, int gcdse_, const string &chrom, const string &gs, int fr=0);
00339       mRNAModel(const string &exstr, const string &gname, const string &gs,
00340             int oi, int gcb, int gce, int fr=0);
00342       mRNAModel(const string &exstr, const string &gname, const string &gs,
00343             int oi, int gcb, int gce, int gene_id, int fr=0);
00349       mRNAModel(const string &exstarts, const string &exends, char strand, 
00350             int gcb, int gce, const string& gi, const string& genomic)
00351          throw(PointOutChain, Badinput, exception);
00354       mRNAModel(const string &exstarts, const string &exends, char strand, 
00355             int gcb, int gce, const string& gi, const string& genomic, int oo)
00356          throw(PointOutChain, Badinput);
00357 
00359       mRNAModel(const mRNAModel &mm) : RNAModel(mm), cdsb(mm.cdsb),
00360          cdse(mm.cdse), gcdsb(mm.gcdsb), gcdse(mm.gcdse), 
00361          pep(mm.pep), frame(mm.frame) { }
00368       char guessStrand() const;
00369 
00370       mRNAModel& operator=(const mRNAModel &mm);
00373       bool samePeptide(const mRNAModel &mm) const { return pep==mm.pep; }
00377       bool sameGene(const mRNAModel &mod) const;
00380       Noschain FivePrimeUTR() const { return subchainBeforePoint(gcdsb); }
00381       Noschain ThreePrimeUTR() const { return subchainAfterPoint(gcdse); }
00382       int num5NoncodingExons() const { return FivePrimeUTR().numberOfRanges(); }
00383       int num3NoncodingExons() const { return ThreePrimeUTR().numberOfRanges(); }
00385       Noschain CDSChain() const throw (PointOutChain); //{ return subchain(gcdsb, gcdse); }
00389       Range CDSRange() const { return Range(gcdsb, gcdse); }
00390       pair<int,int> genomicCDSBound() const { return pair<int,int>(gcdsb, gcdse); }
00391       Range RNACDSRange() const { return Range(cdsb,cdse); }
00392       pair<int,int> RNACDSBound() const { return pair<int,int>(cdsb, cdse); }
00394       int CDSLength() const { return cdse-cdsb+1; }
00395       int genomicCDSLength() const { return abs(gcdse-gcdsb)+1; }
00396       int genomicCDSEnd() const { return gcdse; }
00397       int genomicCDSBegin() const { return gcdsb; }
00398       const string& proteinSequence() const { return pep; }
00406       int proteinLength() const { return pep.length(); }
00409       int proteinLengthNoTail() const;
00410       int getFrame() const { return frame; }
00412       string CDSSeq() const { return rna.substr(cdsb-1, cdse-cdsb+1); }
00416       string CDSSequence() const;
00417       char CDSDirection() const { if (gcdsb < gcdse) return '+'; return '-'; }
00420       double CDSFraction() const { return static_cast<double>(CDSLength())/length(); }
00424       double CDSFractionRNA() const { return static_cast<double>(CDSLength())/exonLength(); }
00428       double CDSFractionGenomic() const { return static_cast<double>(CDSLength())/length(); }
00432       int FivePrimeUTRLength() const { return cdsb-1; }
00433       int ThreePrimeUTRLength() const { return exonLength() - cdse; }
00434       int UTRLength() const { return FivePrimeUTRLength() + ThreePrimeUTRLength(); }
00437       void UTR3Sequence(string &seq) const { if (cdse==rna.length()) seq=""; else seq=rna.substr(cdse); }
00438       string UTR3Sequence() const { if (cdse==rna.length()) return ""; return rna.substr(cdse); }
00441       bool hasStart() const { return pep[0] == 'M'; }
00442       bool hasStop() const { return pep[pep.length()-1] == '*'; }
00444       bool complete() const { return hasStart() && hasStop(); }
00452       //bool operator==(const mRNAModel &md) const { 
00453        //  return Noschain::operator==(md) && pep==md.pep; }
00455       int numberOfInternalStops() const;
00457       const string& getProtein() const { return pep; }
00463       void trimAfterPoint(const int p) throw (PointOutChain);
00466       void trimBeforePoint(const int p) throw (PointOutChain);
00467       void trimBeforePoint(const int gp, const int rp) throw (PointOutChain);
00469       void setProtein(const string &pseq) { pep=pseq; }
00477       void setRNACDS(int bb, int ee) throw(OutsideGenomicSequence);
00481       void resetRNACDS(int bb, int ee) throw(OutsideGenomicSequence) {
00482          setRNACDS(bb,ee); resetProtein(); }
00487       void setGenomicCDS(int gb, int ge) throw(OutsideGenomicSequence, InvalidModel);
00488       void setGenomicCDS(pair<int, int> gcr) {
00489         //try { setGenomicCDS(gcr.first, gcr.second); } catch (exception &err) { throw err; } }
00490       setGenomicCDS(gcr.first, gcr.second); assert(cdsb<cdse); }
00499       bool growCDS3Prime(int len) throw(OutsideGenomicSequence);
00504       bool trimCDSTail();
00507       bool trimCDSStop();
00511       void reset();
00516       void resetProtein() { translate(pep, rna, cdsb+frame, cdse); }
00520       void setLongestCDSAndProtein();
00525       bool append(mRNAModel &mod, int &comment);
00532       string JGIModelRow(const char sep='\t') const;
00533       ostream& printJGIModelRow(ostream &ous, const char sep='\t') const;
00548       string JGITranscriptRow(const char sep='\t') const;
00549       ostream& printJGITranscriptRow(ostream &ous, const char sep='\t') const;
00550       ostream& printJGITranscriptRowNoId(ostream &ous, const char sep='\t') const;
00554       string JGIProteinRow(char sep='\t') const;
00555       //string JGILinkToAssRow(char sep='\t') const;
00557       string sfCDSGenomic() const;
00559       Noschain CDSOnewayChain() const;
00560       void CDSOnewayChain(Noschain& ch) const;
00561       string sfCDSTranscript() const {
00562          return (CDSChain().onewayChain().exonOnlyChain()+(cdsb-1)).asDelimitedString(':', ','); 
00563       }
00564       string sfExonsProtein() const;
00565       ostream& show(ostream &ous) const;
00574       bool genuine() const
00575          { return pep.length()>50 && complete()
00576                && ThreePrimeUTRLength()>30 && ThreePrimeUTRLength() < 1800 
00577                && FivePrimeUTRLength()>30 && FivePrimeUTRLength()< 1800 
00578                && (CDSFractionRNA() > 0.5 || CDSLength() > 1000) 
00579                && num3NoncodingExons() < 3 && num5NoncodingExons() < 3; }
00580       bool semiGenuine() const { return num3NoncodingExons() < 3 && num5NoncodingExons() < 3 && CDSLength() > 140 && complete() && ThreePrimeUTRLength() < utrlen3max && FivePrimeUTRLength() < utrlen5max; }
00585       bool isStar() const { return (CDSFraction() > 0.6 || (ThreePrimeUTRLength() < utrlen3max && FivePrimeUTRLength() < utrlen5max && CDSLength() > 240)) && num3NoncodingExons()<3 && num5NoncodingExons() < 3 && complete(); }
00586       modeltype objtype() const { return mRNA; }
00592       bool valid() const;
00593       ostream& print(ostream &ous) const;
00599       mRNAModel& reverse(int newRNACDSB, int newRNACDSE);
00600       string name(const string &prefix) { return prefix + itos(getOid()); }
00601 
00602       static void setShortestModel(int len) { shortestmodel=len; }
00608       void writetab(ostream& mod, ostream &ex, ostream &track,
00609             ostream &orna, ostream &oprt, char sep='\t') const;
00620       ostream& writeModelTable(ostream &ous, char sep='\t') const;
00621 
00622       static const char modelheader[];
00623       static const char jgiModelCol[];
00624       static const char jgiTranscriptCol[];
00625       static const char jgiProteinCol[];
00626 
00630       static int shortestpep;
00635       static int shortestmodel;
00640       static int utrlen5max, utrlen3max;
00641       
00642    protected:
00649       int cdsb, cdse;
00655       int gcdsb, gcdse; 
00659       string pep;
00676       int frame; // 0, 1, 2
00677 
00679        static char header[350];
00680 };
00681 
00682 
00683 class ESTAssembly;
00684 
00690 class mRNAModelUpdate : public mRNAModel {
00691    public: 
00696       explicit mRNAModelUpdate(const mRNAModel &mm, int ec) 
00697          : mRNAModel(mm), numest(ec), comment(0) { }
00700       mRNAModelUpdate(const mRNAModel &mm) 
00701          : mRNAModel(mm), numest(0), comment(0) { }
00703       mRNAModelUpdate(const mRNAModelUpdate &um) : mRNAModel(um), numest(um.numest), estcover(um.estcover), comment(um.comment) { }
00704       void addESTCover(const mRNAModel &mm, const char direction);
00708       void addESTCover(const ESTAssembly &mm);
00709       void setESTCount(int ec) { numest=ec; }
00710       void addESTCount(int ec) { numest += ec; }
00714       void writetab(ostream& mod, ostream &ex, ostream &track,
00715             ostream &orna, ostream &oprt, char sep='\t') const;
00724       ostream& writeModelTable(ostream &ous, char sep='\t') const;
00725 
00727       string name() const { return "MIX" + itos(getOid()) + "_" + itos(numest) + "_" + itos(static_cast<int>(floor(ESTCoverage()*100+0.5))); }
00728       string title() const { return itos(numest) + " EST updated Ab initio model"; }
00739       bool append(ESTAssembly &mod);
00740       float ESTCoverage() const { return static_cast<float>(estcover.exonLength())/exonLength(); }
00741       ostream& show(ostream &ous) const;
00742 
00743       static const char modelheader[];
00745       static const char* jgiModelColumns();
00747       static const char* jgiTranscriptColumns();
00748       static const char* jgiProteinColumns();
00749 
00750 
00751    private:
00752       int numest;
00753       RangeChain estcover;
00755       int comment; 
00756 };
00757 
00759 class InvalidJGIModel : public exception {
00760    private:
00761       string message;
00762    public:
00763       InvalidJGIModel() throw() : message("Invalid Model") { }
00764       InvalidJGIModel(const char msg[]) throw() : message(msg) { }
00765       InvalidJGIModel(const string& msg) throw() : message(msg) { }
00766       ~InvalidJGIModel() throw() { }
00767       const char* what() const throw() {
00768          return message.c_str();
00769       }
00770 };
00771 
00777 class JGIModel : public mRNAModel {
00778    public:
00788       JGIModel(const string &exstarts, const string &exends,
00789             char strand, int cdsb, int cdse, const string &gi,
00790             int ii, const string &si, const string &genomic) throw(Badinput, InvalidJGIModel, InvalidModel, PointOutChain, exception);
00792       JGIModel(const string &exstarts, const string &exends,
00793             char strand, int cdsb, int cdse, const string &gi,
00794             const string &si, const string &genomic) throw(Badinput, InvalidJGIModel, InvalidModel, PointOutChain, exception);
00799       bool valid();
00803       string toJGIString(char sep='\t') const;
00804       string toSQLString(char sep=',') const;
00805       string toString(char sep='\t') const;
00811       void growEnd(int icr);
00813       int getId() const { return getOid(); }
00814       const string& getName() const { return name; }
00815       ostream& show(ostream &ous) const;
00816 
00819       ostream& print(ostream &ous) const;
00821       modeltype objtype() const { return JGI; }
00822 
00823    private:
00824 
00825       /* RNAModel class has oid. This id should be
00826        * removed. Eliminating this member, use oid from
00827        * RNAModel.
00828        */
00829       //int id; 
00832       string name;
00833       // using frame from mRNA parent class
00834       // removing cdsphase
00835       //int cdsphase; // is this the same as frame from mRNAModel?
00837       //int numstop;
00838 };
00839 
00843 bool useFullORF(int lenfull, int lenpartial);
00844 
00847 class ESTAssembly : public mRNAModel {
00848    public:
00850       ESTAssembly() : mRNAModel(), congid(-1), numest(0), basecovdep(0), comment(), intronfix(0), relprofh(1) { }
00857       explicit ESTAssembly(const Noschain &seg, int nume, Coverdepth *bc,
00858             const string &chrom, const string &gs, int cngid);
00859 
00861       explicit ESTAssembly(const Noschain &seg, int gcdsb_, int gcdse_, int nume, 
00862             Coverdepth *bc, const string &chrom, const string &gs, int cngid, int fr);
00866       explicit ESTAssembly(const Noschain &seg, int nume, 
00867             const string &chrom, const string &gs, int cngid);
00874       ESTAssembly(const string &exons, const string &genomicid,
00875             const string &genomicseq, int modelid, int gcdsB, int gcdsE, 
00876             int congregationid, int estcount, int gnid, int phase, float prof2star)
00877          : mRNAModel(exons, genomicid, genomicseq, modelid, 
00878                gcdsB, gcdsE, gnid, phase),
00879             congid(congregationid), numest(estcount), basecovdep(0),
00880             comment(), intronfix(0), relprofh(prof2star)
00881             { setNameAndTitle(); }
00885       ESTAssembly(const ESTAssembly &ea) : mRNAModel(ea), 
00886          congid(ea.congid), numest(ea.numest), estname(ea.estname),
00887          esttitle(ea.esttitle), pepname(ea.pepname), peptitle(ea.peptitle), comment(ea.comment), intronfix(ea.intronfix), basecovdep(ea.basecovdep), relprofh(ea.relprofh) { }
00889       ~ESTAssembly() { if (basecovdep != 0) delete basecovdep; }
00893       void setNameAndTitle();
00897       void setName();
00898       void setTitle();
00899       void setTitle(const string &prefix);
00900       void addTitleTag(const string &tag);
00901       ESTAssembly& operator=(const ESTAssembly &ea);
00902       const string& name() const { return estname; }
00907       void write(ostream& mod, ostream &exon, ostream &track,
00908             ostream &ousest, ostream &ouspep, char sep='\t') const;
00930        void writetab(ostream& mod, ostream &exon, ostream &track,
00931             ostream &ousest, ostream &ouspep, char sep='\t') const;
00950        void writeModel(ostream &ous, char sep='\t') const;
00951        float RNACodingFraction() { return CDSLength()/(float)RNALength(); }
00972        //bool breakSuffixModel(ESTAssembly &mod);
00973        ESTAssembly* breakSuffixModel() throw(PointOutChain);
00975        ESTAssembly* breakPrefixModel() throw(PointOutChain);
00986        ESTAssembly* prune3PrimeUTR() throw (PointOutChain);
00987        ESTAssembly* prune5PrimeUTR() throw (PointOutChain);
00988       /* break up current model into smaller models.
00989         * If not successful then the vector will be
00990         * empty. Try to find optimal ORFs with the 
00991         * RNA, then produce new models.
00992         * This method is independent of the break-prefix, suffix methods
00993         * They share very little, and used different algorithms.
00994       vector<ESTAssembly*> breakup();
00995       I have moved it to ESTAssemblyid class since this
00996       one was never used directly.
00997         */
01000       bool shouldBreakSuffix() const;
01001       bool shouldBreakPrefix() const;
01003       bool isChimera() const;
01004       modeltype objtype() const { return EST; }
01006       ostream& show(ostream &ous) const;
01007       ostream& print(ostream &ous) const;
01008       void showBaseCoverInfo(ostream &ous=cerr) const { 
01009          if (basecovdep != 0) ous << *basecovdep << endl; }
01010       int ESTCount() const { return numest; }
01011       int numberOfEST() const { return numest; }
01012       bool validProfile() const;
01013       void addComment(const string &comm);
01014       void writeCommentTab(ostream &ous) const { 
01015          if (!comment.empty()) ous << getOid() << '\t' << comment << endl; }
01016       bool hasComment() const { return !comment.empty(); }
01017       bool noComment() const { return comment.empty(); }
01039       void fixIntronBound();
01040       int intronFixState() const { return intronfix; }
01053       void checkIntronBound();
01055       bool genuine() const {
01056          return mRNAModel::genuine() && intronfix < 2; }
01057       bool semiGenuine() const {
01058          return mRNAModel::semiGenuine() && intronfix<2; }
01059       bool isStar() const {
01060          return mRNAModel::isStar() && intronfix<2; }
01066       bool findDip(int b, int e, Dip &dip) const;
01068       ostream& showProfile(ostream &ous) const { ous << *basecovdep << endl; return ous; }
01071       float averageProfileHeight() const { if (basecovdep == 0) return 1; else return basecovdep->averageHeight(); }
01072       int maxProfileHeight() const { if (basecovdep == 0) return 1; else return basecovdep->maxHeight(); }
01076       pair<float,float> relativeProfileHeight(const ESTAssembly &ea) const { return basecovdep->relativeHeight(*(ea.basecovdep)); }
01083       void assignRPH(const ESTAssembly &star) { relprofh=star.relativeProfileHeight(*this).first; }
01084       float getRelprofh() const { return relprofh; }
01085       void setRelprofh(const float rh) { relprofh=rh; }
01090       void releaseProfile() { basecovdep=0; }
01091       ESTAssembly& reverse(int RNACDSb, int RNACDSe) { mRNAModel::reverse(RNACDSb, RNACDSe); 
01092          if (basecovdep != 0) basecovdep->reverseStrand(); }
01093 
01096        static const char* JGIModelColumns();
01097        static const char* JGITranscriptColumns();
01098        static const char* JGIProteinColumns();
01107        static bool ORFLongEnough(pair<int,int> &bound, int ORFType, int trlen); 
01108 
01109        static const char modelheader[];
01110        //static const char exonheader[]; // move to RNA class
01111        static const char trackheader[];
01112 
01113    protected:
01114 
01115        int congid;
01116        int numest;
01118        Coverdepth *basecovdep; 
01131        //pair<float,float> relprofh;
01132        float relprofh;
01133        string estname, esttitle;
01134        string pepname, peptitle;
01137        string comment;
01149        int intronfix;
01150 
01152        ESTAssembly* budPlusSuffixModel(pair<int,int> gcbts, int rcdsb, int rcdse, int suffix_gcdsb, int suffix_gcdse, const string &pep, int rcut1, int rcut2) throw(PointOutChain);
01154        ESTAssembly* budPlusSuffixModel(pair<int,int> gcuts, int rcdsb, int rcdse, int suffix_gcdsb, int suffix_gcdse, const string &pep) throw (PointOutChain) {
01155           try {
01156           return budPlusSuffixModel(gcuts, rcdsb, rcdse, suffix_gcdsb, suffix_gcdse, pep, RNAIndex(gcuts.first), RNAIndex(gcuts.second)); } 
01157           catch (PointOutChain &pocerr) { cerr << "budPlusSuffixModel() short version problem\n"; throw pocerr; } }
01159        ESTAssembly* budMinusSuffixModel(pair<int,int> gcuts, int suffix_gcdsb, int suffix_gcdse, const string &subRNA, const string &pep, int usefull, pair<int,int> subcds, int rcut1, int rcut2) throw (PointOutChain);
01161        ESTAssembly* budMinusSuffixModel(pair<int,int> gcuts, int suffix_gcdsb, int suffix_gcdse, const string &subRNA, const string &pep, int usefull, pair<int,int> subcds) throw (PointOutChain) {
01162           try {
01163           return budMinusSuffixModel(gcuts, suffix_gcdsb, suffix_gcdse, subRNA, pep, usefull, subcds, RNAIndex(gcuts.first), RNAIndex(gcuts.second));
01164           } catch (PointOutChain &pocerr) {
01165              cerr << "budMinusSuffixModel() short version problem\n";
01166              throw pocerr;
01167           }
01168        }
01169 
01171        ESTAssembly* budPlusPrefixModel(pair<int,int> gcuts, int rcut1, int rcut2,
01172              int prefix_gcdsb, int prefix_gcdse, pair<int,int> subcds,
01173              int usefull, const string &maxPep) throw(PointOutChain);
01178        ESTAssembly* budMinusPrefixModel(pair<int,int> gcuts, int rcut1, int rcut2, 
01179              int prefix_gcdsb, int prefix_gcdse, pair<int,int> subcds, 
01180              const string &pep, const string &subRNA) throw(PointOutChain);
01181 
01185        ESTAssembly* budTemplate() const;
01188        void resetNumest(ESTAssembly *mod);
01192        void resetSuffixProfile(ESTAssembly *mod, pair<int,int> gcuts);
01195        void resetPrefixProfile(ESTAssembly *mod, pair<int,int> gcuts);
01204        void setCDSInfo();
01205 
01206        /* used for construction move this to base class 
01207         * mRNAModel*/
01208        //static char header[250];
01209 
01210        //static int id;
01211        //static int exonid;
01212        static int chimera_numestcut; // set to 100
01218        static int chimera_peplencut;    // set to 100
01219        static int chimera_partial_peplencut; // set 30
01224        static int orfspace; // now set to be short
01225 };
01226 
01238 //template<class T>
01239 class ESTAssemblyid : public ESTAssembly {
01240    public:
01244       explicit ESTAssemblyid(const Alnchainid &seg, const string &chrom, 
01245             const string &gs, int cngid) 
01246          : ESTAssembly(seg,seg.numEST(),chrom,gs,cngid), estids(seg.getESTIds()) { }
01249       explicit ESTAssemblyid(const Noschain &seg, int nume, Coverdepth *bc,
01250             const string &chrom, const string &gs, int cngid)
01251          : ESTAssembly(seg,nume,bc,chrom,gs,cngid), estids() { }
01252 
01258       explicit ESTAssemblyid(ESTAssembly &est) : ESTAssembly(est), estids() { est.releaseProfile(); }
01263       explicit ESTAssemblyid(const Noschain &seg, int gcdsb_, int gcdse_, int nume, 
01264             Coverdepth *bc, const string &chrom, const string &gs, int cngid, int fr) : ESTAssembly(seg,gcdsb_,gcdse_,nume,bc, chrom, gs, cngid,fr), estids() { }
01265       //ESTAssemblyid(ESTAssembly *est) : estmod(est), estids() { }
01266 
01267       void assignESTId(const vector<string> &ids) { estids=ids; }
01268 
01269       void addESTId(const string &eid) { estids.push_back(eid); }
01271       void addESTId(const vector<string> &eids) { estids.insert(estids.end(), eids.begin(), eids.end()); }
01272       vector<string>& getESTIds() { return estids; }
01273       string ESTIdString(const char sep=',') const;
01274       ostream& outputESTIds(ostream &ous, const char* sep=",") const;
01275 
01285       ESTAssemblyid* breakSuffixModel() throw (PointOutChain);
01286       ESTAssemblyid* breakPrefixModel() throw (PointOutChain);
01294       vector<ESTAssemblyid*> breakup();
01298       void writeModel(ostream &ous, char sep='\t') const;
01299       void writetab(ostream& mod, ostream &exon, ostream &track,
01300             ostream &ousest, ostream &ouspep, char sep='\t') const;
01304       //int ESTCount() const { return estids.size(); }
01310       //int numberOfEST() const { return estids.size(); }
01311       // should use parent class header!
01312       //static const char modelheader[];
01313       static const char* getModelheader();
01314 
01315    private:
01316 
01336       vector<string> estids;
01337 };
01338 
01348 void clumpModelIntoGene(set<ESTAssemblyid*, lessByChainDirectionPtr> &mod,
01349         list<list<ESTAssemblyid*> > &loci);
01350 
01351 #endif

Generated on Wed Aug 10 11:56:52 2011 for Softwares from Orpara by  doxygen 1.5.6