00001 #ifndef RNAMODEL_H
00002 #define RNAMODEL_H
00003
00004 #include "GenModel.h"
00005 #include <string>
00006 #include "bioseq.h"
00007 #include "Coverdepth.h"
00008 #include "strformat.h"
00009 #include <cassert>
00010 #include <cmath>
00011
00012 using namespace std;
00013
00022 class OutsideGenomicSequence : public exception {
00023 private:
00024 string message;
00025 public:
00026 OutsideGenomicSequence(const string& msg) throw()
00027 : message(msg) { }
00028 OutsideGenomicSequence() throw() : message("Position outside Genomic Length") { }
00029 ~OutsideGenomicSequence() throw() { }
00030 const char* what() const throw() {
00031 return message.c_str();
00032 }
00033 };
00034
00035
00036 enum modeltype { RNA, mRNA, EST, JGI};
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00067 class RNAModel : public Noschain {
00068 public:
00072 RNAModel() : Noschain(), genid(0) { oid=nextId(); }
00073
00079 explicit RNAModel(const Noschain &seg, const string &chrom, const string &gs)
00080 : Noschain(seg), gid(chrom), gseq(&gs), genid(0), oid(0) {
00081 oid=nextId(); subsequence(rna, *gseq);
00082 }
00089 RNAModel(const string &exstr, const string &gname, const string &gs,
00090 int oi)
00091 : Noschain(exstr), gid(gname), gseq(&gs), oid(oi), genid(0) {
00092 subsequence(rna, *gseq); }
00094 RNAModel(const string &exstr, const string &gname, const string &gs,
00095 int oi, int gene_id)
00096 : Noschain(exstr), gid(gname), gseq(&gs), oid(oi), genid(gene_id) {
00097 subsequence(rna, *gseq); }
00103 RNAModel(const string &exstarts, const string &exends,
00104 char strand, const string& gi, const string& genomic)
00105 throw(Badinput, exception)
00106 : Noschain(exstarts, exends, strand), gid(gi), gseq(&genomic),
00107 genid(0), oid(serialid++) {
00108 if (exons[0].isNull() || exons.back().isNull())
00109 throw Badinput("Null end exon");
00110 subsequence(rna, *gseq);
00111 }
00116 RNAModel(const string &exstarts, const string &exends,
00117 char strand, const string& gi, const string& genomic,
00118 int oo)
00119 throw(Badinput)
00120 : Noschain(exstarts, exends, strand), gid(gi), gseq(&genomic),
00121 genid(0), oid(oo) {
00122 if (exons[0].isNull() || exons.back().isNull())
00123 throw Badinput("Null end exon");
00124 subsequence(rna, *gseq);
00125 }
00131 RNAModel(const RNAModel &rm) : Noschain(rm), gid(rm.gid),
00132 gseq(rm.gseq), rna(rm.rna), oid(serialid++), genid(rm.genid) { }
00136 RNAModel& operator=(const RNAModel &rm);
00139 void assignNewId() { oid=nextId(); }
00141 int getOid() const { return oid; }
00142 void setOid(int oi) { oid=oi; }
00146 void setNextOid() { oid=nextId(); }
00155 void assignNewGeneId() { genid=nextGeneId(); }
00156 void modelidAsGeneid() { genid=oid; }
00159 void assignCurrentGeneId() { genid=currentGeneId(); }
00160 int getGeneId() const { return genid; }
00161 int geneId() const { return genid; }
00162 void setGeneId(const int gene_id) { genid=gene_id; }
00169 void setGenomic(const string &gs) { gseq=&gs; }
00170 void setGenomicPointer(const string *gs) { gseq=gs; }
00174 void trimAfterPoint(const int p) throw (PointOutChain);
00178 void trimBeforePoint(const int p) throw (PointOutChain);
00180 const string& RNASequence() const { return rna; }
00186 int genomicIndex(int mrnaidx) const throw (OutsideGenomicSequence);
00187 int RNAIndex(int gidx) const throw (PointOutChain);
00188 bool sameExons(const RNAModel &gm) const { return Noschain::operator==(gm); }
00192 const string* getGenomicPointer() const { return gseq; }
00198 int exonLength() const { return rna.length(); }
00204 virtual bool valid() const;
00212
00213 bool sameRNA(const RNAModel &rmd) const { return rna==rmd.rna; }
00214 const string& genomicId() const { return gid; }
00215 int RNALength() const { return rna.length(); }
00216 const string& RNAString() const { return rna; }
00223 void resetRNA() throw(Badinput) { subsequence(rna,*gseq); }
00224 void setRNA(const string &rseq) { rna=rseq; }
00228 virtual void reset() { subsequence(rna,*gseq); }
00229 void setGenomicId(const string &gg) { gid=gg; }
00233 bool append(Noschain &mod);
00242 RNAModel& reverse();
00243
00247 string seqGenomic() const;
00248 int genomicLength() const { return gseq->length(); }
00249 virtual modeltype objtype() const { return RNA; }
00253 ostream& writeExon(ostream &ous, char sep='\t') const;
00255 ostream& print(ostream& ous) const;
00260 void intronBound(map<string,int> &spsite) const;
00261
00262 static const char exonheader[];
00264
00265 static int currentId() { return serialid; }
00267 static int nextId() { return serialid++; }
00268 static void advanceId() { ++serialid; }
00269 static void rollbackId() { --serialid; }
00270 static void setId(int oi) { serialid=oi; }
00272
00273 static int nextGeneId() { return geneid++; }
00275 static int currentGeneId() { return geneid; }
00276 static void advanceGeneId() { ++geneid; }
00277
00278 protected:
00279
00283 string gid;
00286 const string *gseq;
00290 string rna;
00295 int oid;
00298 int genid;
00299
00302 static int serialid;
00303 static int exonid;
00304 static int geneid;
00305 };
00306
00307
00314 class mRNAModel : public RNAModel {
00315 public:
00316 mRNAModel() : RNAModel(), cdsb(-1), cdse(-1),
00317 gcdsb(-1), gcdse(-1), pep(), frame(0) { }
00323 explicit mRNAModel(const Noschain &seg, const string &chrom, const string &gs);
00331 explicit mRNAModel(const Noschain &seg, int gcdsb_, int gcdse_, const string &chrom, const string &gs, int fr=0);
00339 mRNAModel(const string &exstr, const string &gname, const string &gs,
00340 int oi, int gcb, int gce, int fr=0);
00342 mRNAModel(const string &exstr, const string &gname, const string &gs,
00343 int oi, int gcb, int gce, int gene_id, int fr=0);
00349 mRNAModel(const string &exstarts, const string &exends, char strand,
00350 int gcb, int gce, const string& gi, const string& genomic)
00351 throw(PointOutChain, Badinput, exception);
00354 mRNAModel(const string &exstarts, const string &exends, char strand,
00355 int gcb, int gce, const string& gi, const string& genomic, int oo)
00356 throw(PointOutChain, Badinput);
00357
00359 mRNAModel(const mRNAModel &mm) : RNAModel(mm), cdsb(mm.cdsb),
00360 cdse(mm.cdse), gcdsb(mm.gcdsb), gcdse(mm.gcdse),
00361 pep(mm.pep), frame(mm.frame) { }
00368 char guessStrand() const;
00369
00370 mRNAModel& operator=(const mRNAModel &mm);
00373 bool samePeptide(const mRNAModel &mm) const { return pep==mm.pep; }
00377 bool sameGene(const mRNAModel &mod) const;
00380 Noschain FivePrimeUTR() const { return subchainBeforePoint(gcdsb); }
00381 Noschain ThreePrimeUTR() const { return subchainAfterPoint(gcdse); }
00382 int num5NoncodingExons() const { return FivePrimeUTR().numberOfRanges(); }
00383 int num3NoncodingExons() const { return ThreePrimeUTR().numberOfRanges(); }
00385 Noschain CDSChain() const throw (PointOutChain);
00389 Range CDSRange() const { return Range(gcdsb, gcdse); }
00390 pair<int,int> genomicCDSBound() const { return pair<int,int>(gcdsb, gcdse); }
00391 Range RNACDSRange() const { return Range(cdsb,cdse); }
00392 pair<int,int> RNACDSBound() const { return pair<int,int>(cdsb, cdse); }
00394 int CDSLength() const { return cdse-cdsb+1; }
00395 int genomicCDSLength() const { return abs(gcdse-gcdsb)+1; }
00396 int genomicCDSEnd() const { return gcdse; }
00397 int genomicCDSBegin() const { return gcdsb; }
00398 const string& proteinSequence() const { return pep; }
00406 int proteinLength() const { return pep.length(); }
00409 int proteinLengthNoTail() const;
00410 int getFrame() const { return frame; }
00412 string CDSSeq() const { return rna.substr(cdsb-1, cdse-cdsb+1); }
00416 string CDSSequence() const;
00417 char CDSDirection() const { if (gcdsb < gcdse) return '+'; return '-'; }
00420 double CDSFraction() const { return static_cast<double>(CDSLength())/length(); }
00424 double CDSFractionRNA() const { return static_cast<double>(CDSLength())/exonLength(); }
00428 double CDSFractionGenomic() const { return static_cast<double>(CDSLength())/length(); }
00432 int FivePrimeUTRLength() const { return cdsb-1; }
00433 int ThreePrimeUTRLength() const { return exonLength() - cdse; }
00434 int UTRLength() const { return FivePrimeUTRLength() + ThreePrimeUTRLength(); }
00437 void UTR3Sequence(string &seq) const { if (cdse==rna.length()) seq=""; else seq=rna.substr(cdse); }
00438 string UTR3Sequence() const { if (cdse==rna.length()) return ""; return rna.substr(cdse); }
00441 bool hasStart() const { return pep[0] == 'M'; }
00442 bool hasStop() const { return pep[pep.length()-1] == '*'; }
00444 bool complete() const { return hasStart() && hasStop(); }
00452
00453
00455 int numberOfInternalStops() const;
00457 const string& getProtein() const { return pep; }
00463 void trimAfterPoint(const int p) throw (PointOutChain);
00466 void trimBeforePoint(const int p) throw (PointOutChain);
00467 void trimBeforePoint(const int gp, const int rp) throw (PointOutChain);
00469 void setProtein(const string &pseq) { pep=pseq; }
00477 void setRNACDS(int bb, int ee) throw(OutsideGenomicSequence);
00481 void resetRNACDS(int bb, int ee) throw(OutsideGenomicSequence) {
00482 setRNACDS(bb,ee); resetProtein(); }
00487 void setGenomicCDS(int gb, int ge) throw(OutsideGenomicSequence, InvalidModel);
00488 void setGenomicCDS(pair<int, int> gcr) {
00489
00490 setGenomicCDS(gcr.first, gcr.second); assert(cdsb<cdse); }
00499 bool growCDS3Prime(int len) throw(OutsideGenomicSequence);
00504 bool trimCDSTail();
00507 bool trimCDSStop();
00511 void reset();
00516 void resetProtein() { translate(pep, rna, cdsb+frame, cdse); }
00520 void setLongestCDSAndProtein();
00525 bool append(mRNAModel &mod, int &comment);
00532 string JGIModelRow(const char sep='\t') const;
00533 ostream& printJGIModelRow(ostream &ous, const char sep='\t') const;
00548 string JGITranscriptRow(const char sep='\t') const;
00549 ostream& printJGITranscriptRow(ostream &ous, const char sep='\t') const;
00550 ostream& printJGITranscriptRowNoId(ostream &ous, const char sep='\t') const;
00554 string JGIProteinRow(char sep='\t') const;
00555
00557 string sfCDSGenomic() const;
00559 Noschain CDSOnewayChain() const;
00560 void CDSOnewayChain(Noschain& ch) const;
00561 string sfCDSTranscript() const {
00562 return (CDSChain().onewayChain().exonOnlyChain()+(cdsb-1)).asDelimitedString(':', ',');
00563 }
00564 string sfExonsProtein() const;
00565 ostream& show(ostream &ous) const;
00574 bool genuine() const
00575 { return pep.length()>50 && complete()
00576 && ThreePrimeUTRLength()>30 && ThreePrimeUTRLength() < 1800
00577 && FivePrimeUTRLength()>30 && FivePrimeUTRLength()< 1800
00578 && (CDSFractionRNA() > 0.5 || CDSLength() > 1000)
00579 && num3NoncodingExons() < 3 && num5NoncodingExons() < 3; }
00580 bool semiGenuine() const { return num3NoncodingExons() < 3 && num5NoncodingExons() < 3 && CDSLength() > 140 && complete() && ThreePrimeUTRLength() < utrlen3max && FivePrimeUTRLength() < utrlen5max; }
00585 bool isStar() const { return (CDSFraction() > 0.6 || (ThreePrimeUTRLength() < utrlen3max && FivePrimeUTRLength() < utrlen5max && CDSLength() > 240)) && num3NoncodingExons()<3 && num5NoncodingExons() < 3 && complete(); }
00586 modeltype objtype() const { return mRNA; }
00592 bool valid() const;
00593 ostream& print(ostream &ous) const;
00599 mRNAModel& reverse(int newRNACDSB, int newRNACDSE);
00600 string name(const string &prefix) { return prefix + itos(getOid()); }
00601
00602 static void setShortestModel(int len) { shortestmodel=len; }
00608 void writetab(ostream& mod, ostream &ex, ostream &track,
00609 ostream &orna, ostream &oprt, char sep='\t') const;
00620 ostream& writeModelTable(ostream &ous, char sep='\t') const;
00621
00622 static const char modelheader[];
00623 static const char jgiModelCol[];
00624 static const char jgiTranscriptCol[];
00625 static const char jgiProteinCol[];
00626
00630 static int shortestpep;
00635 static int shortestmodel;
00640 static int utrlen5max, utrlen3max;
00641
00642 protected:
00649 int cdsb, cdse;
00655 int gcdsb, gcdse;
00659 string pep;
00676 int frame;
00677
00679 static char header[350];
00680 };
00681
00682
00683 class ESTAssembly;
00684
00690 class mRNAModelUpdate : public mRNAModel {
00691 public:
00696 explicit mRNAModelUpdate(const mRNAModel &mm, int ec)
00697 : mRNAModel(mm), numest(ec), comment(0) { }
00700 mRNAModelUpdate(const mRNAModel &mm)
00701 : mRNAModel(mm), numest(0), comment(0) { }
00703 mRNAModelUpdate(const mRNAModelUpdate &um) : mRNAModel(um), numest(um.numest), estcover(um.estcover), comment(um.comment) { }
00704 void addESTCover(const mRNAModel &mm, const char direction);
00708 void addESTCover(const ESTAssembly &mm);
00709 void setESTCount(int ec) { numest=ec; }
00710 void addESTCount(int ec) { numest += ec; }
00714 void writetab(ostream& mod, ostream &ex, ostream &track,
00715 ostream &orna, ostream &oprt, char sep='\t') const;
00724 ostream& writeModelTable(ostream &ous, char sep='\t') const;
00725
00727 string name() const { return "MIX" + itos(getOid()) + "_" + itos(numest) + "_" + itos(static_cast<int>(floor(ESTCoverage()*100+0.5))); }
00728 string title() const { return itos(numest) + " EST updated Ab initio model"; }
00739 bool append(ESTAssembly &mod);
00740 float ESTCoverage() const { return static_cast<float>(estcover.exonLength())/exonLength(); }
00741 ostream& show(ostream &ous) const;
00742
00743 static const char modelheader[];
00745 static const char* jgiModelColumns();
00747 static const char* jgiTranscriptColumns();
00748 static const char* jgiProteinColumns();
00749
00750
00751 private:
00752 int numest;
00753 RangeChain estcover;
00755 int comment;
00756 };
00757
00759 class InvalidJGIModel : public exception {
00760 private:
00761 string message;
00762 public:
00763 InvalidJGIModel() throw() : message("Invalid Model") { }
00764 InvalidJGIModel(const char msg[]) throw() : message(msg) { }
00765 InvalidJGIModel(const string& msg) throw() : message(msg) { }
00766 ~InvalidJGIModel() throw() { }
00767 const char* what() const throw() {
00768 return message.c_str();
00769 }
00770 };
00771
00777 class JGIModel : public mRNAModel {
00778 public:
00788 JGIModel(const string &exstarts, const string &exends,
00789 char strand, int cdsb, int cdse, const string &gi,
00790 int ii, const string &si, const string &genomic) throw(Badinput, InvalidJGIModel, InvalidModel, PointOutChain, exception);
00792 JGIModel(const string &exstarts, const string &exends,
00793 char strand, int cdsb, int cdse, const string &gi,
00794 const string &si, const string &genomic) throw(Badinput, InvalidJGIModel, InvalidModel, PointOutChain, exception);
00799 bool valid();
00803 string toJGIString(char sep='\t') const;
00804 string toSQLString(char sep=',') const;
00805 string toString(char sep='\t') const;
00811 void growEnd(int icr);
00813 int getId() const { return getOid(); }
00814 const string& getName() const { return name; }
00815 ostream& show(ostream &ous) const;
00816
00819 ostream& print(ostream &ous) const;
00821 modeltype objtype() const { return JGI; }
00822
00823 private:
00824
00825
00826
00827
00828
00829
00832 string name;
00833
00834
00835
00837
00838 };
00839
00843 bool useFullORF(int lenfull, int lenpartial);
00844
00847 class ESTAssembly : public mRNAModel {
00848 public:
00850 ESTAssembly() : mRNAModel(), congid(-1), numest(0), basecovdep(0), comment(), intronfix(0), relprofh(1) { }
00857 explicit ESTAssembly(const Noschain &seg, int nume, Coverdepth *bc,
00858 const string &chrom, const string &gs, int cngid);
00859
00861 explicit ESTAssembly(const Noschain &seg, int gcdsb_, int gcdse_, int nume,
00862 Coverdepth *bc, const string &chrom, const string &gs, int cngid, int fr);
00866 explicit ESTAssembly(const Noschain &seg, int nume,
00867 const string &chrom, const string &gs, int cngid);
00874 ESTAssembly(const string &exons, const string &genomicid,
00875 const string &genomicseq, int modelid, int gcdsB, int gcdsE,
00876 int congregationid, int estcount, int gnid, int phase, float prof2star)
00877 : mRNAModel(exons, genomicid, genomicseq, modelid,
00878 gcdsB, gcdsE, gnid, phase),
00879 congid(congregationid), numest(estcount), basecovdep(0),
00880 comment(), intronfix(0), relprofh(prof2star)
00881 { setNameAndTitle(); }
00885 ESTAssembly(const ESTAssembly &ea) : mRNAModel(ea),
00886 congid(ea.congid), numest(ea.numest), estname(ea.estname),
00887 esttitle(ea.esttitle), pepname(ea.pepname), peptitle(ea.peptitle), comment(ea.comment), intronfix(ea.intronfix), basecovdep(ea.basecovdep), relprofh(ea.relprofh) { }
00889 ~ESTAssembly() { if (basecovdep != 0) delete basecovdep; }
00893 void setNameAndTitle();
00897 void setName();
00898 void setTitle();
00899 void setTitle(const string &prefix);
00900 void addTitleTag(const string &tag);
00901 ESTAssembly& operator=(const ESTAssembly &ea);
00902 const string& name() const { return estname; }
00907 void write(ostream& mod, ostream &exon, ostream &track,
00908 ostream &ousest, ostream &ouspep, char sep='\t') const;
00930 void writetab(ostream& mod, ostream &exon, ostream &track,
00931 ostream &ousest, ostream &ouspep, char sep='\t') const;
00950 void writeModel(ostream &ous, char sep='\t') const;
00951 float RNACodingFraction() { return CDSLength()/(float)RNALength(); }
00972
00973 ESTAssembly* breakSuffixModel() throw(PointOutChain);
00975 ESTAssembly* breakPrefixModel() throw(PointOutChain);
00986 ESTAssembly* prune3PrimeUTR() throw (PointOutChain);
00987 ESTAssembly* prune5PrimeUTR() throw (PointOutChain);
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
01000 bool shouldBreakSuffix() const;
01001 bool shouldBreakPrefix() const;
01003 bool isChimera() const;
01004 modeltype objtype() const { return EST; }
01006 ostream& show(ostream &ous) const;
01007 ostream& print(ostream &ous) const;
01008 void showBaseCoverInfo(ostream &ous=cerr) const {
01009 if (basecovdep != 0) ous << *basecovdep << endl; }
01010 int ESTCount() const { return numest; }
01011 int numberOfEST() const { return numest; }
01012 bool validProfile() const;
01013 void addComment(const string &comm);
01014 void writeCommentTab(ostream &ous) const {
01015 if (!comment.empty()) ous << getOid() << '\t' << comment << endl; }
01016 bool hasComment() const { return !comment.empty(); }
01017 bool noComment() const { return comment.empty(); }
01039 void fixIntronBound();
01040 int intronFixState() const { return intronfix; }
01053 void checkIntronBound();
01055 bool genuine() const {
01056 return mRNAModel::genuine() && intronfix < 2; }
01057 bool semiGenuine() const {
01058 return mRNAModel::semiGenuine() && intronfix<2; }
01059 bool isStar() const {
01060 return mRNAModel::isStar() && intronfix<2; }
01066 bool findDip(int b, int e, Dip &dip) const;
01068 ostream& showProfile(ostream &ous) const { ous << *basecovdep << endl; return ous; }
01071 float averageProfileHeight() const { if (basecovdep == 0) return 1; else return basecovdep->averageHeight(); }
01072 int maxProfileHeight() const { if (basecovdep == 0) return 1; else return basecovdep->maxHeight(); }
01076 pair<float,float> relativeProfileHeight(const ESTAssembly &ea) const { return basecovdep->relativeHeight(*(ea.basecovdep)); }
01083 void assignRPH(const ESTAssembly &star) { relprofh=star.relativeProfileHeight(*this).first; }
01084 float getRelprofh() const { return relprofh; }
01085 void setRelprofh(const float rh) { relprofh=rh; }
01090 void releaseProfile() { basecovdep=0; }
01091 ESTAssembly& reverse(int RNACDSb, int RNACDSe) { mRNAModel::reverse(RNACDSb, RNACDSe);
01092 if (basecovdep != 0) basecovdep->reverseStrand(); }
01093
01096 static const char* JGIModelColumns();
01097 static const char* JGITranscriptColumns();
01098 static const char* JGIProteinColumns();
01107 static bool ORFLongEnough(pair<int,int> &bound, int ORFType, int trlen);
01108
01109 static const char modelheader[];
01110
01111 static const char trackheader[];
01112
01113 protected:
01114
01115 int congid;
01116 int numest;
01118 Coverdepth *basecovdep;
01131
01132 float relprofh;
01133 string estname, esttitle;
01134 string pepname, peptitle;
01137 string comment;
01149 int intronfix;
01150
01152 ESTAssembly* budPlusSuffixModel(pair<int,int> gcbts, int rcdsb, int rcdse, int suffix_gcdsb, int suffix_gcdse, const string &pep, int rcut1, int rcut2) throw(PointOutChain);
01154 ESTAssembly* budPlusSuffixModel(pair<int,int> gcuts, int rcdsb, int rcdse, int suffix_gcdsb, int suffix_gcdse, const string &pep) throw (PointOutChain) {
01155 try {
01156 return budPlusSuffixModel(gcuts, rcdsb, rcdse, suffix_gcdsb, suffix_gcdse, pep, RNAIndex(gcuts.first), RNAIndex(gcuts.second)); }
01157 catch (PointOutChain &pocerr) { cerr << "budPlusSuffixModel() short version problem\n"; throw pocerr; } }
01159 ESTAssembly* budMinusSuffixModel(pair<int,int> gcuts, int suffix_gcdsb, int suffix_gcdse, const string &subRNA, const string &pep, int usefull, pair<int,int> subcds, int rcut1, int rcut2) throw (PointOutChain);
01161 ESTAssembly* budMinusSuffixModel(pair<int,int> gcuts, int suffix_gcdsb, int suffix_gcdse, const string &subRNA, const string &pep, int usefull, pair<int,int> subcds) throw (PointOutChain) {
01162 try {
01163 return budMinusSuffixModel(gcuts, suffix_gcdsb, suffix_gcdse, subRNA, pep, usefull, subcds, RNAIndex(gcuts.first), RNAIndex(gcuts.second));
01164 } catch (PointOutChain &pocerr) {
01165 cerr << "budMinusSuffixModel() short version problem\n";
01166 throw pocerr;
01167 }
01168 }
01169
01171 ESTAssembly* budPlusPrefixModel(pair<int,int> gcuts, int rcut1, int rcut2,
01172 int prefix_gcdsb, int prefix_gcdse, pair<int,int> subcds,
01173 int usefull, const string &maxPep) throw(PointOutChain);
01178 ESTAssembly* budMinusPrefixModel(pair<int,int> gcuts, int rcut1, int rcut2,
01179 int prefix_gcdsb, int prefix_gcdse, pair<int,int> subcds,
01180 const string &pep, const string &subRNA) throw(PointOutChain);
01181
01185 ESTAssembly* budTemplate() const;
01188 void resetNumest(ESTAssembly *mod);
01192 void resetSuffixProfile(ESTAssembly *mod, pair<int,int> gcuts);
01195 void resetPrefixProfile(ESTAssembly *mod, pair<int,int> gcuts);
01204 void setCDSInfo();
01205
01206
01207
01208
01209
01210
01211
01212 static int chimera_numestcut;
01218 static int chimera_peplencut;
01219 static int chimera_partial_peplencut;
01224 static int orfspace;
01225 };
01226
01238
01239 class ESTAssemblyid : public ESTAssembly {
01240 public:
01244 explicit ESTAssemblyid(const Alnchainid &seg, const string &chrom,
01245 const string &gs, int cngid)
01246 : ESTAssembly(seg,seg.numEST(),chrom,gs,cngid), estids(seg.getESTIds()) { }
01249 explicit ESTAssemblyid(const Noschain &seg, int nume, Coverdepth *bc,
01250 const string &chrom, const string &gs, int cngid)
01251 : ESTAssembly(seg,nume,bc,chrom,gs,cngid), estids() { }
01252
01258 explicit ESTAssemblyid(ESTAssembly &est) : ESTAssembly(est), estids() { est.releaseProfile(); }
01263 explicit ESTAssemblyid(const Noschain &seg, int gcdsb_, int gcdse_, int nume,
01264 Coverdepth *bc, const string &chrom, const string &gs, int cngid, int fr) : ESTAssembly(seg,gcdsb_,gcdse_,nume,bc, chrom, gs, cngid,fr), estids() { }
01265
01266
01267 void assignESTId(const vector<string> &ids) { estids=ids; }
01268
01269 void addESTId(const string &eid) { estids.push_back(eid); }
01271 void addESTId(const vector<string> &eids) { estids.insert(estids.end(), eids.begin(), eids.end()); }
01272 vector<string>& getESTIds() { return estids; }
01273 string ESTIdString(const char sep=',') const;
01274 ostream& outputESTIds(ostream &ous, const char* sep=",") const;
01275
01285 ESTAssemblyid* breakSuffixModel() throw (PointOutChain);
01286 ESTAssemblyid* breakPrefixModel() throw (PointOutChain);
01294 vector<ESTAssemblyid*> breakup();
01298 void writeModel(ostream &ous, char sep='\t') const;
01299 void writetab(ostream& mod, ostream &exon, ostream &track,
01300 ostream &ousest, ostream &ouspep, char sep='\t') const;
01304
01310
01311
01312
01313 static const char* getModelheader();
01314
01315 private:
01316
01336 vector<string> estids;
01337 };
01338
01348 void clumpModelIntoGene(set<ESTAssemblyid*, lessByChainDirectionPtr> &mod,
01349 list<list<ESTAssemblyid*> > &loci);
01350
01351 #endif