#include <RNAModel.h>

Public Member Functions | |
| ESTAssembly () | |
| ESTAssembly (const Noschain &seg, int nume, Coverdepth *bc, const string &chrom, const string &gs, int cngid) | |
| ESTAssembly (const Noschain &seg, int gcdsb_, int gcdse_, int nume, Coverdepth *bc, const string &chrom, const string &gs, int cngid, int fr) | |
| ESTAssembly (const Noschain &seg, int nume, const string &chrom, const string &gs, int cngid) | |
| ESTAssembly (const string &exons, const string &genomicid, const string &genomicseq, int modelid, int gcdsB, int gcdsE, int congregationid, int estcount, int gnid, int phase, float prof2star) | |
| ESTAssembly (const ESTAssembly &ea) | |
| ~ESTAssembly () | |
| void | setNameAndTitle () |
| void | setName () |
| void | setTitle () |
| void | setTitle (const string &prefix) |
| void | addTitleTag (const string &tag) |
| ESTAssembly & | operator= (const ESTAssembly &ea) |
| const string & | name () const |
| void | write (ostream &mod, ostream &exon, ostream &track, ostream &ousest, ostream &ouspep, char sep='\t') const |
| void | writetab (ostream &mod, ostream &exon, ostream &track, ostream &ousest, ostream &ouspep, char sep='\t') const |
| void | writeModel (ostream &ous, char sep='\t') const |
| float | RNACodingFraction () |
| ESTAssembly * | breakSuffixModel () throw (PointOutChain) |
| ESTAssembly * | breakPrefixModel () throw (PointOutChain) |
| ESTAssembly * | prune3PrimeUTR () throw (PointOutChain) |
| ESTAssembly * | prune5PrimeUTR () throw (PointOutChain) |
| bool | shouldBreakSuffix () const |
| bool | shouldBreakPrefix () const |
| bool | isChimera () const |
| modeltype | objtype () const |
| ostream & | show (ostream &ous) const |
| ostream & | print (ostream &ous) const |
| void | showBaseCoverInfo (ostream &ous=cerr) const |
| int | ESTCount () const |
| int | numberOfEST () const |
| bool | validProfile () const |
| void | addComment (const string &comm) |
| void | writeCommentTab (ostream &ous) const |
| bool | hasComment () const |
| bool | noComment () const |
| void | fixIntronBound () |
| int | intronFixState () const |
| void | checkIntronBound () |
| bool | genuine () const |
| bool | semiGenuine () const |
| bool | isStar () const |
| bool | findDip (int b, int e, Dip &dip) const |
| ostream & | showProfile (ostream &ous) const |
| float | averageProfileHeight () const |
| int | maxProfileHeight () const |
| pair< float, float > | relativeProfileHeight (const ESTAssembly &ea) const |
| void | assignRPH (const ESTAssembly &star) |
| float | getRelprofh () const |
| void | setRelprofh (const float rh) |
| void | releaseProfile () |
| ESTAssembly & | reverse (int RNACDSb, int RNACDSe) |
Static Public Member Functions | |
| static const char * | JGIModelColumns () |
| static const char * | JGITranscriptColumns () |
| static const char * | JGIProteinColumns () |
| static bool | ORFLongEnough (pair< int, int > &bound, int ORFType, int trlen) |
Static Public Attributes | |
| static const char | modelheader [] = "modelid\tgeneid\tcongid\tgenomicid\tbegin\tend\tgCDSb\tgCDSe\tnumest\tnumexon\texonLength\texons\tCDSstart\tCDSend\tRNAseq\tpepseq\tframe\tprofileArea\tprofileMaxHeight\trelProfH\tbasecover" |
| static const char | trackheader [] = "id\tname\tchrom\tstrand\tstart\tend\tsfCount\tsfStarts\tsfEnds" |
Protected Member Functions | |
| ESTAssembly * | budPlusSuffixModel (pair< int, int > gcbts, int rcdsb, int rcdse, int suffix_gcdsb, int suffix_gcdse, const string &pep, int rcut1, int rcut2) throw (PointOutChain) |
| ESTAssembly * | budPlusSuffixModel (pair< int, int > gcuts, int rcdsb, int rcdse, int suffix_gcdsb, int suffix_gcdse, const string &pep) throw (PointOutChain) |
| ESTAssembly * | budMinusSuffixModel (pair< int, int > gcuts, int suffix_gcdsb, int suffix_gcdse, const string &subRNA, const string &pep, int usefull, pair< int, int > subcds, int rcut1, int rcut2) throw (PointOutChain) |
| ESTAssembly * | budMinusSuffixModel (pair< int, int > gcuts, int suffix_gcdsb, int suffix_gcdse, const string &subRNA, const string &pep, int usefull, pair< int, int > subcds) throw (PointOutChain) |
| ESTAssembly * | budPlusPrefixModel (pair< int, int > gcuts, int rcut1, int rcut2, int prefix_gcdsb, int prefix_gcdse, pair< int, int > subcds, int usefull, const string &maxPep) throw (PointOutChain) |
| ESTAssembly * | budMinusPrefixModel (pair< int, int > gcuts, int rcut1, int rcut2, int prefix_gcdsb, int prefix_gcdse, pair< int, int > subcds, const string &pep, const string &subRNA) throw (PointOutChain) |
| ESTAssembly * | budTemplate () const |
| void | resetNumest (ESTAssembly *mod) |
| void | resetSuffixProfile (ESTAssembly *mod, pair< int, int > gcuts) |
| void | resetPrefixProfile (ESTAssembly *mod, pair< int, int > gcuts) |
| void | setCDSInfo () |
Protected Attributes | |
| int | congid |
| int | numest |
| Coverdepth * | basecovdep |
| float | relprofh |
| string | estname |
| string | esttitle |
| string | pepname |
| string | peptitle |
| string | comment |
| int | intronfix |
Static Protected Attributes | |
| static int | chimera_numestcut = 50 |
| static int | chimera_peplencut = 90 |
| static int | chimera_partial_peplencut = 40 |
| static int | orfspace = 100 |
| ESTAssembly::ESTAssembly | ( | ) | [inline] |
default
Referenced by budTemplate().
| ESTAssembly::ESTAssembly | ( | const Noschain & | seg, | |
| int | nume, | |||
| Coverdepth * | bc, | |||
| const string & | chrom, | |||
| const string & | gs, | |||
| int | cngid | |||
| ) | [explicit] |
used by Esambl to add Base cover depth info. This is currently used production version.
| bc | is the base coverage profile pointer, it should be allocated on the free store. This object will manage to deallocate the memory. |
References basecovdep, Range::direction(), fixIntronBound(), setNameAndTitle(), Coverdepth::setStrand(), and Coverdepth::strand().
| ESTAssembly::ESTAssembly | ( | const Noschain & | seg, | |
| int | gcdsb_, | |||
| int | gcdse_, | |||
| int | nume, | |||
| Coverdepth * | bc, | |||
| const string & | chrom, | |||
| const string & | gs, | |||
| int | cngid, | |||
| int | fr | |||
| ) | [explicit] |
used by breakup to make new models
References basecovdep, Range::direction(), fixIntronBound(), setNameAndTitle(), Coverdepth::setStrand(), and Coverdepth::strand().
| ESTAssembly::ESTAssembly | ( | const Noschain & | seg, | |
| int | nume, | |||
| const string & | chrom, | |||
| const string & | gs, | |||
| int | cngid | |||
| ) | [explicit] |
use without assigning the Coverdepth information faster, for single EST Assemblies where base coverage is not important.
References anyToString(), mRNAModel::cdsb, mRNAModel::cdse, estname, esttitle, RNAModel::genomicId(), RNAModel::getOid(), numest, pepname, peptitle, and Noschain::toString().
| ESTAssembly::ESTAssembly | ( | const string & | exons, | |
| const string & | genomicid, | |||
| const string & | genomicseq, | |||
| int | modelid, | |||
| int | gcdsB, | |||
| int | gcdsE, | |||
| int | congregationid, | |||
| int | estcount, | |||
| int | gnid, | |||
| int | phase, | |||
| float | prof2star | |||
| ) | [inline] |
for reading ESTAssembly stored in files
| modelid | Modelid is oid. | |
| gcdsB | and gcdsE are used to construct cdsb and cdse respectively. is geneid |
References setNameAndTitle().
| ESTAssembly::ESTAssembly | ( | const ESTAssembly & | ea | ) | [inline] |
copy constructor problem with profile, Not making copy of the profile which is big. Only cp the pointer. The old object should call releaseProfile()
| ESTAssembly::~ESTAssembly | ( | ) | [inline] |
the assembler produced the profile
References basecovdep.
| void ESTAssembly::setNameAndTitle | ( | ) |
helper function use to make a human readable name and title from the numbers already stored in the object.
References setName(), and setTitle().
Referenced by ESTAssembly().
| void ESTAssembly::setName | ( | ) |
The name is CE<oid>_<numest> for Model CEP<oid>_<numest> for protein.
References anyToString(), estname, RNAModel::getOid(), numest, and pepname.
Referenced by budMinusPrefixModel(), budMinusSuffixModel(), budPlusPrefixModel(), budPlusSuffixModel(), prune3PrimeUTR(), prune5PrimeUTR(), and setNameAndTitle().
| void ESTAssembly::setTitle | ( | ) |
References anyToString(), mRNAModel::cdsb, mRNAModel::cdse, esttitle, RNAModel::genomicId(), numest, and peptitle.
Referenced by budMinusPrefixModel(), budMinusSuffixModel(), budPlusPrefixModel(), budPlusSuffixModel(), prune3PrimeUTR(), prune5PrimeUTR(), and setNameAndTitle().
| void ESTAssembly::setTitle | ( | const string & | prefix | ) |
References anyToString(), mRNAModel::cdsb, mRNAModel::cdse, esttitle, RNAModel::genomicId(), numest, and peptitle.
| void ESTAssembly::addTitleTag | ( | const string & | tag | ) |
| ESTAssembly & ESTAssembly::operator= | ( | const ESTAssembly & | ea | ) |
| const string& ESTAssembly::name | ( | ) | const [inline] |
References estname.
| void ESTAssembly::write | ( | ostream & | mod, | |
| ostream & | exon, | |||
| ostream & | track, | |||
| ostream & | ousest, | |||
| ostream & | ouspep, | |||
| char | sep = '\t' | |||
| ) | const |
when this methods is called, the id, and exonid will be incremented Produces output in two different formats.
References Range::begin(), mRNAModel::cdsb, mRNAModel::cdse, congid, Range::end(), estname, esttitle, RNAModel::exonLength(), mRNAModel::gcdsb, mRNAModel::gcdse, RNAModel::genomicId(), RNAModel::getOid(), mRNAModel::getProtein(), Noschain::jgiformat(), Noschain::numberOfRanges(), numest, mRNAModel::pep, pepname, peptitle, printFasta(), RNAModel::rna, RNAModel::RNASequence(), Noschain::toString(), and RNAModel::writeExon().
| void ESTAssembly::writetab | ( | ostream & | mod, | |
| ostream & | exon, | |||
| ostream & | track, | |||
| ostream & | ousest, | |||
| ostream & | ouspep, | |||
| char | sep = '\t' | |||
| ) | const |
| mod | combest tabular format with the following columns: objectid, geneid, congregationid, genomicid, begin, end, genomic_cdsb, genomic_cdse, numest, numexon, sumexonlength, exon_structure, mRNAcdsb, mRNAcdse, RNAseq, pepseq, frame, area_of_profile, maxheight_profile, relprofh2star, basecover this output used writeModel() Not all are calculated, for single EST model we don't calculate the area or do't generate the base profile. | |
| exon | exontable columns: exonid, model_objectid, exb, exe. sequencial from first to last exon | |
| track. | JGI model row | |
| ousest | JGI transcript table row | |
| ouspep | JGI protein table row |
Reimplemented from mRNAModel.
Reimplemented in ESTAssemblyid.
References comment, estname, esttitle, mRNAModel::JGIModelRow(), mRNAModel::JGIProteinRow(), mRNAModel::JGITranscriptRow(), pepname, peptitle, RNAModel::writeExon(), and writeModel().
| void ESTAssembly::writeModel | ( | ostream & | ous, | |
| char | sep = '\t' | |||
| ) | const |
write the model in tabular format for storage. This method does not inherit from the parent classes. This method stores all essential information and some summary information for quick SQL programming. for relative profile height to star, only the minimum is outputed. For 99.9% models, maximum should be 1. The columns: ================================================= oid, GeneId, congid, genomicId(), begin, end, gcdsb, gcdse, numest, numberOfRanges, sumExonLength, exonsStringFormat[b1-e1,b2-e2,...], cdsb, cdse, rnaSeq, pep, frame, BCParea, BCPmaxh, BCPrelheight, actualBCP =================================================
exonStringFormat use the toString() method of Noschain.
Reimplemented in ESTAssemblyid.
References basecovdep, Range::begin(), mRNAModel::cdsb, mRNAModel::cdse, congid, Range::end(), RNAModel::exonLength(), mRNAModel::frame, mRNAModel::gcdsb, mRNAModel::gcdse, RNAModel::genomicId(), RNAModel::getGeneId(), RNAModel::getOid(), Coverdepth::maxAndSum(), Noschain::numberOfRanges(), numest, mRNAModel::pep, relprofh, RNAModel::rna, RNAModel::RNALength(), and Noschain::toString().
Referenced by ESTAssemblyid::writeModel(), and writetab().
| float ESTAssembly::RNACodingFraction | ( | ) | [inline] |
References mRNAModel::CDSLength(), and RNAModel::RNALength().
| ESTAssembly * ESTAssembly::breakSuffixModel | ( | ) | throw (PointOutChain) |
Break off model contained inside the 3'-UTR if 1. the protein is complete, 2. number of EST > 100 3. 5'-UTR or 3'-UTR exonLength() > 700 4. UTR has at least one intron then make the UTR a new Model These new models may be identical from different genes, so some set operation needs to be performed. How to break off the new UTR from the main gene? This is an very expensive operation, slows down the program by a factor of 2.
When the suffix model has no intron, the algorithm should try ORF in both directions. Right now it is only testing in one direction. This is apparently not sufficient.
Reimplemented in ESTAssemblyid.
References addComment(), Dip::breakPoint(), budMinusSuffixModel(), budPlusSuffixModel(), mRNAModel::cdse, Range::direction(), Range::end(), esttitle, findDip(), mRNAModel::gcdse, RNAModel::genomicIndex(), RNAModel::getOid(), itos(), longestNoStartORFPlus(), longestNoStopORFPlus(), maxFullORF(), Noschain::numberOfRanges(), ORFLongEnough(), prune3PrimeUTR(), reverseComplement(), RNAModel::rna, shouldBreakSuffix(), show(), mRNAModel::ThreePrimeUTR(), mRNAModel::ThreePrimeUTRLength(), and OutsideGenomicSequence::what().
Referenced by breakExtraModel(), and ESTAssemblyid::breakSuffixModel().
| ESTAssembly * ESTAssembly::breakPrefixModel | ( | ) | throw (PointOutChain) |
analogous to breakSuffixModel()
Reimplemented in ESTAssemblyid.
References addComment(), Range::begin(), Dip::breakPoint(), budMinusPrefixModel(), budPlusPrefixModel(), mRNAModel::cdsb, comment, Range::direction(), findDip(), mRNAModel::FivePrimeUTR(), mRNAModel::gcdsb, RNAModel::genomicIndex(), RNAModel::getOid(), itos(), longestNoStartORFPlus(), longestNoStopORFPlus(), maxFullORF(), Noschain::numberOfRanges(), ORFLongEnough(), prune5PrimeUTR(), reverseComplement(), RNAModel::rna, RNAModel::RNAIndex(), shouldBreakPrefix(), show(), and OutsideGenomicSequence::what().
Referenced by breakExtraModel(), and ESTAssemblyid::breakPrefixModel().
| ESTAssembly * ESTAssembly::prune3PrimeUTR | ( | ) | throw (PointOutChain) |
this function does not compute ORF first, but simply look for a Dip between orfspace nt away from either end of ORF to the limits of the genomic DNA. if found it will prune off the extra Assembly, then it will try to set up coding info for this new Assembly This is apposed to breakPrefix or breakSuffix methods where they first compute ORF then try to cut the assembly between the ORFs.
parent
References addComment(), Noschain::advancePosOnExon(), Dip::breakPoint(), budTemplate(), Range::direction(), Range::end(), findDip(), mRNAModel::gcdse, Range::length(), mRNAModel::num3NoncodingExons(), orfspace, resetNumest(), resetSuffixProfile(), RNAModel::rna, RNAModel::RNAIndex(), setCDSInfo(), Noschain::setChain(), setName(), setTitle(), show(), Noschain::subchain(), mRNAModel::ThreePrimeUTRLength(), mRNAModel::trimAfterPoint(), and mRNAModel::valid().
Referenced by breakSuffixModel().
| ESTAssembly * ESTAssembly::prune5PrimeUTR | ( | ) | throw (PointOutChain) |
References addComment(), Range::begin(), Dip::breakPoint(), budTemplate(), Range::direction(), findDip(), mRNAModel::FivePrimeUTRLength(), mRNAModel::gcdsb, Range::length(), mRNAModel::num5NoncodingExons(), orfspace, resetNumest(), resetPrefixProfile(), Noschain::retreatPosOnExon(), RNAModel::rna, RNAModel::RNAIndex(), setCDSInfo(), Noschain::setChain(), setName(), setTitle(), show(), Noschain::subchain(), mRNAModel::trimBeforePoint(), and mRNAModel::valid().
Referenced by breakPrefixModel().
| bool ESTAssembly::shouldBreakSuffix | ( | ) | const |
if UTR length > 1500 and has stop then break
References chimera_numestcut, mRNAModel::hasStop(), Noschain::numberOfRanges(), numest, mRNAModel::ThreePrimeUTR(), and mRNAModel::ThreePrimeUTRLength().
Referenced by breakSuffixModel().
| bool ESTAssembly::shouldBreakPrefix | ( | ) | const |
References chimera_numestcut, mRNAModel::FivePrimeUTR(), mRNAModel::FivePrimeUTRLength(), mRNAModel::hasStart(), Noschain::numberOfRanges(), and numest.
Referenced by breakPrefixModel().
| bool ESTAssembly::isChimera | ( | ) | const |
This model is more likely to be Chimera
fungal UTRs could be short so we are setting a 500 nt limit
References mRNAModel::CDSFractionRNA(), chimera_numestcut, mRNAModel::FivePrimeUTR(), mRNAModel::FivePrimeUTRLength(), Range::length(), Noschain::numberOfRanges(), numest, mRNAModel::ThreePrimeUTR(), and mRNAModel::ThreePrimeUTRLength().
Referenced by Graphid::assemble(), and testBreakup().
| modeltype ESTAssembly::objtype | ( | ) | const [inline, virtual] |
| ostream & ESTAssembly::show | ( | ostream & | ous | ) | const [virtual] |
ESTAssembly for debug
Reimplemented from mRNAModel.
References congid, esttitle, numest, peptitle, and mRNAModel::show().
Referenced by breakPrefixModel(), breakSuffixModel(), findDip(), prune3PrimeUTR(), prune5PrimeUTR(), readESTModel(), and updateOneUpdated().
| ostream & ESTAssembly::print | ( | ostream & | ous | ) | const [virtual] |
| void ESTAssembly::showBaseCoverInfo | ( | ostream & | ous = cerr |
) | const [inline] |
References basecovdep.
| int ESTAssembly::ESTCount | ( | ) | const [inline] |
References numest.
| int ESTAssembly::numberOfEST | ( | ) | const [inline] |
| bool ESTAssembly::validProfile | ( | ) | const |
References basecovdep, Range::direction(), Coverdepth::exons(), Noschain::reverse(), and Coverdepth::strand().
| void ESTAssembly::addComment | ( | const string & | comm | ) |
References comment.
Referenced by breakPrefixModel(), breakSuffixModel(), prune3PrimeUTR(), and prune5PrimeUTR().
| void ESTAssembly::writeCommentTab | ( | ostream & | ous | ) | const [inline] |
References comment, and RNAModel::getOid().
| bool ESTAssembly::hasComment | ( | ) | const [inline] |
References comment.
| bool ESTAssembly::noComment | ( | ) | const [inline] |
References comment.
| void ESTAssembly::fixIntronBound | ( | ) |
NNN is considered good.
Only fix introngs longer than 32 nt. I left short ones intact in case there are RNA-editing enzymes. Also make sure exons are long enough so that it will not be chaged into different directions. So far I am 4 down 5 up 9 nt around the splice sites. 4 exon, 5 intron bases.
References basecovdep, Range::begin(), Range::direction(), Range::end(), Coverdepth::erase(), Noschain::exons, Coverdepth::extendLeft(), Coverdepth::extendRight(), RNAModel::gseq, intronfix, Noschain::numberOfRanges(), and mRNAModel::reset().
Referenced by ESTAssembly().
| int ESTAssembly::intronFixState | ( | ) | const [inline] |
| void ESTAssembly::checkIntronBound | ( | ) |
this function will be used by breaking family of methods. If the model has been broken, I will have to recheck to see if there is a bad intron IN this case it will only know two stats, good (0) bad(2), it does not knonw whether it got fixed or not. Because there is a tiny fraction of models are like this, this is fine.
This method will set the internal state variable: intronfix This method coulb potentially produce bad exons of inverted direction!.
References Range::begin(), Range::direction(), Range::end(), Noschain::exons, RNAModel::gseq, intronfix, and Noschain::numberOfRanges().
Referenced by breakExtraModel().
| bool ESTAssembly::genuine | ( | ) | const [inline] |
use mRNAModel genuine method
Reimplemented from mRNAModel.
References mRNAModel::genuine(), and intronfix.
Referenced by assembleChain().
| bool ESTAssembly::semiGenuine | ( | ) | const [inline] |
| bool ESTAssembly::isStar | ( | ) | const [inline] |
good quality model: UTR < 600 nt, CDS length > 330 nt, numUTR exon < 2 or CDS fraction > 65%
Reimplemented from mRNAModel.
References intronfix, and mRNAModel::isStar().
| bool ESTAssembly::findDip | ( | int | b, | |
| int | e, | |||
| Dip & | dip | |||
| ) | const |
[b,e] genomic position Given a range [b,e], this function will try to find a Dip
| dip | the value of dip will be set to the result. |
References basecovdep, Range::begin(), Range::end(), Range::length(), Noschain::numberOfRanges(), Coverdepth::searchDip(), show(), Noschain::subchain(), and PointOutChain::what().
Referenced by breakPrefixModel(), breakSuffixModel(), ESTAssemblyid::breakup(), prune3PrimeUTR(), and prune5PrimeUTR().
| ostream& ESTAssembly::showProfile | ( | ostream & | ous | ) | const [inline] |
display the profile
References basecovdep.
| float ESTAssembly::averageProfileHeight | ( | ) | const [inline] |
This value is not veray useful
References Coverdepth::averageHeight(), and basecovdep.
| int ESTAssembly::maxProfileHeight | ( | ) | const [inline] |
References basecovdep, and Coverdepth::maxHeight().
| pair<float,float> ESTAssembly::relativeProfileHeight | ( | const ESTAssembly & | ea | ) | const [inline] |
this is a simple wrapper for Coverdepth object relativeHeight()
References basecovdep, and Coverdepth::relativeHeight().
Referenced by assignRPH().
| void ESTAssembly::assignRPH | ( | const ESTAssembly & | star | ) | [inline] |
This method looks at thing from the opposite point of view as relativeProfileHeight. this/star star is used as reference. relative profile height of -1 means the star model and this models has no common profile.
References relativeProfileHeight(), and relprofh.
| float ESTAssembly::getRelprofh | ( | ) | const [inline] |
References relprofh.
| void ESTAssembly::setRelprofh | ( | const float | rh | ) | [inline] |
References relprofh.
| void ESTAssembly::releaseProfile | ( | ) | [inline] |
will not destroy the profile, simply transfer ownership. This is called after giving away the profile.
References basecovdep.
Referenced by ESTAssemblyid::ESTAssemblyid().
| ESTAssembly& ESTAssembly::reverse | ( | int | newRNACDSB, | |
| int | newRNACDSE | |||
| ) | [inline] |
prevents reversing of multiexon models. it will crash, if you try to. This will prevent the programmer from making mistakes. cds range, pep, and frame will all needs to be reset.
Reimplemented from mRNAModel.
References basecovdep, RNAModel::reverse(), and Coverdepth::reverseStrand().
| const char * ESTAssembly::JGIModelColumns | ( | ) | [static] |
use header[] to construct these headers from base class information
References mRNAModel::header, and mRNAModel::jgiModelCol.
Referenced by makeModels().
| const char * ESTAssembly::JGITranscriptColumns | ( | ) | [static] |
| const char * ESTAssembly::JGIProteinColumns | ( | ) | [static] |
| bool ESTAssembly::ORFLongEnough | ( | pair< int, int > & | bound, | |
| int | ORFType, | |||
| int | trlen | |||
| ) | [static] |
| ORFType | [0,1,2,-1] | |
| trlen | length of the parent transcript 1 nostart or nostop. M--- or ----* 2 full M---* 0 nostart and nostop ---- -1 nothing, this is not possible as long as RNA longer than 3 nt, you should get something. |
References chimera_partial_peplencut, and chimera_peplencut.
Referenced by breakPrefixModel(), and breakSuffixModel().
| ESTAssembly * ESTAssembly::budPlusSuffixModel | ( | pair< int, int > | gcbts, | |
| int | rcdsb, | |||
| int | rcdse, | |||
| int | suffix_gcdsb, | |||
| int | suffix_gcdse, | |||
| const string & | pep, | |||
| int | rcut1, | |||
| int | rcut2 | |||
| ) | throw (PointOutChain) [protected] |
helper function for Break Suffix Model
References budTemplate(), mRNAModel::cdsb, mRNAModel::cdse, Range::end(), mRNAModel::gcdsb, mRNAModel::gcdse, RNAModel::getOid(), itos(), mRNAModel::pep, resetNumest(), resetSuffixProfile(), RNAModel::rna, Noschain::setChain(), setName(), mRNAModel::setProtein(), setTitle(), Noschain::subchain(), and mRNAModel::trimAfterPoint().
Referenced by breakSuffixModel(), and budPlusSuffixModel().
| ESTAssembly* ESTAssembly::budPlusSuffixModel | ( | pair< int, int > | gcuts, | |
| int | rcdsb, | |||
| int | rcdse, | |||
| int | suffix_gcdsb, | |||
| int | suffix_gcdse, | |||
| const string & | pep | |||
| ) | throw (PointOutChain) [inline, protected] |
shorter version of above function
References budPlusSuffixModel(), mRNAModel::pep, and RNAModel::RNAIndex().
| ESTAssembly * ESTAssembly::budMinusSuffixModel | ( | pair< int, int > | gcuts, | |
| int | suffix_gcdsb, | |||
| int | suffix_gcdse, | |||
| const string & | subRNA, | |||
| const string & | pep, | |||
| int | usefull, | |||
| pair< int, int > | subcds, | |||
| int | rcut1, | |||
| int | rcut2 | |||
| ) | throw (PointOutChain) [protected] |
full parameter version
References budTemplate(), mRNAModel::cdsb, mRNAModel::cdse, Range::end(), mRNAModel::frame, mRNAModel::gcdsb, mRNAModel::gcdse, RNAModel::getOid(), itos(), mRNAModel::pep, resetNumest(), resetSuffixProfile(), RNAModel::rna, Noschain::setChain(), setName(), mRNAModel::setProtein(), RNAModel::setRNA(), setTitle(), Noschain::subchain(), and mRNAModel::trimAfterPoint().
Referenced by breakSuffixModel(), and budMinusSuffixModel().
| ESTAssembly* ESTAssembly::budMinusSuffixModel | ( | pair< int, int > | gcuts, | |
| int | suffix_gcdsb, | |||
| int | suffix_gcdse, | |||
| const string & | subRNA, | |||
| const string & | pep, | |||
| int | usefull, | |||
| pair< int, int > | subcds | |||
| ) | throw (PointOutChain) [inline, protected] |
shorter version
References budMinusSuffixModel(), mRNAModel::pep, and RNAModel::RNAIndex().
| ESTAssembly * ESTAssembly::budPlusPrefixModel | ( | pair< int, int > | gcuts, | |
| int | rcut1, | |||
| int | rcut2, | |||
| int | prefix_gcdsb, | |||
| int | prefix_gcdse, | |||
| pair< int, int > | subcds, | |||
| int | usefull, | |||
| const string & | maxPep | |||
| ) | throw (PointOutChain) [protected] |
helper function
References Range::begin(), budTemplate(), mRNAModel::cdsb, mRNAModel::cdse, mRNAModel::frame, mRNAModel::gcdsb, mRNAModel::gcdse, RNAModel::getOid(), itos(), resetNumest(), resetPrefixProfile(), RNAModel::rna, Noschain::setChain(), setName(), mRNAModel::setProtein(), setTitle(), Noschain::subchain(), and mRNAModel::trimBeforePoint().
Referenced by breakPrefixModel().
| ESTAssembly * ESTAssembly::budMinusPrefixModel | ( | pair< int, int > | gcuts, | |
| int | rcut1, | |||
| int | rcut2, | |||
| int | prefix_gcdsb, | |||
| int | prefix_gcdse, | |||
| pair< int, int > | subcds, | |||
| const string & | pep, | |||
| const string & | subRNA | |||
| ) | throw (PointOutChain) [protected] |
| subRNA | is the parent rna.substr(0,cdsb-1) reverse complemented |
References Range::begin(), budTemplate(), mRNAModel::cdsb, mRNAModel::cdse, mRNAModel::gcdsb, mRNAModel::gcdse, RNAModel::getOid(), itos(), mRNAModel::pep, resetNumest(), resetPrefixProfile(), Noschain::setChain(), setName(), mRNAModel::setProtein(), RNAModel::setRNA(), setTitle(), Noschain::subchain(), and mRNAModel::trimBeforePoint().
Referenced by breakPrefixModel().
| ESTAssembly * ESTAssembly::budTemplate | ( | ) | const [protected] |
produce a model with the invariant information: congid, gid, gseq
References congid, ESTAssembly(), RNAModel::gid, and RNAModel::gseq.
Referenced by budMinusPrefixModel(), budMinusSuffixModel(), budPlusPrefixModel(), budPlusSuffixModel(), prune3PrimeUTR(), and prune5PrimeUTR().
| void ESTAssembly::resetNumest | ( | ESTAssembly * | mod | ) | [protected] |
assume profile exists for this and mod
References Coverdepth::area(), basecovdep, max, and numest.
Referenced by budMinusPrefixModel(), budMinusSuffixModel(), budPlusPrefixModel(), budPlusSuffixModel(), prune3PrimeUTR(), and prune5PrimeUTR().
| void ESTAssembly::resetSuffixProfile | ( | ESTAssembly * | mod, | |
| pair< int, int > | gcuts | |||
| ) | [protected] |
the gcuts.first is on parent side, gcuts.second in on suffix side
References basecovdep, Range::direction(), Range::end(), Coverdepth::subprofile(), Coverdepth::truncateHead(), and Coverdepth::truncateTail().
Referenced by budMinusSuffixModel(), budPlusSuffixModel(), and prune3PrimeUTR().
| void ESTAssembly::resetPrefixProfile | ( | ESTAssembly * | mod, | |
| pair< int, int > | gcuts | |||
| ) | [protected] |
for prefix it is differet, gcuts.first is on prefix side
References basecovdep, Range::begin(), Range::direction(), Coverdepth::subprofile(), Coverdepth::truncateHead(), and Coverdepth::truncateTail().
Referenced by budMinusPrefixModel(), budPlusPrefixModel(), and prune5PrimeUTR().
| void ESTAssembly::setCDSInfo | ( | ) | [protected] |
this function is a helper for breaking chimera EST assemblies It requires that both RNA sequences has just been set and missing protein and CDS range information. If the gene is single exon, it will try both directions to find the max ORF. At the end it uses the CDS info at RNA level to set CDS info at genomic level.
References mRNAModel::cdsb, mRNAModel::cdse, mRNAModel::frame, mRNAModel::gcdsb, mRNAModel::gcdse, RNAModel::genomicIndex(), longestORFPlus(), Noschain::numberOfRanges(), mRNAModel::pep, Noschain::reverse(), reverseComplementInPlace(), RNAModel::rna, and Noschain::show().
Referenced by prune3PrimeUTR(), and prune5PrimeUTR().
const char ESTAssembly::modelheader = "modelid\tgeneid\tcongid\tgenomicid\tbegin\tend\tgCDSb\tgCDSe\tnumest\tnumexon\texonLength\texons\tCDSstart\tCDSend\tRNAseq\tpepseq\tframe\tprofileArea\tprofileMaxHeight\trelProfH\tbasecover" [static] |
Reimplemented from mRNAModel.
const char ESTAssembly::trackheader = "id\tname\tchrom\tstrand\tstart\tend\tsfCount\tsfStarts\tsfEnds" [static] |
int ESTAssembly::congid [protected] |
Referenced by ESTAssemblyid::breakup(), budTemplate(), operator=(), print(), show(), write(), and writeModel().
int ESTAssembly::numest [protected] |
Referenced by ESTAssemblyid::breakup(), ESTAssembly(), ESTCount(), isChimera(), numberOfEST(), operator=(), print(), resetNumest(), setName(), setTitle(), shouldBreakPrefix(), shouldBreakSuffix(), show(), write(), and writeModel().
Coverdepth* ESTAssembly::basecovdep [protected] |
base coverage depth, set to zero if no profile.
Referenced by averageProfileHeight(), ESTAssemblyid::breakup(), ESTAssembly(), findDip(), fixIntronBound(), maxProfileHeight(), relativeProfileHeight(), releaseProfile(), resetNumest(), resetPrefixProfile(), resetSuffixProfile(), reverse(), showBaseCoverInfo(), showProfile(), validProfile(), writeModel(), and ~ESTAssembly().
float ESTAssembly::relprofh [protected] |
relative profile height. A convenient number for (min,max) profile height relative to the star model that is defined as the model with the most EST. If gene has only one model, then the value will be (1,1). Or if the model is star model, the value is (1,1). Actually storing two numbers is not needed. We will only store one number min(this/star) here star model is defined simply as model with max number of EST. If more than one, simply pick one randomly.
Referenced by assignRPH(), getRelprofh(), operator=(), setRelprofh(), and writeModel().
string ESTAssembly::estname [protected] |
Referenced by ESTAssembly(), name(), operator=(), setName(), write(), ESTAssemblyid::writetab(), and writetab().
string ESTAssembly::esttitle [protected] |
Referenced by addTitleTag(), breakSuffixModel(), ESTAssembly(), operator=(), print(), setTitle(), show(), write(), ESTAssemblyid::writetab(), and writetab().
string ESTAssembly::pepname [protected] |
Referenced by ESTAssembly(), operator=(), setName(), write(), ESTAssemblyid::writetab(), and writetab().
string ESTAssembly::peptitle [protected] |
Referenced by addTitleTag(), ESTAssembly(), operator=(), print(), setTitle(), show(), write(), ESTAssemblyid::writetab(), and writetab().
string ESTAssembly::comment [protected] |
use to store information about frame-shift etc
Referenced by addComment(), breakPrefixModel(), ESTAssemblyid::breakup(), hasComment(), noComment(), operator=(), writeCommentTab(), and writetab().
int ESTAssembly::intronfix [protected] |
a state variable to remember whether the inron bound has been fiexd or not. only less 0.5% of the models have bad intron bounds that are derive from either bad sequence or artifacts or true biological processes. 0 good, no fix one of the three known GT(C)..AG, AT..AC 1 fixed at least one intron 2 at least one bad bound, 3 fixed at least one, and still have bad intron bounds. The fixIntronBound() function sets this variable.
Referenced by checkIntronBound(), fixIntronBound(), genuine(), intronFixState(), isStar(), operator=(), and semiGenuine().
int ESTAssembly::chimera_numestcut = 50 [static, protected] |
Referenced by isChimera(), shouldBreakPrefix(), and shouldBreakSuffix().
int ESTAssembly::chimera_peplencut = 90 [static, protected] |
when ESt has joint two models like the following ---====--=======----- while trying to break chimera EST into individual models, we have a cutoff for the shortest peptide.
Referenced by ORFLongEnough().
int ESTAssembly::chimera_partial_peplencut = 40 [static, protected] |
Referenced by ORFLongEnough().
int ESTAssembly::orfspace = 100 [static, protected] |
Now I used it to trim 3' or 5' long UTRs Now I am setting it to 100 nt. It look for cutting point only 100 nt away from either ends of an UTR
Referenced by prune3PrimeUTR(), and prune5PrimeUTR().
1.5.6