#include <RNAModel.h>

Public Member Functions | |
| mRNAModel () | |
| mRNAModel (const Noschain &seg, const string &chrom, const string &gs) | |
| mRNAModel (const Noschain &seg, int gcdsb_, int gcdse_, const string &chrom, const string &gs, int fr=0) | |
| mRNAModel (const string &exstr, const string &gname, const string &gs, int oi, int gcb, int gce, int fr=0) | |
| mRNAModel (const string &exstr, const string &gname, const string &gs, int oi, int gcb, int gce, int gene_id, int fr=0) | |
| mRNAModel (const string &exstarts, const string &exends, char strand, int gcb, int gce, const string &gi, const string &genomic) throw (PointOutChain, Badinput, exception) | |
| mRNAModel (const string &exstarts, const string &exends, char strand, int gcb, int gce, const string &gi, const string &genomic, int oo) throw (PointOutChain, Badinput) | |
| mRNAModel (const mRNAModel &mm) | |
| char | guessStrand () const |
| mRNAModel & | operator= (const mRNAModel &mm) |
| bool | samePeptide (const mRNAModel &mm) const |
| bool | sameGene (const mRNAModel &mod) const |
| Noschain | FivePrimeUTR () const |
| Noschain | ThreePrimeUTR () const |
| int | num5NoncodingExons () const |
| int | num3NoncodingExons () const |
| Noschain | CDSChain () const throw (PointOutChain) |
| Range | CDSRange () const |
| pair< int, int > | genomicCDSBound () const |
| Range | RNACDSRange () const |
| pair< int, int > | RNACDSBound () const |
| int | CDSLength () const |
| int | genomicCDSLength () const |
| int | genomicCDSEnd () const |
| int | genomicCDSBegin () const |
| const string & | proteinSequence () const |
| int | proteinLength () const |
| int | proteinLengthNoTail () const |
| int | getFrame () const |
| string | CDSSeq () const |
| string | CDSSequence () const |
| char | CDSDirection () const |
| double | CDSFraction () const |
| double | CDSFractionRNA () const |
| double | CDSFractionGenomic () const |
| int | FivePrimeUTRLength () const |
| int | ThreePrimeUTRLength () const |
| int | UTRLength () const |
| void | UTR3Sequence (string &seq) const |
| string | UTR3Sequence () const |
| bool | hasStart () const |
| bool | hasStop () const |
| bool | complete () const |
| int | numberOfInternalStops () const |
| const string & | getProtein () const |
| void | trimAfterPoint (const int p) throw (PointOutChain) |
| void | trimBeforePoint (const int p) throw (PointOutChain) |
| void | trimBeforePoint (const int gp, const int rp) throw (PointOutChain) |
| void | setProtein (const string &pseq) |
| void | setRNACDS (int bb, int ee) throw (OutsideGenomicSequence) |
| void | resetRNACDS (int bb, int ee) throw (OutsideGenomicSequence) |
| void | setGenomicCDS (int gb, int ge) throw (OutsideGenomicSequence, InvalidModel) |
| void | setGenomicCDS (pair< int, int > gcr) |
| bool | growCDS3Prime (int len) throw (OutsideGenomicSequence) |
| bool | trimCDSTail () |
| bool | trimCDSStop () |
| void | reset () |
| void | resetProtein () |
| void | setLongestCDSAndProtein () |
| bool | append (mRNAModel &mod, int &comment) |
| string | JGIModelRow (const char sep='\t') const |
| ostream & | printJGIModelRow (ostream &ous, const char sep='\t') const |
| string | JGITranscriptRow (const char sep='\t') const |
| ostream & | printJGITranscriptRow (ostream &ous, const char sep='\t') const |
| ostream & | printJGITranscriptRowNoId (ostream &ous, const char sep='\t') const |
| string | JGIProteinRow (char sep='\t') const |
| string | sfCDSGenomic () const |
| Noschain | CDSOnewayChain () const |
| void | CDSOnewayChain (Noschain &ch) const |
| string | sfCDSTranscript () const |
| string | sfExonsProtein () const |
| ostream & | show (ostream &ous) const |
| bool | genuine () const |
| bool | semiGenuine () const |
| bool | isStar () const |
| modeltype | objtype () const |
| bool | valid () const |
| ostream & | print (ostream &ous) const |
| mRNAModel & | reverse (int newRNACDSB, int newRNACDSE) |
| string | name (const string &prefix) |
| void | writetab (ostream &mod, ostream &ex, ostream &track, ostream &orna, ostream &oprt, char sep='\t') const |
| ostream & | writeModelTable (ostream &ous, char sep='\t') const |
Static Public Member Functions | |
| static void | setShortestModel (int len) |
Static Public Attributes | |
| static const char | modelheader [] = "modid\tgeneid\tgenomicId\tbegin\tend\tgenomicCDSb\tgenomicCDSe\tnumberOfExons\tsumexonLength\texonstring\tCDSb\tCDSe\tRNAseq\tpepSeq\tframe" |
| static const char | jgiModelCol [] = "id\tchrom\tstrand\tstart\tend\tcdsStart\tcdsEnd\tsfCount\tsfStarts\tsfEnds" |
| static const char | jgiTranscriptCol [] = "transcriptId\tlengthGenomic\tlengthTranscript\tlengthCDS\tsfExonsGenomic\tsfCDSGenomic\tsfExonsTranscript\tsfCDSTranscript\tseqGenomic\tseqTranscript\tseqCDS" |
| static const char | jgiProteinCol [] = "proteinId\ttranscriptId\tlengtht\tsfExons\tseq" |
| static int | shortestpep = 30 |
| static int | shortestmodel = 90 |
| static int | utrlen5max = 2000 |
| static int | utrlen3max = 1900 |
Protected Attributes | |
| int | cdsb |
| int | cdse |
| int | gcdsb |
| int | gcdse |
| string | pep |
| int | frame |
Static Protected Attributes | |
| static char | header [350] = "" |
| mRNAModel::mRNAModel | ( | ) | [inline] |
| mRNAModel::mRNAModel | ( | const Noschain & | seg, | |
| const string & | chrom, | |||
| const string & | gs | |||
| ) | [explicit] |
guess the CDS range as the longest ORF More or less EST model. Done more for the work of ESTModel. Set the frame to 0. May not be the correct ORF. There are about 10 such models in one fungal genome.
References RNAModel::reset(), and setLongestCDSAndProtein().
| mRNAModel::mRNAModel | ( | const Noschain & | seg, | |
| int | gcdsb_, | |||
| int | gcdse_, | |||
| const string & | chrom, | |||
| const string & | gs, | |||
| int | fr = 0 | |||
| ) | [explicit] |
| gcdsb_ | genomic CDS Start position. | |
| gcdse_ | genomic CDS end position. Last base of stop codon. | |
| fr | CDS frame. 0,1, or 2. | |
| gs. | Genomic sequence. | |
| chrom | genomic id. This is mainly used by breakup method in the ESTAssembly class. |
References cdsb, cdse, frame, gcdsb, gcdse, pep, RNAModel::rna, RNAModel::RNAIndex(), Noschain::show(), and translate().
| mRNAModel::mRNAModel | ( | const string & | exstr, | |
| const string & | gname, | |||
| const string & | gs, | |||
| int | oi, | |||
| int | gcb, | |||
| int | gce, | |||
| int | fr = 0 | |||
| ) |
Used to read models stored in files. All model information is given, given genomic CDS range RNA cds range is computed. Pepseq is also computed. This is used to read stored objects. The default is 0 for frame, this is useful for JGI models where frame is not recorded.
References cdsb, cdse, frame, gcdsb, gcdse, pep, RNAModel::rna, RNAModel::RNAIndex(), and translate().
| mRNAModel::mRNAModel | ( | const string & | exstr, | |
| const string & | gname, | |||
| const string & | gs, | |||
| int | oi, | |||
| int | gcb, | |||
| int | gce, | |||
| int | gene_id, | |||
| int | fr = 0 | |||
| ) |
has gene id
References cdsb, cdse, frame, gcdsb, gcdse, pep, RNAModel::rna, RNAModel::RNAIndex(), and translate().
| mRNAModel::mRNAModel | ( | const string & | exstarts, | |
| const string & | exends, | |||
| char | strand, | |||
| int | gcb, | |||
| int | gce, | |||
| const string & | gi, | |||
| const string & | genomic | |||
| ) | throw (PointOutChain, Badinput, exception) |
this is the JGI input format, where strand indicate the direction, all numbers are from small to large
| gi. | Genomic identifier, such as scallfol_1 . | |
| genomic. | Genomic sequence. |
References cdsb, cdse, gcdsb, gcdse, guessStrand(), pep, RNAModel::reverse(), RNAModel::rna, RNAModel::RNAIndex(), and translate().
| mRNAModel::mRNAModel | ( | const string & | exstarts, | |
| const string & | exends, | |||
| char | strand, | |||
| int | gcb, | |||
| int | gce, | |||
| const string & | gi, | |||
| const string & | genomic, | |||
| int | oo | |||
| ) | throw (PointOutChain, Badinput) |
with extra oid assigned from external key.
References cdsb, cdse, gcdsb, gcdse, guessStrand(), pep, RNAModel::reverse(), RNAModel::rna, RNAModel::RNAIndex(), and translate().
| mRNAModel::mRNAModel | ( | const mRNAModel & | mm | ) | [inline] |
copy constructor of the same type
| char mRNAModel::guessStrand | ( | ) | const |
use the intron bound to determine strand of the model if it is not given or given in the wrong format (other than +,-). Cannot guess single exon models
References Noschain::exons, RNAModel::gseq, Noschain::numberOfRanges(), and str2upper().
Referenced by mRNAModel().
References cdsb, cdse, frame, gcdsb, gcdse, RNAModel::operator=(), and pep.
Referenced by ESTAssembly::operator=().
| bool mRNAModel::samePeptide | ( | const mRNAModel & | mm | ) | const [inline] |
no overloading of the base ==
References pep.
| bool mRNAModel::sameGene | ( | const mRNAModel & | mod | ) | const |
to test that this model and mod belong to the same gene or not
References Noschain::commonIntrons(), Range::direction(), Noschain::exonIntersectFraction(), Noschain::exonLength(), gcdsb, gcdse, genomicCDSLength(), min, Noschain::numberOfRanges(), and Range::Range().
| Noschain mRNAModel::FivePrimeUTR | ( | ) | const [inline] |
return genomic 5'-UTR as chain. contains introns.
References gcdsb, and Noschain::subchainBeforePoint().
Referenced by bestORF(), ESTAssembly::breakPrefixModel(), ESTAssembly::isChimera(), num5NoncodingExons(), and ESTAssembly::shouldBreakPrefix().
| Noschain mRNAModel::ThreePrimeUTR | ( | ) | const [inline] |
References gcdse, and Noschain::subchainAfterPoint().
Referenced by ESTAssembly::breakSuffixModel(), ESTAssembly::isChimera(), num3NoncodingExons(), and ESTAssembly::shouldBreakSuffix().
| int mRNAModel::num5NoncodingExons | ( | ) | const [inline] |
References FivePrimeUTR(), and Noschain::numberOfRanges().
Referenced by genuine(), isStar(), ESTAssembly::prune5PrimeUTR(), semiGenuine(), and updatedWorse().
| int mRNAModel::num3NoncodingExons | ( | ) | const [inline] |
References Noschain::numberOfRanges(), and ThreePrimeUTR().
Referenced by genuine(), isStar(), ESTAssembly::prune3PrimeUTR(), semiGenuine(), and updatedWorse().
| Noschain mRNAModel::CDSChain | ( | ) | const throw (PointOutChain) |
CDS exons in genomic coordinates
References Range::begin(), Range::direction(), Range::end(), gcdsb, gcdse, show(), and Noschain::subchain().
Referenced by CDSOnewayChain(), sfCDSGenomic(), sfCDSTranscript(), and sfExonsProtein().
| Range mRNAModel::CDSRange | ( | ) | const [inline] |
the genomic Range of the CDS. As oppose to RNACDSRange() that is not very useful.
References gcdsb, gcdse, and Range::Range().
Referenced by append(), incompatible(), updateCompatible(), and JGIModel::valid().
| Range mRNAModel::RNACDSRange | ( | ) | const [inline] |
References cdsb, cdse, and Range::Range().
Referenced by ESTAssemblyid::breakup(), and readESTModel().
| int mRNAModel::CDSLength | ( | ) | const [inline] |
the CDS length in RNA space
Referenced by append(), ESTAssemblyid::breakup(), CDSFraction(), CDSFractionGenomic(), CDSFractionRNA(), checkstop(), genuine(), isStar(), JGITranscriptRow(), print(), printJGITranscriptRow(), printJGITranscriptRowNoId(), ESTAssembly::RNACodingFraction(), semiGenuine(), and JGIModel::valid().
| int mRNAModel::genomicCDSLength | ( | ) | const [inline] |
| int mRNAModel::genomicCDSEnd | ( | ) | const [inline] |
| int mRNAModel::genomicCDSBegin | ( | ) | const [inline] |
References gcdsb.
| const string& mRNAModel::proteinSequence | ( | ) | const [inline] |
| int mRNAModel::proteinLength | ( | ) | const [inline] |
this method will return protein length one longer than known amino acid sequence if the CDS is partial at 3' end. For example, if you only know 2 of the 3 codon of the last AA, the protein will be ...GADETAL2 If you don't want the tails then you use another function
References pep.
Referenced by checkBadStopIndex(), incompatible(), and updateCompatible().
| int mRNAModel::proteinLengthNoTail | ( | ) | const |
| int mRNAModel::getFrame | ( | ) | const [inline] |
| string mRNAModel::CDSSeq | ( | ) | const [inline] |
shorter name for CDSSequence()
References cdsb, cdse, and RNAModel::rna.
Referenced by JGIModel::valid().
| string mRNAModel::CDSSequence | ( | ) | const |
return the underlying CDS ORF Do error checking for debugging stage.
References cdsb, cdse, RNAModel::rna, show(), and string().
Referenced by checkstop(), JGITranscriptRow(), printJGITranscriptRow(), printJGITranscriptRowNoId(), trimCDSStop(), and trimCDSTail().
| char mRNAModel::CDSDirection | ( | ) | const [inline] |
| double mRNAModel::CDSFraction | ( | ) | const [inline] |
fraction of the CDS in genomic sequence see also CDSFractionRNA() and CDSFractionGenomic()
References CDSLength(), and Range::length().
Referenced by isStar().
| double mRNAModel::CDSFractionRNA | ( | ) | const [inline] |
fraction of CDS in RNA, this is usefule in judging the quality of model
References CDSLength(), and RNAModel::exonLength().
Referenced by append(), genuine(), ESTAssembly::isChimera(), and updatedWorse().
| double mRNAModel::CDSFractionGenomic | ( | ) | const [inline] |
CDS compared to genomic fraction, not very useful because it depends on the average size of introns
References CDSLength(), and Range::length().
| int mRNAModel::FivePrimeUTRLength | ( | ) | const [inline] |
Non coding length excluding introns. This is the part in the RNA before CDS begin
References cdsb.
Referenced by genuine(), ESTAssembly::isChimera(), isStar(), ESTAssembly::prune5PrimeUTR(), semiGenuine(), ESTAssembly::shouldBreakPrefix(), updatedWorse(), and UTRLength().
| int mRNAModel::ThreePrimeUTRLength | ( | ) | const [inline] |
References cdse, and RNAModel::exonLength().
Referenced by bestORF(), ESTAssembly::breakSuffixModel(), genuine(), ESTAssembly::isChimera(), isStar(), ESTAssembly::prune3PrimeUTR(), semiGenuine(), ESTAssembly::shouldBreakSuffix(), testBreakup(), updatedWorse(), and UTRLength().
| int mRNAModel::UTRLength | ( | ) | const [inline] |
References FivePrimeUTRLength(), and ThreePrimeUTRLength().
| void mRNAModel::UTR3Sequence | ( | string & | seq | ) | const [inline] |
set seq to the 3'-UTR sequence of the mature mRNA
References cdse, and RNAModel::rna.
| string mRNAModel::UTR3Sequence | ( | ) | const [inline] |
References cdse, and RNAModel::rna.
| bool mRNAModel::hasStart | ( | ) | const [inline] |
CDS divided by the genomic Ranges
References pep.
Referenced by complete(), ESTAssembly::shouldBreakPrefix(), and JGIModel::toSQLString().
| bool mRNAModel::hasStop | ( | ) | const [inline] |
References pep.
Referenced by checkBadStopIndex(), complete(), ESTAssembly::shouldBreakSuffix(), and JGIModel::toSQLString().
| bool mRNAModel::complete | ( | ) | const [inline] |
the ORF is complete
References hasStart(), and hasStop().
Referenced by append(), checkBadStopIndex(), genuine(), isStar(), and semiGenuine().
| int mRNAModel::numberOfInternalStops | ( | ) | const |
compare the translated product which is 3x faster than comparing RNA sequences but they may have different exon structure! so we have to compare both exon structure and translation product. Translation product is not sufficient. shold not overload! couunt the * inside the protein sequence
References pep.
Referenced by append(), readJGIModel(), updatedWorse(), and JGIModel::valid().
| const string& mRNAModel::getProtein | ( | ) | const [inline] |
| void mRNAModel::trimAfterPoint | ( | const int | p | ) | throw (PointOutChain) |
this should not be defined, I am defining it here to check exceptions. After the debug stage this function should be removed. RNA::trimAfterPoint is sufficient.
Reimplemented from RNAModel.
References cdse, RNAModel::RNAIndex(), and RNAModel::trimAfterPoint().
Referenced by ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusSuffixModel(), and ESTAssembly::prune3PrimeUTR().
| void mRNAModel::trimBeforePoint | ( | const int | p | ) | throw (PointOutChain) |
this one is needed to reset the CDS range
Reimplemented from RNAModel.
References cdsb, cdse, RNAModel::rna, RNAModel::RNAIndex(), show(), and Noschain::trimBeforePoint().
Referenced by ESTAssembly::budMinusPrefixModel(), ESTAssembly::budPlusPrefixModel(), and ESTAssembly::prune5PrimeUTR().
| void mRNAModel::trimBeforePoint | ( | const int | gp, | |
| const int | rp | |||
| ) | throw (PointOutChain) |
References cdsb, cdse, RNAModel::rna, show(), and Noschain::trimBeforePoint().
| void mRNAModel::setProtein | ( | const string & | pseq | ) | [inline] |
use getProtein() to obtain protein sequence string
References pep.
Referenced by ESTAssembly::budMinusPrefixModel(), ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusPrefixModel(), and ESTAssembly::budPlusSuffixModel().
| void mRNAModel::setRNACDS | ( | int | bb, | |
| int | ee | |||
| ) | throw (OutsideGenomicSequence) |
will set RNA CDS range, and genomic CDS Ranges. Will not set peptide sequence. The caller must call reset protein to update the protein sequence. This operation will only change the bounds and will not able to reverse the direction of the CDS. The frame info will be derived from bb.
References cdsb, cdse, frame, gcdsb, gcdse, RNAModel::genomicIndex(), itos(), RNAModel::rna, and OutsideGenomicSequence::what().
Referenced by resetRNACDS().
| void mRNAModel::resetRNACDS | ( | int | bb, | |
| int | ee | |||
| ) | throw (OutsideGenomicSequence) [inline] |
only change the range of RNACDS, bb should < ee; otherwise, it will crash.
References resetProtein(), and setRNACDS().
Referenced by ESTAssemblyid::breakup(), and reverse().
| void mRNAModel::setGenomicCDS | ( | int | gb, | |
| int | ge | |||
| ) | throw (OutsideGenomicSequence, InvalidModel) |
Set genomic CDS Range, the set RNA CDS range. Will not reset peptide sequence. this is to make the operation more automatic.
References Range::begin(), cdsb, cdse, Range::direction(), Range::end(), gcdsb, gcdse, RNAModel::gseq, RNAModel::RNAIndex(), and show().
Referenced by setGenomicCDS().
| void mRNAModel::setGenomicCDS | ( | pair< int, int > | gcr | ) | [inline] |
References cdsb, cdse, and setGenomicCDS().
| bool mRNAModel::growCDS3Prime | ( | int | len | ) | throw (OutsideGenomicSequence) |
this method is more efficient than set*CDS() methods since it only deals with one end, We should assume len is > 0. If grow outside genomic sequence then it will throw an exception.
References cdse, delta(), Range::direction(), Range::end(), gcdse, RNAModel::genomicIndex(), RNAModel::genomicLength(), Noschain::growEnd(), itos(), resetProtein(), RNAModel::resetRNA(), RNAModel::rna, RNAModel::RNALength(), and OutsideGenomicSequence::what().
Referenced by checkBadStopIndex().
| bool mRNAModel::trimCDSTail | ( | ) |
return true for success false for failure. When protein has AA*L situation use this method to remove the aa after the stop codon.
References cdse, CDSSequence(), delta(), frame, gcdse, RNAModel::genomicIndex(), pep, resetProtein(), RNAModel::resetRNA(), and subseqIsStop().
Referenced by checkBadStopIndex().
| bool mRNAModel::trimCDSStop | ( | ) |
AKPNE*** fgenesh1_kg makes such models!
References cdse, CDSSequence(), gcdse, RNAModel::genomicIndex(), resetProtein(), RNAModel::resetRNA(), and subseqIsStop().
Referenced by checkBadStopIndex().
| void mRNAModel::reset | ( | ) | [virtual] |
reset RNA sequence and recompute peptide sequence This is an expensive operation.
Reimplemented from RNAModel.
References cdsb, cdse, frame, gcdsb, gcdse, RNAModel::genomicIndex(), longestORFPlus(), Noschain::numberOfRanges(), pep, RNAModel::resetRNA(), Noschain::reverse(), reverseComplement(), RNAModel::rna, and Noschain::show().
Referenced by ESTAssembly::fixIntronBound().
| void mRNAModel::resetProtein | ( | ) | [inline] |
simplly regenerate protein sequence from this objects rna sequence according to [cdsb+frame, cdse] information. Does not find optimal ORF.
References cdsb, cdse, frame, pep, RNAModel::rna, and translate().
Referenced by growCDS3Prime(), resetRNACDS(), trimCDSStop(), and trimCDSTail().
| void mRNAModel::setLongestCDSAndProtein | ( | ) |
find longest ORF, if single exon model, will also find in reverse strand. Then set CDS range, and protein.
assume RNA has been reset
References cdsb, cdse, frame, gcdsb, gcdse, RNAModel::genomicIndex(), longestORFPlus(), Noschain::numberOfRanges(), pep, Noschain::reverse(), reverseComplement(), RNAModel::rna, and Noschain::show().
Referenced by append(), mRNAModel(), and readJGIModel().
| bool mRNAModel::append | ( | mRNAModel & | mod, | |
| int & | comment | |||
| ) |
mRNAModel version, use RNAModel append. then reset protein.
| comment | 0 for regular, 1 for frame-shift |
References RNAModel::append(), CDSFractionRNA(), CDSLength(), CDSRange(), complete(), RNAModel::geneId(), genomicCDSLength(), Range::length(), max, numberOfInternalStops(), Noschain::numberOfRanges(), Range::overlap(), Range::sameDirection(), RNAModel::setGeneId(), setLongestCDSAndProtein(), and show().
Referenced by mRNAModelUpdate::append().
| string mRNAModel::JGIModelRow | ( | const char | sep = '\t' |
) | const |
output one row of JGI Model Schema data dump excep the useless columns (id, chrom, strand, start, end, cdsStart, cdsEnd, sfCount, sfStarts, sfEnds) Note name column will be produced by the derived class that is usually class-specific.
References Range::begin(), Range::direction(), Range::end(), gcdsb, gcdse, RNAModel::getOid(), RNAModel::gid, Noschain::numberOfRanges(), and Noschain::startEnd().
Referenced by ESTAssemblyid::writetab(), ESTAssembly::writetab(), mRNAModelUpdate::writetab(), and writetab().
| ostream & mRNAModel::printJGIModelRow | ( | ostream & | ous, | |
| const char | sep = '\t' | |||
| ) | const |
References Range::begin(), Range::direction(), Range::end(), gcdsb, gcdse, RNAModel::getOid(), RNAModel::gid, Noschain::numberOfRanges(), and Noschain::startEnd().
Referenced by writeResult().
| string mRNAModel::JGITranscriptRow | ( | const char | sep = '\t' |
) | const |
NP= not produced transcriptId, locusId (NP), name (NP), description (NP), status (NP), type (NP) lengthGenomic, lengthTranscript, lengthCDS, sfExonsGenomic, sfCDSGenomic, sfExonsTranscript, sfCDSTranscript, seqGenomic, seqTranscript, seqCDS, annotatorId (NP), annotatable (NP), creationDate (NP) transcriptId will be the same as modelid. This is the worst creation (crime) by any programmer!
name, and description will be delayed by derived classes. (transcriptId, lengthGenomic, lengthTranscript, lengthCDS, sfExonsGenomic, sfCDSGenomic, sfExonsTranscript, sfCDSTranscript, seqGenomic, seqTranscript, seqCDS)
References CDSLength(), CDSSequence(), RNAModel::getOid(), Range::length(), RNAModel::rna, RNAModel::RNASequence(), RNAModel::seqGenomic(), sfCDSGenomic(), sfCDSTranscript(), Noschain::sfExonsGenomic(), and Noschain::sfExonsTranscript().
Referenced by ESTAssemblyid::writetab(), ESTAssembly::writetab(), mRNAModelUpdate::writetab(), and writetab().
| ostream & mRNAModel::printJGITranscriptRow | ( | ostream & | ous, | |
| const char | sep = '\t' | |||
| ) | const |
| ostream & mRNAModel::printJGITranscriptRowNoId | ( | ostream & | ous, | |
| const char | sep = '\t' | |||
| ) | const |
References CDSLength(), CDSSequence(), Range::length(), RNAModel::rna, RNAModel::RNASequence(), RNAModel::seqGenomic(), sfCDSGenomic(), sfCDSTranscript(), Noschain::sfExonsGenomic(), and Noschain::sfExonsTranscript().
Referenced by writeResult().
| string mRNAModel::JGIProteinRow | ( | char | sep = '\t' |
) | const |
proteinId, transcriptId, name (NP), description (NP), length, sfExons, seq, annotatable (NP)
References RNAModel::getOid(), pep, and sfExonsProtein().
Referenced by ESTAssemblyid::writetab(), ESTAssembly::writetab(), mRNAModelUpdate::writetab(), and writetab().
| string mRNAModel::sfCDSGenomic | ( | ) | const |
helper function used by JGITranscriptRow()
References Noschain::asDelimitedString(), Range::begin(), CDSChain(), Range::direction(), and Noschain::numberOfRanges().
Referenced by JGITranscriptRow(), printJGITranscriptRow(), and printJGITranscriptRowNoId().
| Noschain mRNAModel::CDSOnewayChain | ( | ) | const |
one way chain in transcript coordinate system
References cdsb, CDSChain(), Noschain::exonOnlyChain(), and Noschain::onewayChain().
| void mRNAModel::CDSOnewayChain | ( | Noschain & | ch | ) | const |
References cdsb, CDSChain(), Noschain::exonOnlyChain(), and Noschain::onewayChain().
| string mRNAModel::sfCDSTranscript | ( | ) | const [inline] |
References cdsb, CDSChain(), Noschain::exonOnlyChain(), and Noschain::onewayChain().
Referenced by JGITranscriptRow(), printJGITranscriptRow(), and printJGITranscriptRowNoId().
| string mRNAModel::sfExonsProtein | ( | ) | const |
References Noschain::asDelimitedString(), Range::begin(), CDSChain(), Range::end(), Noschain::exonOnlyChain(), n2p(), Noschain::numberOfRanges(), Noschain::onewayChain(), and Range::Range().
Referenced by JGIProteinRow().
| ostream & mRNAModel::show | ( | ostream & | ous | ) | const [virtual] |
debug function
Reimplemented from Noschain.
Reimplemented in mRNAModelUpdate, JGIModel, and ESTAssembly.
References cdsb, cdse, frame, gcdsb, gcdse, RNAModel::getOid(), RNAModel::gid, RNAModel::gseq, pep, printFasta(), RNAModel::rna, and Noschain::show().
Referenced by append(), CDSChain(), CDSSequence(), setGenomicCDS(), ESTAssembly::show(), mRNAModelUpdate::show(), testmodel(), trimBeforePoint(), updatedWorse(), and updateOnePredicted().
| bool mRNAModel::genuine | ( | ) | const [inline] |
reasonable full length model, no restriction on the number of UTR exons. This will promote models that are likely to have sequence error in the genomic sequences. This is for testing protein models. There could be non-coding RNAs that should be tested with Genuine Non-coding models.
Reimplemented in ESTAssembly.
References CDSFractionRNA(), CDSLength(), complete(), FivePrimeUTRLength(), num3NoncodingExons(), num5NoncodingExons(), pep, and ThreePrimeUTRLength().
Referenced by ESTAssembly::genuine().
| bool mRNAModel::semiGenuine | ( | ) | const [inline] |
Reimplemented in ESTAssembly.
References CDSLength(), complete(), FivePrimeUTRLength(), num3NoncodingExons(), num5NoncodingExons(), ThreePrimeUTRLength(), utrlen3max, and utrlen5max.
Referenced by ESTAssembly::semiGenuine().
| bool mRNAModel::isStar | ( | ) | const [inline] |
good quality model: UTR < 600 nt, CDS length > 330 nt, numUTR exon < 2 or CDS fraction > 65%
Reimplemented in ESTAssembly.
References CDSFraction(), CDSLength(), complete(), FivePrimeUTRLength(), num3NoncodingExons(), num5NoncodingExons(), ThreePrimeUTRLength(), utrlen3max, and utrlen5max.
Referenced by ESTAssembly::isStar().
| modeltype mRNAModel::objtype | ( | ) | const [inline, virtual] |
| bool mRNAModel::valid | ( | ) | const [virtual] |
check the following 1. CDS genomic range outside model 2. CDS length < 5 3. RNA length should agree with exonLength
Reimplemented from RNAModel.
References Range::begin(), cdsb, cdse, Range::direction(), Range::end(), gcdsb, gcdse, RNAModel::rna, and RNAModel::valid().
Referenced by ESTAssembly::prune3PrimeUTR(), and ESTAssembly::prune5PrimeUTR().
| ostream & mRNAModel::print | ( | ostream & | ous | ) | const [virtual] |
for operator<<
Reimplemented from RNAModel.
Reimplemented in JGIModel, and ESTAssembly.
References cdsb, cdse, CDSLength(), pep, and RNAModel::print().
Referenced by ESTAssembly::print(), and JGIModel::print().
| mRNAModel & mRNAModel::reverse | ( | int | newRNACDSB, | |
| int | newRNACDSE | |||
| ) |
prevents reversing of multiexon models. it will crash, if you try to. This will prevent the programmer from making mistakes. cds range, pep, and frame will all needs to be reset.
Reimplemented in ESTAssembly.
References Noschain::numberOfRanges(), resetRNACDS(), and RNAModel::reverse().
| string mRNAModel::name | ( | const string & | prefix | ) | [inline] |
References RNAModel::getOid(), and itos().
| static void mRNAModel::setShortestModel | ( | int | len | ) | [inline, static] |
| void mRNAModel::writetab | ( | ostream & | mod, | |
| ostream & | ex, | |||
| ostream & | track, | |||
| ostream & | orna, | |||
| ostream & | oprt, | |||
| char | sep = '\t' | |||
| ) | const |
Simply calles the five methods to produce defualt rows. writeModelTable(), writeExon() JGIModelRow(), JGITranscriptRow(), JGIProteinRow()
Reimplemented in mRNAModelUpdate, ESTAssembly, and ESTAssemblyid.
References JGIModelRow(), JGIProteinRow(), JGITranscriptRow(), RNAModel::writeExon(), and writeModelTable().
| ostream & mRNAModel::writeModelTable | ( | ostream & | ous, | |
| char | sep = '\t' | |||
| ) | const |
following columns: (modid geneid genomicId begin end genomicCDSb genomicCDSe numberOfExons sumexonLength exonstring CDSb CDSe RNAseq pepSeq frame)
Produces one row of tab-delimited text for loading into database. Should make derived class use this method. But will need to change the order of the columns. So will just use copy-pasting at this point.
Reimplemented in mRNAModelUpdate.
References Range::begin(), cdsb, cdse, Range::end(), RNAModel::exonLength(), frame, gcdsb, gcdse, RNAModel::geneId(), RNAModel::genomicId(), RNAModel::getOid(), Noschain::numberOfRanges(), pep, RNAModel::rna, and Noschain::toString().
Referenced by writetab().
const char mRNAModel::modelheader = "modid\tgeneid\tgenomicId\tbegin\tend\tgenomicCDSb\tgenomicCDSe\tnumberOfExons\tsumexonLength\texonstring\tCDSb\tCDSe\tRNAseq\tpepSeq\tframe" [static] |
const char mRNAModel::jgiModelCol = "id\tchrom\tstrand\tstart\tend\tcdsStart\tcdsEnd\tsfCount\tsfStarts\tsfEnds" [static] |
Referenced by checkBadStopIndex(), ESTAssembly::JGIModelColumns(), and main().
const char mRNAModel::jgiTranscriptCol = "transcriptId\tlengthGenomic\tlengthTranscript\tlengthCDS\tsfExonsGenomic\tsfCDSGenomic\tsfExonsTranscript\tsfCDSTranscript\tseqGenomic\tseqTranscript\tseqCDS" [static] |
Referenced by checkBadStopIndex(), ESTAssembly::JGITranscriptColumns(), and main().
const char mRNAModel::jgiProteinCol = "proteinId\ttranscriptId\tlengtht\tsfExons\tseq" [static] |
Referenced by ESTAssembly::JGIProteinColumns(), and main().
int mRNAModel::shortestpep = 30 [static] |
currently my default is 30. This is the size of smallest proteins identified in Fungi.
Referenced by JGIModel::valid().
int mRNAModel::shortestmodel = 90 [static] |
currently set to 90. This is smaller than 120 nt for smallest mRNA known in fungi. So we are not discarding any useful information.
Referenced by JGIModel::JGIModel(), setShortestModel(), and JGIModel::valid().
int mRNAModel::utrlen5max = 2000 [static] |
95% of the gene's UTR length upper limit for both 3' and 5' default 5' 2000, 3' 1900
Referenced by isStar(), and semiGenuine().
int mRNAModel::utrlen3max = 1900 [static] |
Referenced by isStar(), and semiGenuine().
int mRNAModel::cdsb [protected] |
this are index inside the spliced RNA product for Genomic Position you need to use the parent class's genomicIndex() method 1-based index inclusive! [cdsb, cdse]
Referenced by ESTAssembly::breakPrefixModel(), ESTAssembly::budMinusPrefixModel(), ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusPrefixModel(), ESTAssembly::budPlusSuffixModel(), CDSLength(), CDSOnewayChain(), CDSSeq(), CDSSequence(), ESTAssembly::ESTAssembly(), FivePrimeUTRLength(), mRNAModel(), operator=(), print(), reset(), resetProtein(), RNACDSBound(), RNACDSRange(), ESTAssembly::setCDSInfo(), setGenomicCDS(), setLongestCDSAndProtein(), setRNACDS(), ESTAssembly::setTitle(), sfCDSTranscript(), show(), trimBeforePoint(), valid(), ESTAssembly::write(), ESTAssembly::writeModel(), mRNAModelUpdate::writeModelTable(), and writeModelTable().
int mRNAModel::cdse [protected] |
Referenced by ESTAssembly::breakSuffixModel(), ESTAssembly::budMinusPrefixModel(), ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusPrefixModel(), ESTAssembly::budPlusSuffixModel(), CDSLength(), CDSSeq(), CDSSequence(), ESTAssembly::ESTAssembly(), growCDS3Prime(), mRNAModel(), operator=(), print(), reset(), resetProtein(), RNACDSBound(), RNACDSRange(), ESTAssembly::setCDSInfo(), setGenomicCDS(), setLongestCDSAndProtein(), setRNACDS(), ESTAssembly::setTitle(), show(), ThreePrimeUTRLength(), trimAfterPoint(), trimBeforePoint(), trimCDSStop(), trimCDSTail(), UTR3Sequence(), valid(), ESTAssembly::write(), ESTAssembly::writeModel(), mRNAModelUpdate::writeModelTable(), and writeModelTable().
int mRNAModel::gcdsb [protected] |
for performance we store computed result there are the genomic indx of the CDS location This represent the range of the CDS. On - strand gcdsb > gcdse
Referenced by ESTAssembly::breakPrefixModel(), ESTAssembly::budMinusPrefixModel(), ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusPrefixModel(), ESTAssembly::budPlusSuffixModel(), CDSChain(), CDSDirection(), CDSRange(), FivePrimeUTR(), genomicCDSBegin(), genomicCDSBound(), genomicCDSLength(), JGIModel::JGIModel(), JGIModelRow(), mRNAModel(), operator=(), printJGIModelRow(), ESTAssembly::prune5PrimeUTR(), reset(), sameGene(), ESTAssembly::setCDSInfo(), setGenomicCDS(), setLongestCDSAndProtein(), setRNACDS(), JGIModel::show(), show(), JGIModel::toJGIString(), JGIModel::toSQLString(), JGIModel::toString(), JGIModel::valid(), valid(), ESTAssembly::write(), ESTAssembly::writeModel(), mRNAModelUpdate::writeModelTable(), and writeModelTable().
int mRNAModel::gcdse [protected] |
Referenced by ESTAssembly::breakSuffixModel(), ESTAssembly::budMinusPrefixModel(), ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusPrefixModel(), ESTAssembly::budPlusSuffixModel(), CDSChain(), CDSDirection(), CDSRange(), genomicCDSBound(), genomicCDSEnd(), genomicCDSLength(), growCDS3Prime(), JGIModel::growEnd(), JGIModel::JGIModel(), JGIModelRow(), mRNAModel(), operator=(), printJGIModelRow(), ESTAssembly::prune3PrimeUTR(), reset(), sameGene(), ESTAssembly::setCDSInfo(), setGenomicCDS(), setLongestCDSAndProtein(), setRNACDS(), JGIModel::show(), show(), ThreePrimeUTR(), JGIModel::toJGIString(), JGIModel::toSQLString(), JGIModel::toString(), trimCDSStop(), trimCDSTail(), JGIModel::valid(), valid(), ESTAssembly::write(), ESTAssembly::writeModel(), mRNAModelUpdate::writeModelTable(), and writeModelTable().
string mRNAModel::pep [protected] |
creation of pep used the standard codon table needs to add flexibility for different codon table
Referenced by ESTAssembly::budMinusPrefixModel(), ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusSuffixModel(), genuine(), getProtein(), hasStart(), hasStop(), JGIProteinRow(), mRNAModel(), numberOfInternalStops(), operator=(), print(), proteinLength(), proteinLengthNoTail(), proteinSequence(), reset(), resetProtein(), samePeptide(), ESTAssembly::setCDSInfo(), setLongestCDSAndProtein(), setProtein(), show(), JGIModel::toSQLString(), trimCDSTail(), JGIModel::valid(), ESTAssembly::write(), ESTAssembly::writeModel(), mRNAModelUpdate::writeModelTable(), and writeModelTable().
int mRNAModel::frame [protected] |
marks which translation frame it the peptide derived from. There are three frames: 0, 1, and 2. This is only useful when the 5'-end of the RNA is partial. In this case, CDS starts from 1, but translation could start from 0, 1, or 2. In other cases, when CDS begin > 2, the frame is 0. This frame is the frame to start translation after taking a subsequence [cdsb,cdse], that is different from the reading frame that cdsb is in which can be computed by (cdsb-1)3.
In another word, this is only for translation instruction, not the absolute frame of the CDS with regards to the whole mRNA.
Referenced by ESTAssemblyid::breakup(), ESTAssembly::budMinusSuffixModel(), ESTAssembly::budPlusPrefixModel(), getFrame(), mRNAModel(), operator=(), reset(), resetProtein(), ESTAssembly::setCDSInfo(), setLongestCDSAndProtein(), setRNACDS(), show(), JGIModel::toSQLString(), trimCDSTail(), JGIModel::valid(), ESTAssembly::writeModel(), mRNAModelUpdate::writeModelTable(), and writeModelTable().
char mRNAModel::header = "" [static, protected] |
used for constructing new headers
Referenced by ESTAssemblyid::getModelheader(), ESTAssembly::JGIModelColumns(), mRNAModelUpdate::jgiModelColumns(), ESTAssembly::JGIProteinColumns(), mRNAModelUpdate::jgiProteinColumns(), ESTAssembly::JGITranscriptColumns(), and mRNAModelUpdate::jgiTranscriptColumns().
1.5.6