#include <GenModel.h>

Public Member Functions | |
| GenModel () | |
| GenModel (const string &exstarts, const string &exends, char strand, int cdsb, int cdse, const string &gi, int ii, const string &si) throw (InvalidModel) | |
| GenModel (const string &exstarts, const string &exends, char strand, int cdsb, int cdse, const string &gi, int ii, const string &si, const string &track, int trackid) throw (InvalidModel) | |
| ~GenModel () | |
| ostream & | show (ostream &ous) const |
| Noschain | FivePrimeUTR () const |
| Noschain | ThreePrimeUTR () const |
| int | num5NoncodingExons () const |
| int | num3NoncodingExons () const |
| Noschain | CDS () const throw (PointOutChain) |
| Range | CDSRange () const |
| int | FivePrimeUTRLength () const |
| int | ThreePrimeUTRLength () const |
| int | UTRLength () const |
| int | CDSLength () const throw (PointOutChain) |
| double | CDSFraction () const |
| int | valid () |
| bool | valid (const string &gs) |
| const string & | genomicId () const |
| const string & | modelName () const |
| const string & | getName () const |
| int | getId () const |
| string | CDSSeq (const string &gs) const throw (PointOutChain) |
| void | growEnd (int icr) |
| void | setEnd (int ne) |
| string | toString (char sep='\t') const |
| string | toSQLString (char sep=',') const |
| string | toJGIModel (char sep='\t') const |
| ostream & | printAllmod (ostream &ous, char sep='\t') const |
| bool | operator== (const GenModel &gm) const |
| bool | sameExons (const GenModel &gm) const |
| void | makeProtein (const string &gs) |
| const string & | pepstr () const |
| bool | hasBegin () const |
| bool | hasEnd () const |
Private Attributes | |
| string | gid |
| int | id |
| string | name |
| Range | cds |
| int | cdsphase |
| int | numstop |
| bool | hasstart |
| bool | hasstop |
| Protein * | pep |
| string | track |
| int | trackid |
Static Private Attributes | |
| static const int | minModelLen = 100 |
| static const int | minCDSLen = 80 |
| GenModel::GenModel | ( | ) | [inline] |
| GenModel::GenModel | ( | const string & | exstarts, | |
| const string & | exends, | |||
| char | strand, | |||
| int | cdsb, | |||
| int | cdse, | |||
| const string & | gi, | |||
| int | ii, | |||
| const string & | si | |||
| ) | throw (InvalidModel) |
this is an input structure from JGI model track's two columns: sfStarts, sfEnds
| gi | genomic identifier. | |
| ii | unique integer id | |
| si | unique string id, usually name from feature table | |
| cdsb,cdse. | cds genomic bound. |
References Range::begin(), cds, Range::direction(), Range::end(), Noschain::insideExon(), Range::length(), Range::reverse(), and Noschain::setEnd().
| GenModel::GenModel | ( | const string & | exstarts, | |
| const string & | exends, | |||
| char | strand, | |||
| int | cdsb, | |||
| int | cdse, | |||
| const string & | gi, | |||
| int | ii, | |||
| const string & | si, | |||
| const string & | track, | |||
| int | trackid | |||
| ) | throw (InvalidModel) |
add more identifier for tracking purposes
| track. | method of allmodel or featureTable in linkToAss. | |
| trackid. | id in allmodel or featureId in linkToAss. | |
| ii. | Has no equivalence in JGI schema, needs to use a counter. |
References Range::begin(), cds, Range::direction(), Range::end(), Noschain::insideExon(), Range::length(), Range::reverse(), and Noschain::setEnd().
| GenModel::~GenModel | ( | ) |
References pep.
| ostream & GenModel::show | ( | ostream & | ous | ) | const [virtual] |
debug function
Reimplemented from Noschain.
References cds, gid, name, and Noschain::show().
Referenced by valid(), and writeResultToDatabase().
| Noschain GenModel::FivePrimeUTR | ( | ) | const [inline] |
References Range::begin(), cds, and Noschain::subchainBeforePoint().
Referenced by checkUTRChimera(), and num5NoncodingExons().
| Noschain GenModel::ThreePrimeUTR | ( | ) | const [inline] |
this may contain intron
References cds, Range::end(), and Noschain::subchainAfterPoint().
Referenced by checkUTRChimera(), and num3NoncodingExons().
| int GenModel::num5NoncodingExons | ( | ) | const [inline] |
References FivePrimeUTR(), and Noschain::numberOfRanges().
| int GenModel::num3NoncodingExons | ( | ) | const [inline] |
References Noschain::numberOfRanges(), and ThreePrimeUTR().
| Noschain GenModel::CDS | ( | ) | const throw (PointOutChain) [inline] |
| int GenModel::FivePrimeUTRLength | ( | ) | const [inline] |
References Range::begin(), cds, Noschain::exonLength(), and Noschain::subchainBeforePoint().
Referenced by UTRLength().
| int GenModel::ThreePrimeUTRLength | ( | ) | const [inline] |
References cds, Range::end(), Noschain::exonLength(), and Noschain::subchainAfterPoint().
Referenced by UTRLength().
| int GenModel::UTRLength | ( | ) | const [inline] |
References FivePrimeUTRLength(), and ThreePrimeUTRLength().
| int GenModel::CDSLength | ( | ) | const throw (PointOutChain) [inline] |
| double GenModel::CDSFraction | ( | ) | const [inline] |
the CDS length divides the model length
References CDSLength(), and Range::length().
| int GenModel::valid | ( | ) |
calling this method will result in trying to fix bad models, if they are bad. If cannot be fixed then return false
Not sure to throw exception or return boolean?
This operation is sequence independent.
Check CDS too short, model too short CDS and model in the same direction CDS inside model if outside model and by less than 4 nt this function will fix the error. otherwise it will return an invalid model.
Should not use in the constructor. This version is for when you don't have sequence information.
References Range::begin(), cds, Range::contain(), Range::direction(), Range::end(), Range::length(), minCDSLen, minModelLen, and setEnd().
Referenced by main(), ModelFactory::next(), and valid().
| bool GenModel::valid | ( | const string & | gs | ) |
when you supply genomic sequence, this function does further checking
CDS is multiple of 3
References cds, CDSLength(), cdsphase, CDSSeq(), Protein::countInternalStops(), Range::direction(), Range::end(), Noschain::exonIndex(), Noschain::exons, growEnd(), hasstart, hasstop, Protein::hasStop(), Range::length(), makeProtein(), minCDSLen, numstop, pep, show(), DNA::translate(), and valid().
| const string& GenModel::genomicId | ( | ) | const [inline] |
same as valid
References gid.
Referenced by toJGIModel(), toSQLString(), toString(), writeResultToDatabase(), and writeResultToFile().
| const string& GenModel::modelName | ( | ) | const [inline] |
should not modify this return name
References name.
Referenced by writeResultToDatabase(), and writeResultToFile().
| const string& GenModel::getName | ( | ) | const [inline] |
| int GenModel::getId | ( | ) | const [inline] |
| string GenModel::CDSSeq | ( | const string & | gs | ) | const throw (PointOutChain) |
extract the CDS sequence given a genomic sequence as input
References CDS(), Range::direction(), Noschain::getExons(), and reverseComplement().
Referenced by main(), makeProtein(), and valid().
| void GenModel::growEnd | ( | int | icr | ) |
this will grow both the CDS and the exon chain ends
Reimplemented from Noschain.
References cds, Range::growEnd(), and Noschain::growEnd().
Referenced by valid().
| void GenModel::setEnd | ( | int | ne | ) | [inline] |
not sure how to manage the CDS end
Reimplemented from Noschain.
References cds, Range::setEnd(), and Noschain::setEnd().
Referenced by valid().
| string GenModel::toString | ( | char | sep = '\t' |
) | const |
overwrite parent method to produce the same columns as toSQLString method. Produce tab-delimited row for bulk upload id, genomicid,name, exons, mb, me, gcdsb, gcdse, cdsphase, hasstart, hasstop, pep, track, trackid
References Range::begin(), cds, cdsphase, Range::end(), genomicId(), getId(), getName(), bioseq::getSequence(), hasstart, hasstop, pep, Noschain::toString(), track, and trackid.
| string GenModel::toSQLString | ( | char | sep = ',' |
) | const |
References Range::begin(), cds, cdsphase, Range::end(), genomicId(), getId(), getName(), bioseq::getSequence(), hasstart, hasstop, pep, Noschain::toString(), track, and trackid.
| string GenModel::toJGIModel | ( | char | sep = '\t' |
) | const |
return a text row of string separated by sep default TAB to be written to a file that can be uploaded
References cds, Range::direction(), genomicId(), getId(), getName(), Range::largerEnd(), Noschain::numberOfRanges(), Range::smallerEnd(), and Noschain::startEnd().
| ostream & GenModel::printAllmod | ( | ostream & | ous, | |
| char | sep = '\t' | |||
| ) | const |
produce the allmodels schema format | method | varchar(90) | NO | PRI | id | int(11) | NO | PRI | name | varchar(250) | NO | MUL | chrom | varchar(50) | YES | MUL | strand | char(1) | YES | | fstart | int(11) | YES | | fend | int(11) | YES | | cdsstart | int(11) | YES | | cdsend | int(11) | YES | | sfcount | int(11) | YES | | sfstarts | text | YES | | sfends | text | YES | Not this proteinid | int(11) | YES | MUL The object does not store this entry!
References cds, Range::direction(), gid, Range::largerEnd(), name, Noschain::numberOfRanges(), Range::smallerEnd(), Noschain::startEnd(), track, and trackid.
| bool GenModel::operator== | ( | const GenModel & | gm | ) | const [inline] |
References cds, and Noschain::operator==().
| bool GenModel::sameExons | ( | const GenModel & | gm | ) | const [inline] |
References Noschain::operator==().
| void GenModel::makeProtein | ( | const string & | gs | ) |
| const string& GenModel::pepstr | ( | ) | const [inline] |
| bool GenModel::hasBegin | ( | ) | const [inline] |
References hasstart.
| bool GenModel::hasEnd | ( | ) | const [inline] |
References hasstop.
string GenModel::gid [private] |
genomic identifier, usually string for convinence, template would be better. Which genomic DNA owns this model. For complete genome, this is usually chromosome number such as Chrom1, chrom_2, some variations.
Referenced by genomicId(), printAllmod(), and show().
int GenModel::id [private] |
a unique idetifier for this object in some database table, usualy a primary key We use an integer type. This is the id column of allmodels table.
Referenced by getId().
string GenModel::name [private] |
usually a human readable name that is also unique in some database table but not uniqe in the JGI database!
Referenced by getName(), modelName(), printAllmod(), and show().
Range GenModel::cds [private] |
where the CDS starts and ends This should be in the same direction as the Model This information is redundant. It is for convinence. This is the genomic postion, not the RNA position.
Referenced by CDS(), CDSRange(), FivePrimeUTR(), FivePrimeUTRLength(), GenModel(), growEnd(), operator==(), printAllmod(), setEnd(), show(), ThreePrimeUTR(), ThreePrimeUTRLength(), toJGIModel(), toSQLString(), toString(), and valid().
int GenModel::cdsphase [private] |
0, 1, or 2 for start of translation within the range of CDS
Referenced by makeProtein(), toSQLString(), toString(), and valid().
int GenModel::numstop [private] |
record the number of internal stop codons
Referenced by valid().
bool GenModel::hasstart [private] |
Referenced by hasBegin(), toSQLString(), toString(), and valid().
bool GenModel::hasstop [private] |
Referenced by hasEnd(), toSQLString(), toString(), and valid().
Protein* GenModel::pep [private] |
Referenced by makeProtein(), pepstr(), toSQLString(), toString(), valid(), and ~GenModel().
string GenModel::track [private] |
for trackin the source from JGI database it is featureTable
Referenced by printAllmod(), toSQLString(), and toString().
int GenModel::trackid [private] |
same as featureId
Referenced by printAllmod(), toSQLString(), and toString().
const int GenModel::minModelLen = 100 [static, private] |
const int GenModel::minCDSLen = 80 [static, private] |
shortest CDS to be considered to be valid model
Referenced by valid().
1.5.6