#include <iostream>#include <fstream>#include "RNAModel.h"#include "bioseq.h"#include <cstring>#include <queue>#include "stddev.h"Classes | |
| class | Progparam |
| class | compfirst |
Functions | |
| void | readAbinitioModel (map< string, vector< mRNAModel * > * > &mod, const string &file, const map< string, string > &gstore) |
| double | readESTModel (map< string, vector< ESTAssembly * > * > &mod, const string &file, const map< string, string > &gstore, bool usepartial=true) |
| void | releaseESTModel (map< string, vector< ESTAssembly * > * > &mod) |
| void | readJGIModel (map< string, vector< mRNAModel * > * > &mod, const string &file, const map< string, string > &gstore) |
| void | releaseJGIModel (map< string, vector< JGIModel * > * > &mod) |
| void | releaseAbinitioModel (map< string, vector< mRNAModel * > * > &mod) |
| void | combineModels (const vector< mRNAModel * > &v1, const vector< ESTAssembly * > &v2, vector< ESTAssembly * > &goodEST, vector< ESTAssembly * > &leftoverEST, vector< mRNAModel * > &leftoverPredict, set< mRNAModelUpdate *, lessChainPtr > &updated, vector< ESTAssembly * > &absorbed, ostream &oulog, const Progparam &par) |
| void | writeGenuineESTModel (const vector< ESTAssembly * > &mod, ostream &ous, ostream &bad) |
| void | mixAbwithEST (vector< mRNAModel * > &abi, vector< ESTAssembly * > &est, vector< ESTAssembly * > &goodest, vector< ESTAssembly * > &badest, set< mRNAModelUpdate *, lessChainPtr > &updated, vector< ESTAssembly * > &absorbed, ostream &log, const Progparam &par) |
| void | writeModelId (const vector< ESTAssembly * > &mod, ostream &ous) |
| void | writeToFiles (const set< mRNAModelUpdate *, lessChainPtr > &mixedmod, ostream &ousmod, ostream &ousex, ostream &oustrack, ostream &ousrna, ostream &ousprt) |
| void | readId (set< int > &ids, const string &file) |
| void | usage () |
| template<class T> | |
| void | collect3UTRPattern (map< string, int > &patcnt, const vector< T * > &mod) |
| template<class Iterator> | |
| void | collect3UTRPattern (map< string, int > &patcnt, Iterator beg, Iterator end, set< Noschain > &uniq) |
| void | displayUTRPattern (const map< string, int > &patcnt) |
| int | main (int argc, char *argv[]) |
| void | sortESTIntoGoodBad (const vector< ESTAssembly * > &input, vector< ESTAssembly * > &good, vector< ESTAssembly * > &bad, const Progparam &par) |
| bool | updateCompatible (const mRNAModel *m1, const mRNAModel *m2) |
| bool | incompatible (const mRNAModel *m1, const mRNAModel *m2) |
| bool | updatedWorse (const mRNAModel *old, const mRNAModel *upd, ostream &log) |
| mRNAModelUpdate * | updateOnePredicted (mRNAModel *&predic, ESTAssembly *est, ostream &log) |
| bool | updateOneUpdated (mRNAModelUpdate *&updm, ESTAssembly *est, ostream &log) |
| void | updatePredictWithEST (vector< mRNAModel * > &predic, vector< ESTAssembly * > &est, vector< ESTAssembly * > &good, vector< ESTAssembly * > &bad, vector< ESTAssembly * > &absorb, set< mRNAModelUpdate *, lessChainPtr > &updated, ostream &log) |
| int | maxDistanceOfModels (const vector< mRNAModel * > &mods) |
| void | addfiletag (string &str, const string &tag) |
| void addfiletag | ( | string & | str, | |
| const string & | tag | |||
| ) |
Referenced by Progparam::tagFileNames().
| void collect3UTRPattern | ( | map< string, int > & | patcnt, | |
| Iterator | beg, | |||
| Iterator | end, | |||
| set< Noschain > & | uniq | |||
| ) | [inline] |
| void collect3UTRPattern | ( | map< string, int > & | patcnt, | |
| const vector< T * > & | mod | |||
| ) | [inline] |
Referenced by main().
| void combineModels | ( | const vector< mRNAModel * > & | v1, | |
| const vector< ESTAssembly * > & | v2, | |||
| vector< ESTAssembly * > & | goodEST, | |||
| vector< ESTAssembly * > & | leftoverEST, | |||
| vector< mRNAModel * > & | leftoverPredict, | |||
| set< mRNAModelUpdate *, lessChainPtr > & | updated, | |||
| vector< ESTAssembly * > & | absorbed, | |||
| ostream & | oulog, | |||
| const Progparam & | par | |||
| ) |
this is the key method both input vectors are already sorted, needs to mrege them, then it will be completely sorted. This is flattened merge sort.
| v1 | Vector of pointer of predicted models | |
| v2 | Vector of pointer of EST Assemblies Produces three outputs | |
| goodEST | EST that either contain Ab intio models or don't overlap with any ab initio models and are good models by themselves. | |
| leftoverEST | ESt taht are not good models by themselves and don't support any existing ab initio models. | |
| leftoverPredict | AIH models not overlapping any EST Models. | |
| updated | are ab intio modesl that merged with EST assemblies. |
References EST, Range::fuse(), mixAbwithEST(), and sortESTIntoGoodBad().
Referenced by main().
| void displayUTRPattern | ( | const map< string, int > & | patcnt | ) |
Referenced by main().
References mRNAModel::CDSRange(), Range::distance(), Noschain::numberOfRanges(), mRNAModel::proteinLength(), and Range::sameDirection().
Referenced by updateOnePredicted(), and updateOneUpdated().
| int main | ( | int | argc, | |
| char * | argv[] | |||
| ) |
References Progparam::absorbedf, Progparam::badf, Progparam::codontable, collect3UTRPattern(), combineModels(), displayUTRPattern(), Progparam::estmodf, RNAModel::exonheader, Progparam::genomicf, Progparam::genuinef, mRNAModel::jgiModelCol, mRNAModelUpdate::jgiModelColumns(), mRNAModel::jgiProteinCol, mRNAModelUpdate::jgiProteinColumns(), mRNAModel::jgiTranscriptCol, mRNAModelUpdate::jgiTranscriptColumns(), loadFastaIntoMap(), Progparam::meanProfmaxh, mRNAModel::modelheader, mRNAModelUpdate::modelheader, Progparam::noestf, Progparam::predictmodf, Progparam::predictType, readAbinitioModel(), readESTModel(), readJGIModel(), releaseAbinitioModel(), releaseESTModel(), DNA::setCodonTable(), Progparam::tagFileNames(), Progparam::updatedexonf, Progparam::updatedJGIproteinf, Progparam::updatedJGItrackf, Progparam::updatedJGItranscriptf, Progparam::updatedmodelf, usage, writeModelId(), and writeToFiles().
| int maxDistanceOfModels | ( | const vector< mRNAModel * > & | mods | ) |
Referenced by mixAbwithEST().
| void mixAbwithEST | ( | vector< mRNAModel * > & | abi, | |
| vector< ESTAssembly * > & | est, | |||
| vector< ESTAssembly * > & | goodest, | |||
| vector< ESTAssembly * > & | badest, | |||
| set< mRNAModelUpdate *, lessChainPtr > & | updated, | |||
| vector< ESTAssembly * > & | absorbed, | |||
| ostream & | log, | |||
| const Progparam & | par | |||
| ) |
this function update ab initio models with EST Assembly Different predicted models after update may become identical, so we need to check for identity with a set container.
| updated | is the result. There could be more updated models than input AIH models if there are branching in updates (multiple EST give imcompatible updated models). |
References maxDistanceOfModels(), sortESTIntoGoodBad(), and updatePredictWithEST().
Referenced by combineModels().
| void readAbinitioModel | ( | map< string, vector< mRNAModel * > * > & | mod, | |
| const string & | file, | |||
| const map< string, string > & | gstore | |||
| ) |
I cannot make const map<> why? This function read a text dump of table created by nrep program. It is has the following columns: id, genomic, exons, cdsb, cdse I should make a functio that can read the JGI format, so that this program can update any track. Simple reading, without any optimizing CDS. This is done only at the mixing stage.
References ifstream().
Referenced by main().
| double readESTModel | ( | map< string, vector< ESTAssembly * > * > & | mod, | |
| const string & | file, | |||
| const map< string, string > & | gstore, | |||
| bool | usepartial = true | |||
| ) |
assume that the models are sorted according to coordinates regardless of direction This requires that the output function of combest does the sorting.
| mod | is a map containing chromosome or genomicid => [ESTAssembly*] All of the models from the same chromosome are stored in a vector of pointers. | |
| gstore | is the genomic store for constructing the sequence part of the Model Object. file contain ESTAssemblyid models. This reader will ignore the idlist part, instead will only take the numbest column. | |
| usepartial. | the default is to use relative partial combest models to update predicted models. mean maxprofh. |
References mRNAModel::getFrame(), stddev::getMean(), RNAModel::getOid(), mRNAModel::getProtein(), ifstream(), readId(), mRNAModel::RNACDSRange(), RNAModel::RNAString(), and ESTAssembly::show().
Referenced by main().
| void readId | ( | set< int > & | ids, | |
| const string & | file | |||
| ) |
read id from a file of one columns into a container for fast look up later.
References ifstream().
Referenced by readESTModel().
| void readJGIModel | ( | map< string, vector< mRNAModel * > * > & | mod, | |
| const string & | file, | |||
| const map< string, string > & | gstore | |||
| ) |
adding ability to check for models with intrnal stops. These are usually genewise models with frame-shift. The objects are constructed as JGIModel*, but upcased into mRNAModel*.
References ifstream(), name, mRNAModel::numberOfInternalStops(), mRNAModel::setLongestCDSAndProtein(), and JGIModel::valid().
Referenced by main().
| void releaseESTModel | ( | map< string, vector< ESTAssembly * > * > & | mod | ) |
Referenced by main().
| void releaseJGIModel | ( | map< string, vector< JGIModel * > * > & | mod | ) |
this program seems to be not using this function
| void sortESTIntoGoodBad | ( | const vector< ESTAssembly * > & | input, | |
| vector< ESTAssembly * > & | good, | |||
| vector< ESTAssembly * > & | bad, | |||
| const Progparam & | par | |||
| ) |
separate input into good and bad vectors. Will add to both good and bad. Good models are defined as genuine() or with average EST profile > 100. This is a very hight standard. I am not sure what is a good number. There are possible non-coding RNA models or very short proteins that are encoded with highly expressed transcripts. If we use the longest ORF, we may guess it wrong. In the future I need to use some sort of gene predictor.
References Progparam::meanProfmaxh.
Referenced by combineModels(), and mixAbwithEST().
| mRNAModelUpdate* updateOnePredicted | ( | mRNAModel *& | predic, | |
| ESTAssembly * | est, | |||
| ostream & | log | |||
| ) |
predic could be made NULL if it is contained inside the est model
References mRNAModelUpdate::addESTCover(), mRNAModelUpdate::append(), Noschain::exonContain(), Noschain::exonOverlapLength(), RNAModel::geneId(), incompatible(), Range::overlay(), RNAModel::setGeneId(), mRNAModel::show(), mRNAModelUpdate::show(), and updatedWorse().
Referenced by updatePredictWithEST().
| bool updateOneUpdated | ( | mRNAModelUpdate *& | updm, | |
| ESTAssembly * | est, | |||
| ostream & | log | |||
| ) |
| updm | updated model pointer, should not be zero |
References mRNAModelUpdate::addESTCover(), mRNAModelUpdate::append(), Noschain::exonContain(), Noschain::exonOverlapLength(), RNAModel::geneId(), incompatible(), Range::overlay(), RNAModel::setGeneId(), ESTAssembly::show(), mRNAModelUpdate::show(), and updatedWorse().
Referenced by updatePredictWithEST().
| void updatePredictWithEST | ( | vector< mRNAModel * > & | predic, | |
| vector< ESTAssembly * > & | est, | |||
| vector< ESTAssembly * > & | good, | |||
| vector< ESTAssembly * > & | bad, | |||
| vector< ESTAssembly * > & | absorb, | |||
| set< mRNAModelUpdate *, lessChainPtr > & | updated, | |||
| ostream & | log | |||
| ) |
This is the core model update part, Check compatability could add fuzzy margin in the future. Inputs are predic and est.
| predic | predicted models | |
| est | ESTAssembly pointer vector EST models will be divided into three categories | |
| good | Good by itself, not used to update any predicted model | |
| bad | Failed the genuine test, and not used up in update | |
| absorb. | Used to udate predicted models. Produce result of mixed models: predicted+est EST-updated predicted models is a log file stream for debugging this function. This should be removed from the production verion. |
References mRNAModelUpdate::show(), updateOnePredicted(), and updateOneUpdated().
Referenced by mixAbwithEST().
| void usage | ( | ) |
given gmap summary format, this program converts it into combest archive format (*.car)
It can be used a pipe, or given specific file names.
this is a helper program to count distinct estids in the ESTId column of the combest result. So it is the actual number of ESTs mapped. This could be lower for deeply covered genome because of the coverage depth-dependent filtering.
given gmap summary format, this program converts it into combest archive format (*.car)
It can be used a pipe, or given specific file names.
this is a helper program to count distinct estids in the ESTId column of the combest result. So it is the actual number of ESTs mapped. This could be lower for deeply covered genome because of the coverage depth-dependent filtering.
| void writeGenuineESTModel | ( | const vector< ESTAssembly * > & | mod, | |
| ostream & | ous, | |||
| ostream & | bad | |||
| ) |
| void writeModelId | ( | const vector< ESTAssembly * > & | mod, | |
| ostream & | ous | |||
| ) |
Referenced by main().
| void writeToFiles | ( | const set< mRNAModelUpdate *, lessChainPtr > & | mixedmod, | |
| ostream & | ousmod, | |||
| ostream & | ousex, | |||
| ostream & | oustrack, | |||
| ostream & | ousrna, | |||
| ostream & | ousprt | |||
| ) |
1.5.6