caiwe.cpp File Reference

#include <iostream>
#include <fstream>
#include "RNAModel.h"
#include "bioseq.h"
#include <cstring>
#include <queue>
#include "stddev.h"

Classes

class  Progparam
class  compfirst

Functions

void readAbinitioModel (map< string, vector< mRNAModel * > * > &mod, const string &file, const map< string, string > &gstore)
double readESTModel (map< string, vector< ESTAssembly * > * > &mod, const string &file, const map< string, string > &gstore, bool usepartial=true)
void releaseESTModel (map< string, vector< ESTAssembly * > * > &mod)
void readJGIModel (map< string, vector< mRNAModel * > * > &mod, const string &file, const map< string, string > &gstore)
void releaseJGIModel (map< string, vector< JGIModel * > * > &mod)
void releaseAbinitioModel (map< string, vector< mRNAModel * > * > &mod)
void combineModels (const vector< mRNAModel * > &v1, const vector< ESTAssembly * > &v2, vector< ESTAssembly * > &goodEST, vector< ESTAssembly * > &leftoverEST, vector< mRNAModel * > &leftoverPredict, set< mRNAModelUpdate *, lessChainPtr > &updated, vector< ESTAssembly * > &absorbed, ostream &oulog, const Progparam &par)
void writeGenuineESTModel (const vector< ESTAssembly * > &mod, ostream &ous, ostream &bad)
void mixAbwithEST (vector< mRNAModel * > &abi, vector< ESTAssembly * > &est, vector< ESTAssembly * > &goodest, vector< ESTAssembly * > &badest, set< mRNAModelUpdate *, lessChainPtr > &updated, vector< ESTAssembly * > &absorbed, ostream &log, const Progparam &par)
void writeModelId (const vector< ESTAssembly * > &mod, ostream &ous)
void writeToFiles (const set< mRNAModelUpdate *, lessChainPtr > &mixedmod, ostream &ousmod, ostream &ousex, ostream &oustrack, ostream &ousrna, ostream &ousprt)
void readId (set< int > &ids, const string &file)
void usage ()
template<class T>
void collect3UTRPattern (map< string, int > &patcnt, const vector< T * > &mod)
template<class Iterator>
void collect3UTRPattern (map< string, int > &patcnt, Iterator beg, Iterator end, set< Noschain > &uniq)
void displayUTRPattern (const map< string, int > &patcnt)
int main (int argc, char *argv[])
void sortESTIntoGoodBad (const vector< ESTAssembly * > &input, vector< ESTAssembly * > &good, vector< ESTAssembly * > &bad, const Progparam &par)
bool updateCompatible (const mRNAModel *m1, const mRNAModel *m2)
bool incompatible (const mRNAModel *m1, const mRNAModel *m2)
bool updatedWorse (const mRNAModel *old, const mRNAModel *upd, ostream &log)
mRNAModelUpdateupdateOnePredicted (mRNAModel *&predic, ESTAssembly *est, ostream &log)
bool updateOneUpdated (mRNAModelUpdate *&updm, ESTAssembly *est, ostream &log)
void updatePredictWithEST (vector< mRNAModel * > &predic, vector< ESTAssembly * > &est, vector< ESTAssembly * > &good, vector< ESTAssembly * > &bad, vector< ESTAssembly * > &absorb, set< mRNAModelUpdate *, lessChainPtr > &updated, ostream &log)
int maxDistanceOfModels (const vector< mRNAModel * > &mods)
void addfiletag (string &str, const string &tag)

Function Documentation

void addfiletag ( string &  str,
const string &  tag 
)

Referenced by Progparam::tagFileNames().

template<class Iterator>
void collect3UTRPattern ( map< string, int > &  patcnt,
Iterator  beg,
Iterator  end,
set< Noschain > &  uniq 
) [inline]

template<class T>
void collect3UTRPattern ( map< string, int > &  patcnt,
const vector< T * > &  mod 
) [inline]

Referenced by main().

void combineModels ( const vector< mRNAModel * > &  v1,
const vector< ESTAssembly * > &  v2,
vector< ESTAssembly * > &  goodEST,
vector< ESTAssembly * > &  leftoverEST,
vector< mRNAModel * > &  leftoverPredict,
set< mRNAModelUpdate *, lessChainPtr > &  updated,
vector< ESTAssembly * > &  absorbed,
ostream &  oulog,
const Progparam par 
)

this is the key method both input vectors are already sorted, needs to mrege them, then it will be completely sorted. This is flattened merge sort.

Parameters:
v1 Vector of pointer of predicted models
v2 Vector of pointer of EST Assemblies Produces three outputs
goodEST EST that either contain Ab intio models or don't overlap with any ab initio models and are good models by themselves.
leftoverEST ESt taht are not good models by themselves and don't support any existing ab initio models.
leftoverPredict AIH models not overlapping any EST Models.
updated are ab intio modesl that merged with EST assemblies.
This funciton use pointer operations to improve performance.

References EST, Range::fuse(), mixAbwithEST(), and sortESTIntoGoodBad().

Referenced by main().

void displayUTRPattern ( const map< string, int > &  patcnt  ) 

Referenced by main().

bool incompatible ( const mRNAModel m1,
const mRNAModel m2 
)

int main ( int  argc,
char *  argv[] 
)

int maxDistanceOfModels ( const vector< mRNAModel * > &  mods  ) 

Referenced by mixAbwithEST().

void mixAbwithEST ( vector< mRNAModel * > &  abi,
vector< ESTAssembly * > &  est,
vector< ESTAssembly * > &  goodest,
vector< ESTAssembly * > &  badest,
set< mRNAModelUpdate *, lessChainPtr > &  updated,
vector< ESTAssembly * > &  absorbed,
ostream &  log,
const Progparam par 
)

this function update ab initio models with EST Assembly Different predicted models after update may become identical, so we need to check for identity with a set container.

Parameters:
updated is the result. There could be more updated models than input AIH models if there are branching in updates (multiple EST give imcompatible updated models).
This function calls updatePredictedWithEST() after did some further filtering of overlapping EST/Predicted Models.

References maxDistanceOfModels(), sortESTIntoGoodBad(), and updatePredictWithEST().

Referenced by combineModels().

void readAbinitioModel ( map< string, vector< mRNAModel * > * > &  mod,
const string &  file,
const map< string, string > &  gstore 
)

I cannot make const map<> why? This function read a text dump of table created by nrep program. It is has the following columns: id, genomic, exons, cdsb, cdse I should make a functio that can read the JGI format, so that this program can update any track. Simple reading, without any optimizing CDS. This is done only at the mixing stage.

References ifstream().

Referenced by main().

double readESTModel ( map< string, vector< ESTAssembly * > * > &  mod,
const string &  file,
const map< string, string > &  gstore,
bool  usepartial = true 
)

assume that the models are sorted according to coordinates regardless of direction This requires that the output function of combest does the sorting.

Parameters:
mod is a map containing chromosome or genomicid => [ESTAssembly*] All of the models from the same chromosome are stored in a vector of pointers.
gstore is the genomic store for constructing the sequence part of the Model Object. file contain ESTAssemblyid models. This reader will ignore the idlist part, instead will only take the numbest column.
usepartial. the default is to use relative partial combest models to update predicted models. mean maxprofh.
use combest_models.tab that has the following columns: modelid congid genomicid begin end gCDSsb gCDSe numest numexon exonLength exons CDSstart CDSend RNAseq pepseq It use "combest_partial.tab" to filter out relative partial models. This should become a parameter.

References mRNAModel::getFrame(), stddev::getMean(), RNAModel::getOid(), mRNAModel::getProtein(), ifstream(), readId(), mRNAModel::RNACDSRange(), RNAModel::RNAString(), and ESTAssembly::show().

Referenced by main().

void readId ( set< int > &  ids,
const string &  file 
)

read id from a file of one columns into a container for fast look up later.

References ifstream().

Referenced by readESTModel().

void readJGIModel ( map< string, vector< mRNAModel * > * > &  mod,
const string &  file,
const map< string, string > &  gstore 
)

adding ability to check for models with intrnal stops. These are usually genewise models with frame-shift. The objects are constructed as JGIModel*, but upcased into mRNAModel*.

References ifstream(), name, mRNAModel::numberOfInternalStops(), mRNAModel::setLongestCDSAndProtein(), and JGIModel::valid().

Referenced by main().

void releaseAbinitioModel ( map< string, vector< mRNAModel * > * > &  mod  ) 

Referenced by main().

void releaseESTModel ( map< string, vector< ESTAssembly * > * > &  mod  ) 

Referenced by main().

void releaseJGIModel ( map< string, vector< JGIModel * > * > &  mod  ) 

this program seems to be not using this function

void sortESTIntoGoodBad ( const vector< ESTAssembly * > &  input,
vector< ESTAssembly * > &  good,
vector< ESTAssembly * > &  bad,
const Progparam par 
)

separate input into good and bad vectors. Will add to both good and bad. Good models are defined as genuine() or with average EST profile > 100. This is a very hight standard. I am not sure what is a good number. There are possible non-coding RNA models or very short proteins that are encoded with highly expressed transcripts. If we use the longest ORF, we may guess it wrong. In the future I need to use some sort of gene predictor.

References Progparam::meanProfmaxh.

Referenced by combineModels(), and mixAbwithEST().

bool updateCompatible ( const mRNAModel m1,
const mRNAModel m2 
)

bool updatedWorse ( const mRNAModel old,
const mRNAModel upd,
ostream &  log 
)

mRNAModelUpdate* updateOnePredicted ( mRNAModel *&  predic,
ESTAssembly est,
ostream &  log 
)

bool updateOneUpdated ( mRNAModelUpdate *&  updm,
ESTAssembly est,
ostream &  log 
)

void updatePredictWithEST ( vector< mRNAModel * > &  predic,
vector< ESTAssembly * > &  est,
vector< ESTAssembly * > &  good,
vector< ESTAssembly * > &  bad,
vector< ESTAssembly * > &  absorb,
set< mRNAModelUpdate *, lessChainPtr > &  updated,
ostream &  log 
)

This is the core model update part, Check compatability could add fuzzy margin in the future. Inputs are predic and est.

Parameters:
predic predicted models
est ESTAssembly pointer vector EST models will be divided into three categories
good Good by itself, not used to update any predicted model
bad Failed the genuine test, and not used up in update
absorb. Used to udate predicted models. Produce result of mixed models: predicted+est EST-updated predicted models is a log file stream for debugging this function. This should be removed from the production verion.

References mRNAModelUpdate::show(), updateOnePredicted(), and updateOneUpdated().

Referenced by mixAbwithEST().

void usage (  ) 

given gmap summary format, this program converts it into combest archive format (*.car)

It can be used a pipe, or given specific file names.

this is a helper program to count distinct estids in the ESTId column of the combest result. So it is the actual number of ESTs mapped. This could be lower for deeply covered genome because of the coverage depth-dependent filtering.

given gmap summary format, this program converts it into combest archive format (*.car)

It can be used a pipe, or given specific file names.

this is a helper program to count distinct estids in the ESTId column of the combest result. So it is the actual number of ESTs mapped. This could be lower for deeply covered genome because of the coverage depth-dependent filtering.

void writeGenuineESTModel ( const vector< ESTAssembly * > &  mod,
ostream &  ous,
ostream &  bad 
)

void writeModelId ( const vector< ESTAssembly * > &  mod,
ostream &  ous 
)

Referenced by main().

void writeToFiles ( const set< mRNAModelUpdate *, lessChainPtr > &  mixedmod,
ostream &  ousmod,
ostream &  ousex,
ostream &  oustrack,
ostream &  ousrna,
ostream &  ousprt 
)

only write a set data structure. it uses the writetab() method from the RNAModel series of objects. This method is not generic.

Referenced by main().


Generated on Wed Aug 10 11:57:02 2011 for Softwares from Orpara by  doxygen 1.5.6