#include "GenModel.h"#include "TranscriptExon.h"#include <mysql++.h>#include <dbinfo.h>#include <fstream>#include <map>#include <set>#include "hatrees.h"#include "ModelFactory.h"#include "strformat.h"#include <cstdio>Classes | |
| class | Progparam |
Functions | |
| vector< GenModel * > | removePregnantModels (vector< GenModel * > &mds, ostream &ous, vector< GenModel * > &preg) |
| vector< GenModel * > | removeChimeraModels (vector< GenModel * > &mds, ostream &ous, vector< GenModel * > &chim, const Progparam &par) |
| vector< GenModel * > | removeLowCDSModels (vector< GenModel * > &mds, ostream &ous, vector< GenModel * > &lwcds) |
| vector< GenModel * > | pickGoodModel (vector< GenModel * > &mds, ostream &ouslog, vector< GenModel * > &bad, const Progparam &par) |
| vector< GenModel * > | removeIdentical (const vector< GenModel * > &mds) |
| void | showRow (const Row &row, ostream &ous) |
| vector< GenModel * > | buildGeneCluster (const vector< GenModel * > &mds, map< int, int > &memrep, ostream &logos) |
| void | checkUTRChimera (const GenModel *i, const GenModel *j, RangeChain &uc) |
| void | memoryRelease (vector< GenModel * > &m) |
| void | writeModel (vector< GenModel * > &srcmod, ostream &model, ostream &exon) |
| void | storeModel (const vector< GenModel * > &mod, Query &mq, const string &modtab, const string &extab) |
| void | createModelTable (Query &q, const string &tab) |
| void | createGenericTable (Query &q, const string &tab) |
| void | createExonTable (Query &q, const string &tab) |
| void | constructJGIModel (const string &jmod, const string &mod, const string &ex, Connection &conn, const string &label, const string &allmod, int version, map< string, string > &trackfeat) |
| void | storeModelCluster (const string &modcltab, Connection &conn, const map< int, int > &modcl, const string &modtab) |
| void | colorRepModels (Connection &conn, const string &mod, const string &gen) |
| void | constructJGIGene (Connection &conn, const string &tab, const string &jgimodel, const string &modcltab, int version) |
| void | checkAndBuildInput (const Progparam &par, Connection &conn) |
| void | writeResultToFile (Progparam &par, ModelFactory &modfac, ostream &ouslog) |
| void | writeResultToDatabase (Progparam &par, ModelFactory &modfac, Connection &conn2, ostream &OU) |
| void | createModelClusterTable (Connection &conn, const string &tabname) |
| void | createModelAndExonTables (Connection &conn, Progparam &par) |
| void | constructDerivedTables (Connection &conn, Progparam &par) |
| void | loadPrimaryTables (Connection &conn, const Progparam &par) |
| void | file2table (const string &file, const string &table, const Progparam &par) |
| void | addSingletonGene (Connection &conn, const Progparam &par) |
| void | removeDoneFile () |
| void | usage (const Progparam &par) |
| string | getlinktab (Connection &conn) |
| int | main (int argc, char *argv[]) |
Variables | |
| const int | chimera_intron_len = 500 |
| const int | minutr_len = 120 |
| const int | min_CDS_len = 1200 |
| const int | commonex_len = 170 |
| int | exonid = 1 |
| void addSingletonGene | ( | Connection & | conn, | |
| const Progparam & | par | |||
| ) |
References Progparam::getGoodModelTable(), and Progparam::getModelClusterTable().
Referenced by main().
| vector< GenModel * > buildGeneCluster | ( | const vector< GenModel * > & | mds, | |
| map< int, int > & | memrep, | |||
| ostream & | logos | |||
| ) |
member => rep with the longest CDS if CDS are identical, then use the longest exon length
Only genes with more than one models are outputed. The rest are single exon genes. The user need to figure it out with set operations.
| mds | input vector of model pointers | |
| memrep | output. map of member -> rep. Rep has the longest CDS. |
| memrep | is the output or result of this funciton. It contains member => rep relationship. |
References Noschain::exonLength(), hatrees< T >::getCluster(), good, Noschain::numberOfRanges(), and hatrees< T >::readFromMap().
Referenced by writeResultToDatabase(), and writeResultToFile().
| void checkAndBuildInput | ( | const Progparam & | par, | |
| Connection & | conn | |||
| ) |
ideally, the input table should be made before hand If not available, it will generate a table from all existing tracks.
References Progparam::database, Progparam::getAllmodelTable(), and Progparam::host.
Referenced by main().
| void checkUTRChimera | ( | const GenModel * | i, | |
| const GenModel * | j, | |||
| RangeChain & | uc | |||
| ) |
References RangeChain::add(), chimera_intron_len, Range::contain(), Range::copyShrinkBothEnds(), Noschain::exonLength(), GenModel::FivePrimeUTR(), Noschain::intronsInside(), length, Range::length(), max, Noschain::maxIntron(), minutr_len, Noschain::numberOfRanges(), Range::overlay(), and GenModel::ThreePrimeUTR().
Referenced by removeChimeraModels().
| void colorRepModels | ( | Connection & | conn, | |
| const string & | mod, | |||
| const string & | gen | |||
| ) |
Referenced by constructDerivedTables().
| void constructDerivedTables | ( | Connection & | conn, | |
| Progparam & | par | |||
| ) |
use primary tables produced by filtering to build JGI tracks: Good Model, Bad Model, and Gene for Good Model
References Progparam::buildgene, colorRepModels(), constructJGIGene(), constructJGIModel(), Progparam::getAllmodelTable(), Progparam::getBadExonTable(), Progparam::getBadModelTable(), Progparam::getGoodExonTable(), Progparam::getGoodModelTable(), Progparam::getJGIBadModelTable(), Progparam::getJGIGoodModelTable(), Progparam::getModelClusterTable(), and Progparam::version.
Referenced by main().
| void constructJGIGene | ( | Connection & | conn, | |
| const string & | tab, | |||
| const string & | jgimodel, | |||
| const string & | modcltab, | |||
| int | version | |||
| ) |
| void constructJGIModel | ( | const string & | jmod, | |
| const string & | mod, | |||
| const string & | ex, | |||
| Connection & | conn, | |||
| const string & | label, | |||
| const string & | allmod, | |||
| int | version, | |||
| map< string, string > & | trackfeat | |||
| ) |
Right now label is good or bad
References getlinktab(), and itos().
Referenced by constructDerivedTables().
| void createExonTable | ( | Query & | q, | |
| const string & | tab | |||
| ) |
Referenced by createModelAndExonTables().
| void createGenericTable | ( | Query & | q, | |
| const string & | tab | |||
| ) |
Referenced by constructJGIGene().
| void createModelAndExonTables | ( | Connection & | conn, | |
| Progparam & | par | |||
| ) |
References createExonTable(), createModelTable(), Progparam::getBadExonTable(), Progparam::getBadModelTable(), Progparam::getGoodExonTable(), and Progparam::getGoodModelTable().
Referenced by main().
| void createModelClusterTable | ( | Connection & | conn, | |
| const string & | tabname | |||
| ) |
modelid => geneid create a table of two columns, the first one is a primary key
Referenced by loadPrimaryTables(), and storeModelCluster().
| void createModelTable | ( | Query & | q, | |
| const string & | tab | |||
| ) |
create an empty table tab with the following schema:
id serial primary key, genomic varchar(48), name varchar(200), exons text, mb integer, me integer, -- model boundary cdsb integer, cdse integer, -- CDS boundary cdsphase integer -- 0,1,2 hasstart boolean hasstrop boolean
Referenced by createModelAndExonTables().
| void file2table | ( | const string & | file, | |
| const string & | table, | |||
| const Progparam & | par | |||
| ) |
References Progparam::database, and Progparam::host.
| string getlinktab | ( | Connection & | conn | ) |
this is also defined in gathercaiwegene, in the future this function should be put into a mysql helper header Now using copy and pasting.
| void loadPrimaryTables | ( | Connection & | conn, | |
| const Progparam & | par | |||
| ) |
load Good Model, Bad Model, Gene (model cluster)
References Progparam::buildgene, createModelClusterTable(), Progparam::database, file2table(), Progparam::getAllmodelTable(), Progparam::getBadExonFile(), Progparam::getBadExonTable(), Progparam::getBadModelFile(), Progparam::getBadModelTable(), Progparam::getGoodExonFile(), Progparam::getGoodExonTable(), Progparam::getGoodModelFile(), Progparam::getGoodModelTable(), Progparam::getModelClusterFile(), Progparam::getModelClusterTable(), and Progparam::host.
Referenced by main().
| int main | ( | int | argc, | |
| char * | argv[] | |||
| ) |
References addSingletonGene(), Progparam::allmod, Progparam::buildgene, checkAndBuildInput(), constructDerivedTables(), createModelAndExonTables(), Progparam::database, Progparam::getAllmodelTable(), MysqlDBInfo::getAuthenInfo(), Progparam::getGoodModelFile(), MysqlDBInfo::getPassword(), MysqlDBInfo::getUser(), Progparam::goodmod, Progparam::host, loadPrimaryTables(), Progparam::maxintronlen, removeDoneFile(), Progparam::setVersion(), Progparam::showevery, usage, writeResultToDatabase(), and writeResultToFile().
| void memoryRelease | ( | vector< GenModel * > & | m | ) |
Referenced by writeResultToDatabase(), and writeResultToFile().
| vector< GenModel * > pickGoodModel | ( | vector< GenModel * > & | mds, | |
| ostream & | ouslog, | |||
| vector< GenModel * > & | bad, | |||
| const Progparam & | par | |||
| ) |
return a vector of pointers of good models But some model may be contained in other models.
| mds | vector of input model pointers | |
| bad | vector for containing the bad models This function simply append to the end of this container without any clearing action. output for detailed information why model bad This is mainly for debuggin purposes. |
References commonex_len, Noschain::exonLength(), good, length, Progparam::maxintronlen, min_CDS_len, Noschain::numberOfRanges(), removeChimeraModels(), removeLowCDSModels(), and removePregnantModels().
Referenced by writeResultToDatabase(), and writeResultToFile().
| vector< GenModel * > removeChimeraModels | ( | vector< GenModel * > & | mds, | |
| ostream & | ous, | |||
| vector< GenModel * > & | chim, | |||
| const Progparam & | par | |||
| ) |
| ous | debug output stream | |
| chim | candidate chimera |
References checkUTRChimera(), Progparam::chimera_contained_cdslen, commonex_len, Noschain::exonLength(), good, Progparam::maxintronlen, min_CDS_len, and Noschain::numberOfRanges().
Referenced by pickGoodModel().
| void removeDoneFile | ( | ) |
Referenced by main().
| vector< GenModel * > removePregnantModels | ( | vector< GenModel * > & | mds, | |
| ostream & | ous, | |||
| vector< GenModel * > & | preg | |||
| ) |
always return a valid model pointer, unless it run out of input from res
| mds | input models as a vector of pointers details for logged information, for debug stage usage The output vector of pointer of pregnant models. this function will simply append to it. |
References Range::contain(), good, and Range::length().
Referenced by pickGoodModel().
| void showRow | ( | const Row & | row, | |
| ostream & | ous | |||
| ) |
| void storeModel | ( | const vector< GenModel * > & | mod, | |
| Query & | mq, | |||
| const string & | modtab, | |||
| const string & | extab | |||
| ) |
This is equivalent to writeModel(), directly store information into database table. This method is 100x slower than writeModel().
| mod | input gene model as vector of pointers | |
| mq | query object used to insert result | |
| modtab | model table name | |
| extab | exon table name |
Referenced by writeResultToDatabase().
| void storeModelCluster | ( | const string & | modcltab, | |
| Connection & | conn, | |||
| const map< int, int > & | modcl, | |||
| const string & | modtab | |||
| ) |
| modcl | is the input of member -> rep table. | |
| modcltab | is the output table name. | |
| modtab | was used to generate singleton clusters by set difference operation: modtab - modcltab (clusters with more than one member) |
| modcltab | model cluster table with genes > 1 members no single ton. This is the database table name | |
| modcl | model cluster information, member => rep rep is one of the member's id | |
| modtab | model table name used to extract singleton genes by set operation. input: modcl, output modcltab |
References createModelClusterTable().
Referenced by writeResultToDatabase().
| void usage | ( | const Progparam & | par | ) |
| void writeModel | ( | vector< GenModel * > & | srcmod, | |
| ostream & | model, | |||
| ostream & | exon | |||
| ) |
this is much faster than the database interactive method storeModel()
| srcmod. | Input models as a vector of pointers to GenModel | |
| model. | The output stream for model with columns: (id,genomic,name, exons, mb, me, cdsb, cdse, cdsphase, hasstart, hasstop, pep) Should have the same column as the storeModel() method. | |
| exon. | Exon output stream. |
References exonid.
Referenced by writeResultToFile().
| void writeResultToDatabase | ( | Progparam & | par, | |
| ModelFactory & | modfac, | |||
| Connection & | conn2, | |||
| ostream & | OU | |||
| ) |
References buildGeneCluster(), Range::combine(), GenModel::genomicId(), Progparam::getBadExonTable(), Progparam::getBadModelTable(), Progparam::getGoodExonTable(), Progparam::getGoodModelTable(), Progparam::getModelClusterTable(), Progparam::goodmod, memoryRelease(), GenModel::modelName(), ModelFactory::next(), Range::overlay(), pickGoodModel(), removeIdentical(), GenModel::show(), Progparam::showevery, storeModel(), and storeModelCluster().
Referenced by main().
| void writeResultToFile | ( | Progparam & | par, | |
| ModelFactory & | modfac, | |||
| ostream & | ouslog | |||
| ) |
This is the main method through the file mechanism. It is much faster than the direct insert into database. The buffering of file system is a lot faster than the database system.
| ouslog | is the log file stream. | |
| modfac | is the model stream as input. This method first collect overlapping models (50nt or more) into a buffer. Then it calls pickGoodModels() to get good models. |
References buildGeneCluster(), Range::combine(), GenModel::genomicId(), Progparam::getBadExonFile(), Progparam::getBadModelFile(), Progparam::getGoodExonFile(), Progparam::getGoodModelFile(), Progparam::getModelClusterFile(), Progparam::goodmod, memoryRelease(), GenModel::modelName(), ModelFactory::next(), Range::overlay(), pickGoodModel(), removeIdentical(), Progparam::showevery, and writeModel().
Referenced by main().
| const int chimera_intron_len = 500 |
This is the cutoff value for checking that this intron is likely to bridge two models into on chimeric model
Referenced by checkUTRChimera().
| const int commonex_len = 170 |
Referenced by pickGoodModel(), and removeChimeraModels().
| int exonid = 1 |
Referenced by writeModel().
| const int min_CDS_len = 1200 |
Referenced by pickGoodModel(), and removeChimeraModels().
| const int minutr_len = 120 |
minimal UTR length. The UTR length that are more likely to be true. This is dependent on the organism. Some organisms have long UTRs than others. Small genomes tend to have short UTRs.
Referenced by checkUTRChimera().
1.5.6