#include <iostream>#include <mysql++.h>#include "dbinfo.h"#include <cstring>#include <string>#include <set>#include <map>#include <vector>#include "bioseq.h"#include <cmath>#include <boost/math/distributions.hpp>#include <RNAModel.h>Classes | |
| class | Progparam |
| class | Conpos |
| class | Conseq |
Functions | |
| bool | loadGenomic (map< string, string > &gstore, Connection &conn, const string &host, const string &database) |
| bool | checkstop (const string &host, const string &dbname, const MysqlDBInfo &mydb, ostream &ous, ostream &txou, const Progparam &par) |
| bool | isStop (const string &codon) |
| int | stops (const string &seq) |
| bool | hasTable (Connection &conn, const string &tab) |
| void | readconf (const string &file, map< string, string > &dbtab, set< string > &ignore, map< string, int > &maxintron) |
| double | distanceBaseFreq (double bf1[4], double bf2[4]) |
| void | checkBadStopIndex (const string &host, const string &dbname, const MysqlDBInfo &mydb, ostream &ous, ostream &osl, map< string, int > &maxintron, const Progparam &par) |
| void | analyzeStop (const string &host, const vector< string > &dbs, const MysqlDBInfo &mydb, const Progparam &par, bool appendres=false) |
| void | fixStopIndex (const string &host, vector< string > &dbs, const MysqlDBInfo &mydb, map< string, int > &maxintron, const Progparam &par) |
| void | refineAllmodels (Connection &conn, const string &fixedfile, const string &badfile, const string &refinedallmod) |
| void | getCandidateDatabase (vector< string > &dbnames, Connection &conn) |
| void | discardIgnored (vector< string > &dbnames, const set< string > &ignored) |
| void | getDbsFromFile (vector< string > &dbnames, const string &dblf) |
| void | usage () |
| int | main (int argc, char *argv[]) |
| ostream & | operator<< (ostream &ous, const Conpos &pos) |
| ostream & | operator<< (ostream &ous, const Conseq &cons) |
| void | countbase (int bs[4], const string &seq) |
| double | expectedStop (double bf[4], int n) |
| void | stoplocation (const string &seq, vector< int > &sloc) |
| int | count2freq (int bc[4], double bf[4]) |
| template<class T> | |
| ostream & | writeBase (T bf[4], ostream &ous, const char *sep="\t") |
| bool | loadGenomicFromScaffoldSeq (map< string, string > &gstore, Connection &conn) |
| ostream & | writeRow (Row row, ostream &ous) |
| ostream & | writeResult (ostream &ous, const Row &row, const JGIModel &mod) |
| void analyzeStop | ( | const string & | host, | |
| const vector< string > & | dbs, | |||
| const MysqlDBInfo & | mydb, | |||
| const Progparam & | par, | |||
| bool | appendres = false | |||
| ) |
| void checkBadStopIndex | ( | const string & | host, | |
| const string & | dbname, | |||
| const MysqlDBInfo & | mydb, | |||
| ostream & | ous, | |||
| ostream & | osl, | |||
| map< string, int > & | maxintron, | |||
| const Progparam & | par | |||
| ) |
| ous. | For ouput of all databases. | |
| osl. | is the log steam for debug purpose. fix stop, left or right use all models This methods calls buildAllModels to build allmodels table. |
References at, mRNAModel::complete(), Range::direction(), MysqlDBInfo::getPassword(), MysqlDBInfo::getUser(), mRNAModel::growCDS3Prime(), mRNAModel::hasStop(), itos(), mRNAModel::jgiModelCol, mRNAModel::jgiTranscriptCol, length, loadGenomic(), Progparam::nreptab, mRNAModel::proteinLength(), mRNAModel::proteinLengthNoTail(), mRNAModel::proteinSequence(), refineAllmodels(), Progparam::replaceallmod, string(), mRNAModel::trimCDSStop(), mRNAModel::trimCDSTail(), Badinput::what(), PointOutChain::what(), InvalidModel::what(), InvalidJGIModel::what(), and writeResult().
Referenced by fixStopIndex().
| bool checkstop | ( | const string & | host, | |
| const string & | dbname, | |||
| const MysqlDBInfo & | mydb, | |||
| ostream & | ous, | |||
| ostream & | txou, | |||
| const Progparam & | par | |||
| ) |
Analysze the stop situation for one database.
| ous | contains the summary reuslt for the stop region |
References mRNAModel::CDSLength(), mRNAModel::CDSSequence(), count2freq(), countbase(), Range::direction(), distanceBaseFreq(), expectedStop(), mRNAModel::genomicCDSEnd(), RNAModel::getOid(), MysqlDBInfo::getPassword(), MysqlDBInfo::getUser(), hasTable(), L, loadGenomic(), Progparam::nreptab, reverseComplement(), stoplocation(), string(), and writeBase().
Referenced by analyzeStop().
| int count2freq | ( | int | bc[4], | |
| double | bf[4] | |||
| ) |
Referenced by checkstop().
| void countbase | ( | int | bs[4], | |
| const string & | seq | |||
| ) |
| void discardIgnored | ( | vector< string > & | dbnames, | |
| const set< string > & | ignored | |||
| ) |
| double distanceBaseFreq | ( | double | bf1[4], | |
| double | bf2[4] | |||
| ) |
Eucleadian distance betwee two base coverage profiles
Referenced by checkstop().
| double expectedStop | ( | double | bf[4], | |
| int | n | |||
| ) |
since stop codons are not independent in different frames, we need to do a small correction even for random sequences If frame 1 has a stop, then the other two frames will not be a stop codon for sure. So the absolute frequence of stop codon given any random codon is the sum of the three codons. In a long sequence this needs to be corrected by the following equation: Pcorrected = p-(4np^2)/(3n-2)
| bf | base frequence of A,C,G, and T | |
| n | number of codons of sequence. seqlen/3. |
Referenced by checkstop().
| void fixStopIndex | ( | const string & | host, | |
| vector< string > & | dbs, | |||
| const MysqlDBInfo & | mydb, | |||
| map< string, int > & | maxintron, | |||
| const Progparam & | par | |||
| ) |
this is the top level method, it can iterate through multiple databases. It uses fixStopIndex to work on each individual database.
References checkBadStopIndex().
Referenced by main().
| void getCandidateDatabase | ( | vector< string > & | dbnames, | |
| Connection & | conn | |||
| ) |
| void getDbsFromFile | ( | vector< string > & | dbnames, | |
| const string & | dblf | |||
| ) |
| bool hasTable | ( | Connection & | conn, | |
| const string & | tab | |||
| ) |
Referenced by checkstop(), getCandidateDatabase(), and loadGenomicFromScaffoldSeq().
| bool isStop | ( | const string & | codon | ) |
Referenced by stoplocation(), and stops().
| bool loadGenomic | ( | map< string, string > & | gstore, | |
| Connection & | conn, | |||
| const string & | host, | |||
| const string & | database | |||
| ) |
References length, loadGenomicFromScaffoldSeq(), and string().
Referenced by checkBadStopIndex(), and checkstop().
| bool loadGenomicFromScaffoldSeq | ( | map< string, string > & | gstore, | |
| Connection & | conn | |||
| ) |
| int main | ( | int | argc, | |
| char * | argv[] | |||
| ) |
References analyzeStop(), discardIgnored(), Progparam::fixstop, fixStopIndex(), MysqlDBInfo::getAuthenInfo(), getCandidateDatabase(), getDbsFromFile(), MysqlDBInfo::getPassword(), MysqlDBInfo::getUser(), Progparam::nreptab, readconf(), Progparam::replaceallmod, mRNAModel::setShortestModel(), and usage.
| ostream& operator<< | ( | ostream & | ous, | |
| const Conseq & | cons | |||
| ) |
References Conseq::cseq.
| ostream& operator<< | ( | ostream & | ous, | |
| const Conpos & | pos | |||
| ) |
output frequency like the following: {A0.347526,C0.107838,G0.328182,T0.216454}
References Conpos::bases, Conpos::bc, H, and Conpos::height().
| void readconf | ( | const string & | file, | |
| map< string, string > & | dbtab, | |||
| set< string > & | ignore, | |||
| map< string, int > & | maxintron | |||
| ) |
| void refineAllmodels | ( | Connection & | conn, | |
| const string & | fixedfile, | |||
| const string & | badfile, | |||
| const string & | refinedallmod | |||
| ) |
allmodels {remove bad, update stop-fixed } => allfixed
| refinedallmod | the result table containing allmodels: 1. without bad models. 2. stop fixed This table will not store the actually protein sequence. Only the proteinid. But the pepseq in protein table may be wrong. | |
| fixedfile | models whose coordinates were fixed and are good This is a subset of the allmodels. This file is loaded into a table. This schema has combined schema of model|transcript|protein, so it has a lots of columns. | |
| badfile | contains all bad models, they will be removed from the final modle set (refinedallmod) |
load bad models into table ///
Referenced by checkBadStopIndex().
| void stoplocation | ( | const string & | seq, | |
| vector< int > & | sloc | |||
| ) |
| int stops | ( | const string & | seq | ) |
References isStop().
| void usage | ( | ) |
given gmap summary format, this program converts it into combest archive format (*.car)
It can be used a pipe, or given specific file names.
this is a helper program to count distinct estids in the ESTId column of the combest result. So it is the actual number of ESTs mapped. This could be lower for deeply covered genome because of the coverage depth-dependent filtering.
given gmap summary format, this program converts it into combest archive format (*.car)
It can be used a pipe, or given specific file names.
this is a helper program to count distinct estids in the ESTId column of the combest result. So it is the actual number of ESTs mapped. This could be lower for deeply covered genome because of the coverage depth-dependent filtering.
| ostream& writeBase | ( | T | bf[4], | |
| ostream & | ous, | |||
| const char * | sep = "\t" | |||
| ) | [inline] |
| ostream& writeResult | ( | ostream & | ous, | |
| const Row & | row, | |||
| const JGIModel & | mod | |||
| ) |
References mRNAModel::printJGIModelRow(), mRNAModel::printJGITranscriptRowNoId(), and mRNAModel::proteinSequence().
Referenced by checkBadStopIndex(), and main().
| ostream& writeRow | ( | Row | row, | |
| ostream & | ous | |||
| ) |
1.5.6