#include <Match.h>

Public Member Functions | |
| Tblastn () | |
| Tblastn (const string &query, const string &target, int queryLen, int targetLen) | |
| Tblastn (const Tblastn &tbn) | |
| Tblastn & | operator= (const Tblastn &tbn) |
| ~Tblastn () | |
| void | clear () |
| void | addMatch (float identity, int alnlen, int mismatches, int gaps, int qbegin, int qend, int tbegin, int tend, double expect, double score) |
| void | addMatch (M8Match *mptr) |
| void | add (const Tblastn &tbn) |
| void | setSortedByTarget (bool yes=true) |
| bool | isSortedByTarget () const |
| ostream & | output (ostream &ous, char delimiter[]="\t") const |
| void | outputModel (const M8MatchChain &amodel, int &id, ostream &ouss, ostream &ousd, char delimiter[]="\t") const |
| void | outputModel (const Boxchain &amodel, int &id, ostream &ouss, ostream &ousd, char delimiter[]="\t") const |
| void | show (ostream &ous) const |
| int | removeJunk (const Protein &qseq, const DNA &tseq, float factor=0.8) |
| void | findFootprint (ostream &SUMM, ostream &DETA, float covcut=0.4) |
| vector< Match > | findSections () |
| bool | empty () |
| const string & | query () const |
| const string & | target () const |
| int | numMatches () const |
| const vector< M8Match * > & | getMatches () const |
| int | queryLength () const |
| int | targetLength () const |
| const Range & | firstQueryRange () const |
| const Range & | lastQueryRange () const |
| float | qcov () const |
| bool | isFirstQueryMatch (const M8Match *m) const |
| bool | overlapFirstQueryMatch (const M8Match *m, int cutoff=9) const |
| bool | isLastQueryMatch (const M8Match *m) const |
| bool | overlapLastQueryMatch (const M8Match *m, int cutoff=9) const |
| int | checkContain (ostream &ous, float factor=0.8) const |
| bool | sameQTPair (const Tblastn &tbn) |
| void | sortMatchByTarget () |
| void | directionalSortMatchByTarget () |
Static Public Member Functions | |
| static void | readConfig (const string &conf) |
| static double | getIntronProbility (int len) |
Private Member Functions | |
| void | erase (int b, int e) |
| int | eraseMatch (const Range &r) |
| void | removeNullMatch () |
| void | sortMatchByQuery () const |
| int | removeAll (list< string > &rmlist, ostream &ous) |
| int | removeAll () |
Private Attributes | |
| string | qid |
| string | tid |
| int | qlen |
| int | tlen |
| vector< M8Match * > | matches |
| bool | sortedByTarget |
| vector< M8Match * > | matchesQS |
| bool | sortedByQuery |
Static Private Attributes | |
| static float | qcovCutoff = 0.3 |
| static vector< double > | intronProb = vector<double>() |
Friends | |
| ostream & | operator<< (ostream &ous, const Tblastn &b) |
| Tblastn::Tblastn | ( | ) | [inline] |
| Tblastn::Tblastn | ( | const string & | query, | |
| const string & | target, | |||
| int | queryLen, | |||
| int | targetLen | |||
| ) | [inline] |
| Tblastn::Tblastn | ( | const Tblastn & | tbn | ) |
copy constructor for solving the boundary problem between sql LIMIT commands, need to save the result to be compared to the next trunk.
References matches.
| Tblastn::~Tblastn | ( | ) |
References matches.
References matches, qid, qlen, sortedByQuery, sortedByTarget, tid, and tlen.
| void Tblastn::clear | ( | ) | [inline] |
| void Tblastn::addMatch | ( | float | identity, | |
| int | alnlen, | |||
| int | mismatches, | |||
| int | gaps, | |||
| int | qbegin, | |||
| int | qend, | |||
| int | tbegin, | |||
| int | tend, | |||
| double | expect, | |||
| double | score | |||
| ) |
add a new M8 match to this pair of queryxtarget The input field order is the same as the blast m8 output.
References matches.
Referenced by add(), Tblastnmy::addMatch(), and main().
| void Tblastn::add | ( | const Tblastn & | tbn | ) |
References addMatch(), matches, sortedByQuery, and sortedByTarget.
| void Tblastn::setSortedByTarget | ( | bool | yes = true |
) | [inline] |
this method tell this object that the input data has already been sorted. If the input data came from a relational database, the input data can be easily sorted by the database engine. When taken data directly from the input file which is usually sorted by query,target you should not set this to true.
References sortedByTarget.
| bool Tblastn::isSortedByTarget | ( | ) | const [inline] |
References sortedByTarget.
| ostream & Tblastn::output | ( | ostream & | ous, | |
| char | delimiter[] = "\t" | |||
| ) | const |
| void Tblastn::outputModel | ( | const M8MatchChain & | amodel, | |
| int & | id, | |||
| ostream & | ouss, | |||
| ostream & | ousd, | |||
| char | delimiter[] = "\t" | |||
| ) | const |
ouss is the summary stream, ousd is the detailed stream
References M8MatchChain::_first, M8MatchChain::_last, M8MatchChain::avgIdentity(), M8MatchChain::chain, M8MatchChain::direction(), M8MatchChain::getQueryBegin(), M8MatchChain::getQueryEnd(), M8MatchChain::getTargetBegin(), M8MatchChain::getTargetEnd(), qid, qlen, M8MatchChain::queryLength(), M8MatchChain::queryOverlap(), M8MatchChain::size(), M8MatchChain::sumScore(), tid, and tlen.
Referenced by findFootprint(), and storeBestModels().
| void Tblastn::outputModel | ( | const Boxchain & | amodel, | |
| int & | id, | |||
| ostream & | ouss, | |||
| ostream & | ousd, | |||
| char | delimiter[] = "\t" | |||
| ) | const |
the Boxchain version to replace the old version
Summary output: fields: qid, tid, fpnum, qlen, tlen, qbegin, qend, tbegin, tend, num_exon, sumscore, avgiden, qcov, sumoverlap_fraction
fpnum is the id to count how may models are produced from the same q x t pair this is to make (q,t,fpnum) unique
Note: two fields fewer than the previous version. qcov <= 1, different from definition before.
| ousd | Detail of footprint output stream |
The exnum is a counter from 1 to the number of exons of the footprint.
References Boxchain::first(), M8MatchEX::next(), M8Match::output(), qid, qlen, Boxchain::queryOverlap(), Boxchain::querySeqCovered(), Boxchain::summaryOutput(), tid, and tlen.
this method will modify the matches vector. Junk rows will be removed.
Arguments: qseq query sequence (protein) tseq target sequence (genomic). This is passed to this function, but not used yet at this point. Could be useful for better algorithms. factor: the argument to check containment. ous: the stream to store the bad rows.
The removed results will be added to rows. The format is quoted comma separated string suitable for SQL insert statement.
This function uses the static qcovCutoff member for judgement.
return the number of matches removed. BAD features: (1) qcov < qcovCutoff
Not doing simplicity check. It is supposed to be done by previous steps. New version, not recording removed entries!
References matches, Range::merge(), Range::overlapFraction(), qcov(), qcovCutoff, removeAll(), removeNullMatch(), and sortMatchByTarget().
| void Tblastn::findFootprint | ( | ostream & | SUMM, | |
| ostream & | DETA, | |||
| float | covcut = 0.4 | |||
| ) |
locate all the matches of each protein on the genomic DNA. Write output to SUMM stream and the DETA stream Only good foot prints will be selected. covcut determines the coverage of the query
| covcut | cutoff for query coverage, default 0.4 |
References M8MatchChain::add(), directionalSortMatchByTarget(), firstQueryRange(), isFirstQueryMatch(), isLastQueryMatch(), matches, M8MatchChain::nofirst(), M8MatchChain::nolast(), outputModel(), overlapFirstQueryMatch(), overlapLastQueryMatch(), queryLength(), and M8MatchChain::queryLength().
Referenced by main().
| vector< Match > Tblastn::findSections | ( | ) |
divide the matches into sections and remove bad sections
References directionalSortMatchByTarget(), eraseMatch(), getIntronProbility(), matches, queryLength(), sortedByTarget, and targetLength().
Referenced by Linkmatch::Linkmatch().
| bool Tblastn::empty | ( | ) | [inline] |
| const string& Tblastn::query | ( | ) | const [inline] |
References qid.
| const string& Tblastn::target | ( | ) | const [inline] |
References tid.
| int Tblastn::numMatches | ( | ) | const [inline] |
number of match segments in this pair of query x target
References matches.
Referenced by removeAll(), and storeBestModels().
| const vector<M8Match*>& Tblastn::getMatches | ( | ) | const [inline] |
| int Tblastn::queryLength | ( | ) | const [inline] |
| int Tblastn::targetLength | ( | ) | const [inline] |
| const Range & Tblastn::firstQueryRange | ( | ) | const |
| const Range & Tblastn::lastQueryRange | ( | ) | const |
References matchesQS, and sortMatchByQuery().
| float Tblastn::qcov | ( | ) | const |
merge all the matches on the query into a RangeChain then compute the sum of the Range over the length of the query
References RangeChain::add(), RangeChain::length(), matches, and qlen.
Referenced by removeJunk().
| bool Tblastn::isFirstQueryMatch | ( | const M8Match * | m | ) | const |
to check a particular match is at the start of the query or at the end of the query
References matchesQS, Match::queryRange(), and sortMatchByQuery().
Referenced by findFootprint().
| bool Tblastn::overlapFirstQueryMatch | ( | const M8Match * | m, | |
| int | cutoff = 9 | |||
| ) | const |
cutoff is the smallest overlap test M8Match* m overlap with the first query match. It uses the matcheQS vector to do the job.
References matchesQS, Range::overlap(), Match::queryRange(), and sortMatchByQuery().
Referenced by findFootprint().
| bool Tblastn::isLastQueryMatch | ( | const M8Match * | m | ) | const |
| bool Tblastn::overlapLastQueryMatch | ( | const M8Match * | m, | |
| int | cutoff = 9 | |||
| ) | const |
References matchesQS, Range::overlap(), Match::queryRange(), and sortMatchByQuery().
Referenced by findFootprint().
| int Tblastn::checkContain | ( | ostream & | ous, | |
| float | factor = 0.8 | |||
| ) | const |
Check to see which match is contained in another. The entries that can be removed is written the the output stream ous. The factor is use to decide if the contained entry should be replaced by the formula: identity > factor*identity_of_larger_match or score > factor*score_of_larger_match
| void Tblastn::sortMatchByTarget | ( | ) |
this will sort the matches in the order target,query,tbegin,tend,qbegin,qend Only carry out the sorting operation if the sortedByTarget is false. This sorting is different when two overlapping matches are in different directions, For our operation this operation is more correct, so we should not have used the SQL order. |--A--> |--A-> <-B-| <-B---| In the left case, SQL order A before B, our sorting would place B before A which is what we want. In the right case, SQL and our sorting would be the same.
This method uses the cmpTargetPtr functin object which in turn use the < operator of Range.
References matches, and sortedByTarget.
Referenced by removeJunk().
| void Tblastn::directionalSortMatchByTarget | ( | ) |
this sorting is better for sections
References matches, and sortedByTarget.
Referenced by findFootprint(), and findSections().
| void Tblastn::readConfig | ( | const string & | conf | ) | [static] |
read the config from file with path conf
References ifstream(), and intronProb.
Referenced by main().
| double Tblastn::getIntronProbility | ( | int | len | ) | [static] |
| void Tblastn::erase | ( | int | b, | |
| int | e | |||
| ) | [private] |
erase elements in matches from b to e [b, e] inclusive. use delete to deallocate memory, then set pointer to 0
References matches, and removeNullMatch().
| int Tblastn::eraseMatch | ( | const Range & | r | ) | [private] |
Erase all matches inside r. use the Range.inside() function. return the number of matches removed
References matches, and removeNullMatch().
Referenced by findSections().
| void Tblastn::removeNullMatch | ( | ) | [private] |
remove elements in matches whose value is 0 (null pointer)
References matches, and sortedByQuery.
Referenced by erase(), eraseMatch(), and removeJunk().
| void Tblastn::sortMatchByQuery | ( | ) | const [private] |
References matches, matchesQS, and sortedByQuery.
Referenced by firstQueryRange(), isFirstQueryMatch(), isLastQueryMatch(), lastQueryRange(), overlapFirstQueryMatch(), and overlapLastQueryMatch().
| int Tblastn::removeAll | ( | list< string > & | rmlist, | |
| ostream & | ous | |||
| ) | [private] |
this version is for debuging
References matches, numMatches(), output(), qid, sortedByQuery, and tid.
| int Tblastn::removeAll | ( | ) | [private] |
for production, we want to go faster Remove all matches, deallocate memorey. clear the matches vector.
References matches, numMatches(), and sortedByQuery.
Referenced by clear(), and removeJunk().
| ostream& operator<< | ( | ostream & | ous, | |
| const Tblastn & | b | |||
| ) | [friend] |
string Tblastn::qid [private] |
Referenced by checkContain(), operator<<(), operator=(), output(), outputModel(), query(), removeAll(), sameQTPair(), and show().
string Tblastn::tid [private] |
Referenced by checkContain(), operator<<(), operator=(), output(), outputModel(), removeAll(), sameQTPair(), show(), and target().
int Tblastn::qlen [private] |
Referenced by operator<<(), operator=(), outputModel(), qcov(), queryLength(), and show().
int Tblastn::tlen [private] |
Referenced by operator<<(), operator=(), outputModel(), show(), and targetLength().
vector<M8Match*> Tblastn::matches [private] |
for sorting we need vector, for deletion we need list the matches are not sorted in any particlar order. However, the pfog program sorted the input as follows: target,query,tbegin,tend,qbegin,qend we should keep this information. with sortedByTarget
Referenced by add(), addMatch(), checkContain(), directionalSortMatchByTarget(), empty(), erase(), eraseMatch(), findFootprint(), findSections(), getMatches(), numMatches(), operator<<(), operator=(), output(), qcov(), removeAll(), removeJunk(), removeNullMatch(), show(), sortMatchByQuery(), sortMatchByTarget(), Tblastn(), and ~Tblastn().
bool Tblastn::sortedByTarget [private] |
Referenced by add(), directionalSortMatchByTarget(), findSections(), isSortedByTarget(), operator=(), setSortedByTarget(), and sortMatchByTarget().
vector<M8Match*> Tblastn::matchesQS [mutable, private] |
this is a copy of the matches, sorted by Query if any modification is made to matches, then the sortedByQuery is set to false Before using this vector, you must check the sortedByQuery value.
If the matches change then this should be re-created.
Referenced by firstQueryRange(), isFirstQueryMatch(), isLastQueryMatch(), lastQueryRange(), overlapFirstQueryMatch(), overlapLastQueryMatch(), and sortMatchByQuery().
bool Tblastn::sortedByQuery [mutable, private] |
when false, then matchesQS needs to be recreated and sorted from matches
Referenced by add(), operator=(), removeAll(), removeNullMatch(), and sortMatchByQuery().
float Tblastn::qcovCutoff = 0.3 [static, private] |
it stores the pointers to all the models created by the findFootPrint() method
Referenced by removeJunk().
vector< double > Tblastn::intronProb = vector<double>() [static, private] |
store the intron probaility read from Match.conf file
Referenced by getIntronProbility(), and readConfig().
1.5.6