#include "boost/filesystem.hpp"#include "GenModel.h"#include <iostream>#include <fstream>#include "Range.h"#include "Gmapres.h"#include <map>#include <iomanip>#include "Esambl.h"#include <cstring>#include <cmath>#include <algorithm>#include <list>#include "RNAModel.h"#include "strformat.h"Classes | |
| class | Progparam |
Defines | |
| #define | NDEBUG 2 |
Typedefs | |
| typedef set< Alnchainid *, lessChainPtr > | alnset |
| typedef set< Alnchainid *, lessChainPtr >::iterator | alnset_iterator |
Functions | |
| void | usage () |
| void | readConfig (Progparam &par, const string &file) |
| bool | existFile (const string &file) |
| void | makeModels (map< string, set< Alnchainid *, lessChainPtr > > &chains, map< string, string > &gstore, Progparam &par) |
| void | compressPath (alnset &alnseg, alnset_iterator &sb, set< Alnchainid *, lessChainPtr >::const_iterator &se) |
| void | compressPathHeterogeneous (alnset &alnseg, alnset_iterator &sb, set< Alnchainid *, lessChainPtr >::const_iterator &se, const float cutoff=0.8) |
| void | releaseVectorMemory (vector< Alnchainid * > &seg) |
| void | assembleChain (const vector< Alnchainid * > &modfrag, const string &gid, const string &gseq, ostream &estmod, ostream &estexon, ostream &jgitrack, ostream &ousest, ostream &ouspep, ostream &partial, ostream &oucom, ostream &good, map< string, int > &sps, map< int, int > &infix) throw (PointOutChain) |
| void | filterAln (alnset &alnseg, alnset_iterator &sb, alnset_iterator &se, float &nalnpernt, int &numaln, Progparam &par) |
| void | filterAln (vector< Alnchain * > &seg, vector< int > &count, Progparam &par) |
| void | filterAln (vector< Alnchainid * > &seg, Progparam &par) |
| template<class T> | |
| int | writePartialId (list< T * > &mods, ostream &ous, ostream &good) |
| template<class T> | |
| int | removeSimilarModels (list< T * > &mods) |
| template<class T> | |
| void | removeSingleExonModel (set< T *, ltModptr > &mod, set< T * > &removed) |
| template<class T> | |
| void | assignGeneId (list< T * > &locus) |
| bool | guessGmapOutputFile (vector< string > &gmaps) |
| bool | guessSAMOutputFile (vector< string > &sams) |
| bool | searchFiles (vector< string > &files, const string &ext) |
| void | assignStringArray (const char *str, vector< string > &arr) |
| void | readandstoreSam (const string &samfile, map< string, set< Alnchainid *, lessChainPtr > > &alnstore, Progparam &par) |
| void | writeAlnchainidsToStream (const string &gid, const vector< Alnchainid * > &alns, ostream &ous) |
| void | getInputFromRawSam (vector< string > &files, map< string, set< Alnchainid *, lessChainPtr > > &alnseg, Progparam &par) |
| void | getInputFromRawGmap (vector< string > &files, map< string, set< Alnchainid *, lessChainPtr > > &alnseg, Progparam &par) |
| void | readAlnFromOneArchive (map< string, set< Alnchainid *, lessChainPtr > > &alnseg, const string &file) |
| void | readAlnFromArchive (map< string, set< Alnchainid *, lessChainPtr > > &alnseg, const vector< string > &files) |
| void | refineLoci (list< list< ESTAssemblyid * > > &loci, const set< ESTAssemblyid * > &removed) |
| void | destroyRemoved (set< ESTAssemblyid * > &removed) |
| void | computeRelativeProfileHeight (list< ESTAssemblyid * > &locus, list< ESTAssemblyid * > &newlocus) |
| int | main (int argc, char *argv[]) |
Variables | |
| char | version [] = "1.2.5" |
| #define NDEBUG 2 |
| typedef set<Alnchainid*, lessChainPtr> alnset |
to save typing
| typedef set<Alnchainid*, lessChainPtr>::iterator alnset_iterator |
| void assembleChain | ( | const vector< Alnchainid * > & | modfrag, | |
| const string & | gid, | |||
| const string & | gseq, | |||
| ostream & | estmod, | |||
| ostream & | estexon, | |||
| ostream & | jgitrack, | |||
| ostream & | ousest, | |||
| ostream & | ouspep, | |||
| ostream & | partial, | |||
| ostream & | oucom, | |||
| ostream & | good, | |||
| map< string, int > & | sps, | |||
| map< int, int > & | infix | |||
| ) | throw (PointOutChain) |
this is the version that I am using now.
does two things: 1. find start model, this modle's id will used as geneid. 2. compute relative profile height against this star model void computeRelativeProfileHeight(set<ESTAssemblyid*, ltModptr> &locus) { if (locus.size() == 1) return; int maxest=0; set<ESTAssemblyid*, ltModptr>::iterator it, maxit; first pass to get star model for (it=locus.begin(); it != locus.end(); ++it) { if ((*it)->numberOfEST() > maxest) { maxest=(*it)->numberOfEST(); maxit=it; } else if ((*it)->numberOfEST() == maxest) { if ((*it)->maxProfileHeight() > (*maxit)->maxProfileHeight() || (*it)->CDSLength() > (*maxit)->CDSLength()) { maxit=it; } } } ESTAssemblyid* star=*maxit; cout << "star model old geneid " << star->getGeneId() << endl; star->modelidAsGeneid(); cout << "star model new geneid " << star->getGeneId() << endl; second pass to calculate relative profile height for (it=locus.begin(); it != locus.end(); ++it) { if (it != maxit) { cout << "member old geneid " << (*it)->getGeneId() << endl; (*it)->assignRPH(*star); (*it)->setGeneId(star->getGeneId()); } } cout << endl; } latest production version using Alnchainid* if the final model length is less than 36 nt, then discard.
References Graphid::assemble(), clumpModelIntoGene(), destroyRemoved(), ESTAssembly::genuine(), Graph::getCongregationId(), RNAModel::getOid(), good, RNAModel::intronBound(), ESTAssembly::intronFixState(), length, RNAModel::modelidAsGeneid(), refineLoci(), removeSingleExonModel(), PointOutChain::what(), writePartialId(), and ESTAssemblyid::writetab().
Referenced by makeModels().
| void assignGeneId | ( | list< T * > & | locus | ) | [inline] |
pair version not used void assembleChain(const vector<pair<Alnchain*, int> > &modfrag, const string &gid, const string &gseq, ostream &estmod, ostream &estexon, ostream &jgitrack, ostream &ousest, ostream &ouspep) { if (modfrag.size()<2) { ESTAssembly est(*modfrag[0].first, modfrag[0].second, gid, gseq, Graph::getCongregationId()); est.writetab(estmod, estexon, jgitrack, ousest, ouspep); return; } Graph asmgraph(modfrag); set<ESTAssembly*, ltModptr> mods; set<ESTAssembly*, ltModptr>::iterator si; set<ESTAssembly*, lessByChainDirectionPtr> mods; set<ESTAssembly*, lessByChainDirectionPtr>::iterator si; asmgraph.assemble(gid, gseq, mods); for (si = mods.begin(); si != mods.end(); si++) { (*si)->writetab(estmod, estexon, jgitrack, ousest, ouspep); cerr << "writting ESTMod #" << (*si)->getOid() << endl; delete *si; } } version with count of EST. Very few ESTs are identical exactly. But this provides the best count
This version is the debug version, it outputs the result to a file for further reading. This function should do the filtering to remove more input from large modfrag containers.
This function needs refinement: use more precice gene clustering algorithm such as those implemented in SimpleRNAModle.h .cpp. void assembleChain(const vector<Alnchain*> &modfrag, const vector<int> &modfrag_count, const string &gid, const string &gseq, ostream &estmod, ostream &estexon, ostream &jgitrack, ostream &ousest, ostream &ouspep, ostream &partial, ostream &oucom, ostream &good, map<string,int> &sps, map<int,int> &infix) { if (modfrag.size()<2) { NO need to construct profile ESTAssembly est(*modfrag[0], modfrag_count[0], new Coverdepth(*modfrag[0]), gid, gseq, Graph::getCongregationId()); ESTAssembly est(*modfrag[0], modfrag_count[0], gid, gseq, Graph::getCongregationId()); est.assignNewGeneId(); est.writetab(estmod, estexon, jgitrack, ousest, ouspep); est.intronBound(sps); if (est.genuine()) good << est.getOid() << endl; if (est.intronFixState()>0) infix[est.getOid()]=est.intronFixState(); return; } Graph asmgraph(modfrag, modfrag_count); set<ESTAssembly*, lessByChainDirectionPtr> mods; set<ESTAssembly*, lessByChainDirectionPtr>::iterator si, previousis, maxsi; set<ESTAssembly*, lessByChainDirectionPtr>::iterator si, maxsi; asmgraph.assemble(gid, gseq, mods); cerr << "Assembler produced " << mods.size() << " models\n"; separate the models into loci si=mods.begin(); for single model we should lower the standard if (mods.size() == 1) { (*si)->assignNewGeneId(); (*si)->writetab(estmod, estexon, jgitrack, ousest, ouspep); (*si)->intronBound(sps); if ((*si)->genuine() || (*si)->semiGenuine()) good << (*si)->getOid() << endl; if ((*si)->intronFixState()>0) infix[(*si)->getOid()]=(*si)->intronFixState(); delete *si; return; } Range sofar; list<ESTAssembly*> locus; list<ESTAssembly*>::iterator it, lastit; sorted regardless of direction for output and caiwe input set<ESTAssembly*, ltModptr> sortedmods; set<ESTAssembly*, ltModptr>::iterator ssi;
int olp, olp2; while (si != mods.end()) { locus.push_back(*si); sofar=**si++; while (si != mods.end() && (float)(olp=sofar.overlap(**si))/min((*si)->length(),sofar.length())> 0.5) { sofar.merge(**si); locus.push_back(*si); ++si; } writePartialId(locus, partial, good); assignGeneId(locus); sortedmods.insert(locus.begin(), locus.end()); locus.clear(); } removeSingleExonModel(sortedmods); sorted regardles of direction for model update operation in caiwe now output result to stram cerr << sortedmods.size() << " sorted models\n"; for (ssi=sortedmods.begin(); ssi != sortedmods.end(); ssi++) { (*ssi)->writetab(estmod, estexon, jgitrack, ousest, ouspep); (*ssi)->writeCommentTab(oucom); (*ssi)->intronBound(sps); if ((*ssi)->intronFixState()>0) infix[(*ssi)->getOid()]=(*ssi)->intronFixState(); delete *ssi; } } this method use overlap concept, should not use it, the sameGene concept is much better Discard this method.
References RNAModel::advanceGeneId(), Range::length(), Range::merge(), min, and Range::overlap().
| void assignStringArray | ( | const char * | str, | |
| vector< string > & | arr | |||
| ) |
Referenced by main().
| void compressPath | ( | alnset & | alnseg, | |
| alnset_iterator & | sb, | |||
| set< Alnchainid *, lessChainPtr >::const_iterator & | se | |||
| ) |
only compress those whose depth is above average will update segvec if removed some alnseg.
References Range::forceFuse().
| void compressPathHeterogeneous | ( | alnset & | alnseg, | |
| alnset_iterator & | sb, | |||
| set< Alnchainid *, lessChainPtr >::const_iterator & | se, | |||
| const float | cutoff = 0.8 | |||
| ) |
This version should work better with 454+Illumina data with different length
| cutoff | controls the extend of compression. It is a number from 0.1-0.99. If equal 1 then there is no compression. |
References Range::forceFuse(), and Range::merge().
Referenced by makeModels().
| void computeRelativeProfileHeight | ( | list< ESTAssemblyid * > & | locus, | |
| list< ESTAssemblyid * > & | newlocus | |||
| ) |
star models is the one with the most EST. Its RPH will be 1, other modles will have relatively smaller values. The default value of RPH at construction is 1. So singleton clusters also have value 1. The selection of star model is based on the simple algorithm order by numest, maxprofh, and CDS length. The best algorithm should be figured out in the future.
References del(), RNAModel::getGeneId(), Range::length(), min, and RNAModel::modelidAsGeneid().
Referenced by refineLoci().
| void destroyRemoved | ( | set< ESTAssemblyid * > & | removed | ) |
Referenced by assembleChain().
| bool existFile | ( | const string & | file | ) |
| void filterAln | ( | vector< Alnchainid * > & | seg, | |
| Progparam & | par | |||
| ) |
this is the latest version
References bad, Progparam::computeCutoff(), Progparam::dyncovcut, Progparam::dynidencut, Progparam::dynlencut, good, and Progparam::lowerCutoff().
| void filterAln | ( | alnset & | alnseg, | |
| alnset_iterator & | sb, | |||
| alnset_iterator & | se, | |||
| float & | nalnpernt, | |||
| int & | numaln, | |||
| Progparam & | par | |||
| ) |
| congSize | congregation size | |
| sb,se | the range of the congregation on the aln set. Should be depth dependent filtering not size. it is mainly the number of EST per bases that causes the grpah algorithm to be slow. End result: [sb,se) is a continuous range. |
References Progparam::computeCutoff(), Progparam::dyncovcut, Progparam::dynidencut, Progparam::dynlencut, Range::forceFuse(), Range::length(), Progparam::lowerCutoff(), and Range::overlay().
Referenced by makeModels().
| void getInputFromRawGmap | ( | vector< string > & | files, | |
| map< string, set< Alnchainid *, lessChainPtr > > & | alnseg, | |||
| Progparam & | par | |||
| ) |
The summary format is different, not sure this is because the version or it treats chromosome and genome differently Path 1: query 1--117 (117 bp) => chr scaffold_11:1,276,030--1,275,914 (-117 bp) Path 1: query 1..42 (42 bp) => genome chromosome_2:727,701..727,660 (-42 bp)
References readandstoreGmap(), and searchFiles().
Referenced by main().
| void getInputFromRawSam | ( | vector< string > & | files, | |
| map< string, set< Alnchainid *, lessChainPtr > > & | alnseg, | |||
| Progparam & | par | |||
| ) |
| bool guessGmapOutputFile | ( | vector< string > & | gmaps | ) |
if found a gmap output file *gmap.out, then return true.
| bool guessSAMOutputFile | ( | vector< string > & | sams | ) |
| int main | ( | int | argc, | |
| char * | argv[] | |||
| ) |
now use sameGene() to build gene concept Thre is not need to run gathergene after this program.
References Progparam::archive, archiveAln(), assignStringArray(), Progparam::codontable, Progparam::danglex, Progparam::dyncut_topcov, Progparam::dyncut_topiden, Progparam::dyncut_toplen, existFile(), FUZZYMARGIN, getInputFromRawGmap(), getInputFromRawSam(), Progparam::identitycut, Progparam::L, loadFastaIntoMap(), makeModels(), Progparam::maxintronlen, Progparam::minalnlength, Progparam::mincov, Progparam::minintronlen, Progparam::pathcompress, readAlnFromArchive(), readConfig(), searchFiles(), DNA::setCodonTable(), Gmapres::setDangleExon(), Gmapres::setMaximumIntron(), Gmapath::setMinimumCoverage(), Gmapath::setMinimumIdentity(), Gmapres::setMinimumIntron(), Gmapath::setMinimumLength(), Gmapath::setTrimCutoff(), Progparam::show(), Progparam::trimcut, and usage.
| void makeModels | ( | map< string, set< Alnchainid *, lessChainPtr > > & | chains, | |
| map< string, string > & | gstore, | |||
| Progparam & | par | |||
| ) |
there are two different ways to pass the EST count information: two vectors or one vector of pairs. I don't know which one is better version with count of EST latest production version storing actual EST ids This one should a little bit slower than the previous one.
| chains | This is the input for the assembly algorithm | |
| gstore | a table of genomic sequence name => seq mapping |
map<string, map<Alnchain,int> >const_iterator it; map<Alnchain, int>::const_iterator si, sb, se; map<string,int> spliceSite; map<int,int> intronFixInfo; // oid => fixstate for (it=chains.begin(); it != chains.end(); it++) { string genomicSeq=gstore[it->first]; si=it->second.begin(); // iteration over map<Noschains,int> while (si != it->second.end()) { if (si->first.length() <= modelenCutoff) { ++si; continue; } Range rr(si->first); vector<Alnchain*> seg; vector<int> seg_count; while (si != it->second.end() && rr.overlay(si->first) > 0) { rr.fuse(si->first); seg.push_back(new Alnchain(si->first)); seg_count.push_back(si->second); ++si; } congsize1=seg.size(); filterAln(seg, seg_count, par); numestMapped += seg.size(); if (congsize1 - seg.size() > 10 && seg.size()>300) { int i=0; while (i<seg.size()) { vector<Alnchain*> tmpch; vector<int> tmpch_count; Range outer(*seg[i]); while (i<seg.size() && outer.overlay(*seg[i]) > 10) { outer.fuse(*seg[i]); tmpch.push_back(seg[i]); tmpch_count.push_back(seg_count[i]); ++i; } assembleChain(tmpch, tmpch_count, it->first, genomicSeq, oumod, ouexo, outra, ouest, oupep, oupartial, oucomment, ougood, spliceSite, intronFixInfo); } } else { assembleChain(seg, seg_count, it->first, genomicSeq, oumod, ouexo, outra, ouest, oupep, oupartial, oucomment, ougood, spliceSite, intronFixInfo); } } } oumod.close(); ouexo.close(); outra.close(); ouest.close(); oupep.close(); oupartial.close(); oucomment.close(); ougood.close(); map<int,int>::const_iterator mit; map<string,int>::const_iterator spit; ofstream SPLS("intron_bound_stat.tab"); SPLS << "Splice site counts in this database of all models\n"; for (spit=spliceSite.begin(); spit != spliceSite.end(); spit++) { SPLS << spit->first << '' << spit->second << endl; } SPLS.close(); cerr << "Intron bound stat info written to intron_bound_stat.tab\n";
ofstream SZ("intronfix.tab"); for (mit=intronFixInfo.begin(); mit != intronFixInfo.end(); mit++) SZ << mit->first << '' << mit->second << endl; SZ.close(); cerr << "Intron Bound Fix information written to intronfix.tab\n"; SZ.open("numuniqueest_mapped.txt"); SZ << numestMapped << " unique EST mapped to genome\n"; SZ.close(); } Latest production version with Alncainid void processChains(map<string, set<Alnchainid> > &chains, map<string,string> &gstore, Progparam &par) { cerr << "Process all EST segments ...\n"; static const int modelenCutoff=33; ofstream oumod("combest_models.tab"); ofstream ouexo("combest_exons.tab"); ofstream outra("jgimodel.tab"); ofstream ouest("jgitranscript.tab"); ofstream oupep("jgiprotein.tab"); ofstream oupartial("combest_partial.tab"); ofstream oucomment("combest_comment.tab"); ofstream ougood("combest_genuine.tab"); record the congregation size before and after filtering very little effect, for intila phase of study map<int,int> before, after; int congsize1, numestMapped=0; // num unique EST mapped oumod << ESTAssemblyid::modelheader << endl; ouexo << RNAModel::exonheader << endl; outra << ESTAssembly::JGIModelColumns() << endl; ouest << ESTAssembly::JGITranscriptColumns() << endl; oupep << ESTAssembly::JGIProteinColumns() << endl;
map<string, set<Alnchainid> >const_iterator it; set<Alnchainid>::const_iterator si, sb, se; set<Alnchainid>::const_iterator si; map<string,int> spliceSite; // intronBound => count map<int,int> intronFixInfo; // oid => fixstate for (it=chains.begin(); it != chains.end(); it++) { string genomicSeq=gstore[it->first]; si=it->second.begin(); // iteration over set<Alnchainid> while (si != it->second.end()) { if (si->length() <= modelenCutoff) { ++si; continue; } Range rr(*si); sb=si; vector<Alnchainid*> seg; vector<int> seg_count; while (si != it->second.end() && rr.overlay(*si) > 0) { rr.fuse(*si); seg.push_back(new Alnchainid(*si)); seg_count.push_back(si->second); ++si; } congsize1=seg.size(); filterAln(seg, seg_count, par); filterAln(seg, par); numestMapped += seg.size(); if (congsize1 - seg.size() > 10 && seg.size()>300) { int i=0; while (i<seg.size()) { vector<Alnchainid*> tmpch; vector<int> tmpch_count; Range outer(*seg[i]); while (i<seg.size() && outer.overlay(*seg[i]) > 10) { outer.fuse(*seg[i]); tmpch.push_back(seg[i]); tmpch_count.push_back(seg_count[i]); ++i; } assembleChain(tmpch, it->first, genomicSeq, oumod, ouexo, outra, ouest, oupep, oupartial, oucomment, ougood, spliceSite, intronFixInfo); } } else { assembleChain(seg, it->first, genomicSeq, oumod, ouexo, outra, ouest, oupep, oupartial, oucomment, ougood, spliceSite, intronFixInfo); } } } oumod.close(); ouexo.close(); outra.close(); ouest.close(); oupep.close(); oupartial.close(); oucomment.close(); ougood.close(); map<int,int>::const_iterator mit; map<string,int>::const_iterator spit; ofstream SPLS("intron_bound_stat.tab"); SPLS << "Splice site counts in this database of all models\n"; for (spit=spliceSite.begin(); spit != spliceSite.end(); spit++) { SPLS << spit->first << '' << spit->second << endl; } SPLS.close(); cerr << "Intron bound stat info written to intron_bound_stat.tab\n";
ofstream SZ("intronfix.tab"); for (mit=intronFixInfo.begin(); mit != intronFixInfo.end(); mit++) SZ << mit->first << '' << mit->second << endl; SZ.close(); cerr << "Intron Bound Fix information written to intronfix.tab\n"; SZ.open("numuniqueest_mapped.txt"); SZ << numestMapped << " unique EST mapped to genome\n"; SZ.close(); } Latest production version with Alncainid and using pointers Should be faster. Rewrite into interator version should make this function a little bit faster. For now we should be fine. All alignmetn stored in chains are good. Replacing processChains() This one should release memory of chains.
Output files: combest_genunie.tab modelid of good models.
References assembleChain(), compressPathHeterogeneous(), RNAModel::exonheader, filterAln(), Range::fuse(), ESTAssemblyid::getModelheader(), ESTAssembly::JGIModelColumns(), ESTAssembly::JGIProteinColumns(), ESTAssembly::JGITranscriptColumns(), Range::length(), Range::overlay(), Progparam::pathcompress, Range::setMargin(), and writeAlnchainidsToStream().
Referenced by main().
| void readAlnFromArchive | ( | map< string, set< Alnchainid *, lessChainPtr > > & | alnseg, | |
| const vector< string > & | files | |||
| ) |
| void readAlnFromOneArchive | ( | map< string, set< Alnchainid *, lessChainPtr > > & | alnseg, | |
| const string & | file | |||
| ) |
| void readandstoreSam | ( | const string & | samfile, | |
| map< string, set< Alnchainid *, lessChainPtr > > & | alnstore, | |||
| Progparam & | par | |||
| ) |
References Alnchain::coverage(), Progparam::danglex, Noschain::exonLength(), Alnchain::identity(), Progparam::identitycut, ifstream(), Noschain::isDangle(), Progparam::maxintronlen, Noschain::maxIntronLength(), Progparam::minalnlength, Progparam::mincov, Progparam::minintronlen, Noschain::minIntronLength(), Noschain::numberOfRanges(), split(), and substr().
Referenced by getInputFromRawSam().
| void readConfig | ( | Progparam & | par, | |
| const string & | file | |||
| ) |
in the future I should use some generic class to deal with configuration files
References Progparam::avgESTLen, Progparam::danglex, Progparam::dyncut_topcov, Progparam::dyncut_topiden, Progparam::dyncut_toplen, Progparam::identitycut, ifstream(), Progparam::L, Progparam::maxintronlen, Progparam::minalnlength, Progparam::mincov, Progparam::minintronlen, Progparam::pathcompress, and Progparam::trimcut.
Referenced by main().
| void refineLoci | ( | list< list< ESTAssemblyid * > > & | loci, | |
| const set< ESTAssemblyid * > & | removed | |||
| ) |
| void releaseVectorMemory | ( | vector< Alnchainid * > & | seg | ) |
| int removeSimilarModels | ( | list< T * > & | mods | ) | [inline] |
| void removeSingleExonModel | ( | set< T *, ltModptr > & | mod, | |
| set< T * > & | removed | |||
| ) | [inline] |
1. A model (regardless of number of exons) is contained inside another long model, then it will be removed. 2. if a single exon model is very similar to an exon of another model then it will be removed.
| mod | is the input. In case of removal elemenets will be moved to removed container. | |
| removed | is the container for holding removed elementes. |
References del().
Referenced by assembleChain().
| bool searchFiles | ( | vector< string > & | files, | |
| const string & | ext | |||
| ) |
a helper function to find all files with certain file extension.
| files | is the result of the search. this function first clears any existing result in files. | |
| ext | file extension, such as *.sam or gmap.out |
Referenced by getInputFromRawGmap(), getInputFromRawSam(), and main().
| void usage | ( | ) |
given gmap summary format, this program converts it into combest archive format (*.car)
It can be used a pipe, or given specific file names.
| void writeAlnchainidsToStream | ( | const string & | gid, | |
| const vector< Alnchainid * > & | alns, | |||
| ostream & | ous | |||
| ) |
Referenced by makeModels().
| int writePartialId | ( | list< T * > & | mods, | |
| ostream & | ous, | |||
| ostream & | good | |||
| ) | [inline] |
return number of models removed
This template is modeled after the ESTAssemblyid version.
| mods | is the input. Must have more than one model. | |
| good | output for ids of good models. Good models don't have any comment. Also get rid of nealry identical models. | |
| ous | output for relative partial models. Only oids are written out. |
References removeSimilarModels().
Referenced by assembleChain().
| char version[] = "1.2.5" |
1.5.6