Esambl.h

Go to the documentation of this file.
00001 #ifndef EST_ASSEMBLE_H
00002 #define EST_ASSEMBLE_H
00003 
00004 #include <vector>
00005 #include "GenModel.h"
00006 #include <iostream>
00007 #include <queue>
00008 #include "RNAModel.h"
00009 #include "Coverdepth.h"
00010 
00011 //#define NDEBUG
00012 
00013 using namespace std;
00014 
00015 enum Color { white, black, gray, red };
00016 
00017 class Invariant { };
00018 
00023 class Valence {
00024    public:
00025       Valence() : overlap(0), contain(0) { }
00026       Valence(int o, int c) : overlap(o), contain(c) { }
00027       Valence(const Valence &v) : overlap(v.overlap), contain(v.contain) { }
00028       Valence& operator=(const Valence &v);
00033       bool operator<(const Valence &v) const;
00034       bool operator==(const Valence &v) const { 
00035          return overlap==v.overlap && contain == v.contain; }
00036       bool operator>(const Valence &v) const;
00037       void set(int o, int c) { overlap=o; contain=c; }
00038       friend ostream& operator<<(ostream &ous, const Valence &v) {
00039          ous << v.overlap << ", " << v.contain; return ous; }
00040       ostream& show(ostream &ous) const;
00041 
00042       int overlap;
00043       int contain;
00044 };
00045 
00049 class HeadNode : public Valence {
00050    public:
00051       HeadNode() : Valence(), idx(0) { }
00052       HeadNode(int id, int olp, int cont) : Valence(olp, cont), idx(id) { }
00053       HeadNode(const Valence &v, int id) : Valence(v), idx(id) { }
00054       HeadNode(const HeadNode &hn) : Valence(hn), idx(hn.idx) { }
00055       friend ostream& operator<<(ostream &ous, const HeadNode &hn);
00056       int idx;
00057 };
00058 
00059 class GraphNode {
00060    public:
00062       GraphNode() 
00063          : idx(-1), c(white), parent(0), distance(-1),
00064            val(), inassembly(0) { }
00065       GraphNode(int id) 
00066          : idx(id), c(white), parent(0), distance(-1),
00067            val(), inassembly(0) { }
00068       GraphNode(int id, int olp, int cont)
00069          : c(white), parent(0), distance(-1), idx(id), 
00070            val(olp,cont), inassembly(0) { }
00071       GraphNode(const GraphNode &gn);
00072       GraphNode& operator=(const GraphNode &gn);
00073       bool operator<(const GraphNode &gn) const { return val < gn.val; }
00074       void setId(int id) { idx=id; }
00075       void setValence(int o, int c) { val.set(o,c); }
00076       void setIdValence(int id, int olp, int cont) {
00077          idx=id; val.set(olp,cont); }
00078       ostream& show(ostream &ous) const;
00079       friend ostream& operator<<(ostream &ous, const GraphNode &gn) {
00080          gn.show(ous); return ous; }
00081       void setNull() { c=white; parent=0; distance=-1; }
00086       void setHead();
00092       void setChildOf(GraphNode* p);
00093       void setParent(GraphNode* p);
00094       bool canBeHead() const { return inassembly == 0 && c == white; }
00095 
00096    public:
00097       Color c;
00098       GraphNode *parent;
00099       int distance;
00100       int idx; // index into the vector where it stored the Noschanins
00101       Valence val;
00102       int inassembly; // included in different assemblies
00103 };
00104 
00107 class lessGraphNodePtr {
00108    public:
00109       bool operator()(const GraphNode* p1, const GraphNode* p2) const {
00110          return p1->val < p1->val;
00111       }
00112 };
00113 
00114 class ltModptr {
00115    public:
00116       bool operator()(const ESTAssembly *m1, const ESTAssembly *m2) const {
00117          return *m1 < *m2;
00118       }
00119 };
00120 
00121 class Graph {
00122    public:
00123       Graph() : vertices(), adjm(), NVQ(), input(), input_count() { }
00124       Graph(int N) : adjm(N, vector<int>(N)), vertices(N), NVQ(),
00125          input(), input_count() { }
00130       Graph(const vector<Alnchain*> &modfrag);
00136       Graph(const vector<Alnchain*> &modfrag, const vector<int> &modfrag_count)
00137          : adjm(modfrag.size(), vector<int>(modfrag.size())),
00138            vertices(modfrag.size()), NVQ(), input(modfrag),
00139            input_count(modfrag_count)
00140          { init(modfrag); }
00141 
00146       Graph(const vector<pair<Alnchain*,int> > &modfrag);
00148       ~Graph();
00149 
00150       int numberOfVertices() const { return adjm.size(); }
00158       void prepareAssembly();
00166       int assembleOneRound(Noschain &res, int &count, ostream &ous);
00175       //int assembleOneRound(Noschain &res, int &count);
00176       int assembleOneRound(Noschain &res, int &count, Coverdepth* &covd) throw(Invariant);
00177       /* write result to three output strams to be 
00178        * bulk-uploaded into the database. this version
00179        * is not good for debug. I will use a set as output
00180        * mechanism. */
00181       //void assemble(const string &gid, const string &gseq,
00182       //      ostream &ous, ostream &estmod, ostream &estexon, ostream &jgitrack, 
00183       //      ostream &ousest, ostream &ouspep);
00184       /* production version, no debug output, and produce 
00185        * JGI three tables: model, transcript, protein */
00186       //void assemble(const string &gid, const string &gseq,
00187       //      ostream &estmod, ostream &estexon, ostream &jgitrack, 
00188       //      ostream &ousest, ostream &ouspep);
00196       void assemble(const string &gid, const string &gseq, 
00197             set<ESTAssembly*, lessByChainDirectionPtr> &allest);
00198             //set<ESTAssembly*, ltModptr> &allest);
00204       bool allAssembled() const;
00205       void showMatrix(ostream &ous) const;
00206       void showInput(ostream &ous) const;
00207       friend ostream& operator<<(ostream& ous, const Graph &gr);
00211       static int getCongregationId() { return congid++; }
00212 
00213    private:
00215       void init(const vector<Alnchain*> &modfrag);
00216 
00217    public:
00218       static int congid;
00220       vector<Alnchain*> input;
00222       vector<int> input_count;
00225       vector<GraphNode> vertices;
00228       vector<vector<int> > adjm; // adjancy matrix
00233       priority_queue<HeadNode> NVQ;
00234 };
00235 
00241 class Graphid {
00242    public:
00249       Graphid(const vector<Alnchainid*> &modfrag)
00250          : input(modfrag), vertices(modfrag.size()), 
00251             adjm(modfrag.size(), vector<int>(modfrag.size())),
00252            NVQ() 
00253          { init(modfrag); }
00254          //{ Graph::init(static_cast< vector<Alnchain*> >(modfrag)); }
00259       ~Graphid();
00260       bool allAssembled() const;
00261       void prepareAssembly();
00262 
00280       void assemble(const string &gid, const string &gseq, 
00281             set<ESTAssemblyid*, lessByChainDirectionPtr> &allest) throw (PointOutChain);
00295       int assembleOneRound(Noschain &res, int &count, Coverdepth* &covd) throw(PointOutChain, Invariant);
00296       void showMatrix(ostream &ous) const;
00300       void showInput(ostream &ous) const;
00304       int numberOfVertices() const { return adjm.size(); }
00307       friend ostream& operator<<(ostream& ous, const Graph &gr);
00308 
00312       static int getCongregationId() { return congid++; }
00313 
00314 
00315    private:
00318       void init(const vector<Alnchainid*> &modfrag);
00326       //void prepareAssembly();
00328       string allInputESTIdsString(const char sep=',') const;
00334       vector<string> allInputESTIds() const;
00339       void appendAllESTIds(vector<string> &idv) const;
00340 
00347       void assignESTIds(const set<ESTAssemblyid*, lessByChainDirectionPtr> &estmod) const;
00348 
00349    public:
00352       static int congid;
00356       static int genomic_lengthcut;
00358       vector<Alnchainid*> input;
00361       vector<GraphNode> vertices;
00364       vector<vector<int> > adjm; // adjancy matrix
00369       priority_queue<HeadNode> NVQ;
00370 };
00371 
00372 #endif

Generated on Wed Aug 10 11:56:51 2011 for Softwares from Orpara by  doxygen 1.5.6