feature.h

Go to the documentation of this file.
00001 
00002 #ifndef FEATURE_H
00003 #define FEATURE_H
00004 //#include <map>
00005 #include <vector>
00006 #include <string>
00007 #include "seqinfo.h"
00008 #include "dseq.h"
00009 #include "gberr.h"
00010 #include "strformat.h"
00014 class featerr
00015 { //still use this class, save changes to codes
00016  public:
00017   featerr();
00018         featerr &pick(const string &m);
00019         featerr &pick(const char *m);
00020         void print(ostream &ous) const;
00021 
00022  private:
00023         string info;
00024 };
00025 
00029 struct qualif {
00030         char qkey[20];   //qualifier key
00031         string qvalue;   // qualifier value
00032 
00033         qualif();
00034         qualif(const qualif &qu);
00035         qualif& operator=(const qualif &qu);
00036         bool keyis(const char* kk) const {return !strcmp(qkey, kk);}
00037 };
00038 
00039 
00040 class feature
00041 {
00042  public:
00043         feature();
00044         feature(const feature &fe);
00045         ~feature();
00046         feature &operator=(const feature &fe);
00047  
00049         //
00050         /* read one feature
00051          * return 0 for normal,
00052          * 1 for location parsing error
00053          * 2 for location format error
00054          *
00055          * calls readloc()
00056          */
00057         int read(istream &ins, char nxt[]); 
00058 
00063         int outSeg(ostream &ous) const; 
00064 
00070         ostream& outRange(ostream &ous) const;  
00073         ostream& outRange(ostream &ous, const char ed) const {
00074                 outRange(ous); ous << ed; return ous;
00075         }  
00084         ostream& endInfo(ostream &ous, const bool emark=true) const; 
00085 
00089         void output(ostream &seq, ostream& sub, seqinfo &sinf) const;
00090 
00091   /* write out this feature in text format to bd.  No change to obj
00092         * Mainly useful for debuging.
00093          */
00094         void dumpFeat(ostream &bd) const; 
00095 
00096         void writeCDS(ostream& seq, ostream& sub, seqinfo &sinf) const;
00097         //if cDNA is the source, there is no need for subsequence
00098         void writemRNA(ostream &seq, ostream& sub, seqinfo &sinf) const;
00099         void writeSource(ostream &seqout, ostream &subsout, seqinfo &sinf) const;
00100         void writeGene(ostream &seq, ostream &sub, seqinfo &sinf) const;
00101         void writeRNA(ostream &sout, ostream &subout, seqinfo &sinf) const;
00102         void writerpt(ostream &seq, ostream &sub, seqinfo &sinf) const;
00103         void writeExon(ostream &ous) const;
00104         void writeToken(ostream &seq, ostream &sub, seqinfo &sinf, 
00105                                         const dseq &nt) const;
00106 
00110         void writeAllele(ostream &seq, ostream &sub, seqinfo &sinf) const;
00111 
00112         /* write joint Segmented sequence; for mRNA and CDS only */
00113         void wjoinseg(ostream &sub, seqinfo &sinf) const;
00114 
00118         void writeUTR(ostream &seq, ostream &sub, seqinfo &sinf) const;
00119 
00126         void wFeature(ostream &ous, const seqinfo &sinf) const;         
00127 
00129         //
00133         bool isAbnormal() const;
00134 
00135         bool complement() const;
00136         bool newfeat() const {return (findex() == -1);}
00137         //bool sameloc(const feature &fe);
00138         /* the current feature is ff or not */
00139         bool is(const char *ff) const {return !strcmp(curFeat, ff);}
00140 
00143         bool isjoinseg() const;
00144 
00145         /* contains more than one regions join(6..2498,2542..2739 */
00146         bool locIsJoin() const { return n>0; }
00147 
00149         const char *feat() const { return curFeat; } 
00150 
00151         /*/to implement (23.45)..(204.221) notion in gb format
00152         we used the following notion
00153         (begin.vb)..(ve.end) to represent one_of notion
00154         the default number for vb and ve is 0, which indicates that 
00155         there is no one_of notion.  
00156         (begin.end) is indicated by vr. Default value of vr is 0; 1 indicates
00157         vaguer.  begin^end is simply represented by begin and end.  No flag is set.
00158         However, this info is not outputed into the file.
00159         */
00160 
00162         int findex() const; 
00163         //returns the feature table index of current feature text
00164         
00167         bool findQualif(const char* qk, string &vv) const;
00168 
00173         string qvmap(const char *k) const; 
00174 
00179         int qfind(const char *kq) const;
00180 
00181         friend const string orgtag(const seqinfo &sinf);
00182         //makes a unique short string from full organism name
00183         //for identifying gene names from different organisms
00184         //static map<string, string> qual2ace;
00185         //static void loadQual2ace();
00186 
00187  private:
00188         int nq;     // length of qv = number of valued qualifiers
00189         qualif *qv; //qualifier array, may contain duplicated qualifiers
00190         vector<string> dbxref; //one or more
00191         vector<string> qnv; //qualifiers withou values
00192         //string bFeat;  //for all qualifiers from bad feature
00193         int begin;
00194         int vb; 
00195         int end;
00196         int ve;
00197         bool noLeft, noRight;
00198         string replseq; //store allele sequence 
00199         
00200         int *loc; //the actual interger data for location info
00201         int n; //number of location elements. Only set >0 if join
00202         char *locstr; //location string, for exception handling
00203         int l, maxl; //l=number of char and maxl=maximium allocated char for locstr
00204         char curFeat[16]; //current feature text 
00205         
00206         void copy(const feature &lo); //helper function
00207 
00217         int readloc(istream &ins, char ln[]);
00218 
00219         static const char *ft[64];
00220         static const char *qt[74];
00221 
00223         int qindex(const char qual[]) const; 
00224         //returns the qualifier index of this qualifier in qt table
00225         //both functions return  -1 to indicate that entry does not exist
00226         bool mktitle(string &tt) const;
00227 
00228         /* make a key and put result in sk; return status of key making */
00229         int subkey(string &sk) const;  
00230         //makes subkey from information in feature and set sk as the key
00231         //if nothing available, return 0; 1 for finding a key; 2 for 
00232         //finding protein_id as key necessary for joint objects
00233         //such as mRNA, CDS etc.
00234 
00238         bool tokey(string &tk) const;  
00239         
00240         void rptdump(ostream &ous, const seqinfo &sinf) const;
00241         void RNAdump(ostream &ous, const seqinfo &sinf) const;
00242         /* tlnIdx is the index number of translation in the qualifier 
00243          * vector.  -1 indicates not present */
00244         void subCDS(ostream &seq, ostream &sub, const string &cdsKey, const string &title, int tlnIdx, const seqinfo &sqinf) const;
00245 };
00246 #endif

Generated on Wed Aug 10 11:56:57 2011 for Softwares from Orpara by  doxygen 1.5.6