00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef PFEATURE_H
00010 #define PFEATURE_H
00011
00012 #include <string>
00013 #include <vector>
00014 #include <iostream>
00015 #include <map>
00016 #include <set>
00017 #include "strformat.h"
00018 #include "gbfn.h"
00019 #include "featureError.h"
00020 #include "gbseq.h"
00021
00022 #define NO_CLONE
00023
00024
00025
00026 using namespace std;
00027
00028 class gbdnaseq;
00029 class gbprtseq;
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 class locseg {
00040 public:
00041 friend class feature;
00042 locseg() : begin(0), end(0), rangeType('-'), fuzzy_begin(0), fuzzy_end(0), complement(false) { }
00043 locseg(int bb, int ee) : begin(bb), end(ee), rangeType('-'), fuzzy_begin(0), fuzzy_end(0), complement(false) { }
00044 locseg(int bb, int ee, char rt) : begin(bb), end(ee), rangeType(rt), fuzzy_begin(0), fuzzy_end(0), complement(false) { }
00045 locseg(const locseg &ls);
00046
00051 locseg(const string &lstr) throw(featLocErr);
00052
00053 virtual locseg& operator=(const locseg &ls);
00054
00055
00056
00057
00058
00059 string getOperation() const;
00060
00061 bool isAbnormal() const {
00062 return rangeType != '-' || fuzzy_begin != 0 || fuzzy_end != 0 || complement; }
00063
00064 virtual ostream& writeRange(ostream &ous) const {
00065 ous << begin << "\t" << end; return ous; }
00066
00071 virtual ostream& outRange(ostream &ous) const;
00072
00073 bool isFuzzy() const {
00074 return fuzzy_begin != 0 || fuzzy_end != 0 || rangeType != '-'; }
00075
00076 int getFuzzyBegin() const { return complement ? fuzzy_end : fuzzy_begin; }
00077 int getFuzzyEnd() const { return complement ? fuzzy_begin : fuzzy_end; }
00078 int getBegin() const { return complement ? end : begin; }
00079 int getEnd() const { return complement ? begin : end; }
00080 char getRangeType() const { return rangeType; }
00081
00082 protected:
00083 void parse(string &str) throw(featLocErr);
00084 int begin, end;
00085
00093 char rangeType;
00094
00095
00096
00097
00098
00099
00100
00101
00102 int fuzzy_begin;
00103
00104
00109 int fuzzy_end;
00110
00111 bool complement;
00112
00113
00114 };
00115 class namedlocseg : public locseg {
00116 public:
00117 friend class feature;
00118 namedlocseg() : locseg(), name() {}
00119 namedlocseg(const string &nn, int bb, int ee) : locseg(bb,ee), name(nn) {}
00120 namedlocseg(const namedlocseg& loc) : locseg(loc) { name=loc.name; }
00121 namedlocseg(const locseg& loc) : locseg(loc) { }
00122 namedlocseg(const string &lstr) throw(featLocErr);
00123 namedlocseg& operator=(const namedlocseg& loc);
00124 const string& getName() const { return name; }
00125
00130 ostream& writeRange(ostream &ous) const {
00131 ous << name << "\t"; return locseg::writeRange(ous); }
00132
00133 protected:
00134 string name;
00135 };
00136
00137
00138
00139
00140
00141
00142 class feature {
00143 public:
00144 enum Segrelation {none=0, join, order};
00145
00146 feature() : complement(false), segop(none), onone(true) { }
00147 ~feature() { clear(); }
00148
00149
00150
00151
00152
00153 bool next(string &ln, istream &ins);
00154 feature(const feature &feat);
00155 feature& operator=(const feature &feat);
00156 void writeAceProtein(ostream &ous, gbprtseq &prt);
00157 void writeAceDNA(ostream &ous, ostream &sub, ostream &snp, gbdnaseq& seq) throw(featErr);
00158 string getName() const { return name; }
00159
00160
00161
00162 friend ostream& operator<<(ostream &ous, const feature &feat);
00163
00164
00165 string getDbxrefString() const;
00166
00167
00168
00169 string getAllQualifiers() const;
00170
00171
00175 void clear();
00176
00180 int getBegin() const { return locs[0]->begin; }
00181 int getEnd() const { return locs[locs.size()-1]->end; }
00182
00183 int getNumSeg() const { return locs.size(); }
00184
00185
00186
00187 const string& getOperation() const { return locop; }
00188 Segrelation getSegop() const { return segop; }
00189
00190
00191
00192
00193
00194 ostream& writeRange(ostream &ous) const {
00195 ous << getBegin() << " " << getEnd(); return ous; }
00196
00197 ostream& outRange(ostream &ous) const;
00198
00199
00204 int outSeg(ostream &ous) const;
00206 void outSegMultiple(ostream &ous, const gbdnaseq &seq) const;
00207
00211 string getLocationString() const { return locstr; }
00212
00217 string getTaxid() const;
00218 bool hasDbxref() const { return !dbxref.empty(); }
00219
00220 bool isComplement() const { return complement; }
00221
00223 int getBeginEndinfo() const;
00224 int getEndEndinfo() const;
00225
00226 ostream& endInfo(ostream &ous) const;
00227
00228 bool nostart() const;
00229 bool noend() const;
00230 bool hasQualifier(const string &key) const {
00231 return qualifiers.find(key) != qualifiers.end(); }
00232
00233 string getQualifierValue(const string &qkey) const;
00234
00235
00236 static void readValidQualifier(const string &file);
00237
00243 static void loadSubseq(const string &file);
00244 static void dumpSubseq(const string &file);
00245
00246 static void addSubseq(const string &seq) { subseq.insert(make_pair(seq,0)); }
00247 static bool existSubseq(const string &seq) { return subseq.find(seq) != subseq.end(); }
00248
00254 static void loadGeneName(const string &file);
00255 static void dumpGeneName(const string &file);
00256
00261 static string getGeneSymbol(const string &gene, string &allele);
00262
00263 static pair<string,string> getGeneSymbol(const string &gene);
00264
00265 static const int QUAL = 5;
00266 static const int QUAL_VAL = 21;
00267 static const string QUAL_SPACE;
00268 static bool PRTOUT;
00269
00270
00271 private:
00279 static string addGene(const string &gene, string &allele);
00280
00281
00282 static pair<string,string> separateAllele(const string &gene);
00283
00284 static pair<string,string> makeGeneSymbol(const string &gene);
00285
00286 static pair<string,string> insertGeneSymbol(const string& gene, const string &symbol);
00287
00288
00289 static pair<string,string> insertGeneAsSymbol(const string& gene);
00290
00294 static string geneOfSymbol(const string& sym);
00295 static string cleanGene(const string& gene, const string wd);
00296 static string nojunkGene(const string &gene);
00297
00298 static string nextGeneSymbol();
00299 static void goodGeneSymbol(const string &gene, string &symbol);
00300 static bool isGoodGeneSymbol(const string &gene, const string &symbol);
00301 static string acronymWithAllDigits(const string& str, const int n=1);
00302
00303
00304 static string xHiddenGeneSymbol(const string &str, const string &sep);
00305
00306 static string shortIsGeneSymbol(const string &str, string::size_type idx, const int seplen);
00307
00308
00309 static bool isRomanNumber(const string &str);
00310
00311 static bool isGreek(const string &str);
00312
00313
00314
00315
00316 void nextQualifier(istream &ins, string &ln);
00317
00318
00319 pair<string, string> geneallele() const;
00320
00325 void outgeneline(ostream &ous, const gbdnaseq &seq, const string &tag) const;
00326
00327
00328
00329 map<string,string>::iterator composeTitle();
00330
00331
00332 void insertQualifier(const string& key, string& value);
00333 void writeSource(ostream &ous, ostream &sub, gbdnaseq &seq) throw(featErr);
00334 void writemRNA(ostream &ous, ostream &sub, const gbdnaseq &seq);
00335 void writeSegmRNA(ostream &sub, const gbdnaseq &seq);
00336 void writeCDS(ostream &ous, ostream &sub, const gbdnaseq &seq);
00337 void writeSegCDS(ostream &sub, const gbdnaseq &seq);
00344 void subCDS(ostream &ous, ostream &sub, const gbdnaseq &seq, const string &cdsKey, const string &prtKey, const string &title) const;
00345 void writeGene(ostream &ous, ostream &sub, gbdnaseq &seq) const;
00346
00348 void writeProtein(ostream &sub, const gbdnaseq &seq, const string &key, const string &title) const;
00349
00350 void writeRNA(ostream &sout, ostream &subout, gbdnaseq &seq);
00351 void RNAdump(ostream &ous, const gbdnaseq &seq) const;
00352 void writeFeature(ostream &ous, const gbdnaseq &seq) const throw(featErr);
00353 bool writeFeatureOfWholeSeq(ostream &ous, const gbdnaseq &seq) const;
00354 void writeImmuno(ostream &ous, ostream &sub, const gbdnaseq &seq) const;
00355 void writeSNP(ostream &snp, gbdnaseq &seq) const;
00356
00357
00358 void writePrtSite(ostream &ous, gbprtseq &prt) const;
00359 void writePrtRegion(ostream &ous, gbprtseq &prt) const;
00360
00361 string name;
00362
00363 string locstr;
00364
00368 vector<locseg*> locs;
00369 bool onone;
00370 bool complement;
00371 Segrelation segop;
00372 string locop;
00373
00374 void parseLoc() throw(featLocErr);
00375
00376 map<string, string> qualifiers;
00377 vector<string> dbxref;
00378
00379 static set<string> validqual;
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395 static map<string, int> subseq;
00396
00403 static map<string,string> genen2s;
00404 static set<string> genesbl;
00405 static int genecnt;
00406
00407 };
00408
00409 #endif