group.h
Go to the documentation of this file.00001 #ifndef GROUP_H
00002 #define GROUP_H
00003
00005
00006
00007 #include <iostream>
00008 #include <cstring>
00009 #include <utility>
00010
00011 #include <pqxx>
00012
00013 #include <string>
00014 #include <vector>
00015 #include <fstream>
00016 #include "gconst.h"
00017
00018 using namespace pqxx;
00019
00020
00021
00022 #define FEAT 8 // we use 8 features to characterize each division
00023
00024 class inputend {};
00025
00045 class divstat
00046 {
00047 public:
00048 divstat();
00049
00050
00051
00052
00053
00054
00055
00056 divstat(istream &in);
00057 void read(istream &in);
00058
00062
00063 divstat(result &qres, int row);
00064
00065
00066 divstat(const divstat &d);
00067
00068 string getid() { return string(id); }
00069
00070
00071 bool operator==(const char s[]) { return !strcmp(s, id); }
00072 friend ostream& operator<<(ostream &o, const divstat &ds);
00073 divstat& operator=(const divstat &d);
00074 bool operator>(const divstat &ds) const;
00075 bool operator<(const divstat &ds) const;
00076
00077
00078
00079 bool smallerThan(const divstat &d, double k) const;
00080 bool smallerThan(const divstat *const d, const double k) const;
00081 void scale(double f);
00082
00083 double getQcov() const { return feat[0]; }
00084 double getTcov() const { return feat[1]; }
00085 double getScore() const { return feat[2]; }
00086 double getIdentity() const { return feat[3]; }
00087 double getNgidentity() const { return feat[4]; }
00088 double getSimilarity() const { return feat[5]; }
00089 double getMatchlen() const { return feat[6]; }
00090 double getNogaplen() const { return feat[7]; }
00091 ostream& dump(ostream &ou);
00092
00093
00094
00095 static int idxqcov() { return 0; }
00096 static int idxtcov() { return 1; }
00097 static int idxscore() { return 2; }
00098 static int idxidentity() { return 3; }
00099 static int idxngidentity() { return 4; }
00100 static int idxsimilarity() { return 5; }
00101 static int idxmatchlen() { return 6; }
00102 static int idxnogaplen() { return 7; }
00103
00104
00105 char id[5];
00106 double feat[FEAT];
00107 static const char* features[FEAT];
00108 };
00109
00110
00111
00112
00113
00114
00115
00116 class group : public gconst
00117 {
00118 public:
00119 friend class gstat;
00120
00121
00122
00123 group();
00124 group(const group &g);
00125 group& operator=(const group &g);
00126 ~group() { for(int i=0; i<divisions.size(); i++) delete divm[i]; }
00127
00128
00129 group(int &q, istream &in) throw(inputend);
00130
00131
00132 bool next(int &q, istream &in);
00133
00135
00136
00137 friend ostream& operator<<(ostream &o, const group &g);
00138
00139
00140 void dumpAsTable(ostream &ou);
00141
00146 void dumpKey(ostream &ou);
00147
00148
00149 ostream& dump(ostream &ou);
00150
00151 void dumpStat(ostream &ou);
00152 void dumpAllSMratio(ostream &ou);
00153
00155
00156 pair<int, double> maxqcov() const { return max(divstat::idxqcov()); }
00157 pair<int, double> maxtcov() const { return max(divstat::idxtcov()); }
00158 pair<int, double> mintcov() const;
00159 pair<int, double> maxidentity() const { return max(divstat::idxidentity()); }
00160 pair<int, double> maxngidentity() const { return max(divstat::idxngidentity()); }
00161 pair<int, double> maxsimilarity() const { return max(divstat::idxsimilarity()); }
00162 pair<int, double> maxmatchlen() const { return max(divstat::idxmatchlen()); }
00163 pair<int, double> maxnogaplen() const { return max(divstat::idxnogaplen()); }
00164 pair<int, double> maxscore() const { return max(divstat::idxscore()); }
00165 pair<int, double> max(int f) const;
00166
00167
00168
00169 double getMinTcov(int &div) const;
00170
00171 int getDivCnt() const { return divCnt; }
00172
00173 int getAnchor() const { return anchor; }
00174
00175
00176
00177
00178
00179 int nextAnchor();
00180
00181
00182 double getIdenSMratio() const { return stat[divstat::idxidentity()][1]/stat[divstat::idxidentity()][0]; }
00183 double getMlenSMratio() const { return stat[divstat::idxmatchlen()][1]/stat[divstat::idxmatchlen()][0]; }
00184 double getQcovSMratio() const { return stat[divstat::idxqcov()][1]/stat[divstat::idxqcov()][0]; }
00185 double getTcovSMratio() const { return stat[divstat::idxtcov()][1]/stat[divstat::idxtcov()][0]; }
00186 double getScoreSMratio() const { return stat[divstat::idxscore()][1]/stat[divstat::idxscore()][0]; }
00187
00188
00189
00190
00191
00192
00193 bool isConserved() const;
00194
00196
00197
00198
00199
00200
00201 void rmbranch(int i, ostream &ou);
00202 void rmbranch(int i);
00203
00204 protected:
00205 bool highVarDivPresent() const;
00206
00207
00208
00209
00210 bool checkAndRemove(int i, int b, int e, ostream &ou);
00211
00212
00213
00214
00215
00216
00217
00218 void dostat();
00219
00220 int query;
00221
00222
00223
00224 vector<divstat*> divm;
00225 int divCnt;
00226 int anchor;
00227
00228 double stat[FEAT][2];
00229 };
00230
00235 class gdiagnosis : public group {
00236 public:
00237
00238 gdiagnosis();
00239 gdiagnosis(double cut);
00240 gdiagnosis(const gdiagnosis &gd);
00241
00242
00243
00244
00245 ostream& dumpWithZval(ostream &ou) const;
00246
00247 bool next(int &q, istream &in);
00248
00249 void setzcut(double c) { zcut=c; }
00250 double getzcut() const { return zcut; }
00251
00253
00254 gdiagnosis& operator=(const gdiagnosis& g);
00255 friend ostream& operator<<(ostream &ou, const gdiagnosis &g);
00256
00257
00259
00260
00261
00262
00263 bool qcovpass() { return hgrm[0][divstat::idxqcov()] == 0 && hgrm[2][divstat::idxqcov()] != hgrm[3][divstat::idxqcov()]; }
00264 bool scorepass() { return hgrm[0][divstat::idxscore()] == 0 && hgrm[2][divstat::idxscore()] != hgrm[3][divstat::idxscore()]; }
00265
00266 bool qualitypass();
00267
00268 bool passed(const double zcut) const;
00269
00270
00271
00272 bool goodQuality(double zcut) const;
00273
00274 bool passedAvg(const double zcut) const;
00275 bool passedQcov(const double zcut) const;
00276
00277
00278 bool passedIdentity(const double zcut) const;
00279
00280 bool passedLength(const double zcut) const;
00281
00282 bool passedall(const double zcut) const {
00283 return (passed(zcut) || goodQuality(zcut) || passedAvg(zcut) ||
00284 passedQcov(zcut) || passedIdentity(zcut) || passedLength(zcut));
00285 }
00286
00287
00288
00289 bool coverageDefect(double zcut) const;
00290 bool lastTest() const;
00291
00293 void calzval();
00294
00295
00296
00297 vector<int> rmlow(double lowercut=-3, double avgzc=3);
00298
00299
00300
00301 double getAvgZval() const { return zval[divisions.size()][FEAT]/(FEAT*(divCnt-1));}
00302
00303 double avgzval() const { return zval[divisions.size()][FEAT]/(FEAT*(divCnt-1));}
00304
00305 bool targetPartial(int i) const;
00306 bool anchorPartial() const;
00307
00308
00309 bool trimAndTest(ostream &ou, double zcut);
00310
00311 void trimByIden();
00312
00313
00314
00315 bool fixScoreAndTest(double szcut, gdiagnosis &d) const;
00316
00318 static void readGuid(istream &in);
00319
00320
00321
00322 private:
00323 static vector< vector< vector<double> > > guid;
00324
00325
00326
00327
00328
00329
00330
00331 vector< vector<double> > zval;
00332
00333
00334
00335 vector< vector<double> > norm;
00336
00337 int hgrm[4][FEAT];
00338 double zcut;
00339
00340
00341 void getspace();
00342
00343 };
00344
00349 class gstat : public gconst {
00350 public:
00351 gstat();
00352
00353
00354
00355
00356
00357
00358 void accumulate(const group &g);
00359
00360
00361
00362
00363 void accumulateWithGuid(const group &g);
00364
00365
00366
00367
00368
00369
00370
00371 double calguid();
00372
00373
00374 void zerosum();
00375
00376 void zeroguid();
00377
00378
00379
00380
00381 friend ostream& operator<<(ostream &o, gstat &gs);
00382 bool guid_produced;
00383
00384
00385 static int getNumdiv() { return divisions.size(); }
00386
00387
00388 static const int pivotvalue = 10;
00389
00390 private:
00391
00392
00393 vector<int> cnt;
00394
00395
00396
00397
00398
00399
00400 vector< vector< vector<double> > > sum;
00401
00402
00403 vector< vector< vector<double> > > guid;
00404 int groupCount;
00405
00407
00408
00409
00410
00411 };
00412
00413
00414
00415
00416
00417
00418
00419 class trainer {
00420 public:
00421
00422
00423
00424
00425
00426
00427 trainer();
00428
00429
00430
00431
00432
00433
00434 bool train(const string &inf);
00435 private:
00436 gstat model, csvdmodel;
00437 };
00438
00439 #endif