#include <iostream>#include <iomanip>#include <fstream>#include "mysql++.h"#include <map>#include "DBStat.h"#include <gsl/gsl_cdf.h>#include <cmath>Classes | |
| class | FitNgidentity |
| struct | Resrow |
| struct | Intabcol |
Functions | |
| void | readStat (Connection &conn, const string &table, map< string, map< string, SNavgstd > > &dbindex) |
| Dbstat * | readStatInt (Connection &conn, const string &table) |
| int | removeFalsePositiveHits (Connection &conn, const string &stattab, Query &query, const string &outfile, vector< Resrow > &blk) |
| void | storeBadInTable (Connection &conn, const vector< Resrow > &lk, const string &tab, const float lowestng) |
| int | main (int argc, char *argv[]) |
| double | chisquare (const double *obsv, const double *expt, const int n, double *&zv) |
| double chisquare | ( | const double * | obsv, | |
| const double * | expt, | |||
| const int | n, | |||
| double *& | zv | |||
| ) |
check the two arrays, ignore zero values in obsv obsv has n+1 elements, element[0] was not nused. the index number is the dbid that start from 1.
| expt | is an array containing two elements mean,std encoded in (2i-1, 2i), with i=1, 2, ..., 2n the array has 2n+1 elements, element at zero index was not used so that the index can start from 1. |
| int main | ( | int | argc, | |
| char * | argv[] | |||
| ) |
References Intabcol::db1id, Intabcol::db1seqid, Intabcol::db2id, Intabcol::db2seqid, removeFalsePositiveHits(), storeBadInTable(), and user.
| void readStat | ( | Connection & | conn, | |
| const string & | table, | |||
| map< string, map< string, SNavgstd > > & | dbindex | |||
| ) |
the table structure for stat +-----------+---------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-----------+---------------+------+-----+---------+-------+ | db1 | varchar(30) | NO | | NULL | | | db2 | varchar(30) | NO | | NULL | | | avgscore | decimal(14,4) | YES | | NULL | | | stdscore | double(26,4) | YES | | NULL | | | avgiden | decimal(18,8) | YES | | NULL | | | stdiden | double(34,8) | YES | | NULL | | | avgngiden | decimal(18,8) | YES | | NULL | | | stdngiden | double(34,8) | YES | | NULL | | +-----------+---------------+------+-----+---------+-------+
This is the string version. For better performance I am implementing the array version.
References string().
| Dbstat * readStatInt | ( | Connection & | conn, | |
| const string & | table | |||
| ) |
use integer index instead of string map
References Dbstat::add(), and FitNgidentity::numdb.
Referenced by removeFalsePositiveHits().
| int removeFalsePositiveHits | ( | Connection & | conn, | |
| const string & | stattab, | |||
| Query & | query, | |||
| const string & | outfile, | |||
| vector< Resrow > & | blk | |||
| ) |
stat table should have the following definition: mysql:kemin:shake>desc db2dbngstat95; +-----------+------------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-----------+------------------+------+-----+---------+-------+ | db1 | int(10) unsigned | NO | PRI | NULL | | | db2 | int(10) unsigned | NO | PRI | NULL | | | avgscore | decimal(14,4) | YES | | NULL | | | stdscore | double(26,4) | YES | | NULL | | | avgiden | decimal(18,8) | YES | | NULL | | | stdiden | double(34,8) | YES | | NULL | | | avgngiden | decimal(18,8) | YES | | NULL | | | stdngiden | double(34,8) | YES | | NULL | | | count | bigint(21) | NO | | 0 | | +-----------+------------------+------+-----+---------+-------+
The sqlstr should pull the following columns query << "select db1, seq1name, db2, seq2name, score, ngidentity from " << inputtable << " order by db1, seq1name, db2 ";
This function will be able to work on both the forward and backward direction. The backward has multiple hits from the forward which we picked only one hit. So the backward direction should use the max ngidentity. We are elminating the hits if found lower that other databases. Since we have done a 95% cutoff, this should not be a problem.
taken information from the raw data and do some test
References Dbstat::getNumberOfDb(), Dbstat::getStatRow(), and readStatInt().
Referenced by main().
| void storeBadInTable | ( | Connection & | conn, | |
| const vector< Resrow > & | lk, | |||
| const string & | tab, | |||
| const float | lowestng | |||
| ) |
1.5.6