#include <head.h>
Public Types | |
| enum | Segrelation { none = 0, join, order } |
Public Member Functions | |
| void | read (ifstream &ins, char ln[]) |
| void | write (ostream &ous) const |
| void | clear () |
| feature () | |
| feature (const feature &fe) | |
| ~feature () | |
| feature & | operator= (const feature &fe) |
| int | read (istream &ins, char nxt[]) |
| int | outSeg (ostream &ous) const |
| ostream & | outRange (ostream &ous) const |
| ostream & | outRange (ostream &ous, const char ed) const |
| ostream & | endInfo (ostream &ous, const bool emark=true) const |
| void | output (ostream &seq, ostream &sub, seqinfo &sinf) const |
| void | dumpFeat (ostream &bd) const |
| void | writeCDS (ostream &seq, ostream &sub, seqinfo &sinf) const |
| void | writemRNA (ostream &seq, ostream &sub, seqinfo &sinf) const |
| void | writeSource (ostream &seqout, ostream &subsout, seqinfo &sinf) const |
| void | writeGene (ostream &seq, ostream &sub, seqinfo &sinf) const |
| void | writeRNA (ostream &sout, ostream &subout, seqinfo &sinf) const |
| void | writerpt (ostream &seq, ostream &sub, seqinfo &sinf) const |
| void | writeExon (ostream &ous) const |
| void | writeToken (ostream &seq, ostream &sub, seqinfo &sinf, const dseq &nt) const |
| void | writeAllele (ostream &seq, ostream &sub, seqinfo &sinf) const |
| void | wjoinseg (ostream &sub, seqinfo &sinf) const |
| void | writeUTR (ostream &seq, ostream &sub, seqinfo &sinf) const |
| void | wFeature (ostream &ous, const seqinfo &sinf) const |
| bool | isAbnormal () const |
| bool | complement () const |
| bool | newfeat () const |
| bool | is (const char *ff) const |
| bool | isjoinseg () const |
| bool | locIsJoin () const |
| const char * | feat () const |
| int | findex () const |
| bool | findQualif (const char *qk, string &vv) const |
| string | qvmap (const char *k) const |
| int | qfind (const char *kq) const |
| feature () | |
| ~feature () | |
| bool | next (string &ln, istream &ins) |
| feature (const feature &feat) | |
| feature & | operator= (const feature &feat) |
| void | writeAceProtein (ostream &ous, gbprtseq &prt) |
| void | writeAceDNA (ostream &ous, ostream &sub, ostream &snp, gbdnaseq &seq) throw (featErr) |
| string | getName () const |
| string | getDbxrefString () const |
| string | getAllQualifiers () const |
| void | clear () |
| int | getBegin () const |
| int | getEnd () const |
| int | getNumSeg () const |
| const string & | getOperation () const |
| Segrelation | getSegop () const |
| ostream & | writeRange (ostream &ous) const |
| ostream & | outRange (ostream &ous) const |
| int | outSeg (ostream &ous) const |
| void | outSegMultiple (ostream &ous, const gbdnaseq &seq) const |
| string | getLocationString () const |
| string | getTaxid () const |
| bool | hasDbxref () const |
| bool | isComplement () const |
| int | getBeginEndinfo () const |
| int | getEndEndinfo () const |
| ostream & | endInfo (ostream &ous) const |
| bool | nostart () const |
| bool | noend () const |
| bool | hasQualifier (const string &key) const |
| string | getQualifierValue (const string &qkey) const |
Static Public Member Functions | |
| static void | readValidQualifier (const string &file) |
| static void | loadSubseq (const string &file) |
| static void | dumpSubseq (const string &file) |
| static void | addSubseq (const string &seq) |
| static bool | existSubseq (const string &seq) |
| static void | loadGeneName (const string &file) |
| static void | dumpGeneName (const string &file) |
| static string | getGeneSymbol (const string &gene, string &allele) |
| static pair< string, string > | getGeneSymbol (const string &gene) |
Static Public Attributes | |
| static const int | QUAL = 5 |
| static const int | QUAL_VAL = 21 |
| static const string | QUAL_SPACE = string(QUAL_VAL, ' ') |
| static bool | PRTOUT = false |
Private Member Functions | |
| void | copy (const feature &lo) |
| int | readloc (istream &ins, char ln[]) |
| int | qindex (const char qual[]) const |
| bool | mktitle (string &tt) const |
| int | subkey (string &sk) const |
| bool | tokey (string &tk) const |
| void | rptdump (ostream &ous, const seqinfo &sinf) const |
| void | RNAdump (ostream &ous, const seqinfo &sinf) const |
| void | subCDS (ostream &seq, ostream &sub, const string &cdsKey, const string &title, int tlnIdx, const seqinfo &sqinf) const |
| void | nextQualifier (istream &ins, string &ln) |
| pair< string, string > | geneallele () const |
| void | outgeneline (ostream &ous, const gbdnaseq &seq, const string &tag) const |
| map< string, string >::iterator | composeTitle () |
| void | insertQualifier (const string &key, string &value) |
| void | writeSource (ostream &ous, ostream &sub, gbdnaseq &seq) throw (featErr) |
| void | writemRNA (ostream &ous, ostream &sub, const gbdnaseq &seq) |
| void | writeSegmRNA (ostream &sub, const gbdnaseq &seq) |
| void | writeCDS (ostream &ous, ostream &sub, const gbdnaseq &seq) |
| void | writeSegCDS (ostream &sub, const gbdnaseq &seq) |
| void | subCDS (ostream &ous, ostream &sub, const gbdnaseq &seq, const string &cdsKey, const string &prtKey, const string &title) const |
| void | writeGene (ostream &ous, ostream &sub, gbdnaseq &seq) const |
| void | writeProtein (ostream &sub, const gbdnaseq &seq, const string &key, const string &title) const |
| void | writeRNA (ostream &sout, ostream &subout, gbdnaseq &seq) |
| void | RNAdump (ostream &ous, const gbdnaseq &seq) const |
| void | writeFeature (ostream &ous, const gbdnaseq &seq) const throw (featErr) |
| bool | writeFeatureOfWholeSeq (ostream &ous, const gbdnaseq &seq) const |
| void | writeImmuno (ostream &ous, ostream &sub, const gbdnaseq &seq) const |
| void | writeSNP (ostream &snp, gbdnaseq &seq) const |
| void | writePrtSite (ostream &ous, gbprtseq &prt) const |
| void | writePrtRegion (ostream &ous, gbprtseq &prt) const |
| void | parseLoc () throw (featLocErr) |
Static Private Member Functions | |
| static string | addGene (const string &gene, string &allele) |
| static pair< string, string > | separateAllele (const string &gene) |
| static pair< string, string > | makeGeneSymbol (const string &gene) |
| static pair< string, string > | insertGeneSymbol (const string &gene, const string &symbol) |
| static pair< string, string > | insertGeneAsSymbol (const string &gene) |
| static string | geneOfSymbol (const string &sym) |
| static string | cleanGene (const string &gene, const string wd) |
| static string | nojunkGene (const string &gene) |
| static string | nextGeneSymbol () |
| static void | goodGeneSymbol (const string &gene, string &symbol) |
| static bool | isGoodGeneSymbol (const string &gene, const string &symbol) |
| static string | acronymWithAllDigits (const string &str, const int n=1) |
| static string | xHiddenGeneSymbol (const string &str, const string &sep) |
| static string | shortIsGeneSymbol (const string &str, string::size_type idx, const int seplen) |
| static bool | isRomanNumber (const string &str) |
| static bool | isGreek (const string &str) |
Private Attributes | |
| string | loc |
| string | feat |
| int | nq |
| qualif * | qv |
| vector< string > | dbxref |
| vector< string > | qnv |
| int | begin |
| int | vb |
| int | end |
| int | ve |
| bool | noLeft |
| bool | noRight |
| string | replseq |
| int * | loc |
| int | n |
| char * | locstr |
| int | l |
| int | maxl |
| char | curFeat [16] |
| string | name |
| string | locstr |
| vector< locseg * > | locs |
| bool | onone |
| bool | complement |
| Segrelation | segop |
| string | locop |
| map< string, string > | qualifiers |
Static Private Attributes | |
| static const char * | ft [64] |
| static const char * | qt [74] |
| static set< string > | validqual = set<string>() |
| static map< string, int > | subseq = map<string, int>() |
| static map< string, string > | genen2s = map<string,string>() |
| static set< string > | genesbl = set<string>() |
| static int | genecnt = 0 |
Friends | |
| const string | orgtag (const seqinfo &sinf) |
| ostream & | operator<< (ostream &ous, const feature &feat) |
| enum feature::Segrelation |
| feature::feature | ( | ) |
References curFeat.
| feature::feature | ( | ) | [inline] |
| feature::~feature | ( | ) | [inline] |
References clear().
| feature::feature | ( | const feature & | feat | ) |
| void feature::read | ( | ifstream & | ins, | |
| char | ln[] | |||
| ) |
| void feature::clear | ( | ) |
| int feature::read | ( | istream & | ins, | |
| char | nxt[] | |||
| ) |
| int feature::outSeg | ( | ostream & | ous | ) | const |
output Source_Exons num1 num2 will output the newline returns the sum_of_exon lengths
References begin, complement(), end, loc, and n.
Referenced by subCDS(), writeImmuno(), writemRNA(), and writeRNA().
| ostream & feature::outRange | ( | ostream & | ous | ) | const |
output the range of this feature begin end, according to whether the sequence is complement or not. If complement will output end--begin will not output the newline
References begin, complement(), and end.
Referenced by outRange(), subCDS(), wFeature(), writeAllele(), writeCDS(), writeExon(), writeFeature(), writeFeatureOfWholeSeq(), writeGene(), writeImmuno(), writemRNA(), writeRNA(), writerpt(), writeSource(), and writeToken().
| ostream& feature::outRange | ( | ostream & | ous, | |
| const char | ed | |||
| ) | const [inline] |
add an termination mark ed, such as
References outRange().
| ostream & feature::endInfo | ( | ostream & | ous, | |
| const bool | emark = true | |||
| ) | const |
output start_not_found end_not_found to ous if emark = true,
is outputed at the end. Default emark true output seq end information to the ous if the sequence is not marked as missing start or end, nothing will be outputed to ous.
codon_start will be outputed if not 1; for CDS entries only
References complement(), noLeft, noRight, and qvmap().
Referenced by RNAdump(), subCDS(), wFeature(), writeCDS(), writeExon(), and writemRNA().
| void feature::output | ( | ostream & | seq, | |
| ostream & | sub, | |||
| seqinfo & | sinf | |||
| ) | const |
master feature output method can automatically select the right output function
References curFeat, is(), wFeature(), writeAllele(), writeCDS(), writeExon(), writeGene(), writemRNA(), writeRNA(), writerpt(), writeSource(), and writeUTR().
Referenced by main().
| void feature::dumpFeat | ( | ostream & | bd | ) | const |
References curFeat, end, locstr, nq, qnv, and qv.
Referenced by main(), writeAllele(), and writeGene().
| void feature::writeCDS | ( | ostream & | seq, | |
| ostream & | sub, | |||
| seqinfo & | sinf | |||
| ) | const |
output CDS feature to sequence and subsequece files some change in ACEDB 4.9. Start_not_found 3 to indicate a frame start at 3, change CDS from 3 300 to 1 300
References begin, complement(), dbxref, end, endInfo(), seqinfo::getcntstr(), is(), l, locIsJoin(), mktitle(), seqinfo::name(), noLeft, noRight, seqinfo::org, outRange(), qfind(), qnv, qv, qualif::qvalue, qvmap(), seqinfo::sk, subCDS(), subkey(), and seqinfo::type.
Referenced by output(), and writeAceDNA().
| void feature::writemRNA | ( | ostream & | seq, | |
| ostream & | sub, | |||
| seqinfo & | sinf | |||
| ) | const |
References dbxref, endInfo(), findQualif(), seqinfo::getcntstr(), qualif::keyis(), mktitle(), seqinfo::name(), nq, orgtag, outRange(), outSeg(), featerr::pick(), qualif::qkey, qnv, qv, qualif::qvalue, seqinfo::sk, subkey(), and seqinfo::type.
Referenced by output(), and writeAceDNA().
| void feature::writeSource | ( | ostream & | seqout, | |
| ostream & | subsout, | |||
| seqinfo & | sinf | |||
| ) | const |
References begin, end, itoa(), qualif::keyis(), seqinfo::len, seqinfo::name(), nq, outRange(), qfind(), qualif::qkey, qnv, qv, qualif::qvalue, and seqinfo::sourCount.
Referenced by output(), and writeAceDNA().
| void feature::writeGene | ( | ostream & | seq, | |
| ostream & | sub, | |||
| seqinfo & | sinf | |||
| ) | const |
References dbxref, dumpFeat(), seqinfo::name(), nq, orgtag, outRange(), featerr::pick(), qfind(), qualif::qkey, qnv, qv, qualif::qvalue, split(), and seqinfo::type.
Referenced by output(), and writeAceDNA().
| void feature::writeRNA | ( | ostream & | sout, | |
| ostream & | subout, | |||
| seqinfo & | sinf | |||
| ) | const |
References begin, curFeat, end, itoa(), seqinfo::len, seqinfo::name(), outRange(), qfind(), qv, qualif::qvalue, seqinfo::rnaCount, RNAdump(), seqinfo::sk, and subkey().
Referenced by output(), and writeAceDNA().
| void feature::writerpt | ( | ostream & | seq, | |
| ostream & | sub, | |||
| seqinfo & | sinf | |||
| ) | const |
make a unique key for repeatsequence//////
make a title for repeat_region
References curFeat, seqinfo::getcntstr(), seqinfo::name(), outRange(), qfind(), qv, qualif::qvalue, seqinfo::sk, and subkey().
Referenced by output().
| void feature::writeExon | ( | ostream & | ous | ) | const |
References curFeat, endInfo(), noLeft, noRight, nq, outRange(), qualif::qkey, qv, and qualif::qvalue.
Referenced by output().
References begin, curFeat, end, is(), itoa(), seqinfo::name(), nq, orgtag, outRange(), featerr::pick(), qualif::qkey, qnv, qv, qualif::qvalue, qvmap(), dseq::subseq(), seqinfo::tokCount, and tokey().
| void feature::writeAllele | ( | ostream & | seq, | |
| ostream & | sub, | |||
| seqinfo & | sinf | |||
| ) | const |
dealing with allele and variation feature may create an allele object if there are enough info
can make a title, or frequency creating an allele obj
References dumpFeat(), itoa(), mktitle(), seqinfo::name(), nq, outRange(), qfind(), qualif::qkey, qnv, qv, qualif::qvalue, qvmap(), replseq, seqinfo::tokCount, and tokey().
Referenced by output().
| void feature::wjoinseg | ( | ostream & | sub, | |
| seqinfo & | sinf | |||
| ) | const |
References seqinfo::cdsCount, curFeat, find(), is(), seqinfo::isseg(), itoa(), locstr, mktitle(), seqinfo::mrnaCount, seqinfo::name(), seqinfo::nameseg(), orgtag, qfind(), qualif::qkey, qv, qualif::qvalue, split(), and subkey().
Referenced by main().
| void feature::writeUTR | ( | ostream & | seq, | |
| ostream & | sub, | |||
| seqinfo & | sinf | |||
| ) | const |
most 5'UTR in the genomic annotation is useless, I will discard these
References locIsJoin(), and wFeature().
Referenced by output().
| void feature::wFeature | ( | ostream & | ous, | |
| const seqinfo & | sinf | |||
| ) | const |
All features other than source, CDS, mRNA, gene, RNA, repeat_region, satellite, exon, intron, allele, variation goes here, will be attached to EMBL_feature. ous is the main sequence output stream map to qualifer int int text EMBL_info
generic method for left over features
References begin, curFeat, dbxref, end, endInfo(), is(), qualif::keyis(), seqinfo::len, noLeft, noRight, nq, outRange(), gberr::pick(), qualif::qkey, qv, and qualif::qvalue.
Referenced by output(), and writeUTR().
| bool feature::isAbnormal | ( | ) | const |
if location string contains num,letter gi, order, group, one-of, then it is considered abnormal
References locstr.
Referenced by readloc(), writeAceProtein(), writePrtRegion(), and writePrtSite().
| bool feature::complement | ( | ) | const |
Referenced by endInfo(), getBeginEndinfo(), getEndEndinfo(), isComplement(), noend(), nostart(), outRange(), outSeg(), parseLoc(), writeCDS(), writemRNA(), and writeProtein().
| bool feature::is | ( | const char * | ff | ) | const [inline] |
References curFeat.
Referenced by main(), output(), wFeature(), wjoinseg(), writeCDS(), and writeToken().
| bool feature::isjoinseg | ( | ) | const |
| bool feature::locIsJoin | ( | ) | const [inline] |
| const char* feature::feat | ( | ) | const [inline] |
| bool feature::findQualif | ( | const char * | qk, | |
| string & | vv | |||
| ) | const |
put the qualifer value of pointed to by key qk to vv string
References qfind(), qv, and qualif::qvalue.
Referenced by mktitle(), subkey(), and writemRNA().
| string feature::qvmap | ( | const char * | k | ) | const |
returns qvalue pointed to by qkey k. qkey is one of those in qualifier table. If not found, returns empty string.
References qfind(), qv, qualif::qvalue, and string().
Referenced by endInfo(), writeAllele(), writeCDS(), and writeToken().
| int feature::qfind | ( | const char * | kq | ) | const |
look in the qv array for qkey, returns index of kq, returns -1 if not found
Referenced by findQualif(), qvmap(), subCDS(), subkey(), tokey(), wjoinseg(), writeAllele(), writeCDS(), writeGene(), writeRNA(), writerpt(), and writeSource().
| void feature::copy | ( | const feature & | lo | ) | [private] |
| int feature::readloc | ( | istream & | ins, | |
| char | ln[] | |||
| ) | [private] |
before function call: ln must contain the first line of a feature ln is the input line, at the end of the function call, it will contain the next line following this feature this function also does the parsing of the location
return a error message. 0 for normal, 1 for parsing error
used by read()
joint segments
Bad stuff: 5'UTR join(2243..2291,2761) We now want to say this is ok. Represented with two pairs of integers
References append(), begin, curFeat, end, getNumber(), isAbnormal(), l, LINE, loc, locstr, maxl, n, noLeft, noRight, replseq, substr(), vb, and ve.
Referenced by read().
| int feature::qindex | ( | const char | qual[] | ) | const [private] |
References qt.
| bool feature::mktitle | ( | string & | tt | ) | const [private] |
| int feature::subkey | ( | string & | sk | ) | const [private] |
References findQualif(), KEY_LIMIT, qfind(), qv, and qualif::qvalue.
Referenced by wjoinseg(), writeCDS(), writemRNA(), writeRNA(), writerpt(), and writeSource().
| bool feature::tokey | ( | string & | tk | ) | const [private] |
makes a token key returns false if cannot make a keyy
References dbxref, qfind(), qv, and qualif::qvalue.
Referenced by writeAllele(), and writeToken().
| void feature::rptdump | ( | ostream & | ous, | |
| const seqinfo & | sinf | |||
| ) | const [private] |
References begin, end, qualif::keyis(), seqinfo::len, nq, orgtag, featerr::pick(), qnv, qv, and qualif::qvalue.
| void feature::RNAdump | ( | ostream & | ous, | |
| const seqinfo & | sinf | |||
| ) | const [private] |
References begin, curFeat, endInfo(), nq, orgtag, featerr::pick(), qnv, qv, and qualif::qvalue.
Referenced by writeRNA().
| void feature::subCDS | ( | ostream & | seq, | |
| ostream & | sub, | |||
| const string & | cdsKey, | |||
| const string & | title, | |||
| int | tlnIdx, | |||
| const seqinfo & | sqinf | |||
| ) | const [private] |
References endInfo(), nq, orgtag, outRange(), outSeg(), featerr::pick(), qfind(), qualif::qkey, qnv, qv, and qualif::qvalue.
Referenced by writeCDS().
| bool feature::next | ( | string & | ln, | |
| istream & | ins | |||
| ) |
References locstr, name, nextQualifier(), parseLoc(), QUAL, QUAL_SPACE, QUAL_VAL, and trim().
Referenced by gbseq::read().
| void feature::writeAceProtein | ( | ostream & | ous, | |
| gbprtseq & | prt | |||
| ) |
this methods have dealt with: region, site, rest goest to Swiss_feature
References dbxref, getAllQualifiers(), getBegin(), getDbxrefString(), getEnd(), gbseq::getKey(), gbseq::getLength(), getLower(), getName(), gbseq::getOrgAcronym(), isAbnormal(), locs, name, qualifiers, writePrtRegion(), writePrtSite(), and writeRange().
| void feature::writeAceDNA | ( | ostream & | ous, | |
| ostream & | sub, | |||
| ostream & | snp, | |||
| gbdnaseq & | seq | |||
| ) | throw (featErr) |
References getNumSeg(), getQualifierValue(), name, featErr::print(), substr(), writeCDS(), writeFeature(), writeGene(), writeImmuno(), writemRNA(), writeRNA(), writeSNP(), and writeSource().
| string feature::getName | ( | ) | const [inline] |
| string feature::getDbxrefString | ( | ) | const |
| string feature::getAllQualifiers | ( | ) | const |
References qualifiers, and while().
Referenced by writeAceProtein(), writeFeatureOfWholeSeq(), and writePrtRegion().
| void feature::clear | ( | ) |
mainly set optional members to defaults and clear up locs vector because it is a pointer array.
| int feature::getBegin | ( | ) | const [inline] |
no complement information included For protein object this is enough
References locs.
Referenced by RNAdump(), writeAceProtein(), writeFeature(), writemRNA(), writeRange(), writeRNA(), writeSNP(), and writeSource().
| int feature::getEnd | ( | ) | const [inline] |
References locs.
Referenced by writeAceProtein(), writeFeature(), writemRNA(), writeRange(), writeRNA(), writeSNP(), and writeSource().
| int feature::getNumSeg | ( | ) | const [inline] |
| const string& feature::getOperation | ( | ) | const [inline] |
| Segrelation feature::getSegop | ( | ) | const [inline] |
| ostream& feature::writeRange | ( | ostream & | ous | ) | const [inline] |
| ostream& feature::outRange | ( | ostream & | ous | ) | const |
| int feature::outSeg | ( | ostream & | ous | ) | const |
output the subsequence ranges for CDS and mRNA only, Format: one or more {Source_Exon begin end}
| void feature::outSegMultiple | ( | ostream & | ous, | |
| const gbdnaseq & | seq | |||
| ) | const |
output coding region (exons) from multiple sequences
References gbseq::getKey(), and locs.
Referenced by writeSegCDS(), and writeSegmRNA().
| string feature::getLocationString | ( | ) | const [inline] |
Return the GenBank style location exactly as read from input.
References locstr.
Referenced by writeFeature().
| string feature::getTaxid | ( | ) | const |
| bool feature::hasDbxref | ( | ) | const [inline] |
References dbxref.
| bool feature::isComplement | ( | ) | const [inline] |
References complement().
| int feature::getBeginEndinfo | ( | ) | const |
| int feature::getEndEndinfo | ( | ) | const |
| ostream & feature::endInfo | ( | ostream & | ous | ) | const |
References complement(), and locs.
| bool feature::nostart | ( | ) | const |
References complement(), and locs.
Referenced by RNAdump(), subCDS(), writeCDS(), and writeProtein().
| bool feature::noend | ( | ) | const |
References complement(), and locs.
Referenced by RNAdump(), subCDS(), writeCDS(), and writeProtein().
| bool feature::hasQualifier | ( | const string & | key | ) | const [inline] |
References qualifiers.
Referenced by subCDS(), writeCDS(), writeFeatureOfWholeSeq(), and writeSegCDS().
| string feature::getQualifierValue | ( | const string & | qkey | ) | const |
References qualifiers, and string().
Referenced by geneallele(), outgeneline(), writeAceDNA(), writeGene(), and writeSegCDS().
| void feature::readValidQualifier | ( | const string & | file | ) | [static] |
References ifstream(), string(), and validqual.
| void feature::loadSubseq | ( | const string & | file | ) | [static] |
Load subsequence keys from a file containing all relevant subsequence keys, so that the parser knows what subsequences have been parsed. Keeps track of globally named subsequences.
References ifstream(), and subseq.
Referenced by main().
| static void feature::addSubseq | ( | const string & | seq | ) | [inline, static] |
| static bool feature::existSubseq | ( | const string & | seq | ) | [inline, static] |
References subseq.
| void feature::loadGeneName | ( | const string & | file | ) | [static] |
functions working with gene names
GeneName file use tab delimited file geneName
References genen2s, genesbl, ifstream(), and split().
Referenced by main().
| void feature::dumpGeneName | ( | const string & | file | ) | [static] |
| string feature::getGeneSymbol | ( | const string & | gene, | |
| string & | allele | |||
| ) | [static] |
will check whether gene is a symbol or not if not will insert it into gensbl and add to genen2s. This is the interface that will be used by the public.
References genen2s, makeGeneSymbol(), and wc().
Referenced by geneallele(), and outgeneline().
| pair< string, string > feature::getGeneSymbol | ( | const string & | gene | ) | [static] |
References genen2s, makeGeneSymbol(), and wc().
| static string feature::addGene | ( | const string & | gene, | |
| string & | allele | |||
| ) | [static, private] |
If getGeneSymbol() gets nothing it calles this methods. Add a new gene into genen2s map<string,string>. Returns the symbol for gene. Some gene are actually allele and this will be separated. Looks like * separates the gene name from allele gene*allele
| pair< string, string > feature::separateAllele | ( | const string & | gene | ) | [static, private] |
only if you are sure this is a legal Gene*symbol structure you can call this function.
References wc().
Referenced by insertGeneSymbol().
| pair< string, string > feature::makeGeneSymbol | ( | const string & | gene | ) | [static, private] |
References acronymWithAllDigits(), delall(), dissect(), firstword(), genen2s, genesbl, goodGeneSymbol(), insertGeneSymbol(), isGreek(), isnumber(), isRomanNumber(), isupper(), lastword(), length, nextGeneSymbol(), nojunkGene(), split(), toupper(), tr(), wc(), and xHiddenGeneSymbol().
Referenced by getGeneSymbol().
| pair< string, string > feature::insertGeneSymbol | ( | const string & | gene, | |
| const string & | symbol | |||
| ) | [static, private] |
References genen2s, genesbl, separateAllele(), and tr().
Referenced by insertGeneAsSymbol(), and makeGeneSymbol().
| pair< string, string > feature::insertGeneAsSymbol | ( | const string & | gene | ) | [static, private] |
References insertGeneSymbol().
| string feature::geneOfSymbol | ( | const string & | sym | ) | [static, private] |
actually more than one gene may point to the same symbol this function returns the first one found
References genen2s, and string().
Referenced by isGoodGeneSymbol().
| string feature::cleanGene | ( | const string & | gene, | |
| const string | wd | |||
| ) | [static, private] |
| string feature::nojunkGene | ( | const string & | gene | ) | [static, private] |
References cleanGene(), isGreek(), L, lastword(), and singleSpace().
Referenced by goodGeneSymbol(), isGoodGeneSymbol(), and makeGeneSymbol().
| string feature::nextGeneSymbol | ( | ) | [static, private] |
| void feature::goodGeneSymbol | ( | const string & | gene, | |
| string & | symbol | |||
| ) | [static, private] |
References acronymWithAllDigits(), genesbl, isGoodGeneSymbol(), itos(), l, and nojunkGene().
Referenced by makeGeneSymbol().
| bool feature::isGoodGeneSymbol | ( | const string & | gene, | |
| const string & | symbol | |||
| ) | [static, private] |
References cmp_nocase(), firstword(), geneOfSymbol(), genesbl, lastword(), and nojunkGene().
Referenced by goodGeneSymbol().
| string feature::acronymWithAllDigits | ( | const string & | str, | |
| const int | n = 1 | |||
| ) | [static, private] |
References dissect(), isGreek(), isnumber(), isRomanNumber(), isupper(), and length.
Referenced by goodGeneSymbol(), and makeGeneSymbol().
| string feature::xHiddenGeneSymbol | ( | const string & | str, | |
| const string & | sep | |||
| ) | [static, private] |
| string feature::shortIsGeneSymbol | ( | const string & | str, | |
| string::size_type | idx, | |||
| const int | seplen | |||
| ) | [static, private] |
| bool feature::isRomanNumber | ( | const string & | str | ) | [static, private] |
Referenced by acronymWithAllDigits(), and makeGeneSymbol().
| bool feature::isGreek | ( | const string & | str | ) | [static, private] |
Referenced by acronymWithAllDigits(), makeGeneSymbol(), and nojunkGene().
| void feature::nextQualifier | ( | istream & | ins, | |
| string & | ln | |||
| ) | [private] |
| pair< string, string > feature::geneallele | ( | ) | const [private] |
used up gene and allele, or locus_tag qualifier if they exist otherwise return a pair of empty strings.
References getGeneSymbol(), getQualifierValue(), name, and qualifiers.
Referenced by writeGene().
| void feature::outgeneline | ( | ostream & | ous, | |
| const gbdnaseq & | seq, | |||
| const string & | tag | |||
| ) | const [private] |
for output the gene line, tag can be: geneof_mRNA, geneof_CDS, geneof_model, geneof_transcript or geneof_protein
References getGeneSymbol(), gbseq::getOrgAcronym(), getQualifierValue(), and qualifiers.
Referenced by RNAdump(), subCDS(), writeCDS(), writeFeatureOfWholeSeq(), writeImmuno(), writemRNA(), writeProtein(), writeRNA(), writeSegCDS(), and writeSegmRNA().
| map< string, string >::iterator feature::composeTitle | ( | ) | [private] |
References l, length, maxl, and qualifiers.
Referenced by writeCDS(), writemRNA(), writeSegCDS(), and writeSegmRNA().
| void feature::insertQualifier | ( | const string & | key, | |
| string & | value | |||
| ) | [private] |
only needs to check the double quote at the end in case someone forgot to add the beginning quote if (value[0] == '"' && value[value.length()-1] == '"') value = value.substr(1, value.length()-2); else if (value[value.length()-1] == '"') value = value.substr(0, value.length()-1); else if (value[0] == '"') value = value.substr(1);
References dbxref, qualifiers, and tr().
Referenced by nextQualifier().
| void feature::writeSource | ( | ostream & | ous, | |
| ostream & | sub, | |||
| gbdnaseq & | seq | |||
| ) | throw (featErr) [private] |
References getBegin(), getEnd(), getLower(), itos(), length, outRange(), qualifiers, split(), and subkey().
| void feature::writemRNA | ( | ostream & | ous, | |
| ostream & | sub, | |||
| const gbdnaseq & | seq | |||
| ) | [private] |
when parsing Genomic DNA and mRNAs sometime times redundancy causes trouble. In the human genome annotation, the genome annotation may have a mRNA annotated on the genomic DNA. At the same time there is a mRNA dump that some times may have more than one segment. The polycy of this program make another subsequence as CDS and links to the protein sequence. There should be a way to link without the subsequence
References addSubseq(), complement(), composeTitle(), dbxref, locseg::getBegin(), getBegin(), getBeginEndinfo(), locseg::getEnd(), getEnd(), getEndEndinfo(), locseg::getFuzzyBegin(), locseg::getFuzzyEnd(), gbseq::getKey(), gbseq::getLength(), getName(), gbseq::getType(), itos(), join, locs, gbdnaseq::nextsub(), onone, outgeneline(), outRange(), outSeg(), qualifiers, segop, subseq, and writeSegmRNA().
| void feature::writeSegmRNA | ( | ostream & | sub, | |
| const gbdnaseq & | seq | |||
| ) | [private] |
References composeTitle(), gbseq::getKey(), itos(), gbdnaseq::nextsub(), outgeneline(), outSegMultiple(), and qualifiers.
Referenced by writemRNA().
| void feature::writeCDS | ( | ostream & | ous, | |
| ostream & | sub, | |||
| const gbdnaseq & | seq | |||
| ) | [private] |
If the CDS is from a mRNA and have more than one segments, it is more likely specifying a translational exception. In this case the distance between segments is usually < 3 and usually have two segments. I will create a subsequence for this mRNA and should not name it according to the corresponding protein.
References addSubseq(), complement(), composeTitle(), gbseq::getKey(), gbseq::getLocusName(), gbseq::getMolType(), getSegop(), hasQualifier(), itos(), join, gbdnaseq::nextsub(), noend(), nostart(), onone, outgeneline(), outRange(), PRTOUT, qualifiers, segop, subCDS(), subkey(), subseq, writeProtein(), writeRange(), and writeSegCDS().
| void feature::writeSegCDS | ( | ostream & | sub, | |
| const gbdnaseq & | seq | |||
| ) | [private] |
References composeTitle(), gbseq::getKey(), getQualifierValue(), hasQualifier(), itos(), gbdnaseq::nextsub(), outgeneline(), outSegMultiple(), PRTOUT, qualifiers, and writeProtein().
Referenced by writeCDS().
| void feature::subCDS | ( | ostream & | ous, | |
| ostream & | sub, | |||
| const gbdnaseq & | seq, | |||
| const string & | cdsKey, | |||
| const string & | prtKey, | |||
| const string & | title | |||
| ) | const [private] |
A helper function to write CDS object Add one more parameter to handle where CDS and protein have different names. This is useful when dealing with translation frame-shifts, where the CDS for splicing is different from CDS for translation, but these two different CDS object points to the same protein.
References dbxref, gbseq::getKey(), gbseq::getOrgAcronym(), hasQualifier(), noend(), nostart(), outgeneline(), outRange(), outSeg(), qualifiers, and split().
| void feature::writeGene | ( | ostream & | ous, | |
| ostream & | sub, | |||
| gbdnaseq & | seq | |||
| ) | const [private] |
| void feature::writeProtein | ( | ostream & | sub, | |
| const gbdnaseq & | seq, | |||
| const string & | key, | |||
| const string & | title | |||
| ) | const [private] |
helper function
use the CDS feature to construct a protein object May or may not have Peptide Object associated with it. Borrow title from CDS object
References complement(), dbxref, gbseq::getKey(), gbseq::getOrgAcronym(), gbseq::getOrganism(), noend(), nostart(), outgeneline(), qualifiers, split(), and writeSequence().
Referenced by writeCDS(), and writeSegCDS().
| void feature::writeRNA | ( | ostream & | sout, | |
| ostream & | subout, | |||
| gbdnaseq & | seq | |||
| ) | [private] |
References dbxref, getBegin(), getEnd(), gbseq::getKey(), gbseq::getLength(), gbseq::getType(), gbseq::hasFeature(), itos(), name, gbdnaseq::nextsub(), outgeneline(), outRange(), outSeg(), qualifiers, and RNAdump().
| void feature::RNAdump | ( | ostream & | ous, | |
| const gbdnaseq & | seq | |||
| ) | const [private] |
References dbxref, getBegin(), name, noend(), nostart(), outgeneline(), qualifiers, split(), and substr().
References dbxref, getBegin(), getEnd(), getLocationString(), getOperation(), locs, name, outRange(), qualifiers, tr(), and writeFeatureOfWholeSeq().
Referenced by writeAceDNA().
| bool feature::writeFeatureOfWholeSeq | ( | ostream & | ous, | |
| const gbdnaseq & | seq | |||
| ) | const [private] |
References getAllQualifiers(), gbseq::getKey(), hasQualifier(), name, outgeneline(), outRange(), and qualifiers.
Referenced by writeFeature().
| void feature::writeImmuno | ( | ostream & | ous, | |
| ostream & | sub, | |||
| const gbdnaseq & | seq | |||
| ) | const [private] |
References gbseq::getKey(), itos(), name, gbdnaseq::nextsub(), outgeneline(), outRange(), outSeg(), and qualifiers.
Referenced by writeAceDNA().
| void feature::writeSNP | ( | ostream & | snp, | |
| gbdnaseq & | seq | |||
| ) | const [private] |
References dbxref, getBegin(), getEnd(), gbseq::getKey(), and qualifiers.
Referenced by writeAceDNA().
| void feature::writePrtSite | ( | ostream & | ous, | |
| gbprtseq & | prt | |||
| ) | const [private] |
| void feature::writePrtRegion | ( | ostream & | ous, | |
| gbprtseq & | prt | |||
| ) | const [private] |
References dbxref, getAllQualifiers(), getDbxrefString(), getNumSeg(), isAbnormal(), locs, and qualifiers.
Referenced by writeAceProtein().
| void feature::parseLoc | ( | ) | throw (featLocErr) [private] |
| const string orgtag | ( | const seqinfo & | sinf | ) | [friend] |
Referenced by RNAdump(), rptdump(), subCDS(), wjoinseg(), writeGene(), writemRNA(), and writeToken().
| ostream& operator<< | ( | ostream & | ous, | |
| const feature & | feat | |||
| ) | [friend] |
ous << feat.loc;
string feature::loc [private] |
Referenced by clear(), copy(), operator=(), outSeg(), read(), readloc(), write(), and ~feature().
string feature::feat [private] |
int feature::nq [private] |
Referenced by copy(), dumpFeat(), qfind(), read(), RNAdump(), rptdump(), subCDS(), wFeature(), writeAllele(), writeExon(), writeGene(), writemRNA(), writeSource(), and writeToken().
qualif* feature::qv [private] |
Referenced by copy(), dumpFeat(), findQualif(), operator=(), qfind(), qvmap(), read(), RNAdump(), rptdump(), subCDS(), subkey(), tokey(), wFeature(), wjoinseg(), writeAllele(), writeCDS(), writeExon(), writeGene(), writemRNA(), writeRNA(), writerpt(), writeSource(), writeToken(), and ~feature().
vector< string > feature::dbxref [private] |
vector<string> feature::qnv [private] |
Referenced by copy(), dumpFeat(), read(), RNAdump(), rptdump(), subCDS(), writeAllele(), writeCDS(), writeGene(), writemRNA(), writeSource(), and writeToken().
int feature::begin [private] |
Referenced by copy(), outRange(), outSeg(), readloc(), RNAdump(), rptdump(), wFeature(), writeCDS(), writeRNA(), writeSource(), and writeToken().
int feature::vb [private] |
int feature::end [private] |
Referenced by copy(), dumpFeat(), outRange(), outSeg(), readloc(), rptdump(), wFeature(), writeCDS(), writeRNA(), writeSource(), and writeToken().
int feature::ve [private] |
bool feature::noLeft [private] |
Referenced by copy(), endInfo(), read(), readloc(), wFeature(), writeCDS(), and writeExon().
bool feature::noRight [private] |
Referenced by copy(), endInfo(), read(), readloc(), wFeature(), writeCDS(), and writeExon().
string feature::replseq [private] |
Referenced by readloc(), and writeAllele().
int* feature::loc [private] |
int feature::n [private] |
Referenced by copy(), locIsJoin(), outSeg(), and readloc().
char* feature::locstr [private] |
Referenced by copy(), dumpFeat(), getLocationString(), isAbnormal(), isjoinseg(), next(), operator=(), parseLoc(), readloc(), wjoinseg(), and ~feature().
int feature::l [private] |
Referenced by composeTitle(), copy(), goodGeneSymbol(), readloc(), and writeCDS().
int feature::maxl [private] |
Referenced by composeTitle(), copy(), and readloc().
char feature::curFeat[16] [private] |
Referenced by copy(), dumpFeat(), feat(), feature(), findex(), is(), output(), readloc(), RNAdump(), wFeature(), wjoinseg(), writeExon(), writeRNA(), writerpt(), and writeToken().
const char * feature::ft [static, private] |
Initial value:
{ "3'UTR", "3'clip", "5'UTR",
"5'clip", "CAAT_signal", "CDS", "C_region", "D-loop",
"D_segment", "GC_signal", "J_region", "J_segment", "LTR",
"N_region", "RBS", "STS", "S_region",
"TATA_signal", "V_region", "V_segment", "allele",
"attenuator", "conflict", "enhancer", "exon",
"gene", "iDNA", "intron", "mRNA",
"mat_peptide", "misc_RNA", "misc_binding", "misc_difference",
"misc_feature", "misc_recomb", "misc_signal", "misc_structure",
"modified_base", "mutation", "old_sequence", "polyA_signal",
"polyA_site", "precursor_RNA", "prim_transcript", "primer",
"primer_bind", "promoter", "protein_bind", "rRNA",
"rep_origin", "repeat_region", "repeat_unit", "satellite",
"scRNA", "sig_peptide", "snRNA", "source",
"stem_loop", "tRNA", "terminator", "transit_peptide",
"transposon", "unsure", "variation" }
Referenced by findex().
const char * feature::qt [static, private] |
Initial value:
{"EC_number",
"PCR_conditions", "allele", "anticodon",
"bound_moiety", "cell_line", "cell_type", "chloroplast",
"chromoplast", "chromosome", "citation", "clone",
"clone_lib", "codon", "codon_start", "cons_splice",
"cultivar", "cyanelle", "db_xref", "dev_stage",
"direction", "evidence", "exception", "focus",
"frequency", "function", "gdb_xref", "gene",
"germline", "haplotype", "insertion_seq", "isolate",
"kinetoplast", "lab_host", "label", "macronuclear",
"map", "mitochondrion", "mod_base", "note",
"number", "organism", "partial", "phenotype",
"plasmid", "pop_variant", "product", "protein_id",
"proviral", "pseudo", "rearranged", "replace",
"rpt_family", "rpt_type", "rpt_unit", "sequenced_mol",
"serotype", "sex", "specific_host", "specimen_voucher",
"standard_name", "strain", "sub_clone", "sub_species",
"sub_strain", "tissue_lib", "tissue_type", "transl_except",
"transl_table", "translation", "transposon", "usedin",
"variety", "virion"}
Referenced by qindex().
const int feature::QUAL = 5 [static] |
Referenced by next().
const int feature::QUAL_VAL = 21 [static] |
Referenced by next(), and nextQualifier().
const string feature::QUAL_SPACE = string(QUAL_VAL, ' ') [static] |
Referenced by next(), and nextQualifier().
PRTOUT is a boolean variable P set feature::PRTOUT = false [static] |
Referenced by main(), writeCDS(), and writeSegCDS().
string feature::name [private] |
Referenced by geneallele(), getName(), getTaxid(), next(), operator<<(), RNAdump(), writeAceDNA(), writeAceProtein(), writeFeature(), writeFeatureOfWholeSeq(), writeGene(), writeImmuno(), and writeRNA().
string feature::locstr [private] |
vector<locseg*> feature::locs [private] |
To provide polymorphism, I must use pointers for the location. the Derived class namedlocseg is rarely used.
Referenced by endInfo(), getBegin(), getBeginEndinfo(), getEnd(), getEndEndinfo(), getNumSeg(), noend(), nostart(), operator<<(), outSegMultiple(), parseLoc(), writeAceProtein(), writeFeature(), writeGene(), writemRNA(), writePrtRegion(), and writePrtSite().
bool feature::onone [private] |
Referenced by parseLoc(), writeCDS(), writeGene(), and writemRNA().
bool feature::complement [private] |
Segrelation feature::segop [private] |
Referenced by getSegop(), parseLoc(), writeCDS(), and writemRNA().
string feature::locop [private] |
Referenced by getOperation(), and operator<<().
map<string, string> feature::qualifiers [private] |
Referenced by composeTitle(), geneallele(), getAllQualifiers(), getQualifierValue(), hasQualifier(), insertQualifier(), nextQualifier(), operator<<(), outgeneline(), RNAdump(), subCDS(), writeAceProtein(), writeCDS(), writeFeature(), writeFeatureOfWholeSeq(), writeGene(), writeImmuno(), writemRNA(), writeProtein(), writePrtRegion(), writePrtSite(), writeRNA(), writeSegCDS(), writeSegmRNA(), writeSNP(), and writeSource().
set< string > feature::validqual = set<string>() [static, private] |
Referenced by readValidQualifier().
map< string, int > feature::subseq = map<string, int>() [static, private] |
Referenced by addSubseq(), dumpSubseq(), existSubseq(), loadSubseq(), writeCDS(), and writemRNA().
map< string, string > feature::genen2s = map<string,string>() [static, private] |
Gene Name to acronym mapping for long gene names if first is already a gene symbol then second is "" if gene have more than one word, use the first letter of each word to make up the gene symbol.
Referenced by dumpGeneName(), geneOfSymbol(), getGeneSymbol(), insertGeneSymbol(), loadGeneName(), and makeGeneSymbol().
set< string > feature::genesbl = set<string>() [static, private] |
Referenced by goodGeneSymbol(), insertGeneSymbol(), isGoodGeneSymbol(), loadGeneName(), makeGeneSymbol(), and nextGeneSymbol().
int feature::genecnt = 0 [static, private] |
Referenced by nextGeneSymbol().
1.5.6