prt.h

Go to the documentation of this file.
00001 #ifndef PRT_H
00002 #define PRT_H
00003 
00004 #include <iostream>
00005 #include <fstream>
00006 #include <string>
00007 #include <map>
00008 #include <vector>
00009 #include "dbtime.h"
00010 #include <set>
00011 #include "util.h"
00012 #include "refloc.h"
00013 #include "book.h"
00014 using namespace std;
00015 
00016 // simple helper, break up into LINE CODE (2 letters) first
00017 // Content into second
00018 void separate(const string &ln, string &first, string &second);
00019 
00020 #define KEY_LIMIT 25
00021 //#define DEBUG
00022 
00023 class ref
00024 {
00025         public:
00026                 ref() { if (field.empty()) init_field(); };
00027                 ref(const ref &r);
00028 
00029                 ref& operator=(const ref &r);
00030 
00033                 friend ostream& operator<<(ostream &os, const ref &r);
00038                 bool next(string &ln, istream &in);
00039 
00045                 //string makeID() const; // create a unique ID for this object
00046 
00049                 void writeRaw(ostream &os) const;
00050 
00054                 const string* get(const string &code) const;
00055 
00056                 string getKey() const { return key; }
00057                 bool bad() { return key.empty(); }
00058 
00066                 vector<string> process_volpp(string &s);
00067 
00072                 string process_year(const string &s);
00073                 string firstAuthor() const;
00074 
00078                 static void init_field();
00079                 //static int getPaperKey();
00080                 static void dumpkeymap(const string &file);
00081                 static void readkeymap(const string &file);
00082         private:
00090                 void process();  
00091 
00095                 void constructKey();
00096 
00101                 map<string, string> record;
00102                 
00103                 /* Derived information 
00104                  * digest includes:
00105                         medline, pubmed, journal type,
00106                         Digested information
00107                 */
00108                 map<string,string> digest; // stores processed information
00109 
00110                 vector<string> editor;  // all editors
00111                 string key;
00112 
00116                 static map<string,string> field;
00117 
00118                 static string field_str[14];  // string version used to compose
00119                 // the map version at start up.  
00120 
00122                 static map<string, int> paperkey;
00123 };
00124 
00125 class prt
00126 {
00127         public:
00131                 prt(const string &infile);
00132 
00133                 bool next();  // read the next object
00134                 void clear(); // start a new object
00135                 void processID(); // helper function to process the first line
00136                 vector<string> getAC();
00137                 string getSWID() { return swpid; }
00138                 void writeAce(ostream &os1, ostream &os2);
00139                 ostream& writeSeq(ostream &os, const int len=75);
00140                 void writeMappedTag(ostream &os);
00141 
00145                 static const int content_idx = 5;     // 5
00146                 static const int ftdescript_idx = 34;  // 34
00147 
00148                 static const int cc_dash;     // 74 dashes start end of trash
00149 
00150                 // in pairs index {at,1_afterend}
00151                 // feature_key, from, to, description (optional note)
00152                 //static const int ftidx[8];
00153                 static map<string, string> field;
00154                 static string record_separator;
00155 
00158                 static map<string, string> swtag2ace;
00159                 static string swtag2ace_str[];  // variable sized
00160 
00161                 static void init();  // initialize class-wise parameters
00162 
00163         private:
00166                 void read_upto(const string &code);  // helper function to read into the record map
00169                 void tossccJunk();
00170                 //vector<string> splitFT(); // work on next_line
00171 
00172                 ifstream input;           // stay the same
00173 
00176                 map<string,string> record;
00177                 string next_line;  // internal buffer
00178                 string dates[3];  // overwrite
00179 
00182                 string seq;
00183                 vector<ref> papers;  // all the papers stored under this field
00184                 vector< vector<string> > dbxref;  // database cross reference
00185                 vector< vector<string> > feature;  // database cross reference
00186 
00187                 // processed infomation, all initialized to default
00188                 string swpid;  // should use this one as the id in ACE
00189                 char type;   // p for protein, or d for DNA
00190                 char swprt_status; // s for standard, p for un_curated
00191                 int length;  // length in aa
00192 
00195                 static set<string> orgset;  // should it be
00196                 static set<string> papset;  // all papers processed
00197 };
00198 
00199 #endif

Generated on Wed Aug 10 11:56:58 2011 for Softwares from Orpara by  doxygen 1.5.6