00001
00002
00003 #ifndef DISET_H
00004 #define DISET_H
00005
00006
00007 #include <string>
00008 #include <map>
00009
00010
00011 #include <utility>
00012 #include <fstream>
00013 #include <sstream>
00014
00015
00016 #include <set>
00017 #include <vector>
00018 #include <iostream>
00019 #include <cstdlib>
00020
00021
00022
00031
00032
00033
00034
00035 using namespace std;
00036
00037
00038 template<class T> struct node
00039 {
00040 node() : rank(0), key() { parent=this; }
00041 node(const T &k) : rank(0), key(k) { parent=this; }
00042 node(const T &k, node* p) : parent(p), rank(0), key(k) { }
00043
00044 node(const node &n) { key = n.key; parent = this; rank = n.rank; }
00045
00046 ~node() { }
00047 bool operator==(const node& n) const { return key==n.key; }
00048 bool operator<(const node &n) const { return key < n.key; }
00049
00050 node& operator=(const node &n) { if (&n != this) { key = n.key; parent = this; rank = n.rank; } return *this; }
00051
00052 T key;
00053 node<T>* parent;
00054 int rank;
00055 };
00056
00057
00059
00060 template<class T> node<T>* getRoot(node<T> *x) {
00061 if (x != x->parent) x->parent = getRoot(x->parent);
00062 return x->parent;
00063 }
00064 template<class T> void mergeRoot(node<T> *&r1, node<T> *&r2) {
00065 if(r1 == r2) return;
00066 if (r1->rank > r2->rank) r2->parent = r1;
00067 else {
00068 r1->parent = r2;
00069 if (r1->rank == r2->rank) r2->rank++;
00070 }
00071 }
00072 template<class T> void join(node<T> &x, node<T> &y) {
00073 node<T> *r1 = getRoot(&x);
00074 node<T> *r2 = getRoot(&y);
00075 mergeRoot(r1, r2);
00076
00077 }
00078 template<class T> void join(node<T>* x, node<T>* y) {
00079 node<T> *r1 = getRoot(x);
00080 node<T> *r2 = getRoot(y);
00081 mergeRoot(r1, r2);
00082
00083 }
00084
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00107
00115 template<class T> class hatrees
00116 {
00117 public:
00118 hatrees() { }
00122 hatrees(const std::multimap<T,T> &mm) { readFromMap(mm); }
00123 ~hatrees();
00124 void clear() { nodes.clear(); result.clear(); }
00126 void readFromMap(const std::multimap<T, T> &m);
00127 void readFromFile(const string &file);
00128
00129
00130
00131
00132
00133
00134
00136
00137
00138
00140 void showStore(ostream &os) const;
00141
00143 int getNodeCount() const { return nodes.size(); }
00146 void showCluster(ostream &os, bool reverse= true);
00151 void showClusterIntId(ostream &ous, int &id);
00152
00154 int showClusterByLine(ostream &os);
00158 set<T> keyset() const;
00159 vector<T> keyarray() const;
00160
00166 void clusterArray(vector<set<T> > &vecset);
00167
00173 std::multimap<T,T>& getCluster() { if (result.empty()) transform(); return result; }
00174
00175 #ifdef USE_HASH_MAP
00176
00177
00178 typedef typename hash_map<T, node<T>* >::iterator niterator;
00179 typedef typename hash_map<T, node<T>* >::const_iterator const_niterator;
00180 #else
00181 typedef typename map<T, node<T>* >::iterator niterator;
00182 typedef typename map<T, node<T>* >::const_iterator const_niterator;
00183
00184
00185 #endif
00186
00187 private:
00188 #ifdef USE_HASH_MAP
00189 hash_map<T, node<T>* > nodes;
00190 #else
00191 map<T, node<T>* > nodes;
00192 #endif
00193
00196
00197 std::multimap<T, T> result;
00198
00199
00200
00201 void transform();
00202 };
00203
00205 template<class T> hatrees<T>::~hatrees() {
00206 niterator MI = nodes.begin();
00207 while (MI != nodes.end()) {
00208 delete MI->second;
00209 MI++;
00210 }
00211 }
00212
00213 template<class T> void hatrees<T>::readFromMap(const std::multimap<T, T> &m) {
00214 pair<niterator, bool> p1, p2;
00215 T f1, f2;
00216 typename std::multimap<T, T>::const_iterator imi = m.begin();
00217
00218 while (imi != m.end()) {
00219 f1 = imi->first;
00220 f2 = imi->second;
00221 p1 = nodes.insert(pair<T, node<T>* >(f1, new node<T>(f1)));
00222 p2 = nodes.insert(pair<T, node<T>* >(f2, new node<T>(f2)));
00223 join(p1.first->second, p2.first->second);
00224 imi++;
00225 }
00226 }
00227
00228 template<class T> void hatrees<T>::readFromFile(const string &file) {
00229 ifstream IN(file.c_str());
00230 if (IN.fail()) {
00231 cerr << "reading from " << file << "failed\n";
00232 exit(1);
00233 }
00234 string ln;
00235 pair<niterator, bool> p1, p2;
00236 T f1, f2;
00237
00238
00239
00240 int id=0;
00241
00242 getline(IN, ln);
00243 while (!IN.eof()) {
00244 istringstream ist(ln);
00245 ist >> f1 >> f2;
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256 p1 = nodes.insert(pair<T, node<T>* >(f1, new node<T>(f1)));
00257 p2 = nodes.insert(pair<T, node<T>* >(f2, new node<T>(f2)));
00258 join(p1.first->second, p2.first->second);
00259 getline(IN, ln);
00260 }
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283 }
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352 template<class T> void hatrees<T>::showStore(ostream &os) const {
00353 const_niterator it = nodes.begin();
00354 node<T>* root;
00355 os << "key\tcluster_representative\n";
00356 while (it != nodes.end()) {
00357 root = getRoot(it->second->parent);
00358 os << it->first << "\t" << root->key << endl;
00359 it++;
00360 }
00361 os << "Total number of unique keys is " << nodes.size() << endl;
00362 cerr << "Total number of unique keys is " << nodes.size() << endl;
00363 }
00364
00365
00366 template<class T> set<T> hatrees<T>::keyset() const {
00367 set<T> tmpset;
00368 const_niterator it = nodes.begin();
00369 while (it != nodes.end()) {
00370 tmpset.insert(it->first);
00371 it++;
00372 }
00373 return tmpset;
00374 }
00375
00376 template<class T> vector<T> hatrees<T>::keyarray() const {
00377 vector<T> tmp;
00378 const_niterator it = nodes.begin();
00379 while (it != nodes.end()) {
00380
00381 tmp.push_back(it->first);
00382 it++;
00383 }
00384 return tmp;
00385 }
00386
00387
00388 template<class T> void hatrees<T>::clusterArray(vector< set<T> > &vecset) {
00389 if (result.empty()) transform();
00390 vecset.clear();
00391
00392 typename std::multimap<T, T>::const_iterator mi = result.begin();
00393 while (mi != result.end()) {
00394 set<T> tmpset;
00395 tmpset.insert(mi->second);
00396
00397 T cluster_id = mi->first;
00398 mi++;
00399 while (mi != result.end() && mi->first == cluster_id) {
00400
00401 tmpset.insert(mi->second);
00402 mi++;
00403 }
00404 vecset.push_back(tmpset);
00405
00406 }
00407 }
00408
00409
00410
00411 template<class T> void hatrees<T>::showClusterIntId(ostream &ous, int &id) {
00412 if (result.empty()) transform();
00413 typename std::multimap<T, T>::const_iterator mi = result.begin();
00414 while (mi != result.end()) {
00415 set<T> tmpset;
00416 ous << mi->second << '\t' << id << endl;
00417
00418 T cluster_id = mi->first;
00419 mi++;
00420 while (mi != result.end() && mi->first == cluster_id) {
00421
00422 ous << mi->second << '\t' << id << endl;
00423 mi++;
00424 }
00425
00426
00427 ++id;
00428 }
00429 }
00430
00431
00432
00433
00434 template<class T> void hatrees<T>::showCluster(ostream &os, bool reverse) {
00435 if (result.empty()) transform();
00436
00437 typename std::multimap<T, T>::const_iterator mi = result.begin();
00438 if (reverse) {
00439 while (mi != result.end()) {
00440 os << mi->second << '\t' << mi->first << endl;
00441 mi++;
00442 }
00443 }
00444 else {
00445 while (mi != result.end()) {
00446 os << mi->first << '\t' << mi->second << endl;
00447 mi++;
00448 }
00449 }
00450
00451
00452 cerr << "Total members: " << result.size() << endl;
00453 }
00454
00455 template<class T> void hatrees<T>::transform() {
00456 niterator it = nodes.begin();
00457 node<T>* root;
00458 while (it != nodes.end()) {
00459 root = getRoot(it->second->parent);
00460 result.insert(pair<T, T>(root->key, it->first));
00461 it++;
00462 }
00463 }
00464
00467 template<class T> int hatrees<T>::showClusterByLine(ostream &os) {
00468 if (result.empty()) transform();
00469 os << "Output format: one cluster per line\n\n";
00470 int clusterCnt=0;
00471 typename std::multimap<T, T>::const_iterator mi = result.begin();
00472 while (mi != result.end()) {
00473 clusterCnt++;
00474 os << mi->second;
00475 T cluster_id = mi->first;
00476 mi++;
00477 while (mi != result.end() && mi->first == cluster_id) {
00478 os << '\t' << mi->second;
00479 mi++;
00480 }
00481 os << "\n\n";
00482 }
00483 os << "Total " << clusterCnt << " clusters\n";
00484 return clusterCnt;
00485 }
00486
00487 #endif