00001 // $Id: StRandomSelector.h,v 1.1 2009/09/03 11:57:11 rfatemi Exp $ 00002 // 00003 // $Log: StRandomSelector.h,v $ 00004 // Revision 1.1 2009/09/03 11:57:11 rfatemi 00005 // This class allows the user to randomly remove members of any group 00006 // 00007 00008 /* StRandomSelector.h 00009 * 00010 * Written by Wayne Witzke for the University of Kentucky Department of 00011 * Physics and Astronomy. 00012 * 00013 * This random selector will allow a developer to randomly pick TObjects from 00014 * a TObject. It is very iterator-like, but this class is not an iterator, 00015 * since there is just slightly too much work for this class to do to make it 00016 * really behave like a proper iterator. 00017 * 00018 * Originally this was going to be a template, taking container and element 00019 * types, but this proved to be basically impossible to implement sanely, both 00020 * because the ROOT library fails to implement an STL-like interface, and 00021 * because ROOT/CINT doesn't know how to handle templates properly (i.e., I 00022 * shouldn't have to enumerate the vast number of possible template parameters 00023 * and combinations thereof in order to make the template work. . . That's 00024 * just a stupid requirement and defeats the whole purpose of developing 00025 * generics/templates in the first place, and if ROOT/CINT *is* capable of 00026 * doing this, then why isn't the documentation for making it work in a palce 00027 * that is easily accessible?) 00028 * 00029 * Now, instead, it is going to follow the non-template-based ROOT library 00030 * conventions, which is an inferior way to implement this class. 00031 */ 00032 #ifndef STRANDOMSELECTOR_H 00033 #define STRANDOMSELECTOR_H 00034 00035 #include <TRandom3.h> 00036 #include <TCollection.h> 00037 #include <TIterator.h> 00038 #include <TObject.h> 00039 #include <Rtypes.h> 00040 00041 // This class can be used to select TObjects randomly (but sequentially) from 00042 // a given TCollection. That is, random TObjects in the collection will be 00043 // selected based on the probability specified for the object. If an TObject 00044 // is not randomly chosen for selection, it will be skipped when using the 00045 // random selector to iterate over the TCollection. 00046 // 00047 // WARNING: This class has not been fully tested for very, very large 00048 // collections, on the order of 100,000 or more elements, with absolute 00049 // thresholding enabled. While absolute thresholding should still *almost* 00050 // work for very large collections, and it might even work perfectly, there is 00051 // a chance that the returned and skipped sets of elements will not actually 00052 // confirm exactly to the absolute threshold and be only almost absolute. I 00053 // think that they may be off by at most plus or minus 1. 00054 class StRandomSelector 00055 { 00056 protected: 00057 // Protected member variables 00058 00059 // This is the random number generator that we will use. This should 00060 // use the Mersenne Twister algorithm to generate random numbers. 00061 TRandom3 mRand; 00062 00063 // This is the total number of TObjects that have been returned 00064 // through selection. 00065 Int_t mTotalElemReturned; 00066 00067 // This is the number of TObjects that have been skipped through 00068 // selection. 00069 Int_t mTotalElemSkipped; 00070 00071 // This is the probability of selecting an TObject when selection 00072 // occurs. This is a ratio, so values should be in the range [0,1]. 00073 Double_t mProbability; 00074 00075 // This determines if mProbability represents an absolute threshold 00076 // for the number of TObjects selected. If this is set, then the 00077 // mProbabiility not only determines the chance of selecting a given 00078 // TObject, but also determines the absolute total number of TObjects 00079 // that can possibly be returned or skipped. 00080 bool mAbsoluteThreshold; 00081 00082 // This is the collection of TObjects from which selections should be 00083 // made. 00084 const TCollection * mContainer; 00085 00086 // This is an iterator to the current TObject. 00087 TIterator * mIterator; 00088 00089 // Protected methods 00090 00091 // This method will perform basic initialization of an object. 00092 // As a quick reference, newContainer is a new TCollection, newProb is 00093 // the probability that should be set for this object, newAT is 00094 // whether or not the absolute threshold should be true or false for 00095 // this object, and seed is the seed that should be used for this 00096 // object. 00097 void initialize( 00098 const TCollection * newContainer, 00099 Double_t newProb, 00100 bool newAt, 00101 UInt_t newSeed 00102 ) 00103 { 00104 mRand.SetSeed( newSeed ); 00105 mProbability = newProb; 00106 mAbsoluteThreshold = newAt; 00107 00108 mContainer = newContainer; 00109 mIterator = 0; 00110 Rewind(); 00111 } 00112 00113 public: 00114 // Public constructors 00115 00116 // This is a basic initializer that does not take an initial 00117 // TCollection. 00118 // newProb is the probability of selecting an TObject from the 00119 // TCollection. This should be ratio in the range [0,1]. 00120 // newAt is a flag that specifies whether or not the specified 00121 // selection probability is an absolute threshold or not. If it 00122 // is an absolute threshold, then the probability represents the 00123 // absolute ratio of selected to skipped TObjects once all 00124 // TObjects have been exhausted. If the selection probability is 00125 // not an absolute threshold, then it is theoretically possible 00126 // for no Elements to be selected, or for all TObjects to be 00127 // selected, regardless of the probability. 00128 // seed is the seed that should be used when seeding the random number 00129 // generator. If the seed is zero a random seed will be 00130 // generated. NOTE: If random seeds are generated too quickly in 00131 // succession, they will be identical because of the way that 00132 // TRandom3 is generating random seeds (based, apparently, on the 00133 // current unix time, to the second). 00134 StRandomSelector( 00135 Double_t newProb = 1.0, 00136 bool newAT = false, 00137 UInt_t seed = 0 00138 ) 00139 { 00140 initialize( NULL, newProb, newAT, seed ); 00141 } 00142 00143 // This is a basic initializer allowing the initial specification of 00144 // a TCollection. 00145 // newElement is a TCollection pointer that contains a set of TObjects 00146 // from which this class should select (when asked). 00147 // newProb is the probability of selecting an TObject from the 00148 // TCollection. This should be ratio in the range [0,1]. 00149 // newAt is a flag that specifies whether or not the specified 00150 // selection probability is an absolute threshold or not. If it 00151 // is an absolute threshold, then the probability represents the 00152 // absolute ratio of selected to skipped TObjects once all 00153 // TObjects have been exhausted. If the selection probability is 00154 // not an absolute threshold, then it is theoretically possible 00155 // for no Elements to be selected, or for all TObjects to be 00156 // selected, regardless of the probability. 00157 // seed is the seed that should be used when seeding the random number 00158 // generator. If the seed is zero a random seed will be 00159 // generated. NOTE: If random seeds are generated too quickly in 00160 // succession, they will be identical because of the way that 00161 // TRandom3 is generating random seeds (based, apparently, on the 00162 // current unix time, to the second). 00163 StRandomSelector( 00164 TCollection * newContainer, 00165 Double_t newProb = 1.0, 00166 bool newAT = false, 00167 UInt_t seed = 0 00168 ) 00169 { 00170 initialize( newContainer, newProb, newAT, seed ); 00171 } 00172 00173 // We can no longer use the default copy constructor because of the 00174 // mIterator pointer. This method is called automatically whenever 00175 // a copy is made of an StRandomSelector. 00176 StRandomSelector( const StRandomSelector & toCopy ) 00177 : mRand(toCopy.mRand), 00178 mTotalElemReturned(toCopy.mTotalElemReturned), 00179 mTotalElemSkipped(toCopy.mTotalElemSkipped), 00180 mProbability(toCopy.mProbability), 00181 mAbsoluteThreshold(toCopy.mAbsoluteThreshold), 00182 mContainer(toCopy.mContainer), 00183 mIterator( 0 ) 00184 { 00185 if ( mContainer ) 00186 { 00187 mIterator = mContainer->MakeIterator(); 00188 // This next line is a good example of why you don't really 00189 // want to have pointers to iterators. 00190 (*mIterator) = (*toCopy.mIterator); 00191 } 00192 } 00193 00194 // Public methods 00195 00196 // This will rewind the selection process so that selection can start 00197 // anew at the beginning of the TCollection. This will NOT reseed the 00198 // object, however. To reseed, SetSeed must be called. 00199 void Rewind() 00200 { 00201 mTotalElemReturned = 0; 00202 mTotalElemSkipped = 0; 00203 00204 if ( mIterator ) 00205 delete mIterator; 00206 00207 if ( mContainer ) 00208 { 00209 mIterator = mContainer->MakeIterator(); 00210 } 00211 else 00212 { 00213 mIterator = NULL; 00214 } 00215 } 00216 00217 // This method will allow a user to manually skip TObjects. This 00218 // method will correctly maintain all internal state variables (so a 00219 // call to GetNumberSkipped() will have incremented after a call to 00220 // Skip()). 00221 void Skip( Int_t numberToSkip = 1 ) 00222 { 00223 if ( !mContainer ) 00224 return; 00225 00226 TObject * checkme; 00227 for ( int ii = 0; ii < numberToSkip; ++ii ) 00228 { 00229 checkme = mIterator->Next(); 00230 if ( !checkme ) 00231 { 00232 break; 00233 } 00234 ++mTotalElemSkipped; 00235 } 00236 } 00237 00238 // This next method will get the next TObject from the TCollection and 00239 // return it. This method does NOT select randomly and will never 00240 // skip TObjects. All internal state variables are updated properly 00241 // when this method is called (so that a call to GetNumberReturned() 00242 // will have incremented after a call to GetNext()). 00243 // 00244 // This will return NULL if there are no TObjects left to return. 00245 TObject * GetNext() 00246 { 00247 if ( !mContainer ) 00248 return NULL; 00249 00250 TObject * retVal = mIterator->Next(); 00251 if ( retVal ) 00252 { 00253 ++mTotalElemReturned; 00254 } 00255 00256 return retVal; 00257 } 00258 00259 // This will randomly select the next TObject to return. 00260 // 00261 // This will return NULL if there are no TObjects left to return. 00262 TObject * GetNextRandom(); 00263 00264 // Accessor methods 00265 00266 // These two methods allow the user to get and set the seed for the 00267 // random selection. SetSeed is called automatically when an object 00268 // is instantiated, but can be set again using these calls (for 00269 // instance, if a specific seed is desired so that a sequence can be 00270 // reproduced). This will not automatically rewind the random 00271 // selection mechanism, so that the new seed will take place only on 00272 // newly selected TObjects. NOTE: If random seeds are generated too 00273 // quickly in succession, they will be identical because of the way 00274 // that TRandom3 is generating random seeds (based, apparently, on the 00275 // current unix time, to the second). 00276 void SetSeed( UInt_t seed = 0 ) 00277 { 00278 mRand.SetSeed( seed ); 00279 } 00280 const UInt_t GetSeed() 00281 { 00282 return mRand.GetSeed(); 00283 } 00284 00285 // These two methods allow the user to change the probability of 00286 // selecting a TObject from the TCollection. The probability should 00287 // be passed as a ratio in the range [0,1]. Values greater than 1 00288 // will have the same effect as 1, while values less than 0 will have 00289 // the same effect as 0. It is important to note that the number 00290 // passed here is the probability of SELECTING an TObject. So, if you 00291 // want to select 3/4ths of the TObjects, then call SetProbability 00292 // with the number 0.75 This change will only effect selections that 00293 // occur after the probability change has taken place. 00294 void SetProbability( Double_t newProb ) 00295 { 00296 mProbability = newProb; 00297 } 00298 const Double_t GetProbability() 00299 { 00300 return mProbability; 00301 } 00302 00303 // These methods allow the user to select whether or not mProbability 00304 // represents an absolute threshold. That is, if mProbability 00305 // represents an absolute threshold, then selection will fail to 00306 // return additional TObjects once that threshold of returned-to-total 00307 // TObjects has been reached. If mProbability does *not* represent an 00308 // absolute threshold, then even if the returned-to-total ratio 00309 // exceeds the mProbability value, the selector will still randomly 00310 // determine on each remaining TObject whether or not that TObject is 00311 // returned. In the first case, there is no chance that the ratio of 00312 // returned-to-total will exceed mProbability. In the second case, it 00313 // is conceivable that no TObjects will be withheld. In addition, if 00314 // the absolute threshold is set to true, then the number of TObjects 00315 // withheld will not exceed (by much) 00316 // GetTotalNumber()*(100-mProbability), and if the number of TObjects 00317 // withheld exceeds this value, then every remaining TObject will be 00318 // returned. 00319 void SetAbsoluteThreshold( bool newAT ) 00320 { 00321 mAbsoluteThreshold = newAT; 00322 } 00323 const bool GetAbsoluteThreshold() 00324 { 00325 return mAbsoluteThreshold; 00326 } 00327 00328 // This method will return the number of TObjects in the TCollection. 00329 const Int_t GetTotalNumber() 00330 { 00331 if ( mContainer ) 00332 { 00333 return mContainer->GetEntries(); 00334 } 00335 else 00336 { 00337 return 0; 00338 } 00339 } 00340 00341 // This method will return the total number of TObjects that have been 00342 // returned so far. 00343 const Int_t GetNumberReturned() 00344 { 00345 return mTotalElemReturned; 00346 } 00347 00348 // This method will return the total number of TObjects that have been 00349 // skipped so far. 00350 const Int_t GetNumberSkipped() 00351 { 00352 return mTotalElemSkipped; 00353 } 00354 00355 // These next methods will allow the user to get and set the 00356 // Container. Setting the TCollection will cause all selection 00357 // information (current position in collection, total return/skipped) 00358 // to be reset. This will NOT cause the probability, absolute 00359 // threshold flag, or random seed to change. 00360 void SetContainer( TCollection * newCont ) 00361 { 00362 mContainer = newCont; 00363 Rewind(); 00364 } 00365 const TCollection * GetContainer() 00366 { 00367 return mContainer; 00368 } 00369 00370 // We have to clean up our pointer mIterator (yet another reason why 00371 // the STL is superior, sure it's quicker to pass around pointers, 00372 // buter iterator creation is unusual and so has very little overhead, 00373 // and iterators can be passed around either as references or as const 00374 // references). 00375 virtual ~StRandomSelector() 00376 { 00377 if ( mIterator ) 00378 { 00379 delete mIterator; 00380 } 00381 } 00382 00383 // This is required to tie this header to the class implementation defined 00384 // in the .cxx file, so that the library can be successfully loaded into a 00385 // root macro. 00386 ClassDef(StRandomSelector,0) 00387 }; 00388 00389 #endif // STARRANDOMSELECTOR_H
1.5.9