StRoot  1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
StRandomSelector.h
1 // $Id: StRandomSelector.h,v 1.1 2009/09/03 11:57:11 rfatemi Exp $
2 //
3 // $Log: StRandomSelector.h,v $
4 // Revision 1.1 2009/09/03 11:57:11 rfatemi
5 // This class allows the user to randomly remove members of any group
6 //
7 
8 /* StRandomSelector.h
9  *
10  * Written by Wayne Witzke for the University of Kentucky Department of
11  * Physics and Astronomy.
12  *
13  * This random selector will allow a developer to randomly pick TObjects from
14  * a TObject. It is very iterator-like, but this class is not an iterator,
15  * since there is just slightly too much work for this class to do to make it
16  * really behave like a proper iterator.
17  *
18  * Originally this was going to be a template, taking container and element
19  * types, but this proved to be basically impossible to implement sanely, both
20  * because the ROOT library fails to implement an STL-like interface, and
21  * because ROOT/CINT doesn't know how to handle templates properly (i.e., I
22  * shouldn't have to enumerate the vast number of possible template parameters
23  * and combinations thereof in order to make the template work. . . That's
24  * just a stupid requirement and defeats the whole purpose of developing
25  * generics/templates in the first place, and if ROOT/CINT *is* capable of
26  * doing this, then why isn't the documentation for making it work in a palce
27  * that is easily accessible?)
28  *
29  * Now, instead, it is going to follow the non-template-based ROOT library
30  * conventions, which is an inferior way to implement this class.
31  */
32 #ifndef STRANDOMSELECTOR_H
33 #define STRANDOMSELECTOR_H
34 
35 #include <TRandom3.h>
36 #include <TCollection.h>
37 #include <TIterator.h>
38 #include <TObject.h>
39 #include <Rtypes.h>
40 
41 // This class can be used to select TObjects randomly (but sequentially) from
42 // a given TCollection. That is, random TObjects in the collection will be
43 // selected based on the probability specified for the object. If an TObject
44 // is not randomly chosen for selection, it will be skipped when using the
45 // random selector to iterate over the TCollection.
46 //
47 // WARNING: This class has not been fully tested for very, very large
48 // collections, on the order of 100,000 or more elements, with absolute
49 // thresholding enabled. While absolute thresholding should still *almost*
50 // work for very large collections, and it might even work perfectly, there is
51 // a chance that the returned and skipped sets of elements will not actually
52 // confirm exactly to the absolute threshold and be only almost absolute. I
53 // think that they may be off by at most plus or minus 1.
55 {
56  protected:
57  // Protected member variables
58 
59  // This is the random number generator that we will use. This should
60  // use the Mersenne Twister algorithm to generate random numbers.
61  TRandom3 mRand;
62 
63  // This is the total number of TObjects that have been returned
64  // through selection.
65  Int_t mTotalElemReturned;
66 
67  // This is the number of TObjects that have been skipped through
68  // selection.
69  Int_t mTotalElemSkipped;
70 
71  // This is the probability of selecting an TObject when selection
72  // occurs. This is a ratio, so values should be in the range [0,1].
73  Double_t mProbability;
74 
75  // This determines if mProbability represents an absolute threshold
76  // for the number of TObjects selected. If this is set, then the
77  // mProbabiility not only determines the chance of selecting a given
78  // TObject, but also determines the absolute total number of TObjects
79  // that can possibly be returned or skipped.
80  bool mAbsoluteThreshold;
81 
82  // This is the collection of TObjects from which selections should be
83  // made.
84  const TCollection * mContainer;
85 
86  // This is an iterator to the current TObject.
87  TIterator * mIterator;
88 
89  // Protected methods
90 
91  // This method will perform basic initialization of an object.
92  // As a quick reference, newContainer is a new TCollection, newProb is
93  // the probability that should be set for this object, newAT is
94  // whether or not the absolute threshold should be true or false for
95  // this object, and seed is the seed that should be used for this
96  // object.
97  void initialize(
98  const TCollection * newContainer,
99  Double_t newProb,
100  bool newAt,
101  UInt_t newSeed
102  )
103  {
104  mRand.SetSeed( newSeed );
105  mProbability = newProb;
106  mAbsoluteThreshold = newAt;
107 
108  mContainer = newContainer;
109  mIterator = 0;
110  Rewind();
111  }
112 
113  public:
114  // Public constructors
115 
116  // This is a basic initializer that does not take an initial
117  // TCollection.
118  // newProb is the probability of selecting an TObject from the
119  // TCollection. This should be ratio in the range [0,1].
120  // newAt is a flag that specifies whether or not the specified
121  // selection probability is an absolute threshold or not. If it
122  // is an absolute threshold, then the probability represents the
123  // absolute ratio of selected to skipped TObjects once all
124  // TObjects have been exhausted. If the selection probability is
125  // not an absolute threshold, then it is theoretically possible
126  // for no Elements to be selected, or for all TObjects to be
127  // selected, regardless of the probability.
128  // seed is the seed that should be used when seeding the random number
129  // generator. If the seed is zero a random seed will be
130  // generated. NOTE: If random seeds are generated too quickly in
131  // succession, they will be identical because of the way that
132  // TRandom3 is generating random seeds (based, apparently, on the
133  // current unix time, to the second).
135  Double_t newProb = 1.0,
136  bool newAT = false,
137  UInt_t seed = 0
138  )
139  {
140  initialize( NULL, newProb, newAT, seed );
141  }
142 
143  // This is a basic initializer allowing the initial specification of
144  // a TCollection.
145  // newElement is a TCollection pointer that contains a set of TObjects
146  // from which this class should select (when asked).
147  // newProb is the probability of selecting an TObject from the
148  // TCollection. This should be ratio in the range [0,1].
149  // newAt is a flag that specifies whether or not the specified
150  // selection probability is an absolute threshold or not. If it
151  // is an absolute threshold, then the probability represents the
152  // absolute ratio of selected to skipped TObjects once all
153  // TObjects have been exhausted. If the selection probability is
154  // not an absolute threshold, then it is theoretically possible
155  // for no Elements to be selected, or for all TObjects to be
156  // selected, regardless of the probability.
157  // seed is the seed that should be used when seeding the random number
158  // generator. If the seed is zero a random seed will be
159  // generated. NOTE: If random seeds are generated too quickly in
160  // succession, they will be identical because of the way that
161  // TRandom3 is generating random seeds (based, apparently, on the
162  // current unix time, to the second).
164  TCollection * newContainer,
165  Double_t newProb = 1.0,
166  bool newAT = false,
167  UInt_t seed = 0
168  )
169  {
170  initialize( newContainer, newProb, newAT, seed );
171  }
172 
173  // We can no longer use the default copy constructor because of the
174  // mIterator pointer. This method is called automatically whenever
175  // a copy is made of an StRandomSelector.
176  StRandomSelector( const StRandomSelector & toCopy )
177  : mRand(toCopy.mRand),
178  mTotalElemReturned(toCopy.mTotalElemReturned),
179  mTotalElemSkipped(toCopy.mTotalElemSkipped),
180  mProbability(toCopy.mProbability),
181  mAbsoluteThreshold(toCopy.mAbsoluteThreshold),
182  mContainer(toCopy.mContainer),
183  mIterator( 0 )
184  {
185  if ( mContainer )
186  {
187  mIterator = mContainer->MakeIterator();
188  // This next line is a good example of why you don't really
189  // want to have pointers to iterators.
190  (*mIterator) = (*toCopy.mIterator);
191  }
192  }
193 
194  // Public methods
195 
196  // This will rewind the selection process so that selection can start
197  // anew at the beginning of the TCollection. This will NOT reseed the
198  // object, however. To reseed, SetSeed must be called.
199  void Rewind()
200  {
201  mTotalElemReturned = 0;
202  mTotalElemSkipped = 0;
203 
204  if ( mIterator )
205  delete mIterator;
206 
207  if ( mContainer )
208  {
209  mIterator = mContainer->MakeIterator();
210  }
211  else
212  {
213  mIterator = NULL;
214  }
215  }
216 
217  // This method will allow a user to manually skip TObjects. This
218  // method will correctly maintain all internal state variables (so a
219  // call to GetNumberSkipped() will have incremented after a call to
220  // Skip()).
221  void Skip( Int_t numberToSkip = 1 )
222  {
223  if ( !mContainer )
224  return;
225 
226  TObject * checkme;
227  for ( int ii = 0; ii < numberToSkip; ++ii )
228  {
229  checkme = mIterator->Next();
230  if ( !checkme )
231  {
232  break;
233  }
234  ++mTotalElemSkipped;
235  }
236  }
237 
238  // This next method will get the next TObject from the TCollection and
239  // return it. This method does NOT select randomly and will never
240  // skip TObjects. All internal state variables are updated properly
241  // when this method is called (so that a call to GetNumberReturned()
242  // will have incremented after a call to GetNext()).
243  //
244  // This will return NULL if there are no TObjects left to return.
245  TObject * GetNext()
246  {
247  if ( !mContainer )
248  return NULL;
249 
250  TObject * retVal = mIterator->Next();
251  if ( retVal )
252  {
253  ++mTotalElemReturned;
254  }
255 
256  return retVal;
257  }
258 
259  // This will randomly select the next TObject to return.
260  //
261  // This will return NULL if there are no TObjects left to return.
262  TObject * GetNextRandom();
263 
264  // Accessor methods
265 
266  // These two methods allow the user to get and set the seed for the
267  // random selection. SetSeed is called automatically when an object
268  // is instantiated, but can be set again using these calls (for
269  // instance, if a specific seed is desired so that a sequence can be
270  // reproduced). This will not automatically rewind the random
271  // selection mechanism, so that the new seed will take place only on
272  // newly selected TObjects. NOTE: If random seeds are generated too
273  // quickly in succession, they will be identical because of the way
274  // that TRandom3 is generating random seeds (based, apparently, on the
275  // current unix time, to the second).
276  void SetSeed( UInt_t seed = 0 )
277  {
278  mRand.SetSeed( seed );
279  }
280  const UInt_t GetSeed()
281  {
282  return mRand.GetSeed();
283  }
284 
285  // These two methods allow the user to change the probability of
286  // selecting a TObject from the TCollection. The probability should
287  // be passed as a ratio in the range [0,1]. Values greater than 1
288  // will have the same effect as 1, while values less than 0 will have
289  // the same effect as 0. It is important to note that the number
290  // passed here is the probability of SELECTING an TObject. So, if you
291  // want to select 3/4ths of the TObjects, then call SetProbability
292  // with the number 0.75 This change will only effect selections that
293  // occur after the probability change has taken place.
294  void SetProbability( Double_t newProb )
295  {
296  mProbability = newProb;
297  }
298  const Double_t GetProbability()
299  {
300  return mProbability;
301  }
302 
303  // These methods allow the user to select whether or not mProbability
304  // represents an absolute threshold. That is, if mProbability
305  // represents an absolute threshold, then selection will fail to
306  // return additional TObjects once that threshold of returned-to-total
307  // TObjects has been reached. If mProbability does *not* represent an
308  // absolute threshold, then even if the returned-to-total ratio
309  // exceeds the mProbability value, the selector will still randomly
310  // determine on each remaining TObject whether or not that TObject is
311  // returned. In the first case, there is no chance that the ratio of
312  // returned-to-total will exceed mProbability. In the second case, it
313  // is conceivable that no TObjects will be withheld. In addition, if
314  // the absolute threshold is set to true, then the number of TObjects
315  // withheld will not exceed (by much)
316  // GetTotalNumber()*(100-mProbability), and if the number of TObjects
317  // withheld exceeds this value, then every remaining TObject will be
318  // returned.
319  void SetAbsoluteThreshold( bool newAT )
320  {
321  mAbsoluteThreshold = newAT;
322  }
323  const bool GetAbsoluteThreshold()
324  {
325  return mAbsoluteThreshold;
326  }
327 
328  // This method will return the number of TObjects in the TCollection.
329  const Int_t GetTotalNumber()
330  {
331  if ( mContainer )
332  {
333  return mContainer->GetEntries();
334  }
335  else
336  {
337  return 0;
338  }
339  }
340 
341  // This method will return the total number of TObjects that have been
342  // returned so far.
343  const Int_t GetNumberReturned()
344  {
345  return mTotalElemReturned;
346  }
347 
348  // This method will return the total number of TObjects that have been
349  // skipped so far.
350  const Int_t GetNumberSkipped()
351  {
352  return mTotalElemSkipped;
353  }
354 
355  // These next methods will allow the user to get and set the
356  // Container. Setting the TCollection will cause all selection
357  // information (current position in collection, total return/skipped)
358  // to be reset. This will NOT cause the probability, absolute
359  // threshold flag, or random seed to change.
360  void SetContainer( TCollection * newCont )
361  {
362  mContainer = newCont;
363  Rewind();
364  }
365  const TCollection * GetContainer()
366  {
367  return mContainer;
368  }
369 
370  // We have to clean up our pointer mIterator (yet another reason why
371  // the STL is superior, sure it's quicker to pass around pointers,
372  // buter iterator creation is unusual and so has very little overhead,
373  // and iterators can be passed around either as references or as const
374  // references).
375  virtual ~StRandomSelector()
376  {
377  if ( mIterator )
378  {
379  delete mIterator;
380  }
381  }
382 
383  // This is required to tie this header to the class implementation defined
384  // in the .cxx file, so that the library can be successfully loaded into a
385  // root macro.
386  ClassDef(StRandomSelector,0)
387 };
388 
389 #endif // STARRANDOMSELECTOR_H