001    /*
002     * DropDuplicateRegX.java
003     *
004     * Created on August 9, 2006, 6:13 PM
005     *
006     * This file is part of the STAR Scheduler.
007     * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory
008     *
009     * STAR Scheduler is free software; you can redistribute it and/or modify
010     * it under the terms of the GNU General Public License as published by
011     * the Free Software Foundation; either version 2 of the License, or
012     * (at your option) any later version.
013     *
014     * STAR Scheduler is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017     * GNU General Public License for more details.
018     *
019     * You should have received a copy of the GNU General Public License
020     * along with STAR Scheduler; if not, write to the Free Software
021     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022     */
023    
024    package gov.bnl.star.offline.scheduler.dataset.datasetManipulators;
025    
026    import gov.bnl.star.offline.scheduler.dataset.Dataset;
027    import gov.bnl.star.offline.scheduler.request.Request;
028    
029    /** A dataset manipulator used to drop entries with matching regular 
030     *  expression capture groups. This class is a composite between SortByRegX 
031     *  and DropDuplicatesForSortedList.
032     * 
033     *  @author lbhajdu
034     */
035    public class DropDuplicateRegX implements DatasetManipulator  {
036        
037        
038        private SortByRegX sortDatasetBy;
039        private DropDuplicatesFromSortedList dropDuplicatesFromSortedList;
040        
041        /** Creates a new instance of DropDuplicateRegX 
042         *  @param regX A regular expression capture group. If mroe then one entry matches the capture group, only one one copy will be kept, all other will be dropted.
043         **/
044        public DropDuplicateRegX(String regX) {
045            sortDatasetBy = new SortByRegX(regX);
046            dropDuplicatesFromSortedList = new DropDuplicatesFromSortedList(regX);
047        }
048        
049        /** Used to pass the dataset to the dataset manipulator
050          *
051          * Note: This DatasetManipulator -will- reorder the order of entries
052          *
053          * @param dataset The dataset to be modifyed 
054          * @param request The request object of the current request for with will use the dataset 
055         **/
056        public void modify(Dataset dataset, Request request) {
057            sortDatasetBy.modify(dataset, request);
058            dropDuplicatesFromSortedList.modify(dataset, request); 
059        }
060    
061        /** When more then one file is found matching a regular expression capture 
062         * group, normally the first one is kept. This behavior can be modified so 
063         * as to rank the copies and keep the best one.
064         *
065         *  Example: anything starting with .big. gets a higher ranking then anything starting with .small.:
066         *
067         *  addPreferredCopyRegx(.A.,.^big.*$.);
068         *  addPreferredCopyRegx(.B.,.^small.*$.);
069         *
070         *  Note that any entry not starting with .big. or .small. will get an even 
071         *    lower rating and is therefore lest likely to be picked.
072         *
073         *  @param preference A string that will be alphabetically sorted to determine the preference. .A. has a higher rank then .Z..
074         *         
075         *  @param Regx A regular expression the string must match in order to be assigned this ranking.
076         **/
077        public void  addPreferredCopyRegx(String preference, String RegX){
078            dropDuplicatesFromSortedList.addPreferredCopyRegx(preference, RegX);
079        }
080        
081        
082        public boolean requirementsSatisfied() {
083            return true;
084        }
085        
086    }