001    /*
002     * DropDuplicatesFromSortedList.java
003     *
004     * Created on July 8, 2006, 12:45 AM
005     *
006     * This file is part of the STAR Scheduler.
007     * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory
008     *
009     * STAR Scheduler is free software; you can redistribute it and/or modify
010     * it under the terms of the GNU General Public License as published by
011     * the Free Software Foundation; either version 2 of the License, or
012     * (at your option) any later version.
013     *
014     * STAR Scheduler is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017     * GNU General Public License for more details.
018     *
019     * You should have received a copy of the GNU General Public License
020     * along with STAR Scheduler; if not, write to the Free Software
021     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022     */
023    
024    package gov.bnl.star.offline.scheduler.dataset.datasetManipulators;
025    
026    
027    import gov.bnl.star.offline.scheduler.dataset.Dataset;
028    import gov.bnl.star.offline.scheduler.request.Request;
029    
030    import java.io.BufferedReader;
031    import java.io.FileNotFoundException;
032    import java.io.FileReader;
033    import java.io.IOException;
034    import java.util.ArrayList;
035    import java.util.Collections;
036    import java.util.Enumeration;
037    import java.util.Hashtable;
038    import java.util.List;
039    import java.util.Vector;
040    
041    /**
042     *  A dataset manipulator used to drop entries with matching regular 
043     *  expression capture groups from a list sorted be that capture groups.
044     *  Note that it will not work if the list is not sorted by the capture 
045     *  group.
046     *
047     * @author lbhajdu
048     */
049    public class DropDuplicatesFromSortedList implements DatasetManipulator{
050        
051        String regXCaptureGroup;
052        
053        /**  Creates a new instance of DropDuplicatesFromSortedList 
054         *  Note any Entry that has a blank value for the capure group will not be dropped.
055         *
056         *  @param regXCaptureGroup A regular expression capture group. If mroe then one entry matches the capture group, only one one copy will be kept, all other will be dropted.  
057         */
058        public DropDuplicatesFromSortedList(String regXCaptureGroup) {this.regXCaptureGroup = regXCaptureGroup;}
059        
060        /** Used to pass the dataset to the dataset manipulator
061          * @param dataset The dataset to be modifyed 
062          * @param request The request object of the current request for with will use the dataset 
063         **/
064        public void modify(Dataset dataset, Request request){
065            
066            //System.out.println("Dropping duplicates of: " + regXCaptureGroup);
067              //super.modify(dataset, request);
068              
069            try {
070    
071                String datsetEntry = null;
072                List duplicateEntires  = new ArrayList();
073                BufferedReader currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName() ));
074           
075    
076                while ((datsetEntry = currentDataset.readLine()) != null) { 
077                    
078                    if(duplicateEntires.size() > 0){
079                        
080                        if( (! datsetEntry.replaceAll(regXCaptureGroup,"$1").matches(""))  && datsetEntry.replaceAll(regXCaptureGroup,"$1").equals(((String) duplicateEntires.get(0)).replaceAll(regXCaptureGroup,"$1"))){
081                           duplicateEntires.add(datsetEntry); 
082                        }
083                        else{
084                           dataset.writeToBuffer(pickEntry(duplicateEntires));
085                           duplicateEntires.clear();
086                           duplicateEntires.add(datsetEntry); 
087                        }
088                    }
089                    else duplicateEntires.add(datsetEntry);
090                }
091                
092                dataset.writeToBuffer(pickEntry(duplicateEntires));
093                 
094            } catch (FileNotFoundException ex) {
095                ex.printStackTrace();
096            } catch (IOException ex) {
097                ex.printStackTrace();
098            }
099              
100            dataset.swap_buffer_dataset_with_dataset();  
101              
102        }
103        
104        
105        private String pickEntry(List duplicateEntires){
106            
107            
108          if( (duplicateEntires.size() == 1) || (keys.size() == 0) ) return (String) duplicateEntires.get(0); //if the list only has one item just return that
109    
110          for(Enumeration e = keys.elements(); e.hasMoreElements();){ //Write the data back out
111            
112                String  regx = (String) preferredCopyRegxTable.get(e.nextElement());
113    
114                for(int i = 0; i != duplicateEntires.size(); i++){
115                    String entry  =  (String) duplicateEntires.get(i);
116                    //System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>> looking for >>>>>" + regx); //used for debugging
117    
118                    if(entry.matches(regx)){
119                        //System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>> found >>>>>" + entry); //used for debugging
120                        return entry;
121                    }
122                }
123          }
124           
125    
126          return (String) duplicateEntires.get(0);  
127        }
128        
129        
130        
131        private Hashtable preferredCopyRegxTable  = new Hashtable();
132        private Vector keys = new Vector();
133        
134        
135        /**
136         * This function is used to select one entry when two or more entrys are found matching the regx
137         * Example of useage:
138         *
139         *  //pic NFS files farst the local files then HPSS files
140         *
141         * addPreferredCopyRegx("B", ".*::local::.*::.*::.*::.*::.*");
142         * addPreferredCopyRegx("A", ".*::NFS::.*::.*::.*::.*::.*");
143         * addPreferredCopyRegx("C", ".*::HPSS::.*::.*::.*::.*::.*");
144         *
145         *  @param pref A string that will be alphabetically sorted to determine the preference. .A. has a higher rank then .Z..
146         *
147         *  @param regx A regular expression the string must match in order to be assigned this ranking.
148         *
149         */
150        public void addPreferredCopyRegx(String pref, String  regx){
151            preferredCopyRegxTable.put(pref, regx);
152            this.keys = new Vector(preferredCopyRegxTable.keySet());
153            Collections.sort(keys); //sort the table keys    
154        }
155    
156        public boolean requirementsSatisfied() {
157            return true;
158        }
159        
160        
161        
162        
163        
164    }