001    /*
002     * SplitByMinMaxEntries.java
003     *
004     * Created on July 17, 2006, 4:27 PM
005     *
006     * This file is part of the STAR Scheduler.
007     * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory
008     *
009     * STAR Scheduler is free software; you can redistribute it and/or modify
010     * it under the terms of the GNU General Public License as published by
011     * the Free Software Foundation; either version 2 of the License, or
012     * (at your option) any later version.
013     *
014     * STAR Scheduler is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017     * GNU General Public License for more details.
018     *
019     * You should have received a copy of the GNU General Public License
020     * along with STAR Scheduler; if not, write to the Free Software
021     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022     */
023    
024    package gov.bnl.star.offline.scheduler.dataset.datasetManipulators;
025    
026    import gov.bnl.star.offline.scheduler.dataset.Dataset;
027    import gov.bnl.star.offline.scheduler.request.Request;
028    import java.io.BufferedReader;
029    import java.io.FileNotFoundException;
030    import java.io.FileReader;
031    import java.io.IOException;
032    import java.util.ArrayList;
033    import java.util.Iterator;
034    import java.util.List;
035    
036    /**
037     * Add markers to the dataset that show where it should be split.
038     * @author Levente B. Hajdu
039     */
040    public class SplitByMinMaxEntries implements DatasetManipulator {
041        
042        int min = 1;
043        int max = Integer.MAX_VALUE;
044        
045        /** Creates a new instance of SplitByMinMaxEntries 
046         *  @param min The minimum size of any one block of entries
047         *  @param max The maximum size of any one block of entries
048         **/
049        public SplitByMinMaxEntries(int min, int max) {
050            if(min > max) throw new RuntimeException("The SplitByMinMax class had an error, min was bigger then max");
051            if(min < 1 ) throw new RuntimeException("The SplitByMinMax class had an error, min is smaller then 1");
052            //Done have to test max
053            this.min = min;
054            this.max = max;
055        }
056    
057        
058        /** Used to pass the dataset to the dataset manipulator
059          * @param dataset The dataset to be modifyed 
060          * @param request The request object of the current request for with will use the dataset 
061         **/    
062        public void modify(Dataset dataset, Request request) {
063            
064            System.out.println("Splitting dataset entries by size (minSize=" + min + " ,maxSize=" + max + " )");
065            String datsetEntry = null;
066            List groupSizeList = new ArrayList();
067            int groupSize = 0;
068            
069            
070            try {
071                
072                
073    // In pass one a table is built up with the size of each entry block.
074                
075                    BufferedReader currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName() ));
076                    while ((datsetEntry = currentDataset.readLine()) != null) {
077                        
078                        if( datsetEntry.matches(dataset.getSplitRegX())){
079                            if(! (groupSizeList.size() == 0 && groupSize == 0)){ //This is here if the farst line is a startting split
080                                groupSizeList.add( new Integer(groupSize) );
081                                groupSize = 0;
082                            }
083                        } 
084                        else groupSize ++;
085                   }
086                   currentDataset.close();
087                    
088                   if(groupSize != 0){ //if the file does not end with a split
089                        groupSizeList.add( new Integer(groupSize) );
090                        groupSize = 0; //just for house keeping
091                   }
092                    
093                    
094    //             debug code to print out able print out table 
095    //               System.out.println("groupSizeList --->" + groupSizeList.size());  
096    //               for(int i = 0; i != groupSizeList.size(); i++ ){
097    //                   System.out.println("index ---> " + i + " size ---> " + ((Integer) groupSizeList.get(i)) );
098    //               }
099                    
100                    
101                    
102    //Pass # two is where the splitting and rewritting of the file is done.
103                    
104                    //System.out.println("debug point split1");
105                    
106                    currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName() ));
107                    
108                    dataset.writeToBuffer(dataset.getSplitString()); //the starting split
109                    
110                    boolean firstLine = true;
111                    
112                    for(int i = 0; i != groupSizeList.size(); i++){
113                        List splits = makesplit( ((Integer) groupSizeList.get(i)).intValue() );
114                        for(int j = 0; j != splits.size(); j++){
115                            int subGroupsSize = ((Integer) splits.get(j)).intValue();
116                            for(int k = 0; k != subGroupsSize; k++){
117                                String line = currentDataset.readLine();
118                                while(line.matches(dataset.getSplitRegX())){  //This loop removes any splits as they will be replaced anyway  
119                                     line = currentDataset.readLine();
120                                }
121                                dataset.writeToBuffer(line);
122       
123                            }  
124                            
125                            if(! (groupSizeList.size() == (i+1) && splits.size() == (j+1))){
126                                dataset.writeToBuffer(dataset.getSplitString());
127                            }
128                           // dataset.writeToBuffer(dataset.getSplitString());
129                            
130                        }
131                        
132                        //datsetEntry = currentDataset.readLine(); //this is the split line that already exist
133                        //This for QA
134                        //if(! datsetEntry.matches(dataset.getSplitRegX()) ) throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted");
135         
136                    }
137                    
138                   
139                    //if the last line of the file is an enddding split, it is not read, it is just replaced
140                    dataset.writeToBuffer(dataset.getSplitString()); //The ending split
141                    
142    
143                    currentDataset.close();
144                    
145                    dataset.swap_buffer_dataset_with_dataset();
146                    
147            } catch (FileNotFoundException ex) {
148                ex.printStackTrace();
149                throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted");
150            } catch (IOException ex) {
151                ex.printStackTrace();
152                throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted");
153            }catch (Exception ex) {
154                ex.printStackTrace();
155                throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted");
156            }
157            
158            
159        }
160        
161        
162        /**Given a list of x files find a split the best meets the min and max given.
163         * The otuput is returnned as a list of integer. For example:
164         * for x = 19  min = 5 and max = 10 the split shoudl be 7-6-6
165         * for x = 19  min = 10 and max = 11 the split should be 10-9
166         *
167         *This function all first try to meet the min as many times a posable then
168         *slip what is left over and can not meet the min betweem these min groups but
169         *not going over the max. If all groups are maxed out and there is still something 
170         *left over it will simply be added to the back of the list.
171         * 
172         */
173        List makesplit(int x){
174            
175            List split = new ArrayList();
176            
177            //make (x/min) grups of 
178            while( split.size() < (x/min) ){
179                split.add(new Integer(min));
180            }
181            
182            boolean hasUnfilledGroups = true; //this is true just to start the loop
183            x = x % min; //x is now the remainder
184            
185            //pass x to the groups untill all groups are full
186            while(hasUnfilledGroups && (x != 0)){
187                
188                hasUnfilledGroups = false;
189                for(int i = 0; (i != split.size()) && (x != 0); i++){
190                    int subgroupSize =  ((Integer) split.get(i)).intValue();
191                    if((subgroupSize + 1) <= max){
192                        hasUnfilledGroups = true; //if there are unfilled gruops in this pass, there may be unfilled gruops still, setting this to true will do another pass
193                        x --;
194                        split.set(i,new Integer(subgroupSize + 1));
195                    }   
196                }
197            }
198            
199            if(x != 0) split.add(new Integer(x));   
200            
201            //for(int i = 0; i != split.size(); i++) System.out.print( "\n" + ((Integer) split.get(i)).toString() + "-");
202            
203    
204            
205            return split;   
206        }
207        
208        
209        
210        boolean requestSatisfied = true;
211        public boolean requirementsSatisfied() {
212            return requestSatisfied;
213        }
214        
215        
216        
217    }