001 /* 002 * SplitByMinMaxEntries.java 003 * 004 * Created on July 17, 2006, 4:27 PM 005 * 006 * This file is part of the STAR Scheduler. 007 * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory 008 * 009 * STAR Scheduler is free software; you can redistribute it and/or modify 010 * it under the terms of the GNU General Public License as published by 011 * the Free Software Foundation; either version 2 of the License, or 012 * (at your option) any later version. 013 * 014 * STAR Scheduler is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * along with STAR Scheduler; if not, write to the Free Software 021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 */ 023 024 package gov.bnl.star.offline.scheduler.dataset.datasetManipulators; 025 026 import gov.bnl.star.offline.scheduler.dataset.Dataset; 027 import gov.bnl.star.offline.scheduler.request.Request; 028 import java.io.BufferedReader; 029 import java.io.FileNotFoundException; 030 import java.io.FileReader; 031 import java.io.IOException; 032 import java.util.ArrayList; 033 import java.util.Iterator; 034 import java.util.List; 035 036 /** 037 * Add markers to the dataset that show where it should be split. 038 * @author Levente B. Hajdu 039 */ 040 public class SplitByMinMaxEntries implements DatasetManipulator { 041 042 int min = 1; 043 int max = Integer.MAX_VALUE; 044 045 /** Creates a new instance of SplitByMinMaxEntries 046 * @param min The minimum size of any one block of entries 047 * @param max The maximum size of any one block of entries 048 **/ 049 public SplitByMinMaxEntries(int min, int max) { 050 if(min > max) throw new RuntimeException("The SplitByMinMax class had an error, min was bigger then max"); 051 if(min < 1 ) throw new RuntimeException("The SplitByMinMax class had an error, min is smaller then 1"); 052 //Done have to test max 053 this.min = min; 054 this.max = max; 055 } 056 057 058 /** Used to pass the dataset to the dataset manipulator 059 * @param dataset The dataset to be modifyed 060 * @param request The request object of the current request for with will use the dataset 061 **/ 062 public void modify(Dataset dataset, Request request) { 063 064 System.out.println("Splitting dataset entries by size (minSize=" + min + " ,maxSize=" + max + " )"); 065 String datsetEntry = null; 066 List groupSizeList = new ArrayList(); 067 int groupSize = 0; 068 069 070 try { 071 072 073 // In pass one a table is built up with the size of each entry block. 074 075 BufferedReader currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName() )); 076 while ((datsetEntry = currentDataset.readLine()) != null) { 077 078 if( datsetEntry.matches(dataset.getSplitRegX())){ 079 if(! (groupSizeList.size() == 0 && groupSize == 0)){ //This is here if the farst line is a startting split 080 groupSizeList.add( new Integer(groupSize) ); 081 groupSize = 0; 082 } 083 } 084 else groupSize ++; 085 } 086 currentDataset.close(); 087 088 if(groupSize != 0){ //if the file does not end with a split 089 groupSizeList.add( new Integer(groupSize) ); 090 groupSize = 0; //just for house keeping 091 } 092 093 094 // debug code to print out able print out table 095 // System.out.println("groupSizeList --->" + groupSizeList.size()); 096 // for(int i = 0; i != groupSizeList.size(); i++ ){ 097 // System.out.println("index ---> " + i + " size ---> " + ((Integer) groupSizeList.get(i)) ); 098 // } 099 100 101 102 //Pass # two is where the splitting and rewritting of the file is done. 103 104 //System.out.println("debug point split1"); 105 106 currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName() )); 107 108 dataset.writeToBuffer(dataset.getSplitString()); //the starting split 109 110 boolean firstLine = true; 111 112 for(int i = 0; i != groupSizeList.size(); i++){ 113 List splits = makesplit( ((Integer) groupSizeList.get(i)).intValue() ); 114 for(int j = 0; j != splits.size(); j++){ 115 int subGroupsSize = ((Integer) splits.get(j)).intValue(); 116 for(int k = 0; k != subGroupsSize; k++){ 117 String line = currentDataset.readLine(); 118 while(line.matches(dataset.getSplitRegX())){ //This loop removes any splits as they will be replaced anyway 119 line = currentDataset.readLine(); 120 } 121 dataset.writeToBuffer(line); 122 123 } 124 125 if(! (groupSizeList.size() == (i+1) && splits.size() == (j+1))){ 126 dataset.writeToBuffer(dataset.getSplitString()); 127 } 128 // dataset.writeToBuffer(dataset.getSplitString()); 129 130 } 131 132 //datsetEntry = currentDataset.readLine(); //this is the split line that already exist 133 //This for QA 134 //if(! datsetEntry.matches(dataset.getSplitRegX()) ) throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted"); 135 136 } 137 138 139 //if the last line of the file is an enddding split, it is not read, it is just replaced 140 dataset.writeToBuffer(dataset.getSplitString()); //The ending split 141 142 143 currentDataset.close(); 144 145 dataset.swap_buffer_dataset_with_dataset(); 146 147 } catch (FileNotFoundException ex) { 148 ex.printStackTrace(); 149 throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted"); 150 } catch (IOException ex) { 151 ex.printStackTrace(); 152 throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted"); 153 }catch (Exception ex) { 154 ex.printStackTrace(); 155 throw new RuntimeException("There was an error, SplitByMinMax lost entry in dataset, no jobs will be submitted"); 156 } 157 158 159 } 160 161 162 /**Given a list of x files find a split the best meets the min and max given. 163 * The otuput is returnned as a list of integer. For example: 164 * for x = 19 min = 5 and max = 10 the split shoudl be 7-6-6 165 * for x = 19 min = 10 and max = 11 the split should be 10-9 166 * 167 *This function all first try to meet the min as many times a posable then 168 *slip what is left over and can not meet the min betweem these min groups but 169 *not going over the max. If all groups are maxed out and there is still something 170 *left over it will simply be added to the back of the list. 171 * 172 */ 173 List makesplit(int x){ 174 175 List split = new ArrayList(); 176 177 //make (x/min) grups of 178 while( split.size() < (x/min) ){ 179 split.add(new Integer(min)); 180 } 181 182 boolean hasUnfilledGroups = true; //this is true just to start the loop 183 x = x % min; //x is now the remainder 184 185 //pass x to the groups untill all groups are full 186 while(hasUnfilledGroups && (x != 0)){ 187 188 hasUnfilledGroups = false; 189 for(int i = 0; (i != split.size()) && (x != 0); i++){ 190 int subgroupSize = ((Integer) split.get(i)).intValue(); 191 if((subgroupSize + 1) <= max){ 192 hasUnfilledGroups = true; //if there are unfilled gruops in this pass, there may be unfilled gruops still, setting this to true will do another pass 193 x --; 194 split.set(i,new Integer(subgroupSize + 1)); 195 } 196 } 197 } 198 199 if(x != 0) split.add(new Integer(x)); 200 201 //for(int i = 0; i != split.size(); i++) System.out.print( "\n" + ((Integer) split.get(i)).toString() + "-"); 202 203 204 205 return split; 206 } 207 208 209 210 boolean requestSatisfied = true; 211 public boolean requirementsSatisfied() { 212 return requestSatisfied; 213 } 214 215 216 217 }