001 /* 002 * validateDataset.java 003 * 004 * Created on July 21, 2006, 2:55 PM 005 * 006 * This file is part of the STAR Scheduler. 007 * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory 008 * 009 * STAR Scheduler is free software; you can redistribute it and/or modify 010 * it under the terms of the GNU General Public License as published by 011 * the Free Software Foundation; either version 2 of the License, or 012 * (at your option) any later version. 013 * 014 * STAR Scheduler is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * along with STAR Scheduler; if not, write to the Free Software 021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 */ 023 024 package gov.bnl.star.offline.scheduler.dataset; 025 026 import gov.bnl.star.offline.scheduler.dataset.Dataset; 027 import java.io.BufferedReader; 028 import java.io.FileNotFoundException; 029 import java.io.FileReader; 030 import java.io.IOException; 031 032 import java.util.Hashtable; 033 import java.util.List; 034 import java.util.ArrayList; 035 036 /** 037 * Used to verify that this class meets the users requirements and the system requirements after processing. 038 * @author Levetne B. Hajdu 039 */ 040 public class SubsetValidator { 041 042 /** Creates a new instance of validateDataset */ 043 public SubsetValidator() {} 044 045 //This group states which tests should be done, and holds setting for tests 046 boolean mustHaveHomogeneousHost = false; 047 boolean mustHaveHomogeneousSite = false; 048 int maxFiles = -1; 049 int minFiles = 1; 050 051 /**Test that each split has only only files from one host 052 * @param mustHaveHomogeneousHost set to ture o run this test, set to false not to run this test**/ 053 public void setMustHaveHomogeneousHost( boolean mustHaveHomogeneousHost){ this.mustHaveHomogeneousHost = mustHaveHomogeneousHost; } 054 /**@return true if object will test for files with homogeneous, else it will return false**/ 055 public boolean getMustHaveHomogeneousHost(){ return mustHaveHomogeneousHost; } 056 /** @param mustHaveHomogeneousSite set to true to test if all entries in an index delog to the same site **/ 057 public void setMustHaveHomogeneousSite( boolean mustHaveHomogeneousSite){ this.mustHaveHomogeneousSite = mustHaveHomogeneousSite; } 058 /**@return true if the testing that all entries in an index delog to the same site, if not testing return false**/ 059 public boolean getMustHaveHomogeneousSite(){ return mustHaveHomogeneousSite; } 060 /** @param maxFiles Test that no index has more files than this value **/ 061 public void setMaxFiles(int maxFiles){ this.maxFiles = maxFiles; } 062 /** @return A number <=-1. Will -1 if this test will not be run else it will test that no index has more files than the value returned**/ 063 public int getMaxFiles(){ return maxFiles; } 064 /** @param minFiles Test that no index has less files than this value **/ 065 public void setMinFiles(int minFiles){ this.minFiles = minFiles; } 066 /** @return A number <=-1. Will -1 if this test will not be run else it will test that no index has less files than the value returned**/ 067 public int getMinFiles(){ return minFiles; } 068 069 070 //The state of all the tests 071 boolean passedMustHaveHomogeneousHost = true; 072 boolean passedMustHaveHomogeneousSite = true; 073 boolean passedMaxFiles = true; 074 boolean passedMinFile = true; 075 076 private int line = 0; 077 078 Dataset dataset; 079 EntryParser entryParser; 080 /** Returns the sum of all tested anded together. In other words it returns ture if all tested passed and false if any one or more test faild. */ 081 public boolean validate(Dataset dataset){ 082 083 errors = ""; //clear all error from any other passes 084 this.dataset = dataset; 085 this.entryParser = dataset.getEntryParser(); 086 //reset all tests incase the data set is tested again 087 passedMustHaveHomogeneousHost = true; 088 passedMustHaveHomogeneousSite = true; 089 passedMaxFiles = true; 090 passedMinFile = true; 091 currentMaxSize = 0; 092 currentMinSize = -1; 093 lastHost = null; 094 lastSite = null; 095 line = 0; 096 097 String datsetEntry = null; 098 try { 099 100 BufferedReader currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName()) ); 101 while ((datsetEntry = currentDataset.readLine()) != null) { 102 line ++; 103 if(mustHaveHomogeneousHost){ 104 //System.out.println("MustHaveHomogeneousHost -->" + mustHaveHomogeneousHost); 105 validateMustHaveHomogeneousHost(datsetEntry); 106 } 107 if(mustHaveHomogeneousSite){ 108 //System.out.println("passedMustHaveHomogeneousSite -->" + mustHaveHomogeneousSite); 109 validateMustHaveHomogeneousSite(datsetEntry); 110 } 111 if(maxFiles > 0){ 112 //System.out.println("maxFiles -->" + maxFiles); 113 validateMaxFiles(datsetEntry); 114 } 115 if(minFiles > 1){ 116 //System.out.println("minFiles -->" + minFiles); 117 validateMinFile(datsetEntry); 118 } 119 validateFormatting(datsetEntry); 120 countFilesAndEvents(datsetEntry); 121 122 } 123 124 } catch (FileNotFoundException ex) { 125 ex.printStackTrace(); 126 } catch (IOException ex) { 127 ex.printStackTrace(); 128 } 129 130 131 //This error string can only be added after the last line has been tested. 132 if(! lastLineIsSplit) errors = errors + "Formatting Error : The last line of the dataset file \"" + dataset.getDatasetName() + "\" should be " + dataset.getSplitString() + "\"\n"; 133 134 135 return passedMustHaveHomogeneousHost 136 && passedMustHaveHomogeneousSite 137 && passedMaxFiles 138 && passedMinFile 139 && startsWithSplit 140 && lastLineIsSplit 141 && hasNoEmptySplits; 142 143 } 144 145 /*The fuction returns false if there are no errors, and try only if the error detected is fatal.*/ 146 public boolean isErrorFatal(){ 147 148 return !( passedMustHaveHomogeneousHost 149 && passedMustHaveHomogeneousSite 150 && startsWithSplit 151 && lastLineIsSplit 152 && hasNoEmptySplits 153 ); 154 } 155 156 157 private String errors = ""; 158 public String getErrors(){return errors;} 159 160 private String lastHost = null; 161 private void validateMustHaveHomogeneousHost(String entry){ 162 163 164 165 if( dataset.getSplitRegX().matches(entry)){ //If we hit the split, reset everthing and take no action 166 lastHost = null; //clear the last known host this is not needed anymore for the subset 167 return; 168 } 169 170 171 if( entryParser.getSTORAGE_SERVISE(entry).matches("NFS") ) return; //NFS files do not have any node so they are alright 172 String currentHost = entryParser.getHOST(entry); 173 if( currentHost.matches("localhost") ) return; //If this file is on the same system we don't this is alright and will not cause it to fail the test 174 if(lastHost == null){ //if this the the farst line in a new subset 175 lastHost = currentHost; 176 currentIndex.setNode(currentHost); 177 return; 178 } 179 180 //if(currentIndex.getNode() == null){ 181 // currentIndex.setNode(currentHost); //Node and host are the same 182 //} 183 184 if( lastHost.equals(currentHost) ) return; 185 186 passedMustHaveHomogeneousHost = false; 187 errors = errors + "Test MustHaveHomogeneousHost faild at line : " + line + " datasetfile \"" + dataset.getDatasetName() + "\" : The dataset has been split in such a way that the same job has files from more then one host. This may be an error. \n"; 188 189 return; 190 } 191 192 193 private String lastSite = null; 194 private void validateMustHaveHomogeneousSite(String entry){ 195 if( dataset.getSplitRegX().matches(entry)){ //If we hit the split, reset everthing and take no action 196 lastSite = null; //clear the last known host this is not needed anymore for the subset 197 return; 198 } 199 200 String currentSite = entryParser.getSITE(entry); 201 currentIndex.setSite(currentSite); 202 203 if(lastSite == null){ //if this the the farst line in a new subset 204 lastSite = currentSite; 205 return; 206 } 207 208 if( lastSite.equals(currentSite) ) return; //if it matches the last file the same site it is valid, else the the is something worng with the dataset subset 209 210 passedMustHaveHomogeneousSite = false; 211 212 errors = errors + "Test MustHaveHomogeneousSite faild at line : " + line + " datasetfile \"" + dataset.getDatasetName() + "\" : There are files from differnt sites in the same job. \n\n"; 213 214 return; 215 } 216 217 int currentMaxSize = 0; 218 private void validateMaxFiles(String entry){ 219 220 if(maxFiles == -1) return; 221 222 if( entry.matches(dataset.getSplitRegX())){ //If we hit the split, reset everthing and take no action 223 currentMaxSize = 0; //clear the last known host this is not needed anymore for the subset 224 return; 225 } 226 227 currentMaxSize ++; 228 if(currentMaxSize > maxFiles){ 229 passedMaxFiles = false; 230 errors = errors + "Error at line : " + line + " datasetfile \"" + dataset.getDatasetName() + "\" : Some subsets in this dataset are above "+ maxFiles +" file per job. Please report this error.\n"; 231 232 } 233 return; 234 } 235 236 int currentMinSize = -1; 237 //boolean addedError = false; 238 private void validateMinFile (String entry){ 239 240 if( entry.matches(dataset.getSplitRegX())){ //If we hit the split, reset everthing and take no action 241 242 if(currentMinSize == -1) return; 243 244 if(currentMinSize < minFiles ){ 245 //System.out.println("currentMinSize = " + currentMinSize + " minFiles = " + minFiles); 246 passedMinFile = false; 247 errors = errors + "Possible Error at line : " + line + " index : " + indexCount + " datasetfile \"" + dataset.getDatasetName() + "\" : This happens when an insignificant number of files have be returned to meet the minimum requirement for any one node when using a distrusted file system that requires splitting by node. Please determine if your jobs will still function properly without meeting this soft requirement.\n"; 248 return; 249 } 250 251 currentMinSize = 0; //clear the last known host this is not needed anymore for the subset 252 return; 253 } 254 255 if(currentMinSize == -1) currentMinSize = 0; 256 currentMinSize ++; 257 return; 258 } 259 260 261 262 /**This function returns the number of indexs is the dataset. 263 If validate(dataset) has not yet been called it will return zero. */ 264 public int getIndexCount(){ return indexCount + 1; } 265 266 267 private boolean farstLine = true; 268 private boolean startsWithSplit = true; 269 private boolean lastLineIsSplit = true; 270 private boolean hasNoEmptySplits = true; 271 private int indexCount = -1; 272 273 private void validateFormatting(String entry){ 274 275 276 if( entry.matches(dataset.getSplitRegX()) ){ 277 indexCount ++; //Keep track of the index 278 currentIndex = new DatasetSubset(indexCount,dataset); 279 indexList.add(currentIndex); 280 281 282 if(lastLineIsSplit && (! farstLine ) ){ // If this is a split and the last line was a split, something is worng 283 hasNoEmptySplits = false; 284 errors = errors + "Formatting Error : The split at index : " + indexCount + " line : " + line + " of datasetfile \"" + dataset.getDatasetName() + "\" has no data within it.\n"; 285 } 286 lastLineIsSplit = true; //keep tack of what the last line was 287 } 288 else{ 289 //think about testing each file here, if it dose not take to long 290 lastLineIsSplit = false; //keep tack of what the last line was 291 } 292 293 if(farstLine){ //Test that the 1st line line is a split 294 farstLine = false; 295 if( entry.matches(dataset.getSplitRegX()) ){ 296 startsWithSplit = true; 297 } 298 else{ 299 startsWithSplit = false; 300 errors = errors + "Formatting Error : The first line of the dataset file \"" + dataset.getDatasetName() + "\" should be " + dataset.getSplitString() + "\"\n"; 301 } 302 } 303 } 304 305 306 private List indexList = new ArrayList(); 307 private DatasetSubset currentIndex; 308 public List getDatasetIndexList(){ return indexList; } 309 310 311 private void countFilesAndEvents(String entry){ 312 if((currentIndex != null) && (! entry.matches(dataset.getSplitRegX())) ){ 313 currentIndex.addOneToFilesInSubset(); 314 String events = entryParser.getNUMBER_OF_EVENTS(entry); 315 if(events != null){ 316 try { 317 currentIndex.addToEventsInSubset(Integer.parseInt(events)); 318 } catch (Exception e) { 319 //do nothing 320 } 321 } 322 } 323 } 324 325 326 }