001    /*
002     * validateDataset.java
003     *
004     * Created on July 21, 2006, 2:55 PM
005     *
006     * This file is part of the STAR Scheduler.
007     * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory
008     *
009     * STAR Scheduler is free software; you can redistribute it and/or modify
010     * it under the terms of the GNU General Public License as published by
011     * the Free Software Foundation; either version 2 of the License, or
012     * (at your option) any later version.
013     *
014     * STAR Scheduler is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017     * GNU General Public License for more details.
018     *
019     * You should have received a copy of the GNU General Public License
020     * along with STAR Scheduler; if not, write to the Free Software
021     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022     */
023    
024    package gov.bnl.star.offline.scheduler.dataset;
025    
026    import gov.bnl.star.offline.scheduler.dataset.Dataset;
027    import java.io.BufferedReader;
028    import java.io.FileNotFoundException;
029    import java.io.FileReader;
030    import java.io.IOException;
031    
032    import java.util.Hashtable;
033    import java.util.List;
034    import java.util.ArrayList;
035    
036    /**
037     * Used to verify that this class meets the users requirements and the system requirements after processing.
038     * @author Levetne B. Hajdu
039     */
040    public class SubsetValidator {
041        
042        /** Creates a new instance of validateDataset */
043        public SubsetValidator() {}
044    
045         //This group states which tests should be done, and holds setting for tests
046         boolean mustHaveHomogeneousHost = false;
047         boolean mustHaveHomogeneousSite = false;
048         int maxFiles = -1;
049         int minFiles = 1; 
050         
051         /**Test that each split has only only files from one host
052          * @param mustHaveHomogeneousHost set to ture o run this test, set to false not to run this test**/
053         public void setMustHaveHomogeneousHost( boolean mustHaveHomogeneousHost){  this.mustHaveHomogeneousHost = mustHaveHomogeneousHost;  }
054         /**@return true if object will test for files with homogeneous, else it will return false**/
055         public boolean  getMustHaveHomogeneousHost(){ return mustHaveHomogeneousHost; }
056         /** @param mustHaveHomogeneousSite set to true to test if all entries in an index delog to the same site  **/
057         public void setMustHaveHomogeneousSite( boolean mustHaveHomogeneousSite){  this.mustHaveHomogeneousSite = mustHaveHomogeneousSite;  }
058         /**@return true if the testing that all entries in an index delog to the same site, if not testing return false**/
059         public boolean getMustHaveHomogeneousSite(){ return mustHaveHomogeneousSite; }
060         /** @param maxFiles Test that no index has more files than this value  **/
061         public void setMaxFiles(int maxFiles){ this.maxFiles = maxFiles; } 
062         /** @return A number <=-1. Will -1 if this test will not be run else it will test that no index has more files than the value returned**/
063         public int getMaxFiles(){ return maxFiles; }
064         /** @param minFiles Test that no index has less files than this value  **/
065         public void setMinFiles(int minFiles){ this.minFiles = minFiles; }
066         /** @return A number <=-1. Will -1 if this test will not be run else it will test that no index has less files than the value returned**/
067         public int getMinFiles(){ return minFiles; }
068         
069         
070         //The state of all the tests
071         boolean passedMustHaveHomogeneousHost = true;
072         boolean passedMustHaveHomogeneousSite = true;
073         boolean passedMaxFiles = true;
074         boolean passedMinFile = true; 
075         
076         private int line = 0;
077         
078         Dataset dataset;
079         EntryParser entryParser;
080         /** Returns the sum of all tested anded together. In other words it returns ture if all tested passed and false if any one or more test faild. */
081         public boolean validate(Dataset dataset){ 
082             
083             errors = ""; //clear all error from any other passes
084             this.dataset = dataset;
085             this.entryParser = dataset.getEntryParser();
086             //reset all tests incase the data set is tested again 
087             passedMustHaveHomogeneousHost = true;
088             passedMustHaveHomogeneousSite = true;
089             passedMaxFiles = true;
090             passedMinFile = true;
091             currentMaxSize = 0;
092             currentMinSize = -1;
093             lastHost = null;
094             lastSite = null;
095             line = 0;
096             
097             String datsetEntry = null;
098             try {
099                
100                BufferedReader currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName()) );
101                while ((datsetEntry = currentDataset.readLine()) != null) {
102                    line ++;
103                    if(mustHaveHomogeneousHost){
104                        //System.out.println("MustHaveHomogeneousHost -->" + mustHaveHomogeneousHost);
105                        validateMustHaveHomogeneousHost(datsetEntry);
106                    }
107                    if(mustHaveHomogeneousSite){
108                        //System.out.println("passedMustHaveHomogeneousSite -->" + mustHaveHomogeneousSite);
109                        validateMustHaveHomogeneousSite(datsetEntry);
110                    }
111                    if(maxFiles > 0){
112                        //System.out.println("maxFiles -->" + maxFiles);
113                        validateMaxFiles(datsetEntry);
114                    }
115                    if(minFiles > 1){
116                        //System.out.println("minFiles -->" + minFiles);
117                        validateMinFile(datsetEntry);
118                    } 
119                    validateFormatting(datsetEntry);
120                    countFilesAndEvents(datsetEntry);
121                    
122                }
123                 
124             } catch (FileNotFoundException ex) {
125                 ex.printStackTrace();
126             } catch (IOException ex) {
127                 ex.printStackTrace();
128             }
129             
130             
131             //This error string can only be added after the last line has been tested.
132             if(! lastLineIsSplit) errors = errors + "Formatting Error : The last line of the dataset file \"" + dataset.getDatasetName() + "\" should be " + dataset.getSplitString() + "\"\n";
133             
134    
135             return  passedMustHaveHomogeneousHost 
136                  && passedMustHaveHomogeneousSite 
137                  && passedMaxFiles 
138                  && passedMinFile
139                  && startsWithSplit
140                  && lastLineIsSplit
141                  && hasNoEmptySplits;
142             
143         }
144         
145         /*The fuction returns false if there are no errors, and try only if the error detected is fatal.*/
146         public boolean isErrorFatal(){
147             
148            return  !( passedMustHaveHomogeneousHost 
149                       && passedMustHaveHomogeneousSite  
150                       && startsWithSplit
151                       && lastLineIsSplit
152                       && hasNoEmptySplits
153                     );
154         }
155         
156         
157         private String errors = "";
158         public String getErrors(){return errors;}
159         
160         private String lastHost = null;
161         private void validateMustHaveHomogeneousHost(String entry){
162             
163             
164             
165             if( dataset.getSplitRegX().matches(entry)){ //If we hit the split, reset everthing and take no action
166                 lastHost = null; //clear the last known host this is not needed anymore for the subset
167                 return;
168             }
169             
170             
171             if( entryParser.getSTORAGE_SERVISE(entry).matches("NFS") ) return; //NFS files do not have any node so they are alright
172             String currentHost = entryParser.getHOST(entry);
173             if( currentHost.matches("localhost") ) return;  //If this file is on the same system we don't this is alright and will not cause it to fail the test
174             if(lastHost == null){ //if this the the farst line in a new subset
175                 lastHost = currentHost;
176                 currentIndex.setNode(currentHost);
177                 return;
178             }
179             
180             //if(currentIndex.getNode() == null){ 
181             //   currentIndex.setNode(currentHost); //Node and host are the same
182             //}    
183             
184             if( lastHost.equals(currentHost) ) return;
185             
186             passedMustHaveHomogeneousHost = false;
187             errors = errors + "Test MustHaveHomogeneousHost faild at line : " + line + "  datasetfile \"" + dataset.getDatasetName() + "\" : The dataset has been split in such a way that the same job has files from more then one host. This may be an error. \n";
188    
189             return;       
190         }
191         
192         
193         private String lastSite = null;
194         private void validateMustHaveHomogeneousSite(String entry){
195             if( dataset.getSplitRegX().matches(entry)){ //If we hit the split, reset everthing and take no action
196                 lastSite = null; //clear the last known host this is not needed anymore for the subset
197                 return;
198             }
199             
200             String currentSite = entryParser.getSITE(entry);
201             currentIndex.setSite(currentSite); 
202             
203             if(lastSite == null){ //if this the the farst line in a new subset
204                 lastSite =  currentSite;
205                 return;
206             }
207             
208             if( lastSite.equals(currentSite) ) return; //if it matches the last file the same site it is valid, else the the is something worng with the dataset subset 
209             
210             passedMustHaveHomogeneousSite = false;
211             
212             errors = errors + "Test MustHaveHomogeneousSite faild at line : " + line + "  datasetfile \"" + dataset.getDatasetName() + "\" : There are files from differnt sites in the same job. \n\n";
213             
214             return;
215         }
216         
217         int currentMaxSize = 0;
218         private void validateMaxFiles(String entry){
219             
220             if(maxFiles == -1) return;
221             
222             if( entry.matches(dataset.getSplitRegX())){ //If we hit the split, reset everthing and take no action
223                 currentMaxSize = 0; //clear the last known host this is not needed anymore for the subset
224                 return;
225             }
226             
227             currentMaxSize ++;
228             if(currentMaxSize > maxFiles){
229                 passedMaxFiles = false;
230                 errors = errors + "Error at line : " + line + " datasetfile \"" + dataset.getDatasetName() + "\" :  Some subsets in this dataset are above "+ maxFiles +" file per job. Please report this error.\n";
231                 
232             }    
233             return;    
234         }
235         
236         int currentMinSize = -1;
237         //boolean addedError = false; 
238         private void validateMinFile (String entry){
239    
240             if( entry.matches(dataset.getSplitRegX())){ //If we hit the split, reset everthing and take no action
241                 
242                 if(currentMinSize == -1) return;
243    
244                 if(currentMinSize < minFiles ){
245                      //System.out.println("currentMinSize = " + currentMinSize + "  minFiles = " + minFiles);
246                      passedMinFile = false;
247                      errors = errors + "Possible Error at line : " + line + " index : " + indexCount + " datasetfile \"" + dataset.getDatasetName() + "\" :  This happens when an insignificant number of files have be returned to meet the minimum requirement for any one node when using a distrusted file system that requires splitting by node. Please determine if your jobs will still function properly without meeting this soft requirement.\n"; 
248                      return;
249                 } 
250                 
251                 currentMinSize = 0; //clear the last known host this is not needed anymore for the subset
252                 return;
253             }
254              
255             if(currentMinSize == -1) currentMinSize = 0;
256             currentMinSize ++;
257             return;
258         }
259         
260         
261         
262         /**This function returns the number of indexs is the dataset.
263          If validate(dataset) has not yet been called it will return zero. */
264         public int getIndexCount(){ return indexCount + 1; }
265         
266         
267         private boolean farstLine = true;
268         private boolean startsWithSplit = true;
269         private boolean lastLineIsSplit = true;
270         private boolean hasNoEmptySplits = true;
271         private int indexCount = -1;
272         
273         private void validateFormatting(String entry){
274           
275            
276            if( entry.matches(dataset.getSplitRegX()) ){
277                indexCount ++; //Keep track of the index
278                currentIndex = new DatasetSubset(indexCount,dataset);
279                indexList.add(currentIndex);
280                
281                
282                if(lastLineIsSplit && (! farstLine ) ){          // If this is a split and the last line was a split, something is worng
283                    hasNoEmptySplits = false;
284                    errors = errors + "Formatting Error : The split at index : " + indexCount + " line : " + line + " of datasetfile \"" + dataset.getDatasetName() + "\" has no  data within it.\n";
285                }
286                lastLineIsSplit = true; //keep tack of what the last line was
287            }
288            else{
289                //think about testing each file here, if it dose not take to  long
290                lastLineIsSplit = false; //keep tack of what the last line was
291            }
292            
293            if(farstLine){ //Test that the 1st line line is a split
294                farstLine = false;
295                if( entry.matches(dataset.getSplitRegX()) ){
296                    startsWithSplit = true;
297                }
298                else{
299                    startsWithSplit = false;
300                    errors = errors + "Formatting Error : The first line of the dataset file \"" + dataset.getDatasetName() + "\" should be " + dataset.getSplitString() + "\"\n";
301                }
302            } 
303         }
304         
305    
306         private List indexList = new ArrayList();
307         private DatasetSubset currentIndex;
308         public List getDatasetIndexList(){ return indexList; }
309    
310    
311         private void countFilesAndEvents(String entry){
312             if((currentIndex != null) && (! entry.matches(dataset.getSplitRegX())) ){
313                 currentIndex.addOneToFilesInSubset();
314                 String events = entryParser.getNUMBER_OF_EVENTS(entry);
315                 if(events != null){
316                    try {
317                        currentIndex.addToEventsInSubset(Integer.parseInt(events));
318                    } catch (Exception e) {
319                        //do nothing
320                    }
321                 } 
322             }
323         }
324    
325            
326    }