001    /*
002     * GroupSplitBy.java
003     *
004     * Created on July 13, 2006, 7:26 PM
005     *
006     * This file is part of the STAR Scheduler.
007     * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory
008     *
009     * STAR Scheduler is free software; you can redistribute it and/or modify
010     * it under the terms of the GNU General Public License as published by
011     * the Free Software Foundation; either version 2 of the License, or
012     * (at your option) any later version.
013     *
014     * STAR Scheduler is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017     * GNU General Public License for more details.
018     *
019     * You should have received a copy of the GNU General Public License
020     * along with STAR Scheduler; if not, write to the Free Software
021     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022     */
023    
024    package gov.bnl.star.offline.scheduler.dataset.datasetManipulators;
025    
026    import gov.bnl.star.offline.scheduler.dataset.Dataset;
027    import gov.bnl.star.offline.scheduler.dataset.datasetManipulators.DatasetManipulator;
028    import gov.bnl.star.offline.scheduler.request.Request;
029    import java.io.BufferedReader;
030    import java.io.FileNotFoundException;
031    import java.io.FileReader;
032    import java.io.IOException;
033    
034    /**
035     * This class markers the dataset for splitting between non-matching regular expression capture groups in an ordered list
036     * @author Levente Hajdu
037     */
038    public class SplitByRegX implements DatasetManipulator{
039        
040        String splitBy = "";
041        Dataset dataset;
042        
043        /** This function takes a an ordered list and splits makes on the boundary of dissimulator regular expression capture groups. 
044         *
045         *Example:
046         *SplitByRegX("^aaaa(*)aaaaa$")
047         *
048         *input:
049         *aaaaaaaaaa
050         *aaaabaaaaa
051         *aaaabaaaaa
052         *aaaacaaaaa
053         *aaaacaaaaa
054         *aaaacaaaaa
055         *
056         *output:
057         *aaaaaaaaaa
058         *----split-----
059         *aaaabaaaaa
060         *aaaabaaaaa
061         *----split-----
062         *aaaacaaaaa
063         *aaaacaaaaa
064         *aaaacaaaaa
065         *
066         *  @param splitBy A expression capture group used to test for boundarys between entries
067         */
068        public SplitByRegX(String splitBy) {this.splitBy = splitBy;}
069        
070        /** This function takes a an ordered list and splits makes on the boundary of dissimulator regular expression capture groups. 
071          * @param dataset The dataset to be modifyed 
072          * @param request The request object of the current request for with will use the dataset 
073          **/
074        public void modify(Dataset dataset, Request request){
075            
076            //System.out.println("Splitting dataset by: " + splitBy);
077            this.dataset = dataset;
078                
079    
080            try {
081                String datsetEntry = null;
082                String lastDatasetEntry = null;
083                String priorRegX = null;
084                String currentRegX = null;
085                boolean firstLine = true;
086                
087                //addStartEndSplit(datsetEntry); //and an ending split if not there
088                
089                
090                BufferedReader currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName() ));
091                while ((datsetEntry = currentDataset.readLine()) != null) {
092                    
093                    if(datsetEntry != null) lastDatasetEntry = datsetEntry;
094                    
095                    if(firstLine) addStartEndSplit(datsetEntry); //add an starting split if not there
096                    firstLine = false;
097                    if( datsetEntry.matches(dataset.getSplitRegX()) ){ //If there is already a split there don't put another one'
098                        dataset.writeToBuffer(datsetEntry);
099                    }else{
100                        currentRegX = datsetEntry.replaceAll(splitBy,"$1");
101                        if(priorRegX != null){
102                            if(! currentRegX.equals(priorRegX)){
103                                dataset.writeToBuffer( dataset.getSplitString() );
104                            }
105                        }
106                        priorRegX = currentRegX;
107                    }
108                    dataset.writeToBuffer(datsetEntry);
109                }
110                addStartEndSplit(lastDatasetEntry); //add an ending split if not there
111                dataset.swap_buffer_dataset_with_dataset();
112                  
113            } catch (IOException ex) {
114                ex.printStackTrace();
115            }     
116       }
117        
118        
119        
120        public boolean requirementsSatisfied() {
121            return true;
122        }
123        
124        
125        /*This adds a starting and ending split if it is not already there */
126        private void addStartEndSplit(String curentLine ){
127            if(curentLine != null){
128                if(! curentLine.matches(dataset.getSplitRegX())){   
129                    dataset.writeToBuffer( dataset.getSplitString() );
130                } 
131            }
132        }
133        
134        
135    }