001 /* 002 * DropDuplicatesFromSortedList.java 003 * 004 * Created on July 8, 2006, 12:45 AM 005 * 006 * This file is part of the STAR Scheduler. 007 * Copyright (c) 2002-2006 STAR Collaboration - Brookhaven National Laboratory 008 * 009 * STAR Scheduler is free software; you can redistribute it and/or modify 010 * it under the terms of the GNU General Public License as published by 011 * the Free Software Foundation; either version 2 of the License, or 012 * (at your option) any later version. 013 * 014 * STAR Scheduler is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * along with STAR Scheduler; if not, write to the Free Software 021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 */ 023 024 package gov.bnl.star.offline.scheduler.dataset.datasetManipulators; 025 026 027 import gov.bnl.star.offline.scheduler.dataset.Dataset; 028 import gov.bnl.star.offline.scheduler.request.Request; 029 030 import java.io.BufferedReader; 031 import java.io.FileNotFoundException; 032 import java.io.FileReader; 033 import java.io.IOException; 034 import java.util.ArrayList; 035 import java.util.Collections; 036 import java.util.Enumeration; 037 import java.util.Hashtable; 038 import java.util.List; 039 import java.util.Vector; 040 041 /** 042 * A dataset manipulator used to drop entries with matching regular 043 * expression capture groups from a list sorted be that capture groups. 044 * Note that it will not work if the list is not sorted by the capture 045 * group. 046 * 047 * @author lbhajdu 048 */ 049 public class DropDuplicatesFromSortedList implements DatasetManipulator{ 050 051 String regXCaptureGroup; 052 053 /** Creates a new instance of DropDuplicatesFromSortedList 054 * Note any Entry that has a blank value for the capure group will not be dropped. 055 * 056 * @param regXCaptureGroup A regular expression capture group. If mroe then one entry matches the capture group, only one one copy will be kept, all other will be dropted. 057 */ 058 public DropDuplicatesFromSortedList(String regXCaptureGroup) {this.regXCaptureGroup = regXCaptureGroup;} 059 060 /** Used to pass the dataset to the dataset manipulator 061 * @param dataset The dataset to be modifyed 062 * @param request The request object of the current request for with will use the dataset 063 **/ 064 public void modify(Dataset dataset, Request request){ 065 066 //System.out.println("Dropping duplicates of: " + regXCaptureGroup); 067 //super.modify(dataset, request); 068 069 try { 070 071 String datsetEntry = null; 072 List duplicateEntires = new ArrayList(); 073 BufferedReader currentDataset = new BufferedReader( new FileReader(dataset.getDatasetName() )); 074 075 076 while ((datsetEntry = currentDataset.readLine()) != null) { 077 078 if(duplicateEntires.size() > 0){ 079 080 if( (! datsetEntry.replaceAll(regXCaptureGroup,"$1").matches("")) && datsetEntry.replaceAll(regXCaptureGroup,"$1").equals(((String) duplicateEntires.get(0)).replaceAll(regXCaptureGroup,"$1"))){ 081 duplicateEntires.add(datsetEntry); 082 } 083 else{ 084 dataset.writeToBuffer(pickEntry(duplicateEntires)); 085 duplicateEntires.clear(); 086 duplicateEntires.add(datsetEntry); 087 } 088 } 089 else duplicateEntires.add(datsetEntry); 090 } 091 092 dataset.writeToBuffer(pickEntry(duplicateEntires)); 093 094 } catch (FileNotFoundException ex) { 095 ex.printStackTrace(); 096 } catch (IOException ex) { 097 ex.printStackTrace(); 098 } 099 100 dataset.swap_buffer_dataset_with_dataset(); 101 102 } 103 104 105 private String pickEntry(List duplicateEntires){ 106 107 108 if( (duplicateEntires.size() == 1) || (keys.size() == 0) ) return (String) duplicateEntires.get(0); //if the list only has one item just return that 109 110 for(Enumeration e = keys.elements(); e.hasMoreElements();){ //Write the data back out 111 112 String regx = (String) preferredCopyRegxTable.get(e.nextElement()); 113 114 for(int i = 0; i != duplicateEntires.size(); i++){ 115 String entry = (String) duplicateEntires.get(i); 116 //System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>> looking for >>>>>" + regx); //used for debugging 117 118 if(entry.matches(regx)){ 119 //System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>> found >>>>>" + entry); //used for debugging 120 return entry; 121 } 122 } 123 } 124 125 126 return (String) duplicateEntires.get(0); 127 } 128 129 130 131 private Hashtable preferredCopyRegxTable = new Hashtable(); 132 private Vector keys = new Vector(); 133 134 135 /** 136 * This function is used to select one entry when two or more entrys are found matching the regx 137 * Example of useage: 138 * 139 * //pic NFS files farst the local files then HPSS files 140 * 141 * addPreferredCopyRegx("B", ".*::local::.*::.*::.*::.*::.*"); 142 * addPreferredCopyRegx("A", ".*::NFS::.*::.*::.*::.*::.*"); 143 * addPreferredCopyRegx("C", ".*::HPSS::.*::.*::.*::.*::.*"); 144 * 145 * @param pref A string that will be alphabetically sorted to determine the preference. .A. has a higher rank then .Z.. 146 * 147 * @param regx A regular expression the string must match in order to be assigned this ranking. 148 * 149 */ 150 public void addPreferredCopyRegx(String pref, String regx){ 151 preferredCopyRegxTable.put(pref, regx); 152 this.keys = new Vector(preferredCopyRegxTable.keySet()); 153 Collections.sort(keys); //sort the table keys 154 } 155 156 public boolean requirementsSatisfied() { 157 return true; 158 } 159 160 161 162 163 164 }