Merger/filtering script

Under:
Typically, a Starsim run will result in an output which is a file, or a series of files with names like gstar.1.fz, gstar.2.fz etc. Regardless of whether we run locally or on the Grid, there is a small chance that the file(s) will be truncated. To guard against the possibility of feeding up incorrect data to the reconstruction stage, and/or performing a split or merger of a few file, a KUMAC script has been developed. It will, among other things, discard incomplete events, and produce serially numbered files with names like rcf1319_01_100evts.fzd, which contains the name of the dataset, the serial number of the file (distinct from the numbering of the input files), and the number of events contained therein, all of which is helpful in setting up or debugging the production. It has recently been simplified (although still not easily readable), and wrapped into a utility shell script, which does preparation work as well as cleanup. The resulting script, named "filter.tcsh", takes a single argument which is assumed to be the name of the dataset (and which is then used in naming the output files).

#! /usr/local/bin/tcsh -f
#
# remove the old list of files
if( -e process.list ) then
    rm process.list
endif
#
if( -e filter.kumac ) then
    rm filter.kumac
endif
ls gstar.*.fz | sed -e 's/[gstar.|.fz]//g' | sort -n >  process.list
#
# clean the trash bin before the next run, re-create
rm -fr trash
mkdir trash
echo `du --block-size=1000K -s | cut -f1` MB in the current directory
echo `df --block-size=1000K . | tail -1 | sed -e 's/\ *[0-9]*\ *[0-9]*\ *//' | sed -e 's/\ .*//g'` MB available on disk
cat<<EOF>>filter.kumac
macro filter name
input='gstar'
mess Start with filenames [input].*.fz, converting to [name]
ag/version batch
option  stat
option  date
option  nbox
filecase keep
pwd =\$shell('pwd');
nfiles=\$shell('cat process.list | wc -l | sed -e "s/[\ ]*//g"');

message Starting to process [nfiles]
* trace on
ve/cr runs([nfiles]) I
ve/read runs process.list
ve/pri runs

if (\$Len([name]).eq.0) then
  message cannot define current directory in [pwd]
  exit
endif
namz=[name]
out =\$env('OUTDIR')
if ([out].ne.'') then
    namz = [out]/[name]/[name]
endif

lenb = 1000
message reading
ve/cr    id(3)  I
* ve/read  id    N
message reading complete
nt=[nfiles]                    | total number of files to process
n1=runs(1)                    | first input file
n2=runs([nfiles])              | last input file
mm  = 0                        | number of output files
nn  = 0                        | number of processed files
cnt = 0                        | total number of events in this job
cno = 0                        | number of events when output has been opened
nev = 0                        | number of events in this output
ii  = 0                        | input active flag
io  = 0                        | output active flag
len0= 1200                    | minimum output file len
len1= [len0]+200              | average output file len - stop at end-of-file
len2= [len1]+200              | maximum output file len - stop always
ni  = [n1]                    | first input file
no  = 0                        | skip up to this file
nd  = [n1]                    | file to delete
ntrig = 10
*
if (\$fexist(nn).gt.0) then
  ve/read id nn
  na=id(1); message [na] input files already done
  no=id(2); message first input files up to gstar.[no]
  mm=id(3); message first output files up to [name].[mm]
  mm=[mm]-1;
endif
*
hist = [name].his
if (\$fexist([hist]).gt.0) then
  shell mv [hist] old.his
call HRGET(0,\$quote([hist]),' ')
endif
ghist [hist]
cdir  //pawc
mdir  cont
if (\$fexist(old.his).gt.0) then
  call HRGET(0,\$quote(old.his),' ')
endif

gfile p gstar.[n1].fz
mode  control prin 1 hist 0                | simu 2
  gexec ../.lib/control.sl
  gexec ../.lib/index.sl

message loaded libs

title=merging runs [n1]-[n2] in [name]
fort/file 66 [name].ps;  meta 66 -111
nextdcut cave x .1 10 10 .03 .03
Set DMOD 1; Igset TXFP -60; Igset CHHE .35
ITX 5 19.5  \$quote([title])
ITX .5  .1  \$quote([pwd])
*
*      do ni  = [ni],[n2]
frst=1     
ag/version interactive
do iev=1,1000000000000
new input file ?
    if ([ii].eq.0) then
      do nfoo=[frst],[nfiles]
        ni = runs([nfoo])

        file = [input].[ni].fz
        filz = [input].[ni].fz.gz
        hist = [input].[ni].his
        message processing index [nfoo] out of [nfiles]
        ve/print runs([nfoo])
*
        if (\$fexist([file]).gt.0) then
        message loop with [file]
          gfile[file]
            if (\$iquest(1).eq.0) then
              ii = 1
              nn = [nn]+1
              if (\$fexist([hist]).gt.0) then
                if (\$hexist(-1).eq.0) then
                  call HRGET(0,\$quote([hist]),' ')
                else
                  call HRGET(0,\$quote([hist]),'A')
                endif
              endif
              call  indmes(\$quote([file]))
              goto  nextf
* iquest:
          endif
* fexist:
        endif
      enddo
      goto nexto
    endif
   
nextf:
new output file ?
    if ([io].eq.0) then
      mm = [mm]+1
      if ([mm].lt.10) then
        output=[namz]_0[mm]
      else
        output=[namz]_[mm]
      endif
      io  = 1
      cno = [cnt]
      gfile o [output].fzt
      iname = [name]_[mm].fzt
      call  indmes(\$quote([iname]))
    endif
   
* processing next event
    call rzcdir('//SLUGRZ',' ')
    trig [ntrig]
    evt\$iquest(99)

    if (\$iquest(1).ne.0) then
      ni = [ni]+1
      frst=[frst]+1
      ii = 0
    endif
    if ([ii].eq.0) goto nexto
* get output file length in MB:
    cmd = ls -s [output].fzt
    len = \$word(\$shell([cmd]))
    len = [len]/[lenb]
mess wrquest len=[len] ii=[ii] evt=[evt]
    if ([len].lt.[len0])                goto nextev
    if ([len].lt.[len1] .and. [ii].gt.0) goto nextev
    if ([len].lt.[len2] .and. [ii].gt.0 .and. [evt].eq.0) goto nextev
* output file done
nexto:
    cnt = \$iquest(100)
    if ([cnt]<0) then
      cnt = 0
    endif
    nev = [cnt]-[cno]
    io  = 0
*
    if ([nev].gt.0) then
      if ([nev].lt.199999) then
*      terminate last event, clear memory
        call  guout
        call  gtrigc
        gfile o
* rename temp file into the final one:
        cmv = mv [output].fzt [output]_[nev]evts.fzd
        i  = \$shell([cmv])
      endif
    endif
    message files inp = [ni] out = [mm] cnt = [cnt] done
*
    if ([ii].eq.0) then
      nj = [ni] - 1            | this file was finished, ni is NEXT to read
      mj = [mm] + 1            | this is next to start write after the BP
      message  writing breakpoint [nn] [ni] [mj]
      ve/inp  id [nn] [ni] [mj]
      ve/write id  nn  i6
      ntrig = 10
************************************
* moving files to TRASH
      while ([nd].lt.[ni]) do
        filed = [input].[nd].fz
        alrun = *.[nd].*
        if (\$fexist([filed]).gt.0) then
          shell mv [alrun] trash/
        endif
        nd = [nd] + 1
      endwhile
************************************
    else
      ntrig = [ntrig] + 1
    endif
    if ([ni].gt.[n2]) goto alldone
nextev:
enddo

* control histogram
alldone:
if ([nn].eq.[nt]) then
shell touch filter.done 
endif 
cdir //pawc
tit = files [n1] - [n2] in set [name]
title_global \$quote([tit])
next; size 20.5 26; zone 2 4;
hi/pl 11;  hi/pl 12;  hi/pl 13;  hi/pl 14
if (\$hexist(1).gt.1) then
  n/pl 1.ntrack; n/pl 1.Nvertx; n/pl 1.NtpcHit; n/pl 1.Ntr10
endif
swn  111 0 20 0 20;  selnt 111
ITX  2.0  0.1  \$quote([pwd])
close 66; meta 0
physi
exit 
return
EOF
echo ------------------------------------------------------------------
echo Activating starsim for dataset $1
$STAR_BIN/starsim -w 1 -g 40 -b ./filter.kumac $1
# cleanup
rm ZEBRA.O process.list nn index paw.metafile *.his *.ps filter.done filter.kumac