ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/find_dupl.sh
Revision: 1.1
Committed: Wed Feb 29 17:21:00 2012 UTC (13 years, 2 months ago) by fanzago
Content type: application/x-sh
Branch: MAIN
Log Message:
compares files at the remote location with those found from the fjr and prints the ones to be removed at the remote location

File Contents

# User Rev Content
1 fanzago 1.1 #!/bin/bash
2     set -o nounset
3    
4     PROGNAME=$(basename $0)
5    
6     function usage
7     {
8     cat <<EOF
9     Find a list of duplicate root files for a dataset at the SE that
10     should be removed.
11    
12     Usage: $PROGNAME -c <crab_dir> [--h | --help]
13     where options are:
14     -c Mandatory argument, crab project directory
15     -v|--verbose Turn on debug statements (D=false)
16     -h|--help This message
17    
18     example: $PROGNAME -c <crab_dir> -v
19    
20     This script creates two files in the present directory:
21    
22     allfiles.list - all the root files for the dataset present at the SE
23     goodfiles.list - root files for successful jobs as found in the crab_fjr_n.xml files
24    
25     and finds the duplicate files from the difference. Note, that at times jobs may finish
26     and root files tranferred to the SE successfully, but crab may not immediately know about job
27     completion. Those 'most recent' root files will be tagged as duplicate, but they
28     are not.
29     EOF
30    
31     exit 1
32     }
33    
34     [ $# -gt 0 ] || usage
35    
36     crab_dir=""
37     let "verbose = 0"
38     let "quiet = 0"
39     while [ $# -gt 0 ]; do
40     case $1 in
41     -c) shift
42     crab_dir=$1
43     ;;
44     -v | --verbose ) let "verbose = 1"
45     ;;
46     -q | --quiet ) let "quiet = 1"
47     ;;
48     -h | --help ) usage
49     ;;
50     * ) usage
51     ;;
52     esac
53     shift
54     done
55    
56     [ $crab_dir != "" ] && [ -e $crab_dir ] || usage
57    
58     gflist=goodfiles.list
59     aflist=allfiles.list
60    
61     # First of all get the list of goodfile by reading the fjr files
62     #export PERL5LIB=/afs/cern.ch/user/s/sarkar/public/perl/lib/perl5/site_perl/5.8.8:$PERL5LIB
63     #perl -w /afs/cern.ch/user/s/sarkar/public/ListGoodOutputFiles_new.pl $project/res > $gflist
64    
65     [ $quiet -gt 0 ] || echo ">>> Find list of good files from fjr files..."
66     python /afs/cern.ch/user/s/sarkar/public/to_stage/find_goodfiles.py -c $crab_dir -q > $gflist
67     # Now find the remote directory name
68     rdir=$(dirname $(head -1 $gflist))
69     srmp=$(echo $rdir | awk -F= '{print $1}')
70    
71     # Get list of all files for the project
72     [ $quiet -gt 0 ] || echo ">>> Find list of all root files at $rdir ..."
73     srmls $rdir 2> /dev/null | grep '.root$' | awk '{if (NF==2) print $NF}' > $aflist
74    
75     # Now compare
76     [ $quiet -gt 0 ] || echo ">>> Following is the list of duplicate files at $rdir ..."
77     for file in $(cat $aflist)
78     do
79     grep $file $gflist > /dev/null
80     [ $? -eq 0 ] && continue
81    
82     bname=$(basename $file)
83     grep $bname $gflist > /dev/null
84     [ $? -eq 0 ] && continue
85    
86     echo "$srmp""=""$file"
87     done
88    
89     exit 0