ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/find_dupl.sh
Revision: 1.2
Committed: Wed Mar 14 15:18:01 2012 UTC (13 years, 1 month ago) by fanzago
Content type: application/x-sh
Branch: MAIN
CVS Tags: CRAB_2_9_1, CRAB_2_9_1_pre2, CRAB_2_9_1_pre1, CRAB_2_9_0, CRAB_2_9_0_pre2, CRAB_2_9_0_pre1, CRAB_2_8_8, CRAB_2_8_8_pre1, CRAB_2_8_7_patch3, CRAB_2_8_7_patch2, CRAB_2_8_7_patch1, CRAB_2_8_7, CRAB_2_8_7_pre2, CRAB_2_8_7_pre1, CRAB_2_8_6, CRAB_2_8_6_pre1, CRAB_2_8_5_patch3, CRAB_2_8_5_patch2, CRAB_2_8_5_patch1, CRAB_2_8_5, CRAB_2_8_5_pre5, CRAB_2_8_5_pre4, CRAB_2_8_5_pre3, CRAB_2_8_4_patch3, CRAB_2_8_5_pre2, CRAB_2_8_4_patch2, CRAB_2_8_5_pre1, CRAB_2_8_4_patch1, CRAB_2_8_4, CRAB_2_8_4_pre5, CRAB_2_8_4_pre4, CRAB_2_8_4_pre3, CRAB_2_8_4_pre2, CRAB_2_8_4_pre1, CRAB_2_8_3, CRAB_2_8_3_pre4, CRAB_2_8_3_pre3, CRAB_2_8_3_pre2, CRAB_2_8_3_pre1, CRAB_2_8_2_patch1, CRAB_2_8_2, CRAB_2_8_2_pre5, CRAB_2_8_2_pre4, CRAB_2_8_2_pre3, CRAB_2_8_2_pre2, CRAB_2_8_2_pre1, CRAB_2_8_1, HEAD
Changes since 1.1: +1 -1 lines
Error occurred while calculating annotation data.
Log Message:
removed a path

File Contents

# Content
1 #!/bin/bash
2 set -o nounset
3
4 PROGNAME=$(basename $0)
5
6 function usage
7 {
8 cat <<EOF
9 Find a list of duplicate root files for a dataset at the SE that
10 should be removed.
11
12 Usage: $PROGNAME -c <crab_dir> [--h | --help]
13 where options are:
14 -c Mandatory argument, crab project directory
15 -v|--verbose Turn on debug statements (D=false)
16 -h|--help This message
17
18 example: $PROGNAME -c <crab_dir> -v
19
20 This script creates two files in the present directory:
21
22 allfiles.list - all the root files for the dataset present at the SE
23 goodfiles.list - root files for successful jobs as found in the crab_fjr_n.xml files
24
25 and finds the duplicate files from the difference. Note, that at times jobs may finish
26 and root files tranferred to the SE successfully, but crab may not immediately know about job
27 completion. Those 'most recent' root files will be tagged as duplicate, but they
28 are not.
29 EOF
30
31 exit 1
32 }
33
34 [ $# -gt 0 ] || usage
35
36 crab_dir=""
37 let "verbose = 0"
38 let "quiet = 0"
39 while [ $# -gt 0 ]; do
40 case $1 in
41 -c) shift
42 crab_dir=$1
43 ;;
44 -v | --verbose ) let "verbose = 1"
45 ;;
46 -q | --quiet ) let "quiet = 1"
47 ;;
48 -h | --help ) usage
49 ;;
50 * ) usage
51 ;;
52 esac
53 shift
54 done
55
56 [ $crab_dir != "" ] && [ -e $crab_dir ] || usage
57
58 gflist=goodfiles.list
59 aflist=allfiles.list
60
61 # First of all get the list of goodfile by reading the fjr files
62 #export PERL5LIB=/afs/cern.ch/user/s/sarkar/public/perl/lib/perl5/site_perl/5.8.8:$PERL5LIB
63 #perl -w /afs/cern.ch/user/s/sarkar/public/ListGoodOutputFiles_new.pl $project/res > $gflist
64
65 [ $quiet -gt 0 ] || echo ">>> Find list of good files from fjr files..."
66 python find_goodfiles.py -c $crab_dir -q > $gflist
67 # Now find the remote directory name
68 rdir=$(dirname $(head -1 $gflist))
69 srmp=$(echo $rdir | awk -F= '{print $1}')
70
71 # Get list of all files for the project
72 [ $quiet -gt 0 ] || echo ">>> Find list of all root files at $rdir ..."
73 srmls $rdir 2> /dev/null | grep '.root$' | awk '{if (NF==2) print $NF}' > $aflist
74
75 # Now compare
76 [ $quiet -gt 0 ] || echo ">>> Following is the list of duplicate files at $rdir ..."
77 for file in $(cat $aflist)
78 do
79 grep $file $gflist > /dev/null
80 [ $? -eq 0 ] && continue
81
82 bname=$(basename $file)
83 grep $bname $gflist > /dev/null
84 [ $? -eq 0 ] && continue
85
86 echo "$srmp""=""$file"
87 done
88
89 exit 0