Revision: | 1.2 |
Committed: | Wed Mar 14 15:18:01 2012 UTC (13 years, 1 month ago) by fanzago |
Content type: | application/x-sh |
Branch: | MAIN |
CVS Tags: | CRAB_2_9_1, CRAB_2_9_1_pre2, CRAB_2_9_1_pre1, CRAB_2_9_0, CRAB_2_9_0_pre2, CRAB_2_9_0_pre1, CRAB_2_8_8, CRAB_2_8_8_pre1, CRAB_2_8_7_patch3, CRAB_2_8_7_patch2, CRAB_2_8_7_patch1, CRAB_2_8_7, CRAB_2_8_7_pre2, CRAB_2_8_7_pre1, CRAB_2_8_6, CRAB_2_8_6_pre1, CRAB_2_8_5_patch3, CRAB_2_8_5_patch2, CRAB_2_8_5_patch1, CRAB_2_8_5, CRAB_2_8_5_pre5, CRAB_2_8_5_pre4, CRAB_2_8_5_pre3, CRAB_2_8_4_patch3, CRAB_2_8_5_pre2, CRAB_2_8_4_patch2, CRAB_2_8_5_pre1, CRAB_2_8_4_patch1, CRAB_2_8_4, CRAB_2_8_4_pre5, CRAB_2_8_4_pre4, CRAB_2_8_4_pre3, CRAB_2_8_4_pre2, CRAB_2_8_4_pre1, CRAB_2_8_3, CRAB_2_8_3_pre4, CRAB_2_8_3_pre3, CRAB_2_8_3_pre2, CRAB_2_8_3_pre1, CRAB_2_8_2_patch1, CRAB_2_8_2, CRAB_2_8_2_pre5, CRAB_2_8_2_pre4, CRAB_2_8_2_pre3, CRAB_2_8_2_pre2, CRAB_2_8_2_pre1, CRAB_2_8_1, HEAD |
Changes since 1.1: | +1 -1 lines |
Error occurred while calculating annotation data. | |
Log Message: | removed a path |
# | Content |
---|---|
1 | #!/bin/bash |
2 | set -o nounset |
3 | |
4 | PROGNAME=$(basename $0) |
5 | |
6 | function usage |
7 | { |
8 | cat <<EOF |
9 | Find a list of duplicate root files for a dataset at the SE that |
10 | should be removed. |
11 | |
12 | Usage: $PROGNAME -c <crab_dir> [--h | --help] |
13 | where options are: |
14 | -c Mandatory argument, crab project directory |
15 | -v|--verbose Turn on debug statements (D=false) |
16 | -h|--help This message |
17 | |
18 | example: $PROGNAME -c <crab_dir> -v |
19 | |
20 | This script creates two files in the present directory: |
21 | |
22 | allfiles.list - all the root files for the dataset present at the SE |
23 | goodfiles.list - root files for successful jobs as found in the crab_fjr_n.xml files |
24 | |
25 | and finds the duplicate files from the difference. Note, that at times jobs may finish |
26 | and root files tranferred to the SE successfully, but crab may not immediately know about job |
27 | completion. Those 'most recent' root files will be tagged as duplicate, but they |
28 | are not. |
29 | EOF |
30 | |
31 | exit 1 |
32 | } |
33 | |
34 | [ $# -gt 0 ] || usage |
35 | |
36 | crab_dir="" |
37 | let "verbose = 0" |
38 | let "quiet = 0" |
39 | while [ $# -gt 0 ]; do |
40 | case $1 in |
41 | -c) shift |
42 | crab_dir=$1 |
43 | ;; |
44 | -v | --verbose ) let "verbose = 1" |
45 | ;; |
46 | -q | --quiet ) let "quiet = 1" |
47 | ;; |
48 | -h | --help ) usage |
49 | ;; |
50 | * ) usage |
51 | ;; |
52 | esac |
53 | shift |
54 | done |
55 | |
56 | [ $crab_dir != "" ] && [ -e $crab_dir ] || usage |
57 | |
58 | gflist=goodfiles.list |
59 | aflist=allfiles.list |
60 | |
61 | # First of all get the list of goodfile by reading the fjr files |
62 | #export PERL5LIB=/afs/cern.ch/user/s/sarkar/public/perl/lib/perl5/site_perl/5.8.8:$PERL5LIB |
63 | #perl -w /afs/cern.ch/user/s/sarkar/public/ListGoodOutputFiles_new.pl $project/res > $gflist |
64 | |
65 | [ $quiet -gt 0 ] || echo ">>> Find list of good files from fjr files..." |
66 | python find_goodfiles.py -c $crab_dir -q > $gflist |
67 | # Now find the remote directory name |
68 | rdir=$(dirname $(head -1 $gflist)) |
69 | srmp=$(echo $rdir | awk -F= '{print $1}') |
70 | |
71 | # Get list of all files for the project |
72 | [ $quiet -gt 0 ] || echo ">>> Find list of all root files at $rdir ..." |
73 | srmls $rdir 2> /dev/null | grep '.root$' | awk '{if (NF==2) print $NF}' > $aflist |
74 | |
75 | # Now compare |
76 | [ $quiet -gt 0 ] || echo ">>> Following is the list of duplicate files at $rdir ..." |
77 | for file in $(cat $aflist) |
78 | do |
79 | grep $file $gflist > /dev/null |
80 | [ $? -eq 0 ] && continue |
81 | |
82 | bname=$(basename $file) |
83 | grep $bname $gflist > /dev/null |
84 | [ $? -eq 0 ] && continue |
85 | |
86 | echo "$srmp""=""$file" |
87 | done |
88 | |
89 | exit 0 |