ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/downloadSample.sh
Revision: 1.12
Committed: Wed Jul 25 03:05:45 2012 UTC (12 years, 9 months ago) by paus
Content type: application/x-sh
Branch: MAIN
CVS Tags: Mit_029_pre1
Changes since 1.11: +18 -5 lines
Log Message:
Preparing for new tag (Mit_029).

File Contents

# User Rev Content
1 paus 1.1 #!/bin/bash
2     #---------------------------------------------------------------------------------------------------
3     # Download a list of files
4     #---------------------------------------------------------------------------------------------------
5    
6     # Read the arguments
7     echo ""
8 paus 1.11 echo " downloadSample.sh $*"
9 paus 1.1 echo ""
10     dataDir=$1; shift
11     book=$1; shift
12     dataset=$1; shift
13     target=$1; shift
14     nCopyProcs=$1; shift
15     condorOutput=$1; shift
16 paus 1.3 onlyMissing=$1; shift
17    
18     DN=`grid-proxy-info -subject`
19 paus 1.1
20     # Prepare environment
21     echo " "
22     echo " Process dataset: $dataset of book: $book"
23     echo " in directory : $dataDir"
24     echo " to target : $target"
25     echo " n copy procs : $nCopyProcs"
26     echo " condor output: $condorOutput"
27 paus 1.3 echo " only missing : $onlyMissing"
28 paus 1.1
29 paus 1.8 mkdir -p $condorOutput/$book/$dataset
30 paus 1.9 makedir --exe $target
31     makedir --exe $target/$book
32 paus 1.8 makedir --exe $target/$book/$dataset
33 paus 1.1 script=`which downloadFiles.sh`
34    
35 paus 1.3 # cleanup our lists and remake a clean one
36 paus 1.11 #echo "rm -f $condorOutput/$book/$dataset/fileList*.$$.txt*"
37     rm -f $condorOutput/$book/$dataset/fileList*.$$.txt*
38 paus 1.1
39 paus 1.3 # make list of all local files
40 paus 1.4 if [ "`echo $HOSTNAME | grep mit.edu`" != "" ] && \
41 paus 1.12 ( [ "`echo $dataDir | grep /castor/cern.ch`" != "" ] || \
42     [ "`echo $target | grep /castor/cern.ch`" != "" ] )
43 paus 1.1 then
44 paus 1.3 opt="--simple"
45 paus 1.1 else
46 paus 1.3 opt=""
47 paus 1.1 fi
48    
49 paus 1.11 list $opt $dataDir/$book/$dataset | sort > $condorOutput/$book/$dataset/fileList-all.$$.txt-bak
50 paus 1.3
51 paus 1.11 # Make sure there are kerberos and globus tickets available
52 paus 1.3 id=`id -u`
53 paus 1.11 mkdir -p ~/.krb5/
54 paus 1.3 cp /tmp/x509up_u${id} ~/.krb5/
55 paus 1.1 KRB5CCNAME=`klist -5 | grep 'Ticket cache:' | cut -d' ' -f 3`
56     if ! [ -z $KRB5CCNAME ]
57     then
58 paus 1.12 mkdir -p ~/.krb5/
59 paus 1.1 chmod 0 ~/.krb5
60     chmod u=rwx ~/.krb5
61     file=`echo $KRB5CCNAME | cut -d: -f2`
62     if [ -f "$file" ]
63     then
64     cp $file ~/.krb5/ticket
65     else
66     echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
67     exit 1
68     fi
69     else
70     echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
71     fi
72    
73     # make list of all remote files
74 paus 1.11 rm -f $condorOutput/$book/$dataset/fileList-all.$$.txt
75     touch $condorOutput/$book/$dataset/fileList-all.$$.txt
76    
77     cat $condorOutput/$book/$dataset/fileList-all.$$.txt-bak | grep root | sort | \
78 paus 1.1 while read line
79     do
80     size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
81     file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
82     file=`basename $file`
83 paus 1.11 echo "$size $file" >> $condorOutput/$book/$dataset/fileList-all.$$.txt
84 paus 1.1 done
85    
86     # make list of all local files
87 paus 1.9 if [ "`echo $HOSTNAME | grep mit.edu`" != "" ] && \
88     ( [ "`echo $dataDir | grep /castor/cern.ch`" != "" ] || \
89     [ "`echo $target | grep /castor/cern.ch`" != "" ] )
90 paus 1.1 then
91 paus 1.3 opt="--simple"
92     else
93     opt=""
94 paus 1.1 fi
95 paus 1.3
96 paus 1.12 #dcache=/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus
97     #list $opt $dcache/$book/$dataset $target/$book/$dataset | grep root | sort \
98     list $opt $target/$book/$dataset | grep root | sort \
99 paus 1.11 > $condorOutput/$book/$dataset/fileList-done.$$.txt
100    
101     diff -y $condorOutput/$book/$dataset/fileList-all.$$.txt \
102     $condorOutput/$book/$dataset/fileList-done.$$.txt > diff.$$
103     echo ""
104     echo " Files different in size: "
105     grep \| diff.$$
106     echo ""
107     echo " Files available in all and not done: "
108     grep \< diff.$$
109     echo ""
110     echo " Files done but not listed in all available: "
111     grep \> diff.$$
112     echo ""
113     rm diff.$$
114 paus 1.1
115     # make list of missing files
116 paus 1.11 rm -f $condorOutput/$book/$dataset/fileList.$$.txt
117     touch $condorOutput/$book/$dataset/fileList.$$.txt
118    
119     cat $condorOutput/$book/$dataset/fileList-all.$$.txt | grep root | \
120 paus 1.1 while read line
121     do
122     size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
123     file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
124 paus 1.11 exists=`grep "$file" $condorOutput/$book/$dataset/fileList-done.$$.txt`
125 paus 1.1 if [ "$exists" == "" ]
126     then
127 paus 1.3 echo " -missing-- $file with $size bytes"
128 paus 1.11 echo "$size $file" >> $condorOutput/$book/$dataset/fileList.$$.txt
129 paus 1.1 # else
130 paus 1.3 # echo " -exists--- $file with $size bytes - exists"
131     else
132     # now check that size matches
133 paus 1.11 test=`grep "$size $file" $condorOutput/$book/$dataset/fileList-done.$$.txt`
134 paus 1.3 if [ "$test" == "" ]
135     then
136     if [ "$onlyMissing" == "" ]
137     then
138     echo " -fileSize- $exists (remote: $size)"
139 paus 1.11 echo "$size $file" >> $condorOutput/$book/$dataset/fileList.$$.txt
140 paus 1.3 fi
141     fi
142 paus 1.1 fi
143     done
144 paus 1.11
145     nAll=`wc -l $condorOutput/$book/$dataset/fileList-all.$$.txt | cut -d ' ' -f1`
146     nMissing=`wc -l $condorOutput/$book/$dataset/fileList.$$.txt | cut -d ' ' -f1`
147     nDone=`wc -l $condorOutput/$book/$dataset/fileList-done.$$.txt | cut -d ' ' -f1`
148 paus 1.10 echo ""
149     echo " Download Summary "
150     echo " All $nAll"
151     echo " Done $nDone"
152     echo " Missing $nMissing"
153     echo ""
154 paus 1.1
155     # construct our job
156 paus 1.11 nFiles=`wc -l $condorOutput/$book/$dataset/fileList.$$.txt | cut -d ' ' -f1`
157 paus 1.1 if [ "$nFiles" == "" ] || [ "$nFiles" == "0" ]
158     then
159     echo " "
160     echo " No more files to download. EXIT."
161     exit 0
162     elif [ $nFiles -lt $nCopyProcs ]
163     then
164     nCopyProcs=$nFiles
165     fi
166 paus 1.4 # how many files per job?
167 paus 1.1 nFilesPerJob=$(( $nFiles/$nCopyProcs ))
168 paus 1.4 nFilesTmp=$(( $nFilesPerJob*$nCopyProcs ))
169     if [ $nFilesPerJob == 1 ] && [ $nFiles -gt $nCopyProcs ]
170     then
171     nFilesPerJob=2
172     elif [ $nFilesTmp -lt $nFiles ]
173     then
174     nFilesPerJob=$(( $nFilesPerJob+1 ))
175     fi
176    
177 paus 1.10 echo " n files to copy: $nFiles"
178     echo " n files/proc : $nFilesPerJob"
179 paus 1.1
180     i=1
181     next=1
182     last=$nFilesPerJob
183    
184 paus 1.3 # make sure condor is properly setup for us
185     if ! [ -z $CONDOR_LOCATION ]
186     then
187     unset CONDOR_LOCATION
188     export CONDOR_CONFIG=/usr/local/condor/etc/condor_config
189     fi
190    
191 paus 1.12 # stage in the missing files if it is at CERN
192     if [ "`echo $dataDir | grep /castor/cern.ch`" != "" ]
193     then
194     echo " scp $condorOutput/$book/$dataset/fileList.$$.txt $TICKET_HOLDER@lxplus.cern.ch:"
195     scp $condorOutput/$book/$dataset/fileList.$$.txt $TICKET_HOLDER@lxplus.cern.ch:
196     echo " ssh $TICKET_HOLDER@lxplus.cern.ch ./stageSample.py --dataDir=$dataDir/$book/$dataset --fileList=fileList.$$.txt"
197     ssh $TICKET_HOLDER@lxplus.cern.ch ./stageSample.py --dataDir=$dataDir/$book/$dataset --fileList=fileList.$$.txt
198     echo " ssh $TICKET_HOLDER@lxplus.cern.ch rm fileList.$$.txt"
199     ssh $TICKET_HOLDER@lxplus.cern.ch rm fileList.$$.txt
200     fi
201    
202 paus 1.3 # loop over the condor jobs and submit them
203 paus 1.4 while [ $i -le $nCopyProcs ] && [ $last -le $nFiles ]
204 paus 1.1 do
205     if [ $i == $nCopyProcs ]
206     then
207     last=$nFiles
208     fi
209    
210     # say what we are going to submit
211 paus 1.11 echo " downloadFiles.sh $dataDir $book $dataset $target $condorOutput $$ $next $last"
212 paus 1.1
213     logFile=`echo download:$book/$dataset/${next}-${last}.txt | tr '/' '+'`
214     logFile=/tmp/$logFile
215     rm -f $logFile
216    
217     # prepare the condor_submit files
218     cat > submit_$$.cmd <<EOF
219     Universe = vanilla
220 paus 1.7 Requirements = ( (Arch == "INTEL") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (HasFileTransfer) )
221 paus 1.12 Notify_user = $TICKET_HOLDER@mit.edu
222 paus 1.1 Notification = Error
223     Executable = $script
224 paus 1.11 Arguments = $dataDir $book $dataset $target $condorOutput $$ $next $last
225 paus 1.1 Rank = Mips
226     GetEnv = True
227     Input = /dev/null
228     Output = $condorOutput/$book/$dataset/${next}-${last}.out
229     Error = $condorOutput/$book/$dataset/${next}-${last}.err
230     Log = $logFile
231     should_transfer_files = YES
232     when_to_transfer_output = ON_EXIT
233 paus 1.3
234     +AccountingGroup = "group_cmsuser.cmsu0284"
235    
236 paus 1.1 Queue
237     EOF
238    
239 paus 1.3 #+x509userproxysubject = $DN
240    
241 paus 1.1 # submit the jobs
242     condor_submit submit_$$.cmd >& /dev/null #>& lastSub
243     #cat submit_$$.cmd
244     rm submit_$$.cmd
245    
246     # update counters
247     next=$(( $next + $nFilesPerJob ))
248     last=$(( $last + $nFilesPerJob ))
249     i=$(( $i + 1 ))
250     done
251    
252     exit 0