ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/downloadSample.sh
Revision: 1.13
Committed: Thu Aug 9 21:16:03 2012 UTC (12 years, 8 months ago) by paus
Content type: application/x-sh
Branch: MAIN
CVS Tags: Mit_032, Mit_031, Mit_030, Mit_029c, Mit_029b, Mit_030_pre1, Mit_029a, Mit_029, HEAD
Changes since 1.12: +0 -2 lines
Log Message:
For version 029.

File Contents

# Content
1 #!/bin/bash
2 #---------------------------------------------------------------------------------------------------
3 # Download a list of files
4 #---------------------------------------------------------------------------------------------------
5
6 # Read the arguments
7 echo ""
8 echo " downloadSample.sh $*"
9 echo ""
10 dataDir=$1; shift
11 book=$1; shift
12 dataset=$1; shift
13 target=$1; shift
14 nCopyProcs=$1; shift
15 condorOutput=$1; shift
16 onlyMissing=$1; shift
17
18 DN=`grid-proxy-info -subject`
19
20 # Prepare environment
21 echo " "
22 echo " Process dataset: $dataset of book: $book"
23 echo " in directory : $dataDir"
24 echo " to target : $target"
25 echo " n copy procs : $nCopyProcs"
26 echo " condor output: $condorOutput"
27 echo " only missing : $onlyMissing"
28
29 mkdir -p $condorOutput/$book/$dataset
30 makedir --exe $target
31 makedir --exe $target/$book
32 makedir --exe $target/$book/$dataset
33 script=`which downloadFiles.sh`
34
35 # cleanup our lists and remake a clean one
36 #echo "rm -f $condorOutput/$book/$dataset/fileList*.$$.txt*"
37 rm -f $condorOutput/$book/$dataset/fileList*.$$.txt*
38
39 # make list of all local files
40 if [ "`echo $HOSTNAME | grep mit.edu`" != "" ] && \
41 ( [ "`echo $dataDir | grep /castor/cern.ch`" != "" ] || \
42 [ "`echo $target | grep /castor/cern.ch`" != "" ] )
43 then
44 opt="--simple"
45 else
46 opt=""
47 fi
48
49 list $opt $dataDir/$book/$dataset | sort > $condorOutput/$book/$dataset/fileList-all.$$.txt-bak
50
51 # Make sure there are kerberos and globus tickets available
52 id=`id -u`
53 mkdir -p ~/.krb5/
54 cp /tmp/x509up_u${id} ~/.krb5/
55 KRB5CCNAME=`klist -5 | grep 'Ticket cache:' | cut -d' ' -f 3`
56 if ! [ -z $KRB5CCNAME ]
57 then
58 mkdir -p ~/.krb5/
59 chmod 0 ~/.krb5
60 chmod u=rwx ~/.krb5
61 file=`echo $KRB5CCNAME | cut -d: -f2`
62 if [ -f "$file" ]
63 then
64 cp $file ~/.krb5/ticket
65 else
66 echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
67 exit 1
68 fi
69 else
70 echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
71 fi
72
73 # make list of all remote files
74 rm -f $condorOutput/$book/$dataset/fileList-all.$$.txt
75 touch $condorOutput/$book/$dataset/fileList-all.$$.txt
76
77 cat $condorOutput/$book/$dataset/fileList-all.$$.txt-bak | grep root | sort | \
78 while read line
79 do
80 size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
81 file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
82 file=`basename $file`
83 echo "$size $file" >> $condorOutput/$book/$dataset/fileList-all.$$.txt
84 done
85
86 # make list of all local files
87 if [ "`echo $HOSTNAME | grep mit.edu`" != "" ] && \
88 ( [ "`echo $dataDir | grep /castor/cern.ch`" != "" ] || \
89 [ "`echo $target | grep /castor/cern.ch`" != "" ] )
90 then
91 opt="--simple"
92 else
93 opt=""
94 fi
95
96 list $opt $target/$book/$dataset | grep root | sort \
97 > $condorOutput/$book/$dataset/fileList-done.$$.txt
98
99 diff -y $condorOutput/$book/$dataset/fileList-all.$$.txt \
100 $condorOutput/$book/$dataset/fileList-done.$$.txt > diff.$$
101 echo ""
102 echo " Files different in size: "
103 grep \| diff.$$
104 echo ""
105 echo " Files available in all and not done: "
106 grep \< diff.$$
107 echo ""
108 echo " Files done but not listed in all available: "
109 grep \> diff.$$
110 echo ""
111 rm diff.$$
112
113 # make list of missing files
114 rm -f $condorOutput/$book/$dataset/fileList.$$.txt
115 touch $condorOutput/$book/$dataset/fileList.$$.txt
116
117 cat $condorOutput/$book/$dataset/fileList-all.$$.txt | grep root | \
118 while read line
119 do
120 size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
121 file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
122 exists=`grep "$file" $condorOutput/$book/$dataset/fileList-done.$$.txt`
123 if [ "$exists" == "" ]
124 then
125 echo " -missing-- $file with $size bytes"
126 echo "$size $file" >> $condorOutput/$book/$dataset/fileList.$$.txt
127 # else
128 # echo " -exists--- $file with $size bytes - exists"
129 else
130 # now check that size matches
131 test=`grep "$size $file" $condorOutput/$book/$dataset/fileList-done.$$.txt`
132 if [ "$test" == "" ]
133 then
134 if [ "$onlyMissing" == "" ]
135 then
136 echo " -fileSize- $exists (remote: $size)"
137 echo "$size $file" >> $condorOutput/$book/$dataset/fileList.$$.txt
138 fi
139 fi
140 fi
141 done
142
143 nAll=`wc -l $condorOutput/$book/$dataset/fileList-all.$$.txt | cut -d ' ' -f1`
144 nMissing=`wc -l $condorOutput/$book/$dataset/fileList.$$.txt | cut -d ' ' -f1`
145 nDone=`wc -l $condorOutput/$book/$dataset/fileList-done.$$.txt | cut -d ' ' -f1`
146 echo ""
147 echo " Download Summary "
148 echo " All $nAll"
149 echo " Done $nDone"
150 echo " Missing $nMissing"
151 echo ""
152
153 # construct our job
154 nFiles=`wc -l $condorOutput/$book/$dataset/fileList.$$.txt | cut -d ' ' -f1`
155 if [ "$nFiles" == "" ] || [ "$nFiles" == "0" ]
156 then
157 echo " "
158 echo " No more files to download. EXIT."
159 exit 0
160 elif [ $nFiles -lt $nCopyProcs ]
161 then
162 nCopyProcs=$nFiles
163 fi
164 # how many files per job?
165 nFilesPerJob=$(( $nFiles/$nCopyProcs ))
166 nFilesTmp=$(( $nFilesPerJob*$nCopyProcs ))
167 if [ $nFilesPerJob == 1 ] && [ $nFiles -gt $nCopyProcs ]
168 then
169 nFilesPerJob=2
170 elif [ $nFilesTmp -lt $nFiles ]
171 then
172 nFilesPerJob=$(( $nFilesPerJob+1 ))
173 fi
174
175 echo " n files to copy: $nFiles"
176 echo " n files/proc : $nFilesPerJob"
177
178 i=1
179 next=1
180 last=$nFilesPerJob
181
182 # make sure condor is properly setup for us
183 if ! [ -z $CONDOR_LOCATION ]
184 then
185 unset CONDOR_LOCATION
186 export CONDOR_CONFIG=/usr/local/condor/etc/condor_config
187 fi
188
189 # stage in the missing files if it is at CERN
190 if [ "`echo $dataDir | grep /castor/cern.ch`" != "" ]
191 then
192 echo " scp $condorOutput/$book/$dataset/fileList.$$.txt $TICKET_HOLDER@lxplus.cern.ch:"
193 scp $condorOutput/$book/$dataset/fileList.$$.txt $TICKET_HOLDER@lxplus.cern.ch:
194 echo " ssh $TICKET_HOLDER@lxplus.cern.ch ./stageSample.py --dataDir=$dataDir/$book/$dataset --fileList=fileList.$$.txt"
195 ssh $TICKET_HOLDER@lxplus.cern.ch ./stageSample.py --dataDir=$dataDir/$book/$dataset --fileList=fileList.$$.txt
196 echo " ssh $TICKET_HOLDER@lxplus.cern.ch rm fileList.$$.txt"
197 ssh $TICKET_HOLDER@lxplus.cern.ch rm fileList.$$.txt
198 fi
199
200 # loop over the condor jobs and submit them
201 while [ $i -le $nCopyProcs ] && [ $last -le $nFiles ]
202 do
203 if [ $i == $nCopyProcs ]
204 then
205 last=$nFiles
206 fi
207
208 # say what we are going to submit
209 echo " downloadFiles.sh $dataDir $book $dataset $target $condorOutput $$ $next $last"
210
211 logFile=`echo download:$book/$dataset/${next}-${last}.txt | tr '/' '+'`
212 logFile=/tmp/$logFile
213 rm -f $logFile
214
215 # prepare the condor_submit files
216 cat > submit_$$.cmd <<EOF
217 Universe = vanilla
218 Requirements = ( (Arch == "INTEL") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (HasFileTransfer) )
219 Notify_user = $TICKET_HOLDER@mit.edu
220 Notification = Error
221 Executable = $script
222 Arguments = $dataDir $book $dataset $target $condorOutput $$ $next $last
223 Rank = Mips
224 GetEnv = True
225 Input = /dev/null
226 Output = $condorOutput/$book/$dataset/${next}-${last}.out
227 Error = $condorOutput/$book/$dataset/${next}-${last}.err
228 Log = $logFile
229 should_transfer_files = YES
230 when_to_transfer_output = ON_EXIT
231
232 +AccountingGroup = "group_cmsuser.cmsu0284"
233
234 Queue
235 EOF
236
237 #+x509userproxysubject = $DN
238
239 # submit the jobs
240 condor_submit submit_$$.cmd >& /dev/null #>& lastSub
241 #cat submit_$$.cmd
242 rm submit_$$.cmd
243
244 # update counters
245 next=$(( $next + $nFilesPerJob ))
246 last=$(( $last + $nFilesPerJob ))
247 i=$(( $i + 1 ))
248 done
249
250 exit 0