ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/downloadSample.sh
Revision: 1.10
Committed: Sat Oct 22 13:07:04 2011 UTC (13 years, 6 months ago) by paus
Content type: application/x-sh
Branch: MAIN
CVS Tags: Mit_025c_branch1, Mit_025c_branch0, Mit_025c, Mit_025b, Mit_025a
Branch point for: Mit_025c_branch
Changes since 1.9: +11 -2 lines
Log Message:
Private file productions.

File Contents

# User Rev Content
1 paus 1.1 #!/bin/bash
2     #---------------------------------------------------------------------------------------------------
3     # Download a list of files
4     #---------------------------------------------------------------------------------------------------
5    
6     # Read the arguments
7     echo ""
8     echo "downloadSample.sh $*"
9     echo ""
10     dataDir=$1; shift
11     book=$1; shift
12     dataset=$1; shift
13     target=$1; shift
14     nCopyProcs=$1; shift
15     condorOutput=$1; shift
16 paus 1.3 onlyMissing=$1; shift
17    
18     DN=`grid-proxy-info -subject`
19 paus 1.1
20     # Prepare environment
21     echo " "
22     echo " Process dataset: $dataset of book: $book"
23     echo " in directory : $dataDir"
24     echo " to target : $target"
25     echo " n copy procs : $nCopyProcs"
26     echo " condor output: $condorOutput"
27 paus 1.3 echo " only missing : $onlyMissing"
28 paus 1.1
29 paus 1.8 mkdir -p $condorOutput/$book/$dataset
30 paus 1.9 makedir --exe $target
31     makedir --exe $target/$book
32 paus 1.8 makedir --exe $target/$book/$dataset
33 paus 1.1 script=`which downloadFiles.sh`
34    
35 paus 1.3 # cleanup our lists and remake a clean one
36 paus 1.4 #echo "rm -f $condorOutput/$book/$dataset/fileList*.txt*"
37 paus 1.1 rm -f $condorOutput/$book/$dataset/fileList*.txt*
38    
39 paus 1.3 # make list of all local files
40 paus 1.4 if [ "`echo $HOSTNAME | grep mit.edu`" != "" ] && \
41     ( [ "`echo $dataDir | grep /castor/cern.ch`" != "" ] || \
42     [ "`echo $target | grep /castor/cern.ch`" != "" ] )
43 paus 1.1 then
44 paus 1.3 opt="--simple"
45 paus 1.1 else
46 paus 1.3 opt=""
47 paus 1.1 fi
48    
49 paus 1.3 #echo "list $opt $dataDir/$book/$dataset > $condorOutput/$book/$dataset/fileList-all.txt-bak"
50     list $opt $dataDir/$book/$dataset > $condorOutput/$book/$dataset/fileList-all.txt-bak
51    
52 paus 1.1 # Make sure there is a kerberos ticket available
53 paus 1.3 id=`id -u`
54     cp /tmp/x509up_u${id} ~/.krb5/
55 paus 1.1 KRB5CCNAME=`klist -5 | grep 'Ticket cache:' | cut -d' ' -f 3`
56     if ! [ -z $KRB5CCNAME ]
57     then
58     mkdir -p ~/.krb5/
59     chmod 0 ~/.krb5
60     chmod u=rwx ~/.krb5
61     file=`echo $KRB5CCNAME | cut -d: -f2`
62     if [ -f "$file" ]
63     then
64     cp $file ~/.krb5/ticket
65     else
66     echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
67     exit 1
68     fi
69     else
70     echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
71     fi
72    
73     # make list of all remote files
74     #echo " converting all entries"
75     cat $condorOutput/$book/$dataset/fileList-all.txt-bak | grep root | \
76     while read line
77     do
78     size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
79     file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
80     file=`basename $file`
81     echo "$size $file" >> $condorOutput/$book/$dataset/fileList-all.txt
82     done
83    
84     # make list of all local files
85 paus 1.9 if [ "`echo $HOSTNAME | grep mit.edu`" != "" ] && \
86     ( [ "`echo $dataDir | grep /castor/cern.ch`" != "" ] || \
87     [ "`echo $target | grep /castor/cern.ch`" != "" ] )
88 paus 1.1 then
89 paus 1.3 opt="--simple"
90     else
91     opt=""
92 paus 1.1 fi
93 paus 1.3
94 paus 1.1 #echo "list $opt $target/$book/$dataset | grep root > $condorOutput/$book/$dataset/fileList-done.txt"
95     list $opt $target/$book/$dataset | grep root > $condorOutput/$book/$dataset/fileList-done.txt
96    
97     # make list of missing files
98     rm -f $condorOutput/$book/$dataset/fileList.txt
99     touch $condorOutput/$book/$dataset/fileList.txt
100     cat $condorOutput/$book/$dataset/fileList-all.txt | grep root | \
101     while read line
102     do
103     size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
104     file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
105 paus 1.3 exists=`grep "$file" $condorOutput/$book/$dataset/fileList-done.txt`
106 paus 1.1 if [ "$exists" == "" ]
107     then
108 paus 1.3 echo " -missing-- $file with $size bytes"
109 paus 1.1 echo "$size $file" >> $condorOutput/$book/$dataset/fileList.txt
110     # else
111 paus 1.3 # echo " -exists--- $file with $size bytes - exists"
112     else
113     # now check that size matches
114     test=`grep "$size $file" $condorOutput/$book/$dataset/fileList-done.txt`
115     if [ "$test" == "" ]
116     then
117     if [ "$onlyMissing" == "" ]
118     then
119     echo " -fileSize- $exists (remote: $size)"
120     echo "$size $file" >> $condorOutput/$book/$dataset/fileList.txt
121     fi
122     fi
123 paus 1.1 fi
124     done
125 paus 1.10 nAll=`wc -l $condorOutput/$book/$dataset/fileList-all.txt | cut -d ' ' -f1`
126     nMissing=`wc -l $condorOutput/$book/$dataset/fileList.txt | cut -d ' ' -f1`
127     nDone=`wc -l $condorOutput/$book/$dataset/fileList-done.txt | cut -d ' ' -f1`
128     echo ""
129     echo " Download Summary "
130     echo " All $nAll"
131     echo " Done $nDone"
132     echo " Missing $nMissing"
133     echo ""
134 paus 1.1
135     # construct our job
136     nFiles=`wc -l $condorOutput/$book/$dataset/fileList.txt | cut -d ' ' -f1`
137     if [ "$nFiles" == "" ] || [ "$nFiles" == "0" ]
138     then
139     echo " "
140     echo " No more files to download. EXIT."
141     exit 0
142     elif [ $nFiles -lt $nCopyProcs ]
143     then
144     nCopyProcs=$nFiles
145     fi
146 paus 1.4 # how many files per job?
147 paus 1.1 nFilesPerJob=$(( $nFiles/$nCopyProcs ))
148 paus 1.4 nFilesTmp=$(( $nFilesPerJob*$nCopyProcs ))
149     if [ $nFilesPerJob == 1 ] && [ $nFiles -gt $nCopyProcs ]
150     then
151     nFilesPerJob=2
152     elif [ $nFilesTmp -lt $nFiles ]
153     then
154     nFilesPerJob=$(( $nFilesPerJob+1 ))
155     fi
156    
157 paus 1.10 echo " n files to copy: $nFiles"
158     echo " n files/proc : $nFilesPerJob"
159 paus 1.1
160     i=1
161     next=1
162     last=$nFilesPerJob
163    
164 paus 1.3 # make sure condor is properly setup for us
165     if ! [ -z $CONDOR_LOCATION ]
166     then
167     unset CONDOR_LOCATION
168     export CONDOR_CONFIG=/usr/local/condor/etc/condor_config
169     fi
170    
171     # loop over the condor jobs and submit them
172 paus 1.4 while [ $i -le $nCopyProcs ] && [ $last -le $nFiles ]
173 paus 1.1 do
174     if [ $i == $nCopyProcs ]
175     then
176     last=$nFiles
177     fi
178    
179     # say what we are going to submit
180     echo " downloadFiles.sh $dataDir $book $dataset $target $condorOutput $next $last"
181    
182     logFile=`echo download:$book/$dataset/${next}-${last}.txt | tr '/' '+'`
183     logFile=/tmp/$logFile
184     rm -f $logFile
185    
186     # prepare the condor_submit files
187     cat > submit_$$.cmd <<EOF
188     Universe = vanilla
189 paus 1.7 Requirements = ( (Arch == "INTEL") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (HasFileTransfer) )
190 paus 1.1 Notify_user = paus@mit.edu
191     Notification = Error
192     Executable = $script
193     Arguments = $dataDir $book $dataset $target $condorOutput $next $last
194     Rank = Mips
195     GetEnv = True
196     Input = /dev/null
197     Output = $condorOutput/$book/$dataset/${next}-${last}.out
198     Error = $condorOutput/$book/$dataset/${next}-${last}.err
199     Log = $logFile
200     should_transfer_files = YES
201     when_to_transfer_output = ON_EXIT
202 paus 1.3
203     +AccountingGroup = "group_cmsuser.cmsu0284"
204    
205 paus 1.1 Queue
206     EOF
207    
208 paus 1.3 #+x509userproxysubject = $DN
209    
210 paus 1.1 # submit the jobs
211     condor_submit submit_$$.cmd >& /dev/null #>& lastSub
212     #cat submit_$$.cmd
213     rm submit_$$.cmd
214    
215     # update counters
216     next=$(( $next + $nFilesPerJob ))
217     last=$(( $last + $nFilesPerJob ))
218     i=$(( $i + 1 ))
219     done
220    
221     exit 0