ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/downloadSample.sh
Revision: 1.1
Committed: Sun Dec 5 01:01:21 2010 UTC (14 years, 5 months ago) by paus
Content type: application/x-sh
Branch: MAIN
CVS Tags: Mit_017
Log Message:
Next iteration with improved downloading tool.

File Contents

# User Rev Content
1 paus 1.1 #!/bin/bash
2     #---------------------------------------------------------------------------------------------------
3     # Download a list of files
4     #---------------------------------------------------------------------------------------------------
5    
6     # Read the arguments
7     echo ""
8     echo "downloadSample.sh $*"
9     echo ""
10     dataDir=$1; shift
11     book=$1; shift
12     dataset=$1; shift
13     target=$1; shift
14     nCopyProcs=$1; shift
15     condorOutput=$1; shift
16    
17     # Prepare environment
18     echo " "
19     echo " Process dataset: $dataset of book: $book"
20     echo " in directory : $dataDir"
21     echo " to target : $target"
22     echo " n copy procs : $nCopyProcs"
23     echo " condor output: $condorOutput"
24    
25     mkdir -p $condorOutput/$book/$dataset
26     mkdir -p $target/$book/$dataset
27     script=`which downloadFiles.sh`
28    
29     # cleanup our lists
30     rm -f $condorOutput/$book/$dataset/fileList*.txt*
31    
32     var=`echo $dataDir | grep /castor/cern.ch`
33     if [ "$var" != "" ]
34     then
35     storageEle="srm-cms.cern.ch"
36     storagePath='/srm/managerv2?SFN='
37     storageUrl="srm://${storageEle}:8443${storagePath}$dataDir/$book/$dataset"
38     #srmls $storageUrl | grep root | tr -s ' ' | cut -d' ' -f 2-3 2> /dev/null 1> \
39     # $condorOutput/$book/$dataset/fileList-all.txt-bak
40     list $dataDir/$book/$dataset > $condorOutput/$book/$dataset/fileList-all.txt-bak
41     else
42     storageEle="se01.cmsaf.mit.edu"
43     storagePath='/srm/managerv2?SFN='
44     storageUrl="srm://${storageEle}:8443${storagePath}$dataDir/$book/$dataset"
45     #echo "list $dataDir/$book/$dataset > $condorOutput/$book/$dataset/fileList-all.txt-bak"
46     list $dataDir/$book/$dataset > $condorOutput/$book/$dataset/fileList-all.txt-bak
47     fi
48    
49     # Make sure there is a kerberos ticket available
50     cp /tmp/x509up_u5410 ~/.krb5/
51     KRB5CCNAME=`klist -5 | grep 'Ticket cache:' | cut -d' ' -f 3`
52     if ! [ -z $KRB5CCNAME ]
53     then
54     mkdir -p ~/.krb5/
55     chmod 0 ~/.krb5
56     chmod u=rwx ~/.krb5
57     file=`echo $KRB5CCNAME | cut -d: -f2`
58     if [ -f "$file" ]
59     then
60     cp $file ~/.krb5/ticket
61     else
62     echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
63     exit 1
64     fi
65     else
66     echo " ERROR -- missing kerberos ticket ($KRB5CCNAME)."
67     fi
68    
69     # make list of all remote files
70     #echo " converting all entries"
71     cat $condorOutput/$book/$dataset/fileList-all.txt-bak | grep root | \
72     while read line
73     do
74     size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
75     file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
76     file=`basename $file`
77     echo "$size $file" >> $condorOutput/$book/$dataset/fileList-all.txt
78     done
79    
80     # make list of all local files
81     opt=''
82     if [ "`echo $HOSTNAME | grep mit.edu`" != "" ] && [ "`echo $dataDir | grep /castor/cern.ch`" != "" ]
83     then
84     opt='--simple'
85     fi
86     #echo "list $opt $target/$book/$dataset | grep root > $condorOutput/$book/$dataset/fileList-done.txt"
87     list $opt $target/$book/$dataset | grep root > $condorOutput/$book/$dataset/fileList-done.txt
88    
89     # make list of missing files
90     rm -f $condorOutput/$book/$dataset/fileList.txt
91     touch $condorOutput/$book/$dataset/fileList.txt
92     cat $condorOutput/$book/$dataset/fileList-all.txt | grep root | \
93     while read line
94     do
95     size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
96     file=`echo $line | tr -s ' ' | cut -d ' ' -f 2`
97     exists=`grep "$size $file" $condorOutput/$book/$dataset/fileList-done.txt`
98     if [ "$exists" == "" ]
99     then
100     echo " -missing- $file with $size bytes"
101     echo "$size $file" >> $condorOutput/$book/$dataset/fileList.txt
102     # else
103     # echo " -exists-- $file with $size bytes - exists"
104     fi
105     done
106    
107     # construct our job
108     nFiles=`wc -l $condorOutput/$book/$dataset/fileList.txt | cut -d ' ' -f1`
109     if [ "$nFiles" == "" ] || [ "$nFiles" == "0" ]
110     then
111     echo " "
112     echo " No more files to download. EXIT."
113     exit 0
114     elif [ $nFiles -lt $nCopyProcs ]
115     then
116     nCopyProcs=$nFiles
117     fi
118     nFilesPerJob=$(( $nFiles/$nCopyProcs ))
119     echo " n files all : $nFiles"
120     echo " n files/proc : $nFilesPerJob"
121    
122     i=1
123     next=1
124     last=$nFilesPerJob
125    
126     while [ $i -le $nCopyProcs ]
127     do
128     if [ $i == $nCopyProcs ]
129     then
130     last=$nFiles
131     fi
132    
133     # say what we are going to submit
134     echo " downloadFiles.sh $dataDir $book $dataset $target $condorOutput $next $last"
135    
136     logFile=`echo download:$book/$dataset/${next}-${last}.txt | tr '/' '+'`
137     logFile=/tmp/$logFile
138     rm -f $logFile
139    
140     # prepare the condor_submit files
141     cat > submit_$$.cmd <<EOF
142     Universe = vanilla
143     Requirements = ( (Arch == "X86_64" || Arch == "INTEL") && (OpSys == "LINUX") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (HasFileTransfer) )
144     Notify_user = paus@mit.edu
145     Notification = Error
146     Executable = $script
147     Arguments = $dataDir $book $dataset $target $condorOutput $next $last
148     Rank = Mips
149     GetEnv = True
150     Input = /dev/null
151     Output = $condorOutput/$book/$dataset/${next}-${last}.out
152     Error = $condorOutput/$book/$dataset/${next}-${last}.err
153     Log = $logFile
154     should_transfer_files = YES
155     when_to_transfer_output = ON_EXIT
156     Queue
157     EOF
158    
159     # submit the jobs
160     condor_submit submit_$$.cmd >& /dev/null #>& lastSub
161     #cat submit_$$.cmd
162     rm submit_$$.cmd
163    
164     # update counters
165     next=$(( $next + $nFilesPerJob ))
166     last=$(( $last + $nFilesPerJob ))
167     i=$(( $i + 1 ))
168     done
169    
170     exit 0