1 |
#!/bin/bash
|
2 |
#---------------------------------------------------------------------------------------------------
|
3 |
# Download a list of files
|
4 |
#---------------------------------------------------------------------------------------------------
|
5 |
|
6 |
# read the arguments
|
7 |
echo ""
|
8 |
echo "downloadFiles.sh $*"
|
9 |
echo ""
|
10 |
dataDir=$1; shift
|
11 |
book=$1; shift
|
12 |
dataset=$1; shift
|
13 |
target=$1; shift
|
14 |
condorOutput=$1; shift
|
15 |
first=$1; shift
|
16 |
last=$1; shift
|
17 |
|
18 |
# prepare environment
|
19 |
echo " "
|
20 |
echo " Process dataset: $dataset of book: $book"
|
21 |
echo " in directory : $dataDir"
|
22 |
echo " to target : $target"
|
23 |
echo " condor output: $condorOutput"
|
24 |
echo " file range : $first -- $last"
|
25 |
|
26 |
mkdir -p $condorOutput/$book/$dataset
|
27 |
script=`which downloadFile.sh`
|
28 |
|
29 |
# make sure the request is good
|
30 |
nFiles=`wc -l $condorOutput/$book/$dataset/fileList.txt | cut -d ' ' -f 1`
|
31 |
if [ $first -gt $nFiles ] || [ $last -gt $nFiles ]
|
32 |
then
|
33 |
echo "Request makes no sense: nFiles=$nFile but first=$first and last=$last"
|
34 |
exit 0
|
35 |
fi
|
36 |
|
37 |
# see how many we do in this job
|
38 |
nFilesPerJob=$(($last - $first + 1))
|
39 |
fList=`head -$last $condorOutput/$book/$dataset/fileList.txt | tail -$nFilesPerJob | cut -d' ' -f 2`
|
40 |
|
41 |
echo LIST $fList
|
42 |
|
43 |
# spread the jobs out by a bit
|
44 |
#sleep $first
|
45 |
|
46 |
# loop through our list now
|
47 |
for file in $fList
|
48 |
do
|
49 |
file=`basename $file`
|
50 |
# find the line to this dataset and do further analysis
|
51 |
line=`grep $file $condorOutput/$book/$dataset/fileList.txt`
|
52 |
# find potential JSON file
|
53 |
export size=`echo $line | tr -s ' ' | cut -d ' ' -f 1`
|
54 |
# now run the download
|
55 |
echo "$script $dataDir/$book/$dataset/$file $target/$book/$dataset/$file"
|
56 |
$script $dataDir/$book/$dataset/$file $target/$book/$dataset/$file
|
57 |
done
|
58 |
|
59 |
exit 0
|