1 |
#!/bin/bash
|
2 |
#===================================================================================================
|
3 |
# Spawn download activities to optimize the bandwidth of the download process, one step closer to
|
4 |
# Phedex.
|
5 |
#
|
6 |
# C.Paus, Nov 2011
|
7 |
#---------------------------------------------------------------------------------------------------
|
8 |
# Improvements to implement on a rainy Friday afternoon:
|
9 |
# + connect back to an existing task
|
10 |
# - optimize sleep time to allow processes to get to submit their missing file requests
|
11 |
# - allow for priorities
|
12 |
#===================================================================================================
|
13 |
|
14 |
#---------------------------------------------------------------------------------------------------
|
15 |
findActivity()
|
16 |
{
|
17 |
condor_q -global $USER -format "%d " ClusterId -format "%s " Cmd -format "%s \n" Args \
|
18 |
| grep downloadFiles.sh | wc -l
|
19 |
}
|
20 |
|
21 |
#---------------------------------------------------------------------------------------------------
|
22 |
showAccounting()
|
23 |
{
|
24 |
pid=$1; shift; iDownload=$1; shift; nDownloads=$1; shift; nJobs=$1; shift; nProcs=$1; shift
|
25 |
echo " "
|
26 |
echo " spawnDownload.sh ==== Accounting Summary ==== "
|
27 |
echo " "
|
28 |
echo " Master pid : $pid "
|
29 |
echo " Last download tasks: $iDownload / $nDownloads "
|
30 |
echo " Running processes : $nProcs "
|
31 |
echo " Running batch : $nJobs "
|
32 |
echo " "
|
33 |
}
|
34 |
|
35 |
#---------------------------------------------------------------------------------------------------
|
36 |
showAccountingShort()
|
37 |
{
|
38 |
pid=$1; shift; iDownload=$1; shift; nDownloads=$1; shift; nJobs=$1; shift; nProcs=$1; shift
|
39 |
echo " Downloads ($pid): $iDownload / $nDownloads nProcs: $nProcs nJobs: $nJobs "
|
40 |
}
|
41 |
|
42 |
#---------------------------------------------------------------------------------------------------
|
43 |
loopCondition()
|
44 |
{
|
45 |
# command line option
|
46 |
iDownload=$1; shift; nDownloads=$1; shift; nJobs=$1; shift;
|
47 |
# return running condition
|
48 |
if [ $iDownload -lt $nDownloads ] || [ $nJobs -gt 0 ]
|
49 |
then
|
50 |
echo 'true'
|
51 |
else
|
52 |
echo 'false'
|
53 |
fi
|
54 |
}
|
55 |
|
56 |
#---------------------------------------------------------------------------------------------------
|
57 |
spawnCondition()
|
58 |
{
|
59 |
# command line option
|
60 |
iDownload=$1; shift; nDownloads=$1; shift; nJobs=$1; shift; nProcs=$1; shift
|
61 |
# return running condition
|
62 |
if [ $iDownload -lt $nDownloads ] && [ $nProcs -lt $NPROC_MAX ] && [ $nJobs -lt $NJOBS_MAX ]
|
63 |
then
|
64 |
echo 'true'
|
65 |
else
|
66 |
echo 'false'
|
67 |
fi
|
68 |
}
|
69 |
|
70 |
#---------------------------------------------------------------------------------------------------
|
71 |
addProcess()
|
72 |
{
|
73 |
pidFile=$1; shift; logFile=$1; shift;
|
74 |
commandLine=$*
|
75 |
$commandLine >& $logFile &
|
76 |
pid=$!
|
77 |
# put some useful information into the pid file
|
78 |
file=`echo $commandLine | cut -d' ' -f2`
|
79 |
download=`cat $file`
|
80 |
echo $pid :: $commandLine :: $download >> $pidFile
|
81 |
echo " Spawning pid: $pid :: $commandLine :: $download"
|
82 |
}
|
83 |
|
84 |
#---------------------------------------------------------------------------------------------------
|
85 |
cleanUpPids()
|
86 |
{
|
87 |
# read parameter
|
88 |
pidFile=$1; shift
|
89 |
# prepare empty temporary pid file
|
90 |
rm -f $pidFile.tmp
|
91 |
touch $pidFile.tmp
|
92 |
echo " Cleaning Pids..."
|
93 |
|
94 |
# parse the exisiting pid file
|
95 |
|
96 |
while read line
|
97 |
do
|
98 |
|
99 |
pid=`echo $line | cut -d ' ' -f1`
|
100 |
|
101 |
kill -0 $pid >& /dev/null
|
102 |
if [ "$?" == "0" ]
|
103 |
then
|
104 |
grep ^$pid $pidFile >> $pidFile.tmp
|
105 |
else
|
106 |
grep ^$pid $pidFile >> $pidFile.done
|
107 |
fi
|
108 |
|
109 |
done < $pidFile
|
110 |
|
111 |
# overwrite the existing with the active pids
|
112 |
mv $pidFile.tmp $pidFile
|
113 |
}
|
114 |
|
115 |
#===================================================================================================
|
116 |
# Main activity starts here
|
117 |
#===================================================================================================
|
118 |
# defaults
|
119 |
LOGDIR=/home/cmsprod/public_html/download
|
120 |
# maximum number of processes and jobs
|
121 |
if [ "$NPROC_MAX" == "" ]
|
122 |
then
|
123 |
NPROC_MAX=8
|
124 |
else
|
125 |
echo " Using environment NPROC_MAX: $NPROC_MAX"
|
126 |
fi
|
127 |
if [ "$NJOBS_MAX" == "" ]
|
128 |
then
|
129 |
NJOBS_MAX=80
|
130 |
else
|
131 |
echo " Using environment NJOBS_MAX: $NJOBS_MAX"
|
132 |
fi
|
133 |
if [ "$SLEEP" == "" ]
|
134 |
then
|
135 |
SLEEP=30
|
136 |
else
|
137 |
echo " Using environment SLEEP: $SLEEP"
|
138 |
fi
|
139 |
|
140 |
echo " "
|
141 |
echo " spawnDownload.sh ==== Configuration ==== "
|
142 |
echo " "
|
143 |
echo " Maximal Jobs : $NJOBS_MAX "
|
144 |
echo " Maximal Processes : $NPROC_MAX "
|
145 |
echo " Sleeping time : $SLEEP "
|
146 |
echo " "
|
147 |
|
148 |
# read command line arguments
|
149 |
INPUT_FILE=$1
|
150 |
if [ "$2" != "" ]
|
151 |
then
|
152 |
MASTER_PID=$2
|
153 |
else
|
154 |
MASTER_PID=$$
|
155 |
fi
|
156 |
|
157 |
# prepare pid file
|
158 |
pidFile=$LOGDIR/pids.$MASTER_PID
|
159 |
if [ "$2" == "" ]
|
160 |
then
|
161 |
rm -f $pidFile
|
162 |
touch $pidFile
|
163 |
iDownload=0
|
164 |
else
|
165 |
iDownload=`ls $LOGDIR/download.$MASTER_PID.* -1|cut -d'.' -f3|awk '{ if ($1 > n) n=$1; } END{print n}'`
|
166 |
fi
|
167 |
|
168 |
# prepare work accounting
|
169 |
nDownloads=`cat $INPUT_FILE | grep -v ^# | wc -l`
|
170 |
nJobs=`findActivity`
|
171 |
nProcs=`cat $pidFile | wc -l`
|
172 |
showAccounting $MASTER_PID $iDownload $nDownloads $nJobs $nProcs
|
173 |
|
174 |
startTime=$(date +%s)
|
175 |
nowTime=$(date +%s); duration=$(($nowTime - $startTime))
|
176 |
|
177 |
# the loop which finishes when activity has ceased
|
178 |
while [ "`loopCondition $iDownload $nDownloads $nJobs`" == "true" ]
|
179 |
do
|
180 |
|
181 |
# short message we are still in the loop
|
182 |
nowTime=$(date +%s); duration=$(($nowTime - $startTime))
|
183 |
echo " "
|
184 |
echo " Loop continues... (spawnDownload.sh: duration - $duration)"
|
185 |
sleep $SLEEP
|
186 |
|
187 |
# are we spawning a new process?
|
188 |
if [ "`spawnCondition $iDownload $nDownloads $nJobs $nProcs`" == "true" ]
|
189 |
then
|
190 |
iDownload=$(( $iDownload + 1 ))
|
191 |
#echo " Download file: $INPUT_FILE - line $iDownload ($nDownloads)"
|
192 |
cat $INPUT_FILE | grep -v ^# | sed -n ${iDownload}p > $LOGDIR/download.$MASTER_PID.$iDownload
|
193 |
addProcess $pidFile $LOGDIR/download.$MASTER_PID.$iDownload.log download.sh \
|
194 |
$LOGDIR/download.$MASTER_PID.$iDownload
|
195 |
fi
|
196 |
|
197 |
# full accounting step
|
198 |
cleanUpPids $pidFile
|
199 |
# get updated counts on batch jobs and number of processes
|
200 |
nJobs=`findActivity`
|
201 |
nProcs=`cat $pidFile | wc -l`
|
202 |
echo " ==== Last update before decision ==== "
|
203 |
showAccountingShort $MASTER_PID $iDownload $nDownloads $nJobs $nProcs
|
204 |
|
205 |
makeDownloadIndex.sh
|
206 |
#cp $pidFile $LOGDIR/download.$MASTER_PID.*.log ~/public_html/download/
|
207 |
|
208 |
done
|
209 |
|
210 |
# cleanup the steering files
|
211 |
rm -f $pidFile $pidFile.tmp
|
212 |
|
213 |
exit 0;
|