1 |
+ |
#!/usr/bin/env python |
2 |
+ |
#--------------------------------------------------------------------------------------------------- |
3 |
+ |
# Script to automatically download a MIT dataset to our local cluster |
4 |
+ |
# |
5 |
+ |
# The download of the MIT dataset is organized in accordance with the dataset production logic. In |
6 |
+ |
# general it is allowed to download the dataset from any location of a properly configured storage |
7 |
+ |
# element. The script will do all most obvious tests to ensure efficient and safe download. For |
8 |
+ |
# performance reason a checksum is not calculated. This ommission is considered completely safe as |
9 |
+ |
# failures will be identified in the analysis phase and the rare occasions will be more effective to |
10 |
+ |
# fix by hand. |
11 |
+ |
# |
12 |
+ |
# At present the download proceeds in one thread (one file at a time) which for performance reasons |
13 |
+ |
# might not be optimal. |
14 |
+ |
# |
15 |
+ |
# Author: C.Paus (July 1, 2008) |
16 |
+ |
#--------------------------------------------------------------------------------------------------- |
17 |
+ |
# Missing but desired features: |
18 |
+ |
# + accounting of size of each file |
19 |
+ |
# + accounting of locally available files (avoid copying already existing files) |
20 |
+ |
# + determine full list of files before starting to copy |
21 |
+ |
# + minimal success check of the copy |
22 |
+ |
# + calculate total data volume (to copy, already copied etc.) |
23 |
+ |
# + add feature to check the castor status |
24 |
+ |
# - add time estimates and progressions for copies |
25 |
+ |
# - multi downloads to enhance performance |
26 |
+ |
#--------------------------------------------------------------------------------------------------- |
27 |
+ |
import os,sys,getopt,re,string |
28 |
+ |
|
29 |
+ |
def Seconds(): |
30 |
+ |
for secs in os.popen('date +%s').readlines(): |
31 |
+ |
secs = int(secs[:-1]) |
32 |
+ |
return secs |
33 |
+ |
|
34 |
+ |
def InSkipList(file,list): |
35 |
+ |
for entry in list: |
36 |
+ |
if entry == file: |
37 |
+ |
return True |
38 |
+ |
return False |
39 |
+ |
|
40 |
+ |
def DecodeSrmLs(line): |
41 |
+ |
line = line.strip() |
42 |
+ |
f = line.split(" ") |
43 |
+ |
size = f[0] |
44 |
+ |
f = f[1].split("/") |
45 |
+ |
file = f.pop() |
46 |
+ |
list = [ size, file ] |
47 |
+ |
return list |
48 |
+ |
|
49 |
+ |
def DecodeRfDir(line): |
50 |
+ |
line = line.strip() |
51 |
+ |
f = line.split(" ") |
52 |
+ |
file = f.pop() |
53 |
+ |
size = f[4] |
54 |
+ |
list = [ size, file ] |
55 |
+ |
return list |
56 |
+ |
|
57 |
+ |
def BuildFileList(cmd): |
58 |
+ |
isSrm = 1 |
59 |
+ |
f = cmd.split(" ") |
60 |
+ |
if f[0] == 'rfdir': |
61 |
+ |
isSrm = 0 |
62 |
+ |
|
63 |
+ |
fileList = {} |
64 |
+ |
|
65 |
+ |
if debug == 1: |
66 |
+ |
print ' Debug:: list: ' + cmd |
67 |
+ |
for line in os.popen(cmd).readlines(): # run command |
68 |
+ |
line = line[:-1] # strip '\n' |
69 |
+ |
#print ' Line: ' + line |
70 |
+ |
f = line.split(" ") |
71 |
+ |
##if isSrm == 1: |
72 |
+ |
## f = DecodeSrmLs(line) |
73 |
+ |
##else: |
74 |
+ |
## f = DecodeRfDir(line) |
75 |
+ |
size = f[0] |
76 |
+ |
file = f[1] |
77 |
+ |
f = file.split("/") |
78 |
+ |
file = f[-1] |
79 |
+ |
if debug == 1: |
80 |
+ |
print ' Debug:: adding: ' + file + ' with size ' + size |
81 |
+ |
fileList[file] = int(size) |
82 |
+ |
|
83 |
+ |
return fileList |
84 |
+ |
|
85 |
+ |
def BuildStagedFileList(storagePath,allFileList,cacheFile): |
86 |
+ |
|
87 |
+ |
# initialize the basics |
88 |
+ |
fileList = {} |
89 |
+ |
f = storagePath.split("="); |
90 |
+ |
rfPath = f[-1] |
91 |
+ |
|
92 |
+ |
# if this is not castor, trick it and mark them as staged |
93 |
+ |
if not (re.search('/castor/',rfPath)): |
94 |
+ |
for file, size in allFileList.iteritems(): |
95 |
+ |
fileList[file] = 'STAGED' |
96 |
+ |
return fileList |
97 |
+ |
|
98 |
+ |
# here we deal with castor |
99 |
+ |
if debug == 1: |
100 |
+ |
print ' Debug:: rfpath: ' + rfPath |
101 |
+ |
if os.path.exists(cacheFile) and noCache == 0: |
102 |
+ |
print ' Using the cached stager queries at ' + cacheFile |
103 |
+ |
for file, size in allFileList.iteritems(): |
104 |
+ |
fullFile = rfPath + '/' + file |
105 |
+ |
if debug == 1: |
106 |
+ |
print ' Debug:: full file name: ' + fullFile |
107 |
+ |
if os.path.exists(cacheFile) and noCache == 0: |
108 |
+ |
cmd = 'grep ' + file + ' ' + cacheFile |
109 |
+ |
else: |
110 |
+ |
cmd = 'stager_qry -M ' + fullFile |
111 |
+ |
fileList[file] = 'undefined' |
112 |
+ |
for line in os.popen(cmd).readlines(): # run command |
113 |
+ |
line = line[:-1] |
114 |
+ |
f = line.split(" ") |
115 |
+ |
if f[0] == fullFile: |
116 |
+ |
f = line.split(" ") |
117 |
+ |
status = f[-1] |
118 |
+ |
fileList[file] = status |
119 |
+ |
|
120 |
+ |
return fileList |
121 |
+ |
|
122 |
+ |
def CacheStagedFileList(cacheFile,storagePath,stagedFileList): |
123 |
+ |
print ' Caching stager query status to ' + cacheFile |
124 |
+ |
f = storagePath.split("="); |
125 |
+ |
rfPath = f[-1] |
126 |
+ |
fileOutput = open(cacheFile,'w') |
127 |
+ |
for file, status in stagedFileList.iteritems(): |
128 |
+ |
line = rfPath + '/' + file + ' xyz@castorns ' + status + '\n' |
129 |
+ |
fileOutput.write(line) |
130 |
+ |
fileOutput.close() |
131 |
+ |
|
132 |
+ |
def CopyFile(storageEle,storagePath,storageUrl,file,localDir): |
133 |
+ |
deltaT = 0 |
134 |
+ |
print ' working on file: ' + file + ' to ' + localDir + \ |
135 |
+ |
' (size: %d MB) '%(int(size)/1024/1024) |
136 |
+ |
if storageEle == 'srm-cms.cern.ch': |
137 |
+ |
f = storagePath.split("="); |
138 |
+ |
rfPath = f[-1] |
139 |
+ |
cpy = 'rfcp ' + rfPath + '/' + file + ' ' + localPath + '/' \ |
140 |
+ |
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file |
141 |
+ |
#print ' using rfcp.... ' + cpy |
142 |
+ |
#sys.exit(0) |
143 |
+ |
else: |
144 |
+ |
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath |
145 |
+ |
cpy = 'lcg-cp ' + storageUrl + '/' + file + ' file:////' + localPath + '/' \ |
146 |
+ |
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file |
147 |
+ |
|
148 |
+ |
# Check whether the file size make sense (zero length files are probably not yet ready to |
149 |
+ |
# copy and will not be transfered |
150 |
+ |
if size < 1: |
151 |
+ |
print ' WARNING - file size is <1b. Probably this file is not yet ready. Stop copy.' |
152 |
+ |
else: |
153 |
+ |
if debug == 1: |
154 |
+ |
print ' Debug:: copy: ' + cpy |
155 |
+ |
start = Seconds() |
156 |
+ |
status = os.system(cpy) |
157 |
+ |
end = Seconds() |
158 |
+ |
deltaT = end - start |
159 |
+ |
|
160 |
+ |
return deltaT |
161 |
+ |
|
162 |
+ |
def StageFile(storagePath,storageUrl,file): |
163 |
+ |
print ' staging in file: ' + file |
164 |
+ |
if storageEle == 'srm-cms.cern.ch': |
165 |
+ |
f = storagePath.split("="); |
166 |
+ |
rfPath = f[-1] |
167 |
+ |
stg = 'stager_get -M ' + rfPath + '/' + file |
168 |
+ |
else: |
169 |
+ |
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath |
170 |
+ |
stg = 'echo lcg-cp ' + storageUrl + '/' + file + ' file:////' + localPath + '/' \ |
171 |
+ |
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file |
172 |
+ |
|
173 |
+ |
if debug == 1: |
174 |
+ |
print ' Debug:: stage: ' + stg |
175 |
+ |
status = os.system(stg) |
176 |
+ |
|
177 |
+ |
#=================================================================================================== |
178 |
+ |
# Main starts here |
179 |
+ |
#=================================================================================================== |
180 |
+ |
# Define string to explain usage of the script |
181 |
+ |
usage = "Usage: downloadSample.py --cmsDataset=<name> | --mitDataset=<name>\n" |
182 |
+ |
usage += " --mitCfg=<name>\n" |
183 |
+ |
usage += " --version=<version>\n" |
184 |
+ |
usage += " --cmssw=<name>\n" |
185 |
+ |
usage += " --localStorageUrl=<name>\n" |
186 |
+ |
usage += " --localPath=<dir>\n" |
187 |
+ |
usage += " --skip=<file list>\n" |
188 |
+ |
usage += " --backward\n" |
189 |
+ |
usage += " --debug\n" |
190 |
+ |
usage += " --help\n" |
191 |
+ |
|
192 |
+ |
# Define the valid options which can be specified and check out the command line |
193 |
+ |
valid = ['cmsDataset=','mitDataset=','mitCfg=','version=','cmssw=','pattern=','localStorageUrl=', |
194 |
+ |
'localPath=','noCache','skip=', |
195 |
+ |
'forceCopy','backward', |
196 |
+ |
'debug','help'] |
197 |
+ |
try: |
198 |
+ |
opts, args = getopt.getopt(sys.argv[1:], "", valid) |
199 |
+ |
except getopt.GetoptError, ex: |
200 |
+ |
print usage |
201 |
+ |
print str(ex) |
202 |
+ |
sys.exit(1) |
203 |
+ |
|
204 |
+ |
# -------------------------------------------------------------------------------------------------- |
205 |
+ |
# Get all parameters for the production |
206 |
+ |
# -------------------------------------------------------------------------------------------------- |
207 |
+ |
# Set defaults for each option |
208 |
+ |
cmsDataset = None |
209 |
+ |
mitDataset = None |
210 |
+ |
skip = '' |
211 |
+ |
skipList = [] |
212 |
+ |
mitCfg = 'filler' |
213 |
+ |
version = '012' |
214 |
+ |
cmssw = '' |
215 |
+ |
blockLocal = 0 |
216 |
+ |
localStorageUrl = '' |
217 |
+ |
localPath = '/server/02b/mitprod' |
218 |
+ |
pattern = '' |
219 |
+ |
noCache = 0 |
220 |
+ |
backward = '' |
221 |
+ |
forceCopy = False |
222 |
+ |
debug = 0 |
223 |
+ |
cmsswCfg = 'cmssw.cfg' |
224 |
+ |
|
225 |
+ |
# Read new values from the command line |
226 |
+ |
for opt, arg in opts: |
227 |
+ |
if opt == '--help': |
228 |
+ |
print usage |
229 |
+ |
sys.exit(0) |
230 |
+ |
if opt == '--cmsDataset': |
231 |
+ |
cmsDataset = arg |
232 |
+ |
if opt == '--mitDataset': |
233 |
+ |
mitDataset = arg |
234 |
+ |
if opt == '--mitCfg': |
235 |
+ |
mitCfg = arg |
236 |
+ |
if opt == '--version': |
237 |
+ |
version = arg |
238 |
+ |
if opt == '--cmssw': |
239 |
+ |
cmssw = arg |
240 |
+ |
if opt == '--pattern': |
241 |
+ |
pattern = arg |
242 |
+ |
if opt == '--localStorageUrl': |
243 |
+ |
localStorageUrl = arg |
244 |
+ |
if opt == '--localPath': |
245 |
+ |
blockLocal = 1 |
246 |
+ |
localPath = arg |
247 |
+ |
if opt == '--skip': |
248 |
+ |
skip = arg |
249 |
+ |
skipList = skip.split(',') |
250 |
+ |
if opt == '--noCache': |
251 |
+ |
noCache = 1 |
252 |
+ |
if opt == '--backward': |
253 |
+ |
backward = ' -r ' |
254 |
+ |
if opt == '--forceCopy': |
255 |
+ |
forceCopy = True |
256 |
+ |
if opt == '--debug': |
257 |
+ |
debug = 1 |
258 |
+ |
|
259 |
+ |
# Deal with obvious problems |
260 |
+ |
if cmsDataset == None and mitDataset == None: |
261 |
+ |
cmd = '--cmsDataset option not provided. This is required.' |
262 |
+ |
raise RuntimeError, cmd |
263 |
+ |
|
264 |
+ |
crabFile = mitCfg + '/' + version + '/' + 'crab.cfg' |
265 |
+ |
if not os.path.exists(crabFile): |
266 |
+ |
cmd = 'Crab file not found: %s' % crabFile |
267 |
+ |
raise RuntimeError, cmd |
268 |
+ |
cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg |
269 |
+ |
if not os.path.exists(cmsswFile): |
270 |
+ |
cmd = 'Cmssw file not found: %s' % cmsswFile |
271 |
+ |
cmsswCfg = 'cmssw.py' |
272 |
+ |
cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg |
273 |
+ |
if not os.path.exists(cmsswFile): |
274 |
+ |
cmd = 'Cmssw file not found: %s' % cmsswFile |
275 |
+ |
cmd = ' XXXX ERROR no valid configuration found XXXX' |
276 |
+ |
raise RuntimeError, cmd |
277 |
+ |
|
278 |
+ |
# Resolve the other mitCfg parameters from the configuration file |
279 |
+ |
cmd = 'cat ' + mitCfg + '/' + version + '/' + 'Productions' |
280 |
+ |
if cmssw != '': |
281 |
+ |
cmd = cmd + '.' + cmssw |
282 |
+ |
|
283 |
+ |
join = 0 |
284 |
+ |
if cmsDataset == None: |
285 |
+ |
cmsDataset = '' |
286 |
+ |
else: |
287 |
+ |
mitDataset = '' |
288 |
+ |
|
289 |
+ |
fullLine = '' |
290 |
+ |
bSlash = '\\'; |
291 |
+ |
for line in os.popen(cmd).readlines(): # run command |
292 |
+ |
line = line[:-1] |
293 |
+ |
#print 'Line: "' + line + '"' |
294 |
+ |
# get ride of empty or commented lines |
295 |
+ |
if line == '' or line[0] == '#': |
296 |
+ |
continue |
297 |
+ |
|
298 |
+ |
# join lines |
299 |
+ |
if join == 1: |
300 |
+ |
fullLine += line |
301 |
+ |
else: |
302 |
+ |
fullLine = line |
303 |
+ |
|
304 |
+ |
# determine if finished or more is coming |
305 |
+ |
if fullLine[-1] == bSlash: |
306 |
+ |
join = 1 |
307 |
+ |
fullLine = fullLine[:-1] |
308 |
+ |
else: |
309 |
+ |
join = 0 |
310 |
+ |
# test whether there is a directory |
311 |
+ |
names = fullLine.split() # splitting every blank |
312 |
+ |
#print "FullLine: " + fullLine |
313 |
+ |
#print "Datasets: " + mitDataset + ' -> ' + cmsDataset + "\n" |
314 |
+ |
if names[0] == cmsDataset: |
315 |
+ |
mitDataset = names[1] # this is the equivalent MIT name of the dataset |
316 |
+ |
nevents = int(names[2]) # number of events to be used in the production |
317 |
+ |
if names[4] != "-" and blockLocal == 0: |
318 |
+ |
localPath = names[4] |
319 |
+ |
#print "\n Sample Info: " + fullLine + "\n" |
320 |
+ |
#print "\n Local path : " + localPath + ' -> ' + names[4] + "\n" |
321 |
+ |
if names[1] == mitDataset: |
322 |
+ |
cmsDataset = names[0] # this is the equivalent CMS name of the dataset |
323 |
+ |
nevents = int(names[2]) # number of events to be used in the production |
324 |
+ |
if names[4] != "-" and blockLocal == 0: |
325 |
+ |
localPath = names[4] |
326 |
+ |
#print "\n Sample Info: " + fullLine + "\n" |
327 |
+ |
#print "\n Local path : " + localPath + ' -> ' + names[4] + "\n" |
328 |
+ |
|
329 |
+ |
if mitDataset == "": |
330 |
+ |
print "ERROR - dataset not defined." |
331 |
+ |
sys.exit(0) |
332 |
+ |
|
333 |
+ |
#cmd = 'grep ' + cmsDataset + ' ' + mitCfg + '/' + version + '/' + 'Productions' |
334 |
+ |
#for file in os.popen(cmd).readlines(): # run command |
335 |
+ |
# line = file[:-1] # strip '\n' |
336 |
+ |
# # test whether there is a directory |
337 |
+ |
# names = line.split() # splitting every blank |
338 |
+ |
# mitDataset = names[1] # this is the equivalent MIT name of the dataset |
339 |
+ |
# nevents = int(names[2]) # number of events to be used in the production |
340 |
+ |
|
341 |
+ |
# Say what we do now |
342 |
+ |
print '\n Preparing dataset for transfer: ' + cmsDataset + ' [MIT: ' + mitDataset + ']\n' |
343 |
+ |
|
344 |
+ |
# -------------------------------------------------------------------------------------------------- |
345 |
+ |
# Deal with storage element area |
346 |
+ |
# -------------------------------------------------------------------------------------------------- |
347 |
+ |
pMitDset = re.compile('XX-MITDATASET-XX') |
348 |
+ |
pMitCfg = re.compile('XX-MITCFG-XX') |
349 |
+ |
pMitVers = re.compile('XX-MITVERSION-XX') |
350 |
+ |
# find the forseen storage place |
351 |
+ |
crabFile = mitCfg + '/' + version + '/' + 'crab.cfg' |
352 |
+ |
cmd = 'grep ^storage_element ' + crabFile |
353 |
+ |
for file in os.popen(cmd).readlines(): # run command |
354 |
+ |
line = file[:-1] # strip '\n' |
355 |
+ |
# decode the storage element name |
356 |
+ |
names = line.split("=") # splitting every '=' |
357 |
+ |
storageEle = names.pop() |
358 |
+ |
storageEle = re.sub("\s", "",storageEle) |
359 |
+ |
# Compile search and replacement sequences just for the path |
360 |
+ |
cmd = 'grep ^storage_path ' + crabFile |
361 |
+ |
for file in os.popen(cmd).readlines(): # run command |
362 |
+ |
line = file[:-1] # strip '\n' |
363 |
+ |
line = pMitDset.sub(mitDataset,line); |
364 |
+ |
line = pMitCfg .sub(mitCfg, line); |
365 |
+ |
line = pMitVers.sub(version, line); |
366 |
+ |
# decode the storage directory name |
367 |
+ |
names = line.split("=") # splitting every '=' |
368 |
+ |
names = names[1:] |
369 |
+ |
storagePath = "=".join(names) |
370 |
+ |
storagePath = re.sub("\s", "",storagePath) |
371 |
+ |
storageUrl = 'srm://' + storageEle + ':8443' + storagePath |
372 |
+ |
|
373 |
+ |
cmd = 'grep ^user_remote_dir ' + crabFile |
374 |
+ |
for file in os.popen(cmd).readlines(): # run command |
375 |
+ |
line = file[:-1] # strip '\n' |
376 |
+ |
line = pMitDset.sub(mitDataset,line); |
377 |
+ |
line = pMitCfg .sub(mitCfg, line); |
378 |
+ |
line = pMitVers.sub(version, line); |
379 |
+ |
# decode the storage directory name |
380 |
+ |
names = line.split("=") # splitting every '=' |
381 |
+ |
names = names[1:] |
382 |
+ |
userRemoteDir = "=".join(names) |
383 |
+ |
userRemoteDir = re.sub("\s","",userRemoteDir) |
384 |
+ |
userRemoteDir = re.sub("/XX-CRABID-XX","",userRemoteDir) |
385 |
+ |
|
386 |
+ |
if userRemoteDir != '': |
387 |
+ |
storagePath += userRemoteDir |
388 |
+ |
storageUrl += userRemoteDir |
389 |
+ |
|
390 |
+ |
if localStorageUrl != '': |
391 |
+ |
storageEle = '' |
392 |
+ |
storagePath = '' |
393 |
+ |
storageUrl = localStorageUrl |
394 |
+ |
|
395 |
+ |
print ' --> StorageUrl: ' + storageUrl |
396 |
+ |
|
397 |
+ |
#--------------------------------------------------------------------------------------------------- |
398 |
+ |
# create the local storage area |
399 |
+ |
#--------------------------------------------------------------------------------------------------- |
400 |
+ |
print ' Make local path: ' + localPath |
401 |
+ |
localDir = localPath + '/' + mitCfg + '/' + version + '/' + mitDataset |
402 |
+ |
mkd = 'mkdir -p ' + localDir |
403 |
+ |
status = os.system(mkd) |
404 |
+ |
|
405 |
+ |
if status != 0: |
406 |
+ |
print ' ERROR - could not create local directory ' + localDir |
407 |
+ |
sys.exit(1) |
408 |
+ |
|
409 |
+ |
print ' --> LocalDir: ' + localDir |
410 |
+ |
|
411 |
+ |
cmd = 'df --block-size=1 ' + localDir + ' | tr -s \' \' | tail -1' |
412 |
+ |
for line in os.popen(cmd).readlines(): # run command |
413 |
+ |
line = line.strip() |
414 |
+ |
f = line.split(" ") |
415 |
+ |
if line[0:0] == '/' or line[0:4] == 'fuse': |
416 |
+ |
free = int(f[3]) |
417 |
+ |
else: |
418 |
+ |
free = int(f[2]) |
419 |
+ |
|
420 |
+ |
#--------------------------------------------------------------------------------------------------- |
421 |
+ |
# create a list af all files to be copied |
422 |
+ |
#--------------------------------------------------------------------------------------------------- |
423 |
+ |
cmd = '' |
424 |
+ |
f = storagePath.split('=') |
425 |
+ |
path = f.pop() |
426 |
+ |
cmd = 'list ' + path + ' | grep root | sort ' + backward |
427 |
+ |
|
428 |
+ |
##if storageEle == 'srm.cern.ch' or storageEle == 'srm-cms.cern.ch': |
429 |
+ |
## cmd = 'rfdir ' + path + ' | grep root | tr -s \' \' | sort ' + backward |
430 |
+ |
##else: |
431 |
+ |
## cmd = 'list ' + path + ' | grep root | sort ' + backward |
432 |
+ |
## #cmd = 'srmls ' + storageUrl + ' | grep root | sort ' + backward |
433 |
+ |
|
434 |
+ |
if pattern != "": |
435 |
+ |
cmd += ' | grep ' + pattern |
436 |
+ |
|
437 |
+ |
print ' Find file: ' + cmd |
438 |
+ |
cacheFile = '/tmp/.cache_' + mitDataset |
439 |
+ |
allFileList = BuildFileList(cmd) |
440 |
+ |
stagedFileList = BuildStagedFileList(storagePath,allFileList,cacheFile) |
441 |
+ |
cacheStaged = CacheStagedFileList(cacheFile,storagePath,stagedFileList) |
442 |
+ |
|
443 |
+ |
#cmd = 'find ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + \ |
444 |
+ |
# ' -maxdepth 1 -type f -printf "%s %f\n"' |
445 |
+ |
print 'List: ' + cmd |
446 |
+ |
cmd = 'list ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + ' | grep root' |
447 |
+ |
doneFileList = BuildFileList(cmd) |
448 |
+ |
|
449 |
+ |
#--------------------------------------------------------------------------------------------------- |
450 |
+ |
# go through the lists: first check files are consistent, then copy the remaining files |
451 |
+ |
#--------------------------------------------------------------------------------------------------- |
452 |
+ |
# initialize data volumes |
453 |
+ |
b2G = 1.0/(1024.*1024.*1024) |
454 |
+ |
nTotal = 0 |
455 |
+ |
totalDataVolume = 0 |
456 |
+ |
nDone = 0 |
457 |
+ |
doneDataVolume = 0 |
458 |
+ |
|
459 |
+ |
for file, size in allFileList.iteritems(): |
460 |
+ |
nTotal += 1 |
461 |
+ |
totalDataVolume += size |
462 |
+ |
if (file in doneFileList) and (doneFileList[file] == size): |
463 |
+ |
nDone += 1 |
464 |
+ |
doneDataVolume += size |
465 |
+ |
|
466 |
+ |
print ' ' |
467 |
+ |
print ' Summary of data volume\n' |
468 |
+ |
print ' --> number of files to copy: %8d (total: %d) '%(nTotal-nDone,nTotal) |
469 |
+ |
print ' --> volume to copy [GB]: %8.2f (total: %.2f) '%(b2G*(totalDataVolume-doneDataVolume), \ |
470 |
+ |
b2G*totalDataVolume) |
471 |
+ |
print ' --> free volume [GB]: %8.2f '%(b2G*free) |
472 |
+ |
print ' ' |
473 |
+ |
|
474 |
+ |
if free*0.85 < (totalDataVolume-doneDataVolume): |
475 |
+ |
print ' ERROR - probably no enough space on volume. See above (some safety assumed)!' |
476 |
+ |
sys.exit(1) |
477 |
+ |
|
478 |
+ |
for file, size in doneFileList.iteritems(): |
479 |
+ |
if file in allFileList: |
480 |
+ |
#print ' --> file is done: ' + file |
481 |
+ |
if allFileList[file] != size: |
482 |
+ |
print ' ERROR - file sizes did not match: ' + file + \ |
483 |
+ |
' [ local: %10d, remote: %10d ]'%(size,allFileList[file]) |
484 |
+ |
sys.exit(1) |
485 |
+ |
else: |
486 |
+ |
print ' ERROR - file from done list is not in the all files list. File: ' + file |
487 |
+ |
sys.exit(1) |
488 |
+ |
|
489 |
+ |
totalSizeMb = 0. |
490 |
+ |
totalTimeSc = 0. |
491 |
+ |
for file, size in allFileList.iteritems(): |
492 |
+ |
if debug == 1: |
493 |
+ |
print ' Debug:: ' + file + ' -> size %d'%size |
494 |
+ |
|
495 |
+ |
totalDataVolume += size |
496 |
+ |
if file in doneFileList: |
497 |
+ |
print ' --> done, size match: %10d - %s'%(size,file) |
498 |
+ |
doneDataVolume = +size |
499 |
+ |
else: |
500 |
+ |
if not InSkipList(file,skipList): |
501 |
+ |
print ' --> copying file: %10d - %s (castor stat: %s)'% \ |
502 |
+ |
(size,file,stagedFileList[file]) |
503 |
+ |
if stagedFileList[file] == "STAGED" or forceCopy: |
504 |
+ |
|
505 |
+ |
sizeMb = size/1024./1024. |
506 |
+ |
deltaT = CopyFile(storageEle,storagePath,storageUrl,file,localDir) |
507 |
+ |
if deltaT > 0: |
508 |
+ |
print ' time required [sec]: %7d rate [MB/sec]: %9.3f'%\ |
509 |
+ |
(deltaT,sizeMb/deltaT) |
510 |
+ |
else: |
511 |
+ |
print ' time required [sec]: %7d rate [MB/sec]: ?'%(deltaT) |
512 |
+ |
totalTimeSc += deltaT |
513 |
+ |
totalSizeMb += sizeMb |
514 |
+ |
else: |
515 |
+ |
print ' skipping file: %s'%(stagedFileList[file]) |
516 |
+ |
StageFile(storagePath,storageUrl,file) |
517 |
+ |
|
518 |
+ |
else: |
519 |
+ |
print ' --> skipping file: %10d - %s'%(size,file) |
520 |
+ |
|
521 |
+ |
print '' |
522 |
+ |
if totalTimeSc > 0: |
523 |
+ |
print ' Performance: volume copied [GB] %9.3f; time [sec] %9d; -> rate [MB/sec] %9.3f'%\ |
524 |
+ |
(totalSizeMb/1024.,totalTimeSc,totalSizeMb/totalTimeSc) |
525 |
+ |
else: |
526 |
+ |
print ' Performance: volume copied [GB] %9.3f; time [sec] %9d; -> rate [MB/sec] ?'%\ |
527 |
+ |
(totalSizeMb/1024.,totalTimeSc) |
528 |
+ |
print '' |