1 |
paus |
1.2 |
#!/usr/bin/env python
|
2 |
|
|
#---------------------------------------------------------------------------------------------------
|
3 |
|
|
# Script to automatically download a MIT dataset to our local cluster
|
4 |
|
|
#
|
5 |
|
|
# The download of the MIT dataset is organized in accordance with the dataset production logic. In
|
6 |
|
|
# general it is allowed to download the dataset from any location of a properly configured storage
|
7 |
|
|
# element. The script will do all most obvious tests to ensure efficient and safe download. For
|
8 |
|
|
# performance reason a checksum is not calculated. This ommission is considered completely safe as
|
9 |
|
|
# failures will be identified in the analysis phase and the rare occasions will be more effective to
|
10 |
|
|
# fix by hand.
|
11 |
|
|
#
|
12 |
|
|
# At present the download proceeds in one thread (one file at a time) which for performance reasons
|
13 |
|
|
# might not be optimal.
|
14 |
|
|
#
|
15 |
|
|
# Author: C.Paus (July 1, 2008)
|
16 |
|
|
#---------------------------------------------------------------------------------------------------
|
17 |
|
|
# Missing but desired features:
|
18 |
|
|
# + accounting of size of each file
|
19 |
|
|
# + accounting of locally available files (avoid copying already existing files)
|
20 |
|
|
# + determine full list of files before starting to copy
|
21 |
|
|
# + minimal success check of the copy
|
22 |
|
|
# + calculate total data volume (to copy, already copied etc.)
|
23 |
|
|
# + add feature to check the castor status
|
24 |
|
|
# - add time estimates and progressions for copies
|
25 |
|
|
# - multi downloads to enhance performance
|
26 |
|
|
#---------------------------------------------------------------------------------------------------
|
27 |
|
|
import os,sys,getopt,re,string
|
28 |
|
|
|
29 |
paus |
1.5 |
dCacheDoor = 't2srv0012.cmsaf.mit.edu'
|
30 |
|
|
|
31 |
paus |
1.2 |
def Seconds():
|
32 |
|
|
for secs in os.popen('date +%s').readlines():
|
33 |
|
|
secs = int(secs[:-1])
|
34 |
|
|
return secs
|
35 |
|
|
|
36 |
|
|
def InSkipList(file,list):
|
37 |
|
|
for entry in list:
|
38 |
|
|
if entry == file:
|
39 |
|
|
return True
|
40 |
|
|
return False
|
41 |
|
|
|
42 |
|
|
def DecodeSrmLs(line):
|
43 |
|
|
line = line.strip()
|
44 |
|
|
f = line.split(" ")
|
45 |
|
|
size = f[0]
|
46 |
|
|
f = f[1].split("/")
|
47 |
|
|
file = f.pop()
|
48 |
|
|
list = [ size, file ]
|
49 |
|
|
return list
|
50 |
|
|
|
51 |
|
|
def DecodeRfDir(line):
|
52 |
|
|
line = line.strip()
|
53 |
|
|
f = line.split(" ")
|
54 |
|
|
file = f.pop()
|
55 |
|
|
size = f[4]
|
56 |
|
|
list = [ size, file ]
|
57 |
|
|
return list
|
58 |
|
|
|
59 |
|
|
def BuildFileList(cmd):
|
60 |
|
|
isSrm = 1
|
61 |
|
|
f = cmd.split(" ")
|
62 |
|
|
if f[0] == 'rfdir':
|
63 |
|
|
isSrm = 0
|
64 |
|
|
|
65 |
|
|
fileList = {}
|
66 |
|
|
|
67 |
|
|
if debug == 1:
|
68 |
|
|
print ' Debug:: list: ' + cmd
|
69 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
70 |
|
|
line = line[:-1] # strip '\n'
|
71 |
|
|
#print ' Line: ' + line
|
72 |
|
|
f = line.split(" ")
|
73 |
|
|
##if isSrm == 1:
|
74 |
|
|
## f = DecodeSrmLs(line)
|
75 |
|
|
##else:
|
76 |
|
|
## f = DecodeRfDir(line)
|
77 |
|
|
size = f[0]
|
78 |
|
|
file = f[1]
|
79 |
|
|
f = file.split("/")
|
80 |
|
|
file = f[-1]
|
81 |
|
|
if debug == 1:
|
82 |
|
|
print ' Debug:: adding: ' + file + ' with size ' + size
|
83 |
|
|
fileList[file] = int(size)
|
84 |
|
|
|
85 |
|
|
return fileList
|
86 |
|
|
|
87 |
|
|
def BuildStagedFileList(storagePath,allFileList,cacheFile):
|
88 |
|
|
|
89 |
|
|
# initialize the basics
|
90 |
|
|
fileList = {}
|
91 |
|
|
f = storagePath.split("=");
|
92 |
|
|
rfPath = f[-1]
|
93 |
|
|
|
94 |
|
|
# if this is not castor, trick it and mark them as staged
|
95 |
|
|
if not (re.search('/castor/',rfPath)):
|
96 |
|
|
for file, size in allFileList.iteritems():
|
97 |
|
|
fileList[file] = 'STAGED'
|
98 |
|
|
return fileList
|
99 |
|
|
|
100 |
|
|
# here we deal with castor
|
101 |
|
|
if debug == 1:
|
102 |
|
|
print ' Debug:: rfpath: ' + rfPath
|
103 |
|
|
if os.path.exists(cacheFile) and noCache == 0:
|
104 |
|
|
print ' Using the cached stager queries at ' + cacheFile
|
105 |
|
|
for file, size in allFileList.iteritems():
|
106 |
|
|
fullFile = rfPath + '/' + file
|
107 |
|
|
if debug == 1:
|
108 |
|
|
print ' Debug:: full file name: ' + fullFile
|
109 |
|
|
if os.path.exists(cacheFile) and noCache == 0:
|
110 |
|
|
cmd = 'grep ' + file + ' ' + cacheFile
|
111 |
|
|
else:
|
112 |
|
|
cmd = 'stager_qry -M ' + fullFile
|
113 |
|
|
fileList[file] = 'undefined'
|
114 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
115 |
|
|
line = line[:-1]
|
116 |
|
|
f = line.split(" ")
|
117 |
|
|
if f[0] == fullFile:
|
118 |
|
|
f = line.split(" ")
|
119 |
|
|
status = f[-1]
|
120 |
|
|
fileList[file] = status
|
121 |
|
|
|
122 |
|
|
return fileList
|
123 |
|
|
|
124 |
|
|
def CacheStagedFileList(cacheFile,storagePath,stagedFileList):
|
125 |
|
|
print ' Caching stager query status to ' + cacheFile
|
126 |
|
|
f = storagePath.split("=");
|
127 |
|
|
rfPath = f[-1]
|
128 |
|
|
fileOutput = open(cacheFile,'w')
|
129 |
|
|
for file, status in stagedFileList.iteritems():
|
130 |
|
|
line = rfPath + '/' + file + ' xyz@castorns ' + status + '\n'
|
131 |
|
|
fileOutput.write(line)
|
132 |
|
|
fileOutput.close()
|
133 |
|
|
|
134 |
paus |
1.5 |
def CopyFile(storageEle,storagePath,storageUrl,file,localDir,fromCern):
|
135 |
paus |
1.2 |
deltaT = 0
|
136 |
|
|
print ' working on file: ' + file + ' to ' + localDir + \
|
137 |
|
|
' (size: %d MB) '%(int(size)/1024/1024)
|
138 |
paus |
1.5 |
if storageEle == 'srm-cms.cern.ch' and not fromCern:
|
139 |
paus |
1.2 |
f = storagePath.split("=");
|
140 |
|
|
rfPath = f[-1]
|
141 |
|
|
cpy = 'rfcp ' + rfPath + '/' + file + ' ' + localPath + '/' \
|
142 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
143 |
|
|
#print ' using rfcp.... ' + cpy
|
144 |
|
|
#sys.exit(0)
|
145 |
paus |
1.4 |
elif storageEle == 'se01.cmsaf.mit.edu':
|
146 |
|
|
f = storagePath.split("=");
|
147 |
|
|
rfPath = f[-1]
|
148 |
|
|
#cpy = 'scp paus@cgate.mit.edu:' + rfPath + '/' + file + ' ' + localPath + '/' \
|
149 |
|
|
# + mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
150 |
paus |
1.5 |
cpy = 'dccp dcap://' + dCacheDoor + '/' \
|
151 |
paus |
1.4 |
+ rfPath + '/' + file + ' ' + localPath + '/' \
|
152 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
153 |
|
|
#print ' using rfcp.... ' + cpy
|
154 |
|
|
#sys.exit(0)
|
155 |
paus |
1.2 |
else:
|
156 |
|
|
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
157 |
|
|
cpy = 'lcg-cp ' + storageUrl + '/' + file + ' file:////' + localPath + '/' \
|
158 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
159 |
|
|
|
160 |
|
|
# Check whether the file size make sense (zero length files are probably not yet ready to
|
161 |
|
|
# copy and will not be transfered
|
162 |
|
|
if size < 1:
|
163 |
|
|
print ' WARNING - file size is <1b. Probably this file is not yet ready. Stop copy.'
|
164 |
|
|
else:
|
165 |
|
|
if debug == 1:
|
166 |
|
|
print ' Debug:: copy: ' + cpy
|
167 |
|
|
start = Seconds()
|
168 |
|
|
status = os.system(cpy)
|
169 |
|
|
end = Seconds()
|
170 |
|
|
deltaT = end - start
|
171 |
|
|
|
172 |
|
|
return deltaT
|
173 |
|
|
|
174 |
paus |
1.5 |
def RecoverFile(storageEle,storagePath,storageUrl,file,localDir):
|
175 |
|
|
deltaT = 0
|
176 |
|
|
print ' working on file: ' + file + ' from ' + localDir + \
|
177 |
|
|
' (size: %d MB) '%(int(size)/1024/1024)
|
178 |
|
|
if storageEle == 'srm-cms.cern.ch':
|
179 |
|
|
f = storagePath.split("=");
|
180 |
|
|
rfPath = f[-1]
|
181 |
|
|
cpy = 'rfcp ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + '/' + file \
|
182 |
|
|
+ ' ' + rfPath + '/' + file
|
183 |
|
|
#print ' using rfcp.... ' + cpy
|
184 |
|
|
#sys.exit(0)
|
185 |
|
|
elif storageEle == 'se01.cmsaf.mit.edu':
|
186 |
|
|
f = storagePath.split("=");
|
187 |
|
|
rfPath = f[-1]
|
188 |
|
|
#cpy = 'scp paus@cgate.mit.edu:' + rfPath + '/' + file + ' ' + localPath + '/' \
|
189 |
|
|
# + mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
190 |
|
|
cpy = 'dccp ' + localPath + '/' \
|
191 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file \
|
192 |
|
|
+ ' dcap://' + dCacheDoor + '/' + rfPath + '/' + file
|
193 |
|
|
print ' using dccp.... ' + cpy
|
194 |
|
|
#sys.exit(0)
|
195 |
|
|
else:
|
196 |
|
|
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
197 |
|
|
cpy = 'lcg-cp ' + 'file:////' + localPath + '/' + mitCfg + '/' + version + '/' \
|
198 |
|
|
+ mitDataset + '/' + file + ' ' + storageUrl + '/' + file
|
199 |
|
|
|
200 |
|
|
# Check whether the file size make sense (zero length files are probably not yet ready to
|
201 |
|
|
# copy and will not be transfered
|
202 |
|
|
if size < 1:
|
203 |
|
|
print ' WARNING - file size is <1b. Probably this file is not yet ready. Stop recovery.'
|
204 |
|
|
else:
|
205 |
|
|
if debug == 1:
|
206 |
|
|
print ' Debug:: copy: ' + cpy
|
207 |
|
|
start = Seconds()
|
208 |
|
|
status = os.system(cpy)
|
209 |
|
|
end = Seconds()
|
210 |
|
|
deltaT = end - start
|
211 |
|
|
|
212 |
|
|
return deltaT
|
213 |
|
|
|
214 |
paus |
1.2 |
def StageFile(storagePath,storageUrl,file):
|
215 |
|
|
print ' staging in file: ' + file
|
216 |
paus |
1.5 |
if storageEle == 'srm-cms.cern.ch' and not fromCern:
|
217 |
paus |
1.2 |
f = storagePath.split("=");
|
218 |
|
|
rfPath = f[-1]
|
219 |
|
|
stg = 'stager_get -M ' + rfPath + '/' + file
|
220 |
|
|
else:
|
221 |
|
|
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
222 |
|
|
stg = 'echo lcg-cp ' + storageUrl + '/' + file + ' file:////' + localPath + '/' \
|
223 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
224 |
|
|
|
225 |
|
|
if debug == 1:
|
226 |
|
|
print ' Debug:: stage: ' + stg
|
227 |
|
|
status = os.system(stg)
|
228 |
|
|
|
229 |
|
|
#===================================================================================================
|
230 |
|
|
# Main starts here
|
231 |
|
|
#===================================================================================================
|
232 |
|
|
# Define string to explain usage of the script
|
233 |
|
|
usage = "Usage: downloadSample.py --cmsDataset=<name> | --mitDataset=<name>\n"
|
234 |
|
|
usage += " --mitCfg=<name>\n"
|
235 |
|
|
usage += " --version=<version>\n"
|
236 |
|
|
usage += " --cmssw=<name>\n"
|
237 |
|
|
usage += " --localStorageUrl=<name>\n"
|
238 |
|
|
usage += " --localPath=<dir>\n"
|
239 |
|
|
usage += " --skip=<file list>\n"
|
240 |
paus |
1.5 |
usage += " --fromCern\n"
|
241 |
|
|
usage += " --forceCopy\n"
|
242 |
paus |
1.2 |
usage += " --backward\n"
|
243 |
paus |
1.5 |
usage += " --stopOnError\n"
|
244 |
paus |
1.2 |
usage += " --debug\n"
|
245 |
paus |
1.5 |
usage += " --test\n"
|
246 |
paus |
1.2 |
usage += " --help\n"
|
247 |
|
|
|
248 |
|
|
# Define the valid options which can be specified and check out the command line
|
249 |
|
|
valid = ['cmsDataset=','mitDataset=','mitCfg=','version=','cmssw=','pattern=','localStorageUrl=',
|
250 |
|
|
'localPath=','noCache','skip=',
|
251 |
paus |
1.5 |
'fromCern','forceCopy','backward','stopOnError',
|
252 |
|
|
'debug','test','help']
|
253 |
paus |
1.2 |
try:
|
254 |
|
|
opts, args = getopt.getopt(sys.argv[1:], "", valid)
|
255 |
|
|
except getopt.GetoptError, ex:
|
256 |
|
|
print usage
|
257 |
|
|
print str(ex)
|
258 |
|
|
sys.exit(1)
|
259 |
|
|
|
260 |
|
|
# --------------------------------------------------------------------------------------------------
|
261 |
|
|
# Get all parameters for the production
|
262 |
|
|
# --------------------------------------------------------------------------------------------------
|
263 |
|
|
# Set defaults for each option
|
264 |
|
|
cmsDataset = None
|
265 |
|
|
mitDataset = None
|
266 |
|
|
skip = ''
|
267 |
|
|
skipList = []
|
268 |
paus |
1.3 |
mitCfg = 'filefi'
|
269 |
|
|
version = '014'
|
270 |
paus |
1.2 |
cmssw = ''
|
271 |
|
|
blockLocal = 0
|
272 |
|
|
localStorageUrl = ''
|
273 |
|
|
localPath = '/server/02b/mitprod'
|
274 |
|
|
pattern = ''
|
275 |
|
|
noCache = 0
|
276 |
|
|
backward = ''
|
277 |
paus |
1.5 |
fromCern = False
|
278 |
|
|
stopOnError = False
|
279 |
paus |
1.2 |
forceCopy = False
|
280 |
|
|
debug = 0
|
281 |
paus |
1.5 |
test = 0
|
282 |
paus |
1.2 |
cmsswCfg = 'cmssw.cfg'
|
283 |
|
|
|
284 |
|
|
# Read new values from the command line
|
285 |
|
|
for opt, arg in opts:
|
286 |
|
|
if opt == '--help':
|
287 |
|
|
print usage
|
288 |
|
|
sys.exit(0)
|
289 |
|
|
if opt == '--cmsDataset':
|
290 |
|
|
cmsDataset = arg
|
291 |
|
|
if opt == '--mitDataset':
|
292 |
|
|
mitDataset = arg
|
293 |
|
|
if opt == '--mitCfg':
|
294 |
|
|
mitCfg = arg
|
295 |
|
|
if opt == '--version':
|
296 |
|
|
version = arg
|
297 |
|
|
if opt == '--cmssw':
|
298 |
|
|
cmssw = arg
|
299 |
|
|
if opt == '--pattern':
|
300 |
|
|
pattern = arg
|
301 |
|
|
if opt == '--localStorageUrl':
|
302 |
|
|
localStorageUrl = arg
|
303 |
|
|
if opt == '--localPath':
|
304 |
|
|
blockLocal = 1
|
305 |
|
|
localPath = arg
|
306 |
|
|
if opt == '--skip':
|
307 |
|
|
skip = arg
|
308 |
|
|
skipList = skip.split(',')
|
309 |
|
|
if opt == '--noCache':
|
310 |
|
|
noCache = 1
|
311 |
paus |
1.5 |
if opt == '--stopOnError':
|
312 |
|
|
stopOnError = True
|
313 |
paus |
1.2 |
if opt == '--backward':
|
314 |
|
|
backward = ' -r '
|
315 |
paus |
1.5 |
if opt == '--fromCern':
|
316 |
|
|
fromCern = True
|
317 |
|
|
if opt == '--forceCopy':
|
318 |
|
|
forceCopy = True
|
319 |
paus |
1.2 |
if opt == '--forceCopy':
|
320 |
|
|
forceCopy = True
|
321 |
|
|
if opt == '--debug':
|
322 |
|
|
debug = 1
|
323 |
paus |
1.5 |
if opt == '--test':
|
324 |
|
|
test = 1
|
325 |
paus |
1.2 |
|
326 |
|
|
# Deal with obvious problems
|
327 |
|
|
if cmsDataset == None and mitDataset == None:
|
328 |
|
|
cmd = '--cmsDataset option not provided. This is required.'
|
329 |
|
|
raise RuntimeError, cmd
|
330 |
|
|
|
331 |
paus |
1.4 |
crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
|
332 |
paus |
1.2 |
if not os.path.exists(crabFile):
|
333 |
|
|
cmd = 'Crab file not found: %s' % crabFile
|
334 |
|
|
raise RuntimeError, cmd
|
335 |
paus |
1.4 |
cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
|
336 |
paus |
1.2 |
if not os.path.exists(cmsswFile):
|
337 |
|
|
cmd = 'Cmssw file not found: %s' % cmsswFile
|
338 |
|
|
cmsswCfg = 'cmssw.py'
|
339 |
paus |
1.4 |
cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
|
340 |
paus |
1.2 |
if not os.path.exists(cmsswFile):
|
341 |
|
|
cmd = 'Cmssw file not found: %s' % cmsswFile
|
342 |
|
|
cmd = ' XXXX ERROR no valid configuration found XXXX'
|
343 |
|
|
raise RuntimeError, cmd
|
344 |
|
|
|
345 |
|
|
# Resolve the other mitCfg parameters from the configuration file
|
346 |
paus |
1.4 |
cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
|
347 |
paus |
1.2 |
if cmssw != '':
|
348 |
|
|
cmd = cmd + '.' + cmssw
|
349 |
|
|
|
350 |
|
|
join = 0
|
351 |
|
|
if cmsDataset == None:
|
352 |
|
|
cmsDataset = ''
|
353 |
|
|
else:
|
354 |
|
|
mitDataset = ''
|
355 |
|
|
|
356 |
|
|
fullLine = ''
|
357 |
|
|
bSlash = '\\';
|
358 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
359 |
|
|
line = line[:-1]
|
360 |
|
|
#print 'Line: "' + line + '"'
|
361 |
|
|
# get ride of empty or commented lines
|
362 |
|
|
if line == '' or line[0] == '#':
|
363 |
|
|
continue
|
364 |
|
|
|
365 |
|
|
# join lines
|
366 |
|
|
if join == 1:
|
367 |
|
|
fullLine += line
|
368 |
|
|
else:
|
369 |
|
|
fullLine = line
|
370 |
|
|
|
371 |
|
|
# determine if finished or more is coming
|
372 |
|
|
if fullLine[-1] == bSlash:
|
373 |
|
|
join = 1
|
374 |
|
|
fullLine = fullLine[:-1]
|
375 |
|
|
else:
|
376 |
|
|
join = 0
|
377 |
|
|
# test whether there is a directory
|
378 |
|
|
names = fullLine.split() # splitting every blank
|
379 |
|
|
#print "FullLine: " + fullLine
|
380 |
|
|
#print "Datasets: " + mitDataset + ' -> ' + cmsDataset + "\n"
|
381 |
|
|
if names[0] == cmsDataset:
|
382 |
|
|
mitDataset = names[1] # this is the equivalent MIT name of the dataset
|
383 |
|
|
nevents = int(names[2]) # number of events to be used in the production
|
384 |
|
|
if names[4] != "-" and blockLocal == 0:
|
385 |
|
|
localPath = names[4]
|
386 |
|
|
#print "\n Sample Info: " + fullLine + "\n"
|
387 |
|
|
#print "\n Local path : " + localPath + ' -> ' + names[4] + "\n"
|
388 |
|
|
if names[1] == mitDataset:
|
389 |
|
|
cmsDataset = names[0] # this is the equivalent CMS name of the dataset
|
390 |
|
|
nevents = int(names[2]) # number of events to be used in the production
|
391 |
|
|
if names[4] != "-" and blockLocal == 0:
|
392 |
|
|
localPath = names[4]
|
393 |
|
|
#print "\n Sample Info: " + fullLine + "\n"
|
394 |
|
|
#print "\n Local path : " + localPath + ' -> ' + names[4] + "\n"
|
395 |
|
|
|
396 |
|
|
if mitDataset == "":
|
397 |
|
|
print "ERROR - dataset not defined."
|
398 |
|
|
sys.exit(0)
|
399 |
|
|
|
400 |
|
|
#cmd = 'grep ' + cmsDataset + ' ' + mitCfg + '/' + version + '/' + 'Productions'
|
401 |
|
|
#for file in os.popen(cmd).readlines(): # run command
|
402 |
|
|
# line = file[:-1] # strip '\n'
|
403 |
|
|
# # test whether there is a directory
|
404 |
|
|
# names = line.split() # splitting every blank
|
405 |
|
|
# mitDataset = names[1] # this is the equivalent MIT name of the dataset
|
406 |
|
|
# nevents = int(names[2]) # number of events to be used in the production
|
407 |
|
|
|
408 |
|
|
# Say what we do now
|
409 |
|
|
print '\n Preparing dataset for transfer: ' + cmsDataset + ' [MIT: ' + mitDataset + ']\n'
|
410 |
|
|
|
411 |
|
|
# --------------------------------------------------------------------------------------------------
|
412 |
|
|
# Deal with storage element area
|
413 |
|
|
# --------------------------------------------------------------------------------------------------
|
414 |
|
|
pMitDset = re.compile('XX-MITDATASET-XX')
|
415 |
|
|
pMitCfg = re.compile('XX-MITCFG-XX')
|
416 |
|
|
pMitVers = re.compile('XX-MITVERSION-XX')
|
417 |
|
|
# find the forseen storage place
|
418 |
paus |
1.4 |
crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
|
419 |
paus |
1.2 |
cmd = 'grep ^storage_element ' + crabFile
|
420 |
|
|
for file in os.popen(cmd).readlines(): # run command
|
421 |
|
|
line = file[:-1] # strip '\n'
|
422 |
|
|
# decode the storage element name
|
423 |
|
|
names = line.split("=") # splitting every '='
|
424 |
|
|
storageEle = names.pop()
|
425 |
|
|
storageEle = re.sub("\s", "",storageEle)
|
426 |
|
|
# Compile search and replacement sequences just for the path
|
427 |
|
|
cmd = 'grep ^storage_path ' + crabFile
|
428 |
|
|
for file in os.popen(cmd).readlines(): # run command
|
429 |
|
|
line = file[:-1] # strip '\n'
|
430 |
|
|
line = pMitDset.sub(mitDataset,line);
|
431 |
|
|
line = pMitCfg .sub(mitCfg, line);
|
432 |
|
|
line = pMitVers.sub(version, line);
|
433 |
|
|
# decode the storage directory name
|
434 |
|
|
names = line.split("=") # splitting every '='
|
435 |
|
|
names = names[1:]
|
436 |
|
|
storagePath = "=".join(names)
|
437 |
|
|
storagePath = re.sub("\s", "",storagePath)
|
438 |
paus |
1.5 |
|
439 |
|
|
##storage_element = srm-cms.cern.ch
|
440 |
|
|
##storage_path = /srm/managerv2?SFN=/castor/cern.ch
|
441 |
|
|
|
442 |
|
|
## Hardwire
|
443 |
|
|
if fromCern:
|
444 |
|
|
storageEle = 'srm-cms.cern.ch'
|
445 |
|
|
storagePath = '/srm/managerv2?SFN=/castor/cern.ch'
|
446 |
|
|
|
447 |
paus |
1.2 |
storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
448 |
|
|
|
449 |
|
|
cmd = 'grep ^user_remote_dir ' + crabFile
|
450 |
|
|
for file in os.popen(cmd).readlines(): # run command
|
451 |
|
|
line = file[:-1] # strip '\n'
|
452 |
|
|
line = pMitDset.sub(mitDataset,line);
|
453 |
|
|
line = pMitCfg .sub(mitCfg, line);
|
454 |
|
|
line = pMitVers.sub(version, line);
|
455 |
|
|
# decode the storage directory name
|
456 |
|
|
names = line.split("=") # splitting every '='
|
457 |
|
|
names = names[1:]
|
458 |
|
|
userRemoteDir = "=".join(names)
|
459 |
|
|
userRemoteDir = re.sub("\s","",userRemoteDir)
|
460 |
|
|
userRemoteDir = re.sub("/XX-CRABID-XX","",userRemoteDir)
|
461 |
|
|
|
462 |
paus |
1.5 |
## Hardwire
|
463 |
|
|
if fromCern:
|
464 |
|
|
userRemoteDir = "/user/p/paus/" + mitCfg + "/" + version + "/" + mitDataset
|
465 |
|
|
|
466 |
paus |
1.2 |
if userRemoteDir != '':
|
467 |
|
|
storagePath += userRemoteDir
|
468 |
|
|
storageUrl += userRemoteDir
|
469 |
|
|
|
470 |
|
|
if localStorageUrl != '':
|
471 |
|
|
storageEle = ''
|
472 |
|
|
storagePath = ''
|
473 |
|
|
storageUrl = localStorageUrl
|
474 |
|
|
|
475 |
|
|
print ' --> StorageUrl: ' + storageUrl
|
476 |
|
|
|
477 |
|
|
#---------------------------------------------------------------------------------------------------
|
478 |
|
|
# create the local storage area
|
479 |
|
|
#---------------------------------------------------------------------------------------------------
|
480 |
|
|
print ' Make local path: ' + localPath
|
481 |
|
|
localDir = localPath + '/' + mitCfg + '/' + version + '/' + mitDataset
|
482 |
|
|
mkd = 'mkdir -p ' + localDir
|
483 |
|
|
status = os.system(mkd)
|
484 |
|
|
|
485 |
|
|
if status != 0:
|
486 |
|
|
print ' ERROR - could not create local directory ' + localDir
|
487 |
|
|
sys.exit(1)
|
488 |
|
|
|
489 |
|
|
print ' --> LocalDir: ' + localDir
|
490 |
|
|
|
491 |
|
|
cmd = 'df --block-size=1 ' + localDir + ' | tr -s \' \' | tail -1'
|
492 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
493 |
|
|
line = line.strip()
|
494 |
|
|
f = line.split(" ")
|
495 |
|
|
if line[0:0] == '/' or line[0:4] == 'fuse':
|
496 |
|
|
free = int(f[3])
|
497 |
|
|
else:
|
498 |
|
|
free = int(f[2])
|
499 |
|
|
|
500 |
|
|
#---------------------------------------------------------------------------------------------------
|
501 |
|
|
# create a list af all files to be copied
|
502 |
|
|
#---------------------------------------------------------------------------------------------------
|
503 |
|
|
cmd = ''
|
504 |
|
|
f = storagePath.split('=')
|
505 |
|
|
path = f.pop()
|
506 |
|
|
cmd = 'list ' + path + ' | grep root | sort ' + backward
|
507 |
paus |
1.5 |
if fromCern:
|
508 |
|
|
cmd = 'srmls ' + storageUrl + '|grep root|sort ' + backward + '|tr -s \' \'|cut -d\' \' -f 2-3'
|
509 |
paus |
1.2 |
|
510 |
|
|
##if storageEle == 'srm.cern.ch' or storageEle == 'srm-cms.cern.ch':
|
511 |
|
|
## cmd = 'rfdir ' + path + ' | grep root | tr -s \' \' | sort ' + backward
|
512 |
|
|
##else:
|
513 |
|
|
## cmd = 'list ' + path + ' | grep root | sort ' + backward
|
514 |
|
|
|
515 |
|
|
if pattern != "":
|
516 |
|
|
cmd += ' | grep ' + pattern
|
517 |
|
|
|
518 |
|
|
print ' Find file: ' + cmd
|
519 |
|
|
cacheFile = '/tmp/.cache_' + mitDataset
|
520 |
|
|
allFileList = BuildFileList(cmd)
|
521 |
|
|
stagedFileList = BuildStagedFileList(storagePath,allFileList,cacheFile)
|
522 |
|
|
cacheStaged = CacheStagedFileList(cacheFile,storagePath,stagedFileList)
|
523 |
|
|
|
524 |
|
|
#cmd = 'find ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + \
|
525 |
|
|
# ' -maxdepth 1 -type f -printf "%s %f\n"'
|
526 |
|
|
print 'List: ' + cmd
|
527 |
|
|
cmd = 'list ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + ' | grep root'
|
528 |
|
|
doneFileList = BuildFileList(cmd)
|
529 |
|
|
|
530 |
|
|
#---------------------------------------------------------------------------------------------------
|
531 |
|
|
# go through the lists: first check files are consistent, then copy the remaining files
|
532 |
|
|
#---------------------------------------------------------------------------------------------------
|
533 |
|
|
# initialize data volumes
|
534 |
|
|
b2G = 1.0/(1024.*1024.*1024)
|
535 |
|
|
nTotal = 0
|
536 |
|
|
totalDataVolume = 0
|
537 |
|
|
nDone = 0
|
538 |
|
|
doneDataVolume = 0
|
539 |
|
|
|
540 |
|
|
for file, size in allFileList.iteritems():
|
541 |
|
|
nTotal += 1
|
542 |
|
|
totalDataVolume += size
|
543 |
|
|
if (file in doneFileList) and (doneFileList[file] == size):
|
544 |
|
|
nDone += 1
|
545 |
|
|
doneDataVolume += size
|
546 |
|
|
|
547 |
|
|
print ' '
|
548 |
|
|
print ' Summary of data volume\n'
|
549 |
|
|
print ' --> number of files to copy: %8d (total: %d) '%(nTotal-nDone,nTotal)
|
550 |
|
|
print ' --> volume to copy [GB]: %8.2f (total: %.2f) '%(b2G*(totalDataVolume-doneDataVolume), \
|
551 |
|
|
b2G*totalDataVolume)
|
552 |
|
|
print ' --> free volume [GB]: %8.2f '%(b2G*free)
|
553 |
|
|
print ' '
|
554 |
|
|
|
555 |
|
|
if free*0.85 < (totalDataVolume-doneDataVolume):
|
556 |
|
|
print ' ERROR - probably no enough space on volume. See above (some safety assumed)!'
|
557 |
|
|
sys.exit(1)
|
558 |
|
|
|
559 |
|
|
for file, size in doneFileList.iteritems():
|
560 |
|
|
if file in allFileList:
|
561 |
|
|
#print ' --> file is done: ' + file
|
562 |
|
|
if allFileList[file] != size:
|
563 |
|
|
print ' ERROR - file sizes did not match: ' + file + \
|
564 |
|
|
' [ local: %10d, remote: %10d ]'%(size,allFileList[file])
|
565 |
paus |
1.5 |
if stopOnError:
|
566 |
|
|
sys.exit(1)
|
567 |
|
|
continue
|
568 |
paus |
1.2 |
else:
|
569 |
|
|
print ' ERROR - file from done list is not in the all files list. File: ' + file
|
570 |
paus |
1.5 |
print ' RECOVER - File: ' + file
|
571 |
|
|
sizeMb = size/1024./1024.
|
572 |
|
|
deltaT = RecoverFile(storageEle,storagePath,storageUrl,file,localDir)
|
573 |
|
|
if deltaT > 0:
|
574 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: %9.3f'%\
|
575 |
|
|
(deltaT,sizeMb/deltaT)
|
576 |
|
|
else:
|
577 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: ?'%(deltaT)
|
578 |
|
|
|
579 |
|
|
#sys.exit(1)
|
580 |
paus |
1.2 |
|
581 |
|
|
totalSizeMb = 0.
|
582 |
|
|
totalTimeSc = 0.
|
583 |
|
|
for file, size in allFileList.iteritems():
|
584 |
|
|
if debug == 1:
|
585 |
|
|
print ' Debug:: ' + file + ' -> size %d'%size
|
586 |
|
|
|
587 |
|
|
totalDataVolume += size
|
588 |
|
|
if file in doneFileList:
|
589 |
|
|
print ' --> done, size match: %10d - %s'%(size,file)
|
590 |
|
|
doneDataVolume = +size
|
591 |
|
|
else:
|
592 |
|
|
if not InSkipList(file,skipList):
|
593 |
|
|
print ' --> copying file: %10d - %s (castor stat: %s)'% \
|
594 |
|
|
(size,file,stagedFileList[file])
|
595 |
paus |
1.5 |
if test == 1:
|
596 |
|
|
print ' testing only.'
|
597 |
|
|
elif stagedFileList[file] == "STAGED" or forceCopy:
|
598 |
paus |
1.2 |
sizeMb = size/1024./1024.
|
599 |
paus |
1.5 |
deltaT = CopyFile(storageEle,storagePath,storageUrl,file,localDir,fromCern)
|
600 |
paus |
1.2 |
if deltaT > 0:
|
601 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: %9.3f'%\
|
602 |
|
|
(deltaT,sizeMb/deltaT)
|
603 |
|
|
else:
|
604 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: ?'%(deltaT)
|
605 |
|
|
totalTimeSc += deltaT
|
606 |
|
|
totalSizeMb += sizeMb
|
607 |
|
|
else:
|
608 |
|
|
print ' skipping file: %s'%(stagedFileList[file])
|
609 |
paus |
1.5 |
StageFile(storagePath,storageUrl,file,fromCern)
|
610 |
paus |
1.2 |
|
611 |
|
|
else:
|
612 |
|
|
print ' --> skipping file: %10d - %s'%(size,file)
|
613 |
|
|
|
614 |
|
|
print ''
|
615 |
|
|
if totalTimeSc > 0:
|
616 |
|
|
print ' Performance: volume copied [GB] %9.3f; time [sec] %9d; -> rate [MB/sec] %9.3f'%\
|
617 |
|
|
(totalSizeMb/1024.,totalTimeSc,totalSizeMb/totalTimeSc)
|
618 |
|
|
else:
|
619 |
|
|
print ' Performance: volume copied [GB] %9.3f; time [sec] %9d; -> rate [MB/sec] ?'%\
|
620 |
|
|
(totalSizeMb/1024.,totalTimeSc)
|
621 |
|
|
print ''
|