1 |
paus |
1.2 |
#!/usr/bin/env python
|
2 |
|
|
#---------------------------------------------------------------------------------------------------
|
3 |
|
|
# Script to automatically download a MIT dataset to our local cluster
|
4 |
|
|
#
|
5 |
|
|
# The download of the MIT dataset is organized in accordance with the dataset production logic. In
|
6 |
|
|
# general it is allowed to download the dataset from any location of a properly configured storage
|
7 |
|
|
# element. The script will do all most obvious tests to ensure efficient and safe download. For
|
8 |
|
|
# performance reason a checksum is not calculated. This ommission is considered completely safe as
|
9 |
|
|
# failures will be identified in the analysis phase and the rare occasions will be more effective to
|
10 |
|
|
# fix by hand.
|
11 |
|
|
#
|
12 |
|
|
# At present the download proceeds in one thread (one file at a time) which for performance reasons
|
13 |
|
|
# might not be optimal.
|
14 |
|
|
#
|
15 |
|
|
# Author: C.Paus (July 1, 2008)
|
16 |
|
|
#---------------------------------------------------------------------------------------------------
|
17 |
|
|
# Missing but desired features:
|
18 |
|
|
# + accounting of size of each file
|
19 |
|
|
# + accounting of locally available files (avoid copying already existing files)
|
20 |
|
|
# + determine full list of files before starting to copy
|
21 |
|
|
# + minimal success check of the copy
|
22 |
|
|
# + calculate total data volume (to copy, already copied etc.)
|
23 |
|
|
# + add feature to check the castor status
|
24 |
|
|
# - add time estimates and progressions for copies
|
25 |
|
|
# - multi downloads to enhance performance
|
26 |
|
|
#---------------------------------------------------------------------------------------------------
|
27 |
|
|
import os,sys,getopt,re,string
|
28 |
|
|
|
29 |
paus |
1.5 |
dCacheDoor = 't2srv0012.cmsaf.mit.edu'
|
30 |
|
|
|
31 |
paus |
1.6 |
def Domain():
|
32 |
|
|
domain = os.uname()[1]
|
33 |
|
|
f = domain.split('.')
|
34 |
|
|
return '.'.join(f[1:])
|
35 |
|
|
|
36 |
paus |
1.2 |
def Seconds():
|
37 |
|
|
for secs in os.popen('date +%s').readlines():
|
38 |
|
|
secs = int(secs[:-1])
|
39 |
|
|
return secs
|
40 |
|
|
|
41 |
|
|
def InSkipList(file,list):
|
42 |
|
|
for entry in list:
|
43 |
|
|
if entry == file:
|
44 |
|
|
return True
|
45 |
|
|
return False
|
46 |
|
|
|
47 |
|
|
def DecodeSrmLs(line):
|
48 |
|
|
line = line.strip()
|
49 |
|
|
f = line.split(" ")
|
50 |
|
|
size = f[0]
|
51 |
|
|
f = f[1].split("/")
|
52 |
|
|
file = f.pop()
|
53 |
|
|
list = [ size, file ]
|
54 |
|
|
return list
|
55 |
|
|
|
56 |
|
|
def DecodeRfDir(line):
|
57 |
|
|
line = line.strip()
|
58 |
|
|
f = line.split(" ")
|
59 |
|
|
file = f.pop()
|
60 |
|
|
size = f[4]
|
61 |
|
|
list = [ size, file ]
|
62 |
|
|
return list
|
63 |
|
|
|
64 |
|
|
def BuildFileList(cmd):
|
65 |
|
|
isSrm = 1
|
66 |
|
|
f = cmd.split(" ")
|
67 |
|
|
if f[0] == 'rfdir':
|
68 |
|
|
isSrm = 0
|
69 |
|
|
|
70 |
|
|
fileList = {}
|
71 |
|
|
|
72 |
|
|
if debug == 1:
|
73 |
|
|
print ' Debug:: list: ' + cmd
|
74 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
75 |
|
|
line = line[:-1] # strip '\n'
|
76 |
|
|
#print ' Line: ' + line
|
77 |
|
|
f = line.split(" ")
|
78 |
|
|
##if isSrm == 1:
|
79 |
|
|
## f = DecodeSrmLs(line)
|
80 |
|
|
##else:
|
81 |
|
|
## f = DecodeRfDir(line)
|
82 |
|
|
size = f[0]
|
83 |
|
|
file = f[1]
|
84 |
|
|
f = file.split("/")
|
85 |
|
|
file = f[-1]
|
86 |
|
|
if debug == 1:
|
87 |
|
|
print ' Debug:: adding: ' + file + ' with size ' + size
|
88 |
|
|
fileList[file] = int(size)
|
89 |
|
|
|
90 |
|
|
return fileList
|
91 |
|
|
|
92 |
|
|
def BuildStagedFileList(storagePath,allFileList,cacheFile):
|
93 |
|
|
|
94 |
|
|
# initialize the basics
|
95 |
|
|
fileList = {}
|
96 |
|
|
f = storagePath.split("=");
|
97 |
|
|
rfPath = f[-1]
|
98 |
|
|
|
99 |
|
|
# if this is not castor, trick it and mark them as staged
|
100 |
|
|
if not (re.search('/castor/',rfPath)):
|
101 |
|
|
for file, size in allFileList.iteritems():
|
102 |
|
|
fileList[file] = 'STAGED'
|
103 |
|
|
return fileList
|
104 |
|
|
|
105 |
|
|
# here we deal with castor
|
106 |
|
|
if debug == 1:
|
107 |
|
|
print ' Debug:: rfpath: ' + rfPath
|
108 |
|
|
if os.path.exists(cacheFile) and noCache == 0:
|
109 |
|
|
print ' Using the cached stager queries at ' + cacheFile
|
110 |
|
|
for file, size in allFileList.iteritems():
|
111 |
|
|
fullFile = rfPath + '/' + file
|
112 |
|
|
if debug == 1:
|
113 |
|
|
print ' Debug:: full file name: ' + fullFile
|
114 |
|
|
if os.path.exists(cacheFile) and noCache == 0:
|
115 |
|
|
cmd = 'grep ' + file + ' ' + cacheFile
|
116 |
|
|
else:
|
117 |
|
|
cmd = 'stager_qry -M ' + fullFile
|
118 |
|
|
fileList[file] = 'undefined'
|
119 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
120 |
|
|
line = line[:-1]
|
121 |
|
|
f = line.split(" ")
|
122 |
|
|
if f[0] == fullFile:
|
123 |
|
|
f = line.split(" ")
|
124 |
|
|
status = f[-1]
|
125 |
|
|
fileList[file] = status
|
126 |
|
|
|
127 |
|
|
return fileList
|
128 |
|
|
|
129 |
|
|
def CacheStagedFileList(cacheFile,storagePath,stagedFileList):
|
130 |
|
|
print ' Caching stager query status to ' + cacheFile
|
131 |
|
|
f = storagePath.split("=");
|
132 |
|
|
rfPath = f[-1]
|
133 |
|
|
fileOutput = open(cacheFile,'w')
|
134 |
|
|
for file, status in stagedFileList.iteritems():
|
135 |
|
|
line = rfPath + '/' + file + ' xyz@castorns ' + status + '\n'
|
136 |
|
|
fileOutput.write(line)
|
137 |
|
|
fileOutput.close()
|
138 |
|
|
|
139 |
paus |
1.5 |
def CopyFile(storageEle,storagePath,storageUrl,file,localDir,fromCern):
|
140 |
paus |
1.2 |
deltaT = 0
|
141 |
|
|
print ' working on file: ' + file + ' to ' + localDir + \
|
142 |
|
|
' (size: %d MB) '%(int(size)/1024/1024)
|
143 |
paus |
1.5 |
if storageEle == 'srm-cms.cern.ch' and not fromCern:
|
144 |
paus |
1.2 |
f = storagePath.split("=");
|
145 |
|
|
rfPath = f[-1]
|
146 |
|
|
cpy = 'rfcp ' + rfPath + '/' + file + ' ' + localPath + '/' \
|
147 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
148 |
|
|
#print ' using rfcp.... ' + cpy
|
149 |
|
|
#sys.exit(0)
|
150 |
paus |
1.4 |
elif storageEle == 'se01.cmsaf.mit.edu':
|
151 |
|
|
f = storagePath.split("=");
|
152 |
|
|
rfPath = f[-1]
|
153 |
|
|
#cpy = 'scp paus@cgate.mit.edu:' + rfPath + '/' + file + ' ' + localPath + '/' \
|
154 |
|
|
# + mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
155 |
paus |
1.5 |
cpy = 'dccp dcap://' + dCacheDoor + '/' \
|
156 |
paus |
1.4 |
+ rfPath + '/' + file + ' ' + localPath + '/' \
|
157 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
158 |
|
|
#print ' using rfcp.... ' + cpy
|
159 |
|
|
#sys.exit(0)
|
160 |
paus |
1.2 |
else:
|
161 |
|
|
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
162 |
|
|
cpy = 'lcg-cp ' + storageUrl + '/' + file + ' file:////' + localPath + '/' \
|
163 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
164 |
|
|
|
165 |
|
|
# Check whether the file size make sense (zero length files are probably not yet ready to
|
166 |
|
|
# copy and will not be transfered
|
167 |
|
|
if size < 1:
|
168 |
|
|
print ' WARNING - file size is <1b. Probably this file is not yet ready. Stop copy.'
|
169 |
|
|
else:
|
170 |
|
|
if debug == 1:
|
171 |
|
|
print ' Debug:: copy: ' + cpy
|
172 |
|
|
start = Seconds()
|
173 |
|
|
status = os.system(cpy)
|
174 |
|
|
end = Seconds()
|
175 |
|
|
deltaT = end - start
|
176 |
|
|
|
177 |
|
|
return deltaT
|
178 |
|
|
|
179 |
paus |
1.5 |
def RecoverFile(storageEle,storagePath,storageUrl,file,localDir):
|
180 |
|
|
deltaT = 0
|
181 |
|
|
print ' working on file: ' + file + ' from ' + localDir + \
|
182 |
|
|
' (size: %d MB) '%(int(size)/1024/1024)
|
183 |
|
|
if storageEle == 'srm-cms.cern.ch':
|
184 |
|
|
f = storagePath.split("=");
|
185 |
|
|
rfPath = f[-1]
|
186 |
|
|
cpy = 'rfcp ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + '/' + file \
|
187 |
|
|
+ ' ' + rfPath + '/' + file
|
188 |
|
|
#print ' using rfcp.... ' + cpy
|
189 |
|
|
#sys.exit(0)
|
190 |
|
|
elif storageEle == 'se01.cmsaf.mit.edu':
|
191 |
|
|
f = storagePath.split("=");
|
192 |
|
|
rfPath = f[-1]
|
193 |
|
|
#cpy = 'scp paus@cgate.mit.edu:' + rfPath + '/' + file + ' ' + localPath + '/' \
|
194 |
|
|
# + mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
195 |
|
|
cpy = 'dccp ' + localPath + '/' \
|
196 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file \
|
197 |
|
|
+ ' dcap://' + dCacheDoor + '/' + rfPath + '/' + file
|
198 |
|
|
print ' using dccp.... ' + cpy
|
199 |
|
|
#sys.exit(0)
|
200 |
|
|
else:
|
201 |
|
|
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
202 |
|
|
cpy = 'lcg-cp ' + 'file:////' + localPath + '/' + mitCfg + '/' + version + '/' \
|
203 |
|
|
+ mitDataset + '/' + file + ' ' + storageUrl + '/' + file
|
204 |
|
|
|
205 |
|
|
# Check whether the file size make sense (zero length files are probably not yet ready to
|
206 |
|
|
# copy and will not be transfered
|
207 |
|
|
if size < 1:
|
208 |
|
|
print ' WARNING - file size is <1b. Probably this file is not yet ready. Stop recovery.'
|
209 |
|
|
else:
|
210 |
|
|
if debug == 1:
|
211 |
|
|
print ' Debug:: copy: ' + cpy
|
212 |
|
|
start = Seconds()
|
213 |
|
|
status = os.system(cpy)
|
214 |
|
|
end = Seconds()
|
215 |
|
|
deltaT = end - start
|
216 |
|
|
|
217 |
|
|
return deltaT
|
218 |
|
|
|
219 |
paus |
1.2 |
def StageFile(storagePath,storageUrl,file):
|
220 |
|
|
print ' staging in file: ' + file
|
221 |
paus |
1.5 |
if storageEle == 'srm-cms.cern.ch' and not fromCern:
|
222 |
paus |
1.2 |
f = storagePath.split("=");
|
223 |
|
|
rfPath = f[-1]
|
224 |
|
|
stg = 'stager_get -M ' + rfPath + '/' + file
|
225 |
|
|
else:
|
226 |
|
|
#storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
227 |
|
|
stg = 'echo lcg-cp ' + storageUrl + '/' + file + ' file:////' + localPath + '/' \
|
228 |
|
|
+ mitCfg + '/' + version + '/' + mitDataset + '/' + file
|
229 |
|
|
|
230 |
|
|
if debug == 1:
|
231 |
|
|
print ' Debug:: stage: ' + stg
|
232 |
|
|
status = os.system(stg)
|
233 |
|
|
|
234 |
|
|
#===================================================================================================
|
235 |
|
|
# Main starts here
|
236 |
|
|
#===================================================================================================
|
237 |
|
|
# Define string to explain usage of the script
|
238 |
|
|
usage = "Usage: downloadSample.py --cmsDataset=<name> | --mitDataset=<name>\n"
|
239 |
|
|
usage += " --mitCfg=<name>\n"
|
240 |
|
|
usage += " --version=<version>\n"
|
241 |
|
|
usage += " --cmssw=<name>\n"
|
242 |
|
|
usage += " --localStorageUrl=<name>\n"
|
243 |
|
|
usage += " --localPath=<dir>\n"
|
244 |
|
|
usage += " --skip=<file list>\n"
|
245 |
paus |
1.5 |
usage += " --fromCern\n"
|
246 |
|
|
usage += " --forceCopy\n"
|
247 |
paus |
1.2 |
usage += " --backward\n"
|
248 |
paus |
1.5 |
usage += " --stopOnError\n"
|
249 |
paus |
1.2 |
usage += " --debug\n"
|
250 |
paus |
1.5 |
usage += " --test\n"
|
251 |
paus |
1.2 |
usage += " --help\n"
|
252 |
|
|
|
253 |
|
|
# Define the valid options which can be specified and check out the command line
|
254 |
|
|
valid = ['cmsDataset=','mitDataset=','mitCfg=','version=','cmssw=','pattern=','localStorageUrl=',
|
255 |
|
|
'localPath=','noCache','skip=',
|
256 |
paus |
1.5 |
'fromCern','forceCopy','backward','stopOnError',
|
257 |
|
|
'debug','test','help']
|
258 |
paus |
1.2 |
try:
|
259 |
|
|
opts, args = getopt.getopt(sys.argv[1:], "", valid)
|
260 |
|
|
except getopt.GetoptError, ex:
|
261 |
|
|
print usage
|
262 |
|
|
print str(ex)
|
263 |
|
|
sys.exit(1)
|
264 |
|
|
|
265 |
|
|
# --------------------------------------------------------------------------------------------------
|
266 |
|
|
# Get all parameters for the production
|
267 |
|
|
# --------------------------------------------------------------------------------------------------
|
268 |
|
|
# Set defaults for each option
|
269 |
|
|
cmsDataset = None
|
270 |
|
|
mitDataset = None
|
271 |
|
|
skip = ''
|
272 |
|
|
skipList = []
|
273 |
paus |
1.3 |
mitCfg = 'filefi'
|
274 |
paus |
1.6 |
version = '023'
|
275 |
paus |
1.2 |
cmssw = ''
|
276 |
|
|
blockLocal = 0
|
277 |
|
|
localStorageUrl = ''
|
278 |
paus |
1.6 |
localPath = '/mnt/hadoop/cmsprod'
|
279 |
paus |
1.2 |
pattern = ''
|
280 |
|
|
noCache = 0
|
281 |
|
|
backward = ''
|
282 |
paus |
1.5 |
fromCern = False
|
283 |
|
|
stopOnError = False
|
284 |
paus |
1.2 |
forceCopy = False
|
285 |
|
|
debug = 0
|
286 |
paus |
1.5 |
test = 0
|
287 |
paus |
1.2 |
cmsswCfg = 'cmssw.cfg'
|
288 |
|
|
|
289 |
|
|
# Read new values from the command line
|
290 |
|
|
for opt, arg in opts:
|
291 |
|
|
if opt == '--help':
|
292 |
|
|
print usage
|
293 |
|
|
sys.exit(0)
|
294 |
|
|
if opt == '--cmsDataset':
|
295 |
|
|
cmsDataset = arg
|
296 |
|
|
if opt == '--mitDataset':
|
297 |
|
|
mitDataset = arg
|
298 |
|
|
if opt == '--mitCfg':
|
299 |
|
|
mitCfg = arg
|
300 |
|
|
if opt == '--version':
|
301 |
|
|
version = arg
|
302 |
|
|
if opt == '--cmssw':
|
303 |
|
|
cmssw = arg
|
304 |
|
|
if opt == '--pattern':
|
305 |
|
|
pattern = arg
|
306 |
|
|
if opt == '--localStorageUrl':
|
307 |
|
|
localStorageUrl = arg
|
308 |
|
|
if opt == '--localPath':
|
309 |
|
|
blockLocal = 1
|
310 |
|
|
localPath = arg
|
311 |
|
|
if opt == '--skip':
|
312 |
|
|
skip = arg
|
313 |
|
|
skipList = skip.split(',')
|
314 |
|
|
if opt == '--noCache':
|
315 |
|
|
noCache = 1
|
316 |
paus |
1.5 |
if opt == '--stopOnError':
|
317 |
|
|
stopOnError = True
|
318 |
paus |
1.2 |
if opt == '--backward':
|
319 |
|
|
backward = ' -r '
|
320 |
paus |
1.5 |
if opt == '--fromCern':
|
321 |
|
|
fromCern = True
|
322 |
|
|
if opt == '--forceCopy':
|
323 |
|
|
forceCopy = True
|
324 |
paus |
1.2 |
if opt == '--forceCopy':
|
325 |
|
|
forceCopy = True
|
326 |
|
|
if opt == '--debug':
|
327 |
|
|
debug = 1
|
328 |
paus |
1.5 |
if opt == '--test':
|
329 |
|
|
test = 1
|
330 |
paus |
1.2 |
|
331 |
|
|
# Deal with obvious problems
|
332 |
|
|
if cmsDataset == None and mitDataset == None:
|
333 |
|
|
cmd = '--cmsDataset option not provided. This is required.'
|
334 |
|
|
raise RuntimeError, cmd
|
335 |
|
|
|
336 |
paus |
1.6 |
seFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'seTable'
|
337 |
|
|
if not os.path.exists(seFile):
|
338 |
|
|
cmd = 'Storage element file not found: %s' % seFile
|
339 |
paus |
1.2 |
raise RuntimeError, cmd
|
340 |
paus |
1.4 |
cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
|
341 |
paus |
1.2 |
if not os.path.exists(cmsswFile):
|
342 |
|
|
cmd = 'Cmssw file not found: %s' % cmsswFile
|
343 |
|
|
cmsswCfg = 'cmssw.py'
|
344 |
paus |
1.4 |
cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
|
345 |
paus |
1.2 |
if not os.path.exists(cmsswFile):
|
346 |
|
|
cmd = 'Cmssw file not found: %s' % cmsswFile
|
347 |
|
|
cmd = ' XXXX ERROR no valid configuration found XXXX'
|
348 |
|
|
raise RuntimeError, cmd
|
349 |
|
|
|
350 |
|
|
# Resolve the other mitCfg parameters from the configuration file
|
351 |
paus |
1.4 |
cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
|
352 |
paus |
1.2 |
if cmssw != '':
|
353 |
|
|
cmd = cmd + '.' + cmssw
|
354 |
|
|
|
355 |
|
|
join = 0
|
356 |
|
|
if cmsDataset == None:
|
357 |
|
|
cmsDataset = ''
|
358 |
|
|
else:
|
359 |
|
|
mitDataset = ''
|
360 |
|
|
|
361 |
|
|
fullLine = ''
|
362 |
|
|
bSlash = '\\';
|
363 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
364 |
|
|
line = line[:-1]
|
365 |
|
|
#print 'Line: "' + line + '"'
|
366 |
|
|
# get ride of empty or commented lines
|
367 |
|
|
if line == '' or line[0] == '#':
|
368 |
|
|
continue
|
369 |
|
|
|
370 |
|
|
# join lines
|
371 |
|
|
if join == 1:
|
372 |
|
|
fullLine += line
|
373 |
|
|
else:
|
374 |
|
|
fullLine = line
|
375 |
|
|
|
376 |
|
|
# determine if finished or more is coming
|
377 |
|
|
if fullLine[-1] == bSlash:
|
378 |
|
|
join = 1
|
379 |
|
|
fullLine = fullLine[:-1]
|
380 |
|
|
else:
|
381 |
|
|
join = 0
|
382 |
|
|
# test whether there is a directory
|
383 |
|
|
names = fullLine.split() # splitting every blank
|
384 |
|
|
#print "FullLine: " + fullLine
|
385 |
|
|
#print "Datasets: " + mitDataset + ' -> ' + cmsDataset + "\n"
|
386 |
|
|
if names[0] == cmsDataset:
|
387 |
|
|
mitDataset = names[1] # this is the equivalent MIT name of the dataset
|
388 |
|
|
nevents = int(names[2]) # number of events to be used in the production
|
389 |
|
|
if names[4] != "-" and blockLocal == 0:
|
390 |
|
|
localPath = names[4]
|
391 |
|
|
#print "\n Sample Info: " + fullLine + "\n"
|
392 |
|
|
#print "\n Local path : " + localPath + ' -> ' + names[4] + "\n"
|
393 |
|
|
if names[1] == mitDataset:
|
394 |
|
|
cmsDataset = names[0] # this is the equivalent CMS name of the dataset
|
395 |
|
|
nevents = int(names[2]) # number of events to be used in the production
|
396 |
|
|
if names[4] != "-" and blockLocal == 0:
|
397 |
|
|
localPath = names[4]
|
398 |
|
|
#print "\n Sample Info: " + fullLine + "\n"
|
399 |
|
|
#print "\n Local path : " + localPath + ' -> ' + names[4] + "\n"
|
400 |
|
|
|
401 |
|
|
if mitDataset == "":
|
402 |
|
|
print "ERROR - dataset not defined."
|
403 |
|
|
sys.exit(0)
|
404 |
|
|
|
405 |
|
|
#cmd = 'grep ' + cmsDataset + ' ' + mitCfg + '/' + version + '/' + 'Productions'
|
406 |
|
|
#for file in os.popen(cmd).readlines(): # run command
|
407 |
|
|
# line = file[:-1] # strip '\n'
|
408 |
|
|
# # test whether there is a directory
|
409 |
|
|
# names = line.split() # splitting every blank
|
410 |
|
|
# mitDataset = names[1] # this is the equivalent MIT name of the dataset
|
411 |
|
|
# nevents = int(names[2]) # number of events to be used in the production
|
412 |
|
|
|
413 |
|
|
# Say what we do now
|
414 |
|
|
print '\n Preparing dataset for transfer: ' + cmsDataset + ' [MIT: ' + mitDataset + ']\n'
|
415 |
|
|
|
416 |
|
|
# --------------------------------------------------------------------------------------------------
|
417 |
|
|
# Deal with storage element area
|
418 |
|
|
# --------------------------------------------------------------------------------------------------
|
419 |
|
|
pMitDset = re.compile('XX-MITDATASET-XX')
|
420 |
|
|
pMitCfg = re.compile('XX-MITCFG-XX')
|
421 |
|
|
pMitVers = re.compile('XX-MITVERSION-XX')
|
422 |
paus |
1.6 |
# decide on the forseen default storage place (where are we running)
|
423 |
|
|
storageTag = 'T2_US_MIT'
|
424 |
|
|
domain = Domain()
|
425 |
|
|
if re.search('mit.edu',domain):
|
426 |
|
|
storageTag = 'T2_US_MIT'
|
427 |
|
|
elif re.search('cern.ch',domain):
|
428 |
|
|
storageTag = 'T0_CH_CERN'
|
429 |
paus |
1.5 |
|
430 |
paus |
1.6 |
cmd = 'grep ^' + storageTag + ' ' + seFile
|
431 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
432 |
|
|
print ' LINE: ' + line
|
433 |
|
|
line = line[:-1] # strip '\n'
|
434 |
|
|
line = line.replace(' ','')
|
435 |
|
|
f = line.split(':')
|
436 |
|
|
storageEle = f[1]
|
437 |
|
|
storagePath = f[2]
|
438 |
|
|
userRemoteDir = f[3]
|
439 |
|
|
print ' Storage -- Ele: ' + storageEle \
|
440 |
|
|
+ ' Path: ' + storagePath + ' UserDir: ' + userRemoteDir
|
441 |
paus |
1.5 |
|
442 |
paus |
1.6 |
# Hardwire
|
443 |
paus |
1.5 |
if fromCern:
|
444 |
paus |
1.6 |
storageEle = 'srm-cms.cern.ch'
|
445 |
|
|
storagePath = '/srm/managerv2?SFN=/castor/cern.ch'
|
446 |
|
|
userRemoteDir = "/user/p/paus/" + mitCfg + "/" + version + "/" + mitDataset
|
447 |
paus |
1.5 |
|
448 |
paus |
1.6 |
# determine the storage URL
|
449 |
paus |
1.2 |
storageUrl = 'srm://' + storageEle + ':8443' + storagePath
|
450 |
|
|
if userRemoteDir != '':
|
451 |
|
|
storagePath += userRemoteDir
|
452 |
|
|
storageUrl += userRemoteDir
|
453 |
|
|
|
454 |
|
|
if localStorageUrl != '':
|
455 |
|
|
storageEle = ''
|
456 |
|
|
storagePath = ''
|
457 |
|
|
storageUrl = localStorageUrl
|
458 |
|
|
|
459 |
|
|
print ' --> StorageUrl: ' + storageUrl
|
460 |
|
|
|
461 |
|
|
#---------------------------------------------------------------------------------------------------
|
462 |
|
|
# create the local storage area
|
463 |
|
|
#---------------------------------------------------------------------------------------------------
|
464 |
|
|
print ' Make local path: ' + localPath
|
465 |
|
|
localDir = localPath + '/' + mitCfg + '/' + version + '/' + mitDataset
|
466 |
|
|
mkd = 'mkdir -p ' + localDir
|
467 |
|
|
status = os.system(mkd)
|
468 |
|
|
|
469 |
|
|
if status != 0:
|
470 |
|
|
print ' ERROR - could not create local directory ' + localDir
|
471 |
|
|
sys.exit(1)
|
472 |
|
|
|
473 |
|
|
print ' --> LocalDir: ' + localDir
|
474 |
|
|
|
475 |
|
|
cmd = 'df --block-size=1 ' + localDir + ' | tr -s \' \' | tail -1'
|
476 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
477 |
|
|
line = line.strip()
|
478 |
|
|
f = line.split(" ")
|
479 |
|
|
if line[0:0] == '/' or line[0:4] == 'fuse':
|
480 |
|
|
free = int(f[3])
|
481 |
|
|
else:
|
482 |
|
|
free = int(f[2])
|
483 |
|
|
|
484 |
|
|
#---------------------------------------------------------------------------------------------------
|
485 |
|
|
# create a list af all files to be copied
|
486 |
|
|
#---------------------------------------------------------------------------------------------------
|
487 |
|
|
cmd = ''
|
488 |
|
|
f = storagePath.split('=')
|
489 |
|
|
path = f.pop()
|
490 |
|
|
cmd = 'list ' + path + ' | grep root | sort ' + backward
|
491 |
paus |
1.5 |
if fromCern:
|
492 |
|
|
cmd = 'srmls ' + storageUrl + '|grep root|sort ' + backward + '|tr -s \' \'|cut -d\' \' -f 2-3'
|
493 |
paus |
1.2 |
|
494 |
|
|
##if storageEle == 'srm.cern.ch' or storageEle == 'srm-cms.cern.ch':
|
495 |
|
|
## cmd = 'rfdir ' + path + ' | grep root | tr -s \' \' | sort ' + backward
|
496 |
|
|
##else:
|
497 |
|
|
## cmd = 'list ' + path + ' | grep root | sort ' + backward
|
498 |
|
|
|
499 |
|
|
if pattern != "":
|
500 |
|
|
cmd += ' | grep ' + pattern
|
501 |
|
|
|
502 |
|
|
print ' Find file: ' + cmd
|
503 |
|
|
cacheFile = '/tmp/.cache_' + mitDataset
|
504 |
|
|
allFileList = BuildFileList(cmd)
|
505 |
|
|
stagedFileList = BuildStagedFileList(storagePath,allFileList,cacheFile)
|
506 |
|
|
cacheStaged = CacheStagedFileList(cacheFile,storagePath,stagedFileList)
|
507 |
|
|
|
508 |
|
|
#cmd = 'find ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + \
|
509 |
|
|
# ' -maxdepth 1 -type f -printf "%s %f\n"'
|
510 |
|
|
print 'List: ' + cmd
|
511 |
|
|
cmd = 'list ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + ' | grep root'
|
512 |
|
|
doneFileList = BuildFileList(cmd)
|
513 |
|
|
|
514 |
|
|
#---------------------------------------------------------------------------------------------------
|
515 |
|
|
# go through the lists: first check files are consistent, then copy the remaining files
|
516 |
|
|
#---------------------------------------------------------------------------------------------------
|
517 |
|
|
# initialize data volumes
|
518 |
|
|
b2G = 1.0/(1024.*1024.*1024)
|
519 |
|
|
nTotal = 0
|
520 |
|
|
totalDataVolume = 0
|
521 |
|
|
nDone = 0
|
522 |
|
|
doneDataVolume = 0
|
523 |
|
|
|
524 |
|
|
for file, size in allFileList.iteritems():
|
525 |
|
|
nTotal += 1
|
526 |
|
|
totalDataVolume += size
|
527 |
|
|
if (file in doneFileList) and (doneFileList[file] == size):
|
528 |
|
|
nDone += 1
|
529 |
|
|
doneDataVolume += size
|
530 |
|
|
|
531 |
|
|
print ' '
|
532 |
|
|
print ' Summary of data volume\n'
|
533 |
|
|
print ' --> number of files to copy: %8d (total: %d) '%(nTotal-nDone,nTotal)
|
534 |
|
|
print ' --> volume to copy [GB]: %8.2f (total: %.2f) '%(b2G*(totalDataVolume-doneDataVolume), \
|
535 |
|
|
b2G*totalDataVolume)
|
536 |
|
|
print ' --> free volume [GB]: %8.2f '%(b2G*free)
|
537 |
|
|
print ' '
|
538 |
|
|
|
539 |
|
|
if free*0.85 < (totalDataVolume-doneDataVolume):
|
540 |
|
|
print ' ERROR - probably no enough space on volume. See above (some safety assumed)!'
|
541 |
|
|
sys.exit(1)
|
542 |
|
|
|
543 |
|
|
for file, size in doneFileList.iteritems():
|
544 |
|
|
if file in allFileList:
|
545 |
|
|
#print ' --> file is done: ' + file
|
546 |
|
|
if allFileList[file] != size:
|
547 |
|
|
print ' ERROR - file sizes did not match: ' + file + \
|
548 |
|
|
' [ local: %10d, remote: %10d ]'%(size,allFileList[file])
|
549 |
paus |
1.5 |
if stopOnError:
|
550 |
|
|
sys.exit(1)
|
551 |
|
|
continue
|
552 |
paus |
1.2 |
else:
|
553 |
|
|
print ' ERROR - file from done list is not in the all files list. File: ' + file
|
554 |
paus |
1.5 |
print ' RECOVER - File: ' + file
|
555 |
|
|
sizeMb = size/1024./1024.
|
556 |
|
|
deltaT = RecoverFile(storageEle,storagePath,storageUrl,file,localDir)
|
557 |
|
|
if deltaT > 0:
|
558 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: %9.3f'%\
|
559 |
|
|
(deltaT,sizeMb/deltaT)
|
560 |
|
|
else:
|
561 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: ?'%(deltaT)
|
562 |
|
|
|
563 |
|
|
#sys.exit(1)
|
564 |
paus |
1.2 |
|
565 |
|
|
totalSizeMb = 0.
|
566 |
|
|
totalTimeSc = 0.
|
567 |
|
|
for file, size in allFileList.iteritems():
|
568 |
|
|
if debug == 1:
|
569 |
|
|
print ' Debug:: ' + file + ' -> size %d'%size
|
570 |
|
|
|
571 |
|
|
totalDataVolume += size
|
572 |
|
|
if file in doneFileList:
|
573 |
|
|
print ' --> done, size match: %10d - %s'%(size,file)
|
574 |
|
|
doneDataVolume = +size
|
575 |
|
|
else:
|
576 |
|
|
if not InSkipList(file,skipList):
|
577 |
|
|
print ' --> copying file: %10d - %s (castor stat: %s)'% \
|
578 |
|
|
(size,file,stagedFileList[file])
|
579 |
paus |
1.5 |
if test == 1:
|
580 |
|
|
print ' testing only.'
|
581 |
|
|
elif stagedFileList[file] == "STAGED" or forceCopy:
|
582 |
paus |
1.2 |
sizeMb = size/1024./1024.
|
583 |
paus |
1.5 |
deltaT = CopyFile(storageEle,storagePath,storageUrl,file,localDir,fromCern)
|
584 |
paus |
1.2 |
if deltaT > 0:
|
585 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: %9.3f'%\
|
586 |
|
|
(deltaT,sizeMb/deltaT)
|
587 |
|
|
else:
|
588 |
|
|
print ' time required [sec]: %7d rate [MB/sec]: ?'%(deltaT)
|
589 |
|
|
totalTimeSc += deltaT
|
590 |
|
|
totalSizeMb += sizeMb
|
591 |
|
|
else:
|
592 |
|
|
print ' skipping file: %s'%(stagedFileList[file])
|
593 |
paus |
1.5 |
StageFile(storagePath,storageUrl,file,fromCern)
|
594 |
paus |
1.2 |
|
595 |
|
|
else:
|
596 |
|
|
print ' --> skipping file: %10d - %s'%(size,file)
|
597 |
|
|
|
598 |
|
|
print ''
|
599 |
|
|
if totalTimeSc > 0:
|
600 |
|
|
print ' Performance: volume copied [GB] %9.3f; time [sec] %9d; -> rate [MB/sec] %9.3f'%\
|
601 |
|
|
(totalSizeMb/1024.,totalTimeSc,totalSizeMb/totalTimeSc)
|
602 |
|
|
else:
|
603 |
|
|
print ' Performance: volume copied [GB] %9.3f; time [sec] %9d; -> rate [MB/sec] ?'%\
|
604 |
|
|
(totalSizeMb/1024.,totalTimeSc)
|
605 |
|
|
print ''
|