ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/downloadSample.py
(Generate patch)

Comparing UserCode/MitProd/Processing/bin/downloadSample.py (file contents):
Revision 1.2 by paus, Sat Jun 5 02:36:28 2010 UTC vs.
Revision 1.6 by paus, Mon Sep 19 21:45:40 2011 UTC

# Line 26 | Line 26
26   #---------------------------------------------------------------------------------------------------
27   import os,sys,getopt,re,string
28  
29 + dCacheDoor = 't2srv0012.cmsaf.mit.edu'
30 +
31 + def Domain():
32 +    domain = os.uname()[1]
33 +    f = domain.split('.')
34 +    return '.'.join(f[1:])
35 +    
36   def Seconds():
37      for secs in os.popen('date +%s').readlines():
38          secs = int(secs[:-1])
# Line 129 | Line 136 | def CacheStagedFileList(cacheFile,storag
136          fileOutput.write(line)
137      fileOutput.close()
138  
139 < def CopyFile(storageEle,storagePath,storageUrl,file,localDir):
139 > def CopyFile(storageEle,storagePath,storageUrl,file,localDir,fromCern):
140      deltaT = 0
141      print '     working on file: ' + file + ' to ' + localDir + \
142            '  (size: %d MB) '%(int(size)/1024/1024)
143 <    if storageEle == 'srm-cms.cern.ch':
143 >    if    storageEle == 'srm-cms.cern.ch' and not fromCern:
144          f = storagePath.split("=");
145          rfPath = f[-1]
146          cpy  = 'rfcp ' + rfPath + '/' + file + ' ' + localPath + '/' \
147                 + mitCfg + '/' + version + '/' + mitDataset + '/' + file
148          #print '     using rfcp.... ' + cpy
149          #sys.exit(0)
150 +    elif storageEle == 'se01.cmsaf.mit.edu':
151 +        f = storagePath.split("=");
152 +        rfPath = f[-1]
153 +        #cpy  = 'scp paus@cgate.mit.edu:' + rfPath + '/' + file + ' ' + localPath + '/' \
154 +        #       + mitCfg + '/' + version + '/' + mitDataset + '/' + file
155 +        cpy  = 'dccp dcap://' + dCacheDoor + '/' \
156 +               + rfPath + '/' + file + ' ' + localPath + '/' \
157 +               + mitCfg + '/' + version + '/' + mitDataset + '/' + file
158 +        #print '     using rfcp.... ' + cpy
159 +        #sys.exit(0)
160      else:
161          #storageUrl = 'srm://' + storageEle + ':8443' + storagePath
162          cpy  = 'lcg-cp ' + storageUrl + '/' + file + ' file:////' + localPath + '/' \
# Line 159 | Line 176 | def CopyFile(storageEle,storagePath,stor
176      
177      return deltaT    
178  
179 + def RecoverFile(storageEle,storagePath,storageUrl,file,localDir):
180 +    deltaT = 0
181 +    print '     working on file: ' + file + ' from ' + localDir + \
182 +          '  (size: %d MB) '%(int(size)/1024/1024)
183 +    if    storageEle == 'srm-cms.cern.ch':
184 +        f = storagePath.split("=");
185 +        rfPath = f[-1]
186 +        cpy  = 'rfcp ' + localPath + '/' + mitCfg + '/' + version + '/' + mitDataset + '/' + file \
187 +               + ' ' + rfPath + '/' + file
188 +        #print '     using rfcp.... ' + cpy
189 +        #sys.exit(0)
190 +    elif storageEle == 'se01.cmsaf.mit.edu':
191 +        f = storagePath.split("=");
192 +        rfPath = f[-1]
193 +        #cpy  = 'scp paus@cgate.mit.edu:' + rfPath + '/' + file + ' ' + localPath + '/' \
194 +        #       + mitCfg + '/' + version + '/' + mitDataset + '/' + file
195 +        cpy  = 'dccp ' + localPath + '/' \
196 +               + mitCfg + '/' + version + '/' + mitDataset + '/' + file \
197 +               + ' dcap://' + dCacheDoor + '/' + rfPath + '/' + file
198 +        print '     using dccp.... ' + cpy
199 +        #sys.exit(0)
200 +    else:
201 +        #storageUrl = 'srm://' + storageEle + ':8443' + storagePath
202 +        cpy  = 'lcg-cp ' + 'file:////' + localPath + '/' + mitCfg + '/' + version + '/' \
203 +               + mitDataset + '/' + file + ' ' + storageUrl + '/' + file
204 +
205 +    # Check whether the file size make sense (zero length files are probably not yet ready to
206 +    # copy and will not be transfered
207 +    if size < 1:
208 +        print ' WARNING - file size is <1b. Probably this file is not yet ready. Stop recovery.'
209 +    else:
210 +        if debug == 1:
211 +            print ' Debug:: copy: ' + cpy
212 +        start = Seconds()
213 +        status = os.system(cpy)
214 +        end = Seconds()
215 +        deltaT = end - start
216 +    
217 +    return deltaT    
218 +
219   def StageFile(storagePath,storageUrl,file):
220      print '     staging in file: ' + file
221 <    if storageEle == 'srm-cms.cern.ch':
221 >    if storageEle == 'srm-cms.cern.ch' and not fromCern:
222          f = storagePath.split("=");
223          rfPath = f[-1]
224          stg  = 'stager_get -M ' + rfPath + '/' + file
# Line 185 | Line 242 | usage += "                         --cms
242   usage += "                         --localStorageUrl=<name>\n"
243   usage += "                         --localPath=<dir>\n"
244   usage += "                         --skip=<file list>\n"
245 + usage += "                         --fromCern\n"
246 + usage += "                         --forceCopy\n"
247   usage += "                         --backward\n"
248 + usage += "                         --stopOnError\n"
249   usage += "                         --debug\n"
250 + usage += "                         --test\n"
251   usage += "                         --help\n"
252  
253   # Define the valid options which can be specified and check out the command line
254   valid = ['cmsDataset=','mitDataset=','mitCfg=','version=','cmssw=','pattern=','localStorageUrl=',
255           'localPath=','noCache','skip=',
256 <         'forceCopy','backward',
257 <         'debug','help']
256 >         'fromCern','forceCopy','backward','stopOnError',
257 >         'debug','test','help']
258   try:
259      opts, args = getopt.getopt(sys.argv[1:], "", valid)
260   except getopt.GetoptError, ex:
# Line 209 | Line 270 | cmsDataset      = None
270   mitDataset      = None
271   skip            = ''
272   skipList        = []
273 < mitCfg          = 'filler'
274 < version         = '012'
273 > mitCfg          = 'filefi'
274 > version         = '023'
275   cmssw           = ''
276   blockLocal      = 0
277   localStorageUrl = ''
278 < localPath       = '/server/02b/mitprod'
278 > localPath       = '/mnt/hadoop/cmsprod'
279   pattern         = ''
280   noCache         = 0
281   backward        = ''
282 + fromCern        = False
283 + stopOnError     = False
284   forceCopy       = False
285   debug           = 0
286 + test            = 0
287   cmsswCfg        = 'cmssw.cfg'
288  
289   # Read new values from the command line
# Line 249 | Line 313 | for opt, arg in opts:
313          skipList        = skip.split(',')
314      if opt == '--noCache':
315          noCache         = 1
316 +    if opt == '--stopOnError':
317 +        stopOnError     = True
318      if opt == '--backward':
319          backward        = ' -r '
320 +    if opt == '--fromCern':
321 +        fromCern        = True
322 +    if opt == '--forceCopy':
323 +        forceCopy       = True
324      if opt == '--forceCopy':
325          forceCopy       = True
326      if opt == '--debug':
327          debug           = 1
328 +    if opt == '--test':
329 +        test            = 1
330  
331   # Deal with obvious problems
332   if cmsDataset == None and mitDataset == None:
333      cmd = '--cmsDataset option not provided. This is required.'
334      raise RuntimeError, cmd
335  
336 < crabFile  = mitCfg + '/' + version + '/' + 'crab.cfg'
337 < if not os.path.exists(crabFile):
338 <    cmd = 'Crab file not found: %s' % crabFile
336 > seFile   = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'seTable'
337 > if not os.path.exists(seFile):
338 >    cmd = 'Storage element file not found: %s' % seFile
339      raise RuntimeError, cmd
340 < cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg
340 > cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
341   if not os.path.exists(cmsswFile):
342      cmd = 'Cmssw file not found: %s' % cmsswFile
343      cmsswCfg = 'cmssw.py'
344 <    cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg
344 >    cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
345      if not os.path.exists(cmsswFile):
346          cmd = 'Cmssw file not found: %s' % cmsswFile
347          cmd = ' XXXX ERROR no valid configuration found XXXX'
348          raise RuntimeError, cmd
349  
350   # Resolve the other mitCfg parameters from the configuration file
351 < cmd = 'cat ' + mitCfg + '/' + version + '/' + 'Productions'
351 > cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
352   if cmssw != '':
353      cmd = cmd + '.' + cmssw
354  
# Line 347 | Line 419 | print '\n Preparing dataset for transfer
419   pMitDset = re.compile('XX-MITDATASET-XX')
420   pMitCfg  = re.compile('XX-MITCFG-XX')
421   pMitVers = re.compile('XX-MITVERSION-XX')
422 < # find the forseen storage place
423 < crabFile  = mitCfg + '/' + version + '/' + 'crab.cfg'
424 < cmd = 'grep ^storage_element ' + crabFile
425 < for file in os.popen(cmd).readlines():   # run command
426 <    line        = file[:-1]              # strip '\n'
427 <    # decode the storage element name
428 <    names       = line.split("=")        # splitting every '='
357 <    storageEle  = names.pop()
358 <    storageEle  = re.sub("\s", "",storageEle)
359 < # Compile search and replacement sequences just for the path
360 < cmd = 'grep ^storage_path ' + crabFile
361 < for file in os.popen(cmd).readlines():   # run command
362 <    line        = file[:-1]              # strip '\n'
363 <    line        = pMitDset.sub(mitDataset,line);
364 <    line        = pMitCfg .sub(mitCfg,    line);
365 <    line        = pMitVers.sub(version,   line);
366 <    # decode the storage directory name
367 <    names       = line.split("=")        # splitting every '='
368 <    names       = names[1:]
369 <    storagePath = "=".join(names)
370 <    storagePath = re.sub("\s", "",storagePath)
371 < storageUrl = 'srm://' + storageEle + ':8443' + storagePath
422 > # decide on the forseen default storage place (where are we running)
423 > storageTag = 'T2_US_MIT'
424 > domain = Domain()
425 > if   re.search('mit.edu',domain):
426 >    storageTag = 'T2_US_MIT'
427 > elif re.search('cern.ch',domain):
428 >    storageTag = 'T0_CH_CERN'
429  
430 < cmd = 'grep ^user_remote_dir ' + crabFile
431 < for file in os.popen(cmd).readlines():   # run command
432 <    line        = file[:-1]              # strip '\n'
433 <    line        = pMitDset.sub(mitDataset,line);
434 <    line        = pMitCfg .sub(mitCfg,    line);
435 <    line        = pMitVers.sub(version,   line);
436 <    # decode the storage directory name
437 <    names       = line.split("=")        # splitting every '='
438 <    names       = names[1:]
439 <    userRemoteDir = "=".join(names)
440 <    userRemoteDir = re.sub("\s","",userRemoteDir)
441 <    userRemoteDir = re.sub("/XX-CRABID-XX","",userRemoteDir)
430 > cmd = 'grep ^' + storageTag + ' ' + seFile
431 > for line in os.popen(cmd).readlines():   # run command
432 >    print ' LINE: ' + line
433 >    line = line[:-1]                     # strip '\n'
434 >    line = line.replace(' ','')
435 >    f = line.split(':')
436 >    storageEle    = f[1]
437 >    storagePath   = f[2]
438 >    userRemoteDir = f[3]
439 >    print ' Storage -- Ele: ' + storageEle \
440 >          + '  Path: ' + storagePath + '  UserDir: ' + userRemoteDir
441 >
442 > # Hardwire
443 > if fromCern:
444 >    storageEle    = 'srm-cms.cern.ch'
445 >    storagePath   = '/srm/managerv2?SFN=/castor/cern.ch'
446 >    userRemoteDir = "/user/p/paus/" + mitCfg + "/" + version + "/" + mitDataset
447  
448 + # determine the storage URL
449 + storageUrl = 'srm://' + storageEle + ':8443' + storagePath
450   if userRemoteDir != '':
451      storagePath += userRemoteDir
452      storageUrl  += userRemoteDir
# Line 424 | Line 488 | cmd = ''
488   f    = storagePath.split('=')
489   path = f.pop()
490   cmd  = 'list ' + path + ' | grep root | sort ' + backward
491 + if fromCern:
492 +    cmd  = 'srmls ' + storageUrl + '|grep root|sort ' + backward + '|tr -s \' \'|cut -d\' \' -f 2-3'
493  
494   ##if storageEle == 'srm.cern.ch' or storageEle == 'srm-cms.cern.ch':
495   ##    cmd  = 'rfdir ' + path + ' | grep root | tr -s \' \' | sort ' + backward
496   ##else:
497   ##    cmd  = 'list ' + path + ' | grep root | sort ' + backward
432 ##    #cmd  = 'srmls ' + storageUrl + ' | grep root | sort ' + backward
498  
499   if pattern != "":
500      cmd += ' | grep ' + pattern
# Line 481 | Line 546 | for file, size in doneFileList.iteritems
546          if allFileList[file] != size:
547              print ' ERROR - file sizes did not match: ' + file + \
548                    ' [ local: %10d, remote: %10d ]'%(size,allFileList[file])
549 <            sys.exit(1)
549 >            if stopOnError:
550 >                sys.exit(1)
551 >            continue
552      else:
553          print ' ERROR - file from done list is not in the all files list. File: ' + file
554 <        sys.exit(1)
554 >        print ' RECOVER - File: ' + file
555 >        sizeMb = size/1024./1024.
556 >        deltaT = RecoverFile(storageEle,storagePath,storageUrl,file,localDir)
557 >        if deltaT > 0:
558 >            print '     time required [sec]: %7d rate [MB/sec]: %9.3f'%\
559 >                  (deltaT,sizeMb/deltaT)
560 >        else:
561 >            print '     time required [sec]: %7d rate [MB/sec]: ?'%(deltaT)
562 >        
563 >        #sys.exit(1)
564  
565   totalSizeMb = 0.
566   totalTimeSc = 0.
# Line 500 | Line 576 | for file, size in allFileList.iteritems(
576          if not InSkipList(file,skipList):
577              print ' --> copying file:     %10d - %s (castor stat: %s)'% \
578                    (size,file,stagedFileList[file])
579 <            if stagedFileList[file] == "STAGED" or forceCopy:
580 <
579 >            if test == 1:
580 >                print '     testing only.'
581 >            elif stagedFileList[file] == "STAGED" or forceCopy:
582                  sizeMb = size/1024./1024.
583 <                deltaT = CopyFile(storageEle,storagePath,storageUrl,file,localDir)
583 >                deltaT = CopyFile(storageEle,storagePath,storageUrl,file,localDir,fromCern)
584                  if deltaT > 0:
585                      print '     time required [sec]: %7d rate [MB/sec]: %9.3f'%\
586                            (deltaT,sizeMb/deltaT)
# Line 513 | Line 590 | for file, size in allFileList.iteritems(
590                  totalSizeMb += sizeMb
591              else:
592                  print '     skipping file:    %s'%(stagedFileList[file])
593 <                StageFile(storagePath,storageUrl,file)
593 >                StageFile(storagePath,storageUrl,file,fromCern)
594  
595          else:
596              print ' --> skipping file:    %10d - %s'%(size,file)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines