2 |
|
from crab_logger import Logger |
3 |
|
from crab_exceptions import * |
4 |
|
from crab_util import * |
5 |
+ |
from BlackWhiteListParser import BlackWhiteListParser |
6 |
|
import common |
6 |
– |
import PsetManipulator |
7 |
– |
import DataDiscovery |
8 |
– |
import DataDiscovery_DBS2 |
9 |
– |
import DataLocation |
7 |
|
import Scram |
8 |
|
|
9 |
< |
import os, string, re, shutil, glob |
9 |
> |
import os, string, glob |
10 |
|
|
11 |
|
class Cmssw(JobType): |
12 |
|
def __init__(self, cfg_params, ncjobs): |
13 |
|
JobType.__init__(self, 'CMSSW') |
14 |
|
common.logger.debug(3,'CMSSW::__init__') |
15 |
|
|
19 |
– |
# Marco. |
16 |
|
self._params = {} |
17 |
|
self.cfg_params = cfg_params |
18 |
|
|
19 |
+ |
# init BlackWhiteListParser |
20 |
+ |
self.blackWhiteListParser = BlackWhiteListParser(cfg_params) |
21 |
+ |
|
22 |
|
try: |
23 |
|
self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize']) |
24 |
|
except KeyError: |
25 |
< |
self.MaxTarBallSize = 100.0 |
25 |
> |
self.MaxTarBallSize = 9.5 |
26 |
|
|
27 |
|
# number of jobs requested to be created, limit obj splitting |
28 |
|
self.ncjobs = ncjobs |
35 |
|
self.executable = '' |
36 |
|
self.executable_arch = self.scram.getArch() |
37 |
|
self.tgz_name = 'default.tgz' |
38 |
+ |
self.additional_tgz_name = 'additional.tgz' |
39 |
|
self.scriptName = 'CMSSW.sh' |
40 |
|
self.pset = '' #scrip use case Da |
41 |
|
self.datasetPath = '' #scrip use case Da |
44 |
|
self.fjrFileName = 'crab_fjr.xml' |
45 |
|
|
46 |
|
self.version = self.scram.getSWVersion() |
47 |
+ |
|
48 |
+ |
# |
49 |
+ |
# Try to block creation in case of arch/version mismatch |
50 |
+ |
# |
51 |
+ |
|
52 |
+ |
a = string.split(self.version, "_") |
53 |
+ |
|
54 |
+ |
if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0): |
55 |
+ |
msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch) |
56 |
+ |
raise CrabException(msg) |
57 |
+ |
if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0): |
58 |
+ |
msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch) |
59 |
+ |
raise CrabException(msg) |
60 |
+ |
|
61 |
|
common.taskDB.setDict('codeVersion',self.version) |
62 |
|
self.setParam_('application', self.version) |
63 |
|
|
65 |
|
|
66 |
|
## get DBS mode |
67 |
|
try: |
68 |
< |
self.use_dbs_2 = int(self.cfg_params['CMSSW.use_dbs_2']) |
68 |
> |
self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1']) |
69 |
|
except KeyError: |
70 |
< |
self.use_dbs_2 = 0 |
70 |
> |
self.use_dbs_1 = 0 |
71 |
|
|
72 |
|
try: |
73 |
|
tmp = cfg_params['CMSSW.datasetpath'] |
88 |
|
self.setParam_('dataset', 'None') |
89 |
|
self.setParam_('owner', 'None') |
90 |
|
else: |
91 |
< |
datasetpath_split = self.datasetPath.split("/") |
92 |
< |
self.setParam_('dataset', datasetpath_split[1]) |
93 |
< |
self.setParam_('owner', datasetpath_split[-1]) |
94 |
< |
|
91 |
> |
try: |
92 |
> |
datasetpath_split = self.datasetPath.split("/") |
93 |
> |
# standard style |
94 |
> |
self.setParam_('datasetFull', self.datasetPath) |
95 |
> |
if self.use_dbs_1 == 1 : |
96 |
> |
self.setParam_('dataset', datasetpath_split[1]) |
97 |
> |
self.setParam_('owner', datasetpath_split[-1]) |
98 |
> |
else: |
99 |
> |
self.setParam_('dataset', datasetpath_split[1]) |
100 |
> |
self.setParam_('owner', datasetpath_split[2]) |
101 |
> |
except: |
102 |
> |
self.setParam_('dataset', self.datasetPath) |
103 |
> |
self.setParam_('owner', self.datasetPath) |
104 |
> |
|
105 |
|
self.setTaskid_() |
106 |
|
self.setParam_('taskId', self.cfg_params['taskId']) |
107 |
|
|
151 |
|
self.output_file.append(tmp) |
152 |
|
pass |
153 |
|
else: |
154 |
< |
log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available") |
154 |
> |
log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n") |
155 |
|
pass |
156 |
|
pass |
157 |
|
except KeyError: |
158 |
< |
log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available") |
158 |
> |
log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n") |
159 |
|
pass |
160 |
|
|
161 |
|
# script_exe file as additional file in inputSandbox |
181 |
|
tmp = string.strip(tmp) |
182 |
|
dirname = '' |
183 |
|
if not tmp[0]=="/": dirname = "." |
184 |
< |
files = glob.glob(os.path.join(dirname, tmp)) |
184 |
> |
files = [] |
185 |
> |
if string.find(tmp,"*")>-1: |
186 |
> |
files = glob.glob(os.path.join(dirname, tmp)) |
187 |
> |
if len(files)==0: |
188 |
> |
raise CrabException("No additional input file found with this pattern: "+tmp) |
189 |
> |
else: |
190 |
> |
files.append(tmp) |
191 |
|
for file in files: |
192 |
|
if not os.path.exists(file): |
193 |
|
raise CrabException("Additional input file not found: "+file) |
194 |
|
pass |
195 |
< |
storedFile = common.work_space.shareDir()+file |
196 |
< |
shutil.copyfile(file, storedFile) |
197 |
< |
self.additional_inbox_files.append(string.strip(storedFile)) |
195 |
> |
# fname = string.split(file, '/')[-1] |
196 |
> |
# storedFile = common.work_space.pathForTgz()+'share/'+fname |
197 |
> |
# shutil.copyfile(file, storedFile) |
198 |
> |
self.additional_inbox_files.append(string.strip(file)) |
199 |
|
pass |
200 |
|
pass |
201 |
|
common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files)) |
253 |
|
except KeyError: |
254 |
|
self.sourceSeedVtx = None |
255 |
|
common.logger.debug(5,"No vertex seed given") |
256 |
+ |
|
257 |
+ |
try: |
258 |
+ |
self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed']) |
259 |
+ |
except KeyError: |
260 |
+ |
self.sourceSeedG4 = None |
261 |
+ |
common.logger.debug(5,"No g4 sim hits seed given") |
262 |
+ |
|
263 |
+ |
try: |
264 |
+ |
self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed']) |
265 |
+ |
except KeyError: |
266 |
+ |
self.sourceSeedMix = None |
267 |
+ |
common.logger.debug(5,"No mix seed given") |
268 |
+ |
|
269 |
|
try: |
270 |
|
self.firstRun = int(cfg_params['CMSSW.first_run']) |
271 |
|
except KeyError: |
272 |
|
self.firstRun = None |
273 |
|
common.logger.debug(5,"No first run given") |
274 |
|
if self.pset != None: #CarlosDaniele |
275 |
< |
self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset |
275 |
> |
ver = string.split(self.version,"_") |
276 |
> |
if (int(ver[1])>=1 and int(ver[2])>=5): |
277 |
> |
import PsetManipulator150 as pp |
278 |
> |
else: |
279 |
> |
import PsetManipulator as pp |
280 |
> |
PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset |
281 |
|
|
282 |
|
#DBSDLS-start |
283 |
|
## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code |
299 |
|
self.jobSplittingForScript() |
300 |
|
else: |
301 |
|
self.jobSplittingNoInput() |
302 |
< |
else: |
302 |
> |
else: |
303 |
|
self.jobSplittingByBlocks(blockSites) |
304 |
|
|
305 |
|
# modify Pset |
307 |
|
try: |
308 |
|
if (self.datasetPath): # standard job |
309 |
|
# allow to processa a fraction of events in a file |
310 |
< |
self.PsetEdit.inputModule("INPUT") |
311 |
< |
self.PsetEdit.maxEvent("INPUTMAXEVENTS") |
312 |
< |
self.PsetEdit.skipEvent("INPUTSKIPEVENTS") |
310 |
> |
PsetEdit.inputModule("INPUT") |
311 |
> |
PsetEdit.maxEvent("INPUTMAXEVENTS") |
312 |
> |
PsetEdit.skipEvent("INPUTSKIPEVENTS") |
313 |
|
else: # pythia like job |
314 |
< |
self.PsetEdit.maxEvent(self.eventsPerJob) |
314 |
> |
PsetEdit.maxEvent(self.eventsPerJob) |
315 |
|
if (self.firstRun): |
316 |
< |
self.PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run |
316 |
> |
PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run |
317 |
|
if (self.sourceSeed) : |
318 |
< |
self.PsetEdit.pythiaSeed("INPUT") |
318 |
> |
PsetEdit.pythiaSeed("INPUT") |
319 |
|
if (self.sourceSeedVtx) : |
320 |
< |
self.PsetEdit.pythiaSeedVtx("INPUTVTX") |
320 |
> |
PsetEdit.vtxSeed("INPUTVTX") |
321 |
> |
if (self.sourceSeedG4) : |
322 |
> |
PsetEdit.g4Seed("INPUTG4") |
323 |
> |
if (self.sourceSeedMix) : |
324 |
> |
PsetEdit.mixSeed("INPUTMIX") |
325 |
|
# add FrameworkJobReport to parameter-set |
326 |
< |
self.PsetEdit.addCrabFJR(self.fjrFileName) |
327 |
< |
self.PsetEdit.psetWriter(self.configFilename()) |
326 |
> |
PsetEdit.addCrabFJR(self.fjrFileName) |
327 |
> |
PsetEdit.psetWriter(self.configFilename()) |
328 |
|
except: |
329 |
|
msg='Error while manipuliating ParameterSet: exiting...' |
330 |
|
raise CrabException(msg) |
331 |
|
|
332 |
|
def DataDiscoveryAndLocation(self, cfg_params): |
333 |
|
|
334 |
+ |
import DataDiscovery |
335 |
+ |
import DataDiscovery_DBS2 |
336 |
+ |
import DataLocation |
337 |
|
common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()") |
338 |
|
|
339 |
|
datasetPath=self.datasetPath |
340 |
|
|
341 |
|
## Contact the DBS |
342 |
< |
common.logger.message("Contacting DBS...") |
342 |
> |
common.logger.message("Contacting Data Discovery Services ...") |
343 |
|
try: |
344 |
|
|
345 |
< |
if self.use_dbs_2 == 1 : |
290 |
< |
self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params) |
291 |
< |
else : |
345 |
> |
if self.use_dbs_1 == 1 : |
346 |
|
self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params) |
347 |
+ |
else : |
348 |
+ |
self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params) |
349 |
|
self.pubdata.fetchDBSInfo() |
350 |
|
|
351 |
|
except DataDiscovery.NotExistingDatasetError, ex : |
367 |
|
msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage() |
368 |
|
raise CrabException(msg) |
369 |
|
|
314 |
– |
## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner) |
315 |
– |
common.logger.message("Required data are :"+self.datasetPath) |
316 |
– |
|
370 |
|
self.filesbyblock=self.pubdata.getFiles() |
371 |
|
self.eventsbyblock=self.pubdata.getEventsPerBlock() |
372 |
|
self.eventsbyfile=self.pubdata.getEventsPerFile() |
373 |
|
|
374 |
|
## get max number of events |
375 |
|
self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py |
323 |
– |
common.logger.message("The number of available events is %s\n"%self.maxEvents) |
376 |
|
|
325 |
– |
common.logger.message("Contacting DLS...") |
377 |
|
## Contact the DLS and build a list of sites hosting the fileblocks |
378 |
|
try: |
379 |
|
dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params) |
391 |
|
allSites.append(oneSite) |
392 |
|
allSites = self.uniquelist(allSites) |
393 |
|
|
394 |
< |
common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites)) |
395 |
< |
common.logger.debug(6, "List of Sites: "+str(allSites)) |
394 |
> |
# screen output |
395 |
> |
common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n") |
396 |
> |
|
397 |
|
return sites |
398 |
|
|
399 |
|
def jobSplittingByBlocks(self, blockSites): |
455 |
|
jobCount = 0 |
456 |
|
list_of_lists = [] |
457 |
|
|
458 |
+ |
# list tracking which jobs are in which jobs belong to which block |
459 |
+ |
jobsOfBlock = {} |
460 |
+ |
|
461 |
|
# ---- Iterate over the blocks in the dataset until ---- # |
462 |
|
# ---- we've met the requested total # of events ---- # |
463 |
|
while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)): |
464 |
|
block = blocks[blockCount] |
465 |
|
blockCount += 1 |
466 |
+ |
if block not in jobsOfBlock.keys() : |
467 |
+ |
jobsOfBlock[block] = [] |
468 |
|
|
469 |
|
if self.eventsbyblock.has_key(block) : |
470 |
|
numEventsInBlock = self.eventsbyblock[block] |
514 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).") |
515 |
|
self.jobDestination.append(blockSites[block]) |
516 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
517 |
+ |
# fill jobs of block dictionary |
518 |
+ |
jobsOfBlock[block].append(jobCount+1) |
519 |
|
# reset counter |
520 |
|
jobCount = jobCount + 1 |
521 |
|
totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount |
539 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.") |
540 |
|
self.jobDestination.append(blockSites[block]) |
541 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
542 |
+ |
jobsOfBlock[block].append(jobCount+1) |
543 |
|
# reset counter |
544 |
|
jobCount = jobCount + 1 |
545 |
|
totalEventCount = totalEventCount + eventsPerJobRequested |
560 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.") |
561 |
|
self.jobDestination.append(blockSites[block]) |
562 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
563 |
+ |
jobsOfBlock[block].append(jobCount+1) |
564 |
|
# increase counter |
565 |
|
jobCount = jobCount + 1 |
566 |
|
totalEventCount = totalEventCount + eventsPerJobRequested |
578 |
|
self.ncjobs = self.total_number_of_jobs = jobCount |
579 |
|
if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ): |
580 |
|
common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.") |
581 |
< |
common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n") |
581 |
> |
common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n") |
582 |
> |
|
583 |
> |
# screen output |
584 |
> |
screenOutput = "List of jobs and available destination sites:\n\n" |
585 |
> |
|
586 |
> |
# keep trace of block with no sites to print a warning at the end |
587 |
> |
noSiteBlock = [] |
588 |
> |
bloskNoSite = [] |
589 |
> |
|
590 |
> |
blockCounter = 0 |
591 |
> |
for block in blocks: |
592 |
> |
if block in jobsOfBlock.keys() : |
593 |
> |
blockCounter += 1 |
594 |
> |
screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block))) |
595 |
> |
if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0: |
596 |
> |
noSiteBlock.append( spanRanges(jobsOfBlock[block]) ) |
597 |
> |
bloskNoSite.append( blockCounter ) |
598 |
|
|
599 |
+ |
common.logger.message(screenOutput) |
600 |
+ |
if len(noSiteBlock) > 0 and len(bloskNoSite) > 0: |
601 |
+ |
msg = 'WARNING: No sites are hosting any part of data for block:\n ' |
602 |
+ |
virgola = "" |
603 |
+ |
if len(bloskNoSite) > 1: |
604 |
+ |
virgola = "," |
605 |
+ |
for block in bloskNoSite: |
606 |
+ |
msg += ' ' + str(block) + virgola |
607 |
+ |
msg += '\n Related jobs:\n ' |
608 |
+ |
virgola = "" |
609 |
+ |
if len(noSiteBlock) > 1: |
610 |
+ |
virgola = "," |
611 |
+ |
for range_jobs in noSiteBlock: |
612 |
+ |
msg += str(range_jobs) + virgola |
613 |
+ |
msg += '\n will not be submitted and this block of data can not be analyzed!\n' |
614 |
+ |
common.logger.message(msg) |
615 |
+ |
|
616 |
|
self.list_of_args = list_of_lists |
617 |
|
return |
618 |
|
|
621 |
|
Perform job splitting based on number of event per job |
622 |
|
""" |
623 |
|
common.logger.debug(5,'Splitting per events') |
624 |
< |
common.logger.message('Required '+str(self.eventsPerJob)+' events per job ') |
625 |
< |
common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ') |
626 |
< |
common.logger.message('Required '+str(self.total_number_of_events)+' events in total ') |
624 |
> |
|
625 |
> |
if (self.selectEventsPerJob): |
626 |
> |
common.logger.message('Required '+str(self.eventsPerJob)+' events per job ') |
627 |
> |
if (self.selectNumberOfJobs): |
628 |
> |
common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ') |
629 |
> |
if (self.selectTotalNumberEvents): |
630 |
> |
common.logger.message('Required '+str(self.total_number_of_events)+' events in total ') |
631 |
|
|
632 |
|
if (self.total_number_of_events < 0): |
633 |
|
msg='Cannot split jobs per Events with "-1" as total number of events' |
661 |
|
## Since there is no input, any site is good |
662 |
|
# self.jobDestination.append(["Any"]) |
663 |
|
self.jobDestination.append([""]) #must be empty to write correctly the xml |
664 |
< |
args='' |
664 |
> |
args=[] |
665 |
|
if (self.firstRun): |
666 |
|
## pythia first run |
667 |
|
#self.list_of_args.append([(str(self.firstRun)+str(i))]) |
668 |
< |
args=args+(str(self.firstRun)+str(i)) |
668 |
> |
args.append(str(self.firstRun)+str(i)) |
669 |
|
else: |
670 |
|
## no first run |
671 |
|
#self.list_of_args.append([str(i)]) |
672 |
< |
args=args+str(i) |
672 |
> |
args.append(str(i)) |
673 |
|
if (self.sourceSeed): |
674 |
+ |
args.append(str(self.sourceSeed)+str(i)) |
675 |
|
if (self.sourceSeedVtx): |
676 |
< |
## pythia + vtx random seed |
677 |
< |
#self.list_of_args.append([ |
678 |
< |
# str(self.sourceSeed)+str(i), |
679 |
< |
# str(self.sourceSeedVtx)+str(i) |
680 |
< |
# ]) |
681 |
< |
args=args+str(',')+str(self.sourceSeed)+str(i)+str(',')+str(self.sourceSeedVtx)+str(i) |
682 |
< |
else: |
683 |
< |
## only pythia random seed |
684 |
< |
#self.list_of_args.append([(str(self.sourceSeed)+str(i))]) |
685 |
< |
args=args +str(',')+str(self.sourceSeed)+str(i) |
686 |
< |
else: |
687 |
< |
## no random seed |
589 |
< |
if str(args)=='': args=args+(str(self.firstRun)+str(i)) |
590 |
< |
arguments=args.split(',') |
591 |
< |
if len(arguments)==3:self.list_of_args.append([str(arguments[0]),str(arguments[1]),str(arguments[2])]) |
592 |
< |
elif len(arguments)==2:self.list_of_args.append([str(arguments[0]),str(arguments[1])]) |
593 |
< |
else :self.list_of_args.append([str(arguments[0])]) |
676 |
> |
## + vtx random seed |
677 |
> |
args.append(str(self.sourceSeedVtx)+str(i)) |
678 |
> |
if (self.sourceSeedG4): |
679 |
> |
## + G4 random seed |
680 |
> |
args.append(str(self.sourceSeedG4)+str(i)) |
681 |
> |
if (self.sourceSeedMix): |
682 |
> |
## + Mix random seed |
683 |
> |
args.append(str(self.sourceSeedMix)+str(i)) |
684 |
> |
pass |
685 |
> |
pass |
686 |
> |
self.list_of_args.append(args) |
687 |
> |
pass |
688 |
|
|
689 |
< |
# print self.list_of_args |
689 |
> |
# print self.list_of_args |
690 |
|
|
691 |
|
return |
692 |
|
|
781 |
|
try: # create tar ball |
782 |
|
tar = tarfile.open(self.tgzNameWithPath, "w:gz") |
783 |
|
## First find the executable |
784 |
< |
if (executable != ''): |
784 |
> |
if (self.executable != ''): |
785 |
|
exeWithPath = self.scram.findFile_(executable) |
786 |
|
if ( not exeWithPath ): |
787 |
|
raise CrabException('User executable '+executable+' not found') |
791 |
|
# the exe is private, so we must ship |
792 |
|
common.logger.debug(5,"Exe "+exeWithPath+" to be tarred") |
793 |
|
path = swArea+'/' |
794 |
< |
exe = string.replace(exeWithPath, path,'') |
795 |
< |
tar.add(path+exe,executable) |
794 |
> |
# distinguish case when script is in user project area or given by full path somewhere else |
795 |
> |
if exeWithPath.find(path) >= 0 : |
796 |
> |
exe = string.replace(exeWithPath, path,'') |
797 |
> |
tar.add(path+exe,exe) |
798 |
> |
else : |
799 |
> |
tar.add(exeWithPath,os.path.basename(executable)) |
800 |
|
pass |
801 |
|
else: |
802 |
|
# the exe is from release, we'll find it on WN |
827 |
|
pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi' |
828 |
|
if os.path.isdir(pa): |
829 |
|
tar.add(pa,paDir) |
830 |
+ |
|
831 |
+ |
### FEDE FOR DBS PUBLICATION |
832 |
+ |
## Add PRODCOMMON dir to tar |
833 |
+ |
prodcommonDir = 'ProdCommon' |
834 |
+ |
prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon' |
835 |
+ |
if os.path.isdir(prodcommonPath): |
836 |
+ |
tar.add(prodcommonPath,prodcommonDir) |
837 |
+ |
############################# |
838 |
|
|
839 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
840 |
|
tar.close() |
860 |
|
|
861 |
|
return |
862 |
|
|
863 |
+ |
def additionalInputFileTgz(self): |
864 |
+ |
""" |
865 |
+ |
Put all additional files into a tar ball and return its name |
866 |
+ |
""" |
867 |
+ |
import tarfile |
868 |
+ |
tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name |
869 |
+ |
tar = tarfile.open(tarName, "w:gz") |
870 |
+ |
for file in self.additional_inbox_files: |
871 |
+ |
tar.add(file,string.split(file,'/')[-1]) |
872 |
+ |
common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames())) |
873 |
+ |
tar.close() |
874 |
+ |
return tarName |
875 |
+ |
|
876 |
|
def wsSetupEnvironment(self, nj): |
877 |
|
""" |
878 |
|
Returns part of a job script which prepares |
884 |
|
## OLI_Daniele at this level middleware already known |
885 |
|
|
886 |
|
txt += 'if [ $middleware == LCG ]; then \n' |
887 |
+ |
txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n' |
888 |
+ |
txt += ' export SCRAM_ARCH='+self.executable_arch+'\n' |
889 |
+ |
txt += ' export BUILD_ARCH='+self.executable_arch+'\n' |
890 |
|
txt += self.wsSetupCMSLCGEnvironment_() |
891 |
|
txt += 'elif [ $middleware == OSG ]; then\n' |
892 |
|
txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n' |
893 |
|
txt += ' echo "Created working directory: $WORKING_DIR"\n' |
894 |
|
txt += ' if [ ! -d $WORKING_DIR ] ;then\n' |
895 |
|
txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n' |
896 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10016"\n' |
897 |
< |
txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n' |
898 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
896 |
> |
txt += ' echo "JOB_EXIT_STATUS = 10016"\n' |
897 |
> |
txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n' |
898 |
> |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
899 |
|
txt += ' rm -f $RUNTIME_AREA/$repo \n' |
900 |
|
txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n' |
901 |
|
txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n' |
905 |
|
txt += ' echo "Change to working directory: $WORKING_DIR"\n' |
906 |
|
txt += ' cd $WORKING_DIR\n' |
907 |
|
txt += self.wsSetupCMSOSGEnvironment_() |
908 |
+ |
txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n' |
909 |
+ |
txt += ' export SCRAM_ARCH='+self.executable_arch+'\n' |
910 |
|
txt += 'fi\n' |
911 |
|
|
912 |
|
# Prepare JobType-specific part |
929 |
|
txt += ' cd $RUNTIME_AREA\n' |
930 |
|
txt += ' /bin/rm -rf $WORKING_DIR\n' |
931 |
|
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
932 |
< |
txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n' |
933 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10018"\n' |
934 |
< |
txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n' |
935 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
932 |
> |
txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n' |
933 |
> |
txt += ' echo "JOB_EXIT_STATUS = 10018"\n' |
934 |
> |
txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n' |
935 |
> |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
936 |
|
txt += ' rm -f $RUNTIME_AREA/$repo \n' |
937 |
|
txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n' |
938 |
|
txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n' |
941 |
|
txt += ' exit 1 \n' |
942 |
|
txt += 'fi \n' |
943 |
|
txt += 'echo "CMSSW_VERSION = '+self.version+'"\n' |
820 |
– |
txt += 'export SCRAM_ARCH='+self.executable_arch+'\n' |
944 |
|
txt += 'cd '+self.version+'\n' |
945 |
+ |
########## FEDE FOR DBS2 ###################### |
946 |
+ |
txt += 'SOFTWARE_DIR=`pwd`\n' |
947 |
+ |
txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n' |
948 |
+ |
############################################### |
949 |
|
### needed grep for bug in scramv1 ### |
950 |
|
txt += scram+' runtime -sh\n' |
951 |
|
txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n' |
971 |
|
txt += ' cd $RUNTIME_AREA\n' |
972 |
|
txt += ' /bin/rm -rf $WORKING_DIR\n' |
973 |
|
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
974 |
< |
txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n' |
975 |
< |
txt += ' echo "JOB_EXIT_STATUS = 50114"\n' |
976 |
< |
txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n' |
977 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
974 |
> |
txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n' |
975 |
> |
txt += ' echo "JOB_EXIT_STATUS = 50114"\n' |
976 |
> |
txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n' |
977 |
> |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
978 |
|
txt += ' rm -f $RUNTIME_AREA/$repo \n' |
979 |
|
txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n' |
980 |
|
txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n' |
986 |
|
|
987 |
|
# Prepare job-specific part |
988 |
|
job = common.job_list[nj] |
989 |
+ |
### FEDE FOR DBS OUTPUT PUBLICATION |
990 |
+ |
if (self.datasetPath): |
991 |
+ |
txt += '\n' |
992 |
+ |
txt += 'DatasetPath='+self.datasetPath+'\n' |
993 |
+ |
|
994 |
+ |
datasetpath_split = self.datasetPath.split("/") |
995 |
+ |
|
996 |
+ |
txt += 'PrimaryDataset='+datasetpath_split[1]+'\n' |
997 |
+ |
txt += 'DataTier='+datasetpath_split[2]+'\n' |
998 |
+ |
#txt += 'ProcessedDataset='+datasetpath_split[3]+'\n' |
999 |
+ |
txt += 'ApplicationFamily=cmsRun\n' |
1000 |
+ |
|
1001 |
+ |
else: |
1002 |
+ |
txt += 'DatasetPath=MCDataTier\n' |
1003 |
+ |
txt += 'PrimaryDataset=null\n' |
1004 |
+ |
txt += 'DataTier=null\n' |
1005 |
+ |
#txt += 'ProcessedDataset=null\n' |
1006 |
+ |
txt += 'ApplicationFamily=MCDataTier\n' |
1007 |
|
if self.pset != None: #CarlosDaniele |
1008 |
|
pset = os.path.basename(job.configFilename()) |
1009 |
|
txt += '\n' |
1010 |
+ |
txt += 'cp $RUNTIME_AREA/'+pset+' .\n' |
1011 |
|
if (self.datasetPath): # standard job |
1012 |
|
#txt += 'InputFiles=$2\n' |
1013 |
|
txt += 'InputFiles=${args[1]}\n' |
1014 |
|
txt += 'MaxEvents=${args[2]}\n' |
1015 |
|
txt += 'SkipEvents=${args[3]}\n' |
1016 |
|
txt += 'echo "Inputfiles:<$InputFiles>"\n' |
1017 |
< |
txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n' |
1017 |
> |
txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1018 |
|
txt += 'echo "MaxEvents:<$MaxEvents>"\n' |
1019 |
< |
txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n' |
1019 |
> |
txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1020 |
|
txt += 'echo "SkipEvents:<$SkipEvents>"\n' |
1021 |
< |
txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n' |
1021 |
> |
txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1022 |
|
else: # pythia like job |
1023 |
< |
if (self.sourceSeed): |
1024 |
< |
txt += 'FirstRun=${args[1]}\n' |
1023 |
> |
seedIndex=1 |
1024 |
> |
if (self.firstRun): |
1025 |
> |
txt += 'FirstRun=${args['+str(seedIndex)+']}\n' |
1026 |
|
txt += 'echo "FirstRun: <$FirstRun>"\n' |
1027 |
< |
txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" $RUNTIME_AREA/'+pset+' > tmp_1.cfg\n' |
1028 |
< |
else: |
1029 |
< |
txt += '# Copy untouched pset\n' |
883 |
< |
txt += 'cp $RUNTIME_AREA/'+pset+' tmp_1.cfg\n' |
1027 |
> |
txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1028 |
> |
seedIndex=seedIndex+1 |
1029 |
> |
|
1030 |
|
if (self.sourceSeed): |
1031 |
< |
# txt += 'Seed=$2\n' |
1032 |
< |
txt += 'Seed=${args[2]}\n' |
1033 |
< |
txt += 'echo "Seed: <$Seed>"\n' |
1034 |
< |
txt += 'sed "s#\<INPUT\>#$Seed#" tmp_1.cfg > tmp_2.cfg\n' |
1031 |
> |
txt += 'Seed=${args['+str(seedIndex)+']}\n' |
1032 |
> |
txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1033 |
> |
seedIndex=seedIndex+1 |
1034 |
> |
## the following seeds are not always present |
1035 |
|
if (self.sourceSeedVtx): |
1036 |
< |
# txt += 'VtxSeed=$3\n' |
891 |
< |
txt += 'VtxSeed=${args[3]}\n' |
1036 |
> |
txt += 'VtxSeed=${args['+str(seedIndex)+']}\n' |
1037 |
|
txt += 'echo "VtxSeed: <$VtxSeed>"\n' |
1038 |
< |
txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp_2.cfg > pset.cfg\n' |
1039 |
< |
else: |
1040 |
< |
txt += 'mv tmp_2.cfg pset.cfg\n' |
1041 |
< |
else: |
1042 |
< |
txt += 'mv tmp_1.cfg pset.cfg\n' |
1043 |
< |
# txt += '# Copy untouched pset\n' |
1044 |
< |
# txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n' |
1045 |
< |
|
1038 |
> |
txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1039 |
> |
seedIndex += 1 |
1040 |
> |
if (self.sourceSeedG4): |
1041 |
> |
txt += 'G4Seed=${args['+str(seedIndex)+']}\n' |
1042 |
> |
txt += 'echo "G4Seed: <$G4Seed>"\n' |
1043 |
> |
txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1044 |
> |
seedIndex += 1 |
1045 |
> |
if (self.sourceSeedMix): |
1046 |
> |
txt += 'mixSeed=${args['+str(seedIndex)+']}\n' |
1047 |
> |
txt += 'echo "MixSeed: <$mixSeed>"\n' |
1048 |
> |
txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n' |
1049 |
> |
seedIndex += 1 |
1050 |
> |
pass |
1051 |
> |
pass |
1052 |
> |
txt += 'mv -f '+pset+' pset.cfg\n' |
1053 |
|
|
1054 |
|
if len(self.additional_inbox_files) > 0: |
1055 |
< |
for file in self.additional_inbox_files: |
1056 |
< |
relFile = file.split("/")[-1] |
1057 |
< |
txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n' |
906 |
< |
txt += ' cp $RUNTIME_AREA/'+relFile+' .\n' |
907 |
< |
txt += ' chmod +x '+relFile+'\n' |
908 |
< |
txt += 'fi\n' |
1055 |
> |
txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n' |
1056 |
> |
txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n' |
1057 |
> |
txt += 'fi\n' |
1058 |
|
pass |
1059 |
|
|
1060 |
|
if self.pset != None: #CarlosDaniele |
1065 |
|
txt += 'cat pset.cfg\n' |
1066 |
|
txt += 'echo "****** end pset.cfg ********"\n' |
1067 |
|
txt += '\n' |
1068 |
+ |
### FEDE FOR DBS OUTPUT PUBLICATION |
1069 |
+ |
txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n' |
1070 |
+ |
txt += 'echo "PSETHASH = $PSETHASH" \n' |
1071 |
+ |
############## |
1072 |
+ |
txt += '\n' |
1073 |
|
# txt += 'echo "***** cat pset1.cfg *********"\n' |
1074 |
|
# txt += 'cat pset1.cfg\n' |
1075 |
|
# txt += 'echo "****** end pset1.cfg ********"\n' |
1111 |
|
txt += ' echo "Successful untar" \n' |
1112 |
|
txt += 'fi \n' |
1113 |
|
txt += '\n' |
1114 |
< |
txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n' |
1114 |
> |
txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n' |
1115 |
|
txt += 'if [ -z "$PYTHONPATH" ]; then\n' |
1116 |
< |
txt += ' export PYTHONPATH=ProdAgentApi\n' |
1116 |
> |
#### FEDE FOR DBS OUTPUT PUBLICATION |
1117 |
> |
txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n' |
1118 |
> |
#txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n' |
1119 |
> |
#txt += ' export PYTHONPATH=ProdAgentApi\n' |
1120 |
|
txt += 'else\n' |
1121 |
< |
txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n' |
1121 |
> |
txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n' |
1122 |
> |
#txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n' |
1123 |
> |
#txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n' |
1124 |
> |
txt += 'echo "PYTHONPATH=$PYTHONPATH"\n' |
1125 |
> |
################### |
1126 |
|
txt += 'fi\n' |
1127 |
|
txt += '\n' |
1128 |
|
|
1145 |
|
def executableArgs(self): |
1146 |
|
if self.scriptExe:#CarlosDaniele |
1147 |
|
return self.scriptExe + " $NJob" |
1148 |
< |
else: |
1149 |
< |
return " -p pset.cfg" |
1148 |
> |
else: |
1149 |
> |
# if >= CMSSW_1_5_X, add -e |
1150 |
> |
version_array = self.scram.getSWVersion().split('_') |
1151 |
> |
major = 0 |
1152 |
> |
minor = 0 |
1153 |
> |
try: |
1154 |
> |
major = int(version_array[1]) |
1155 |
> |
minor = int(version_array[2]) |
1156 |
> |
except: |
1157 |
> |
msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!" |
1158 |
> |
raise CrabException(msg) |
1159 |
> |
if major >= 1 and minor >= 5 : |
1160 |
> |
return " -e -p pset.cfg" |
1161 |
> |
else: |
1162 |
> |
return " -p pset.cfg" |
1163 |
|
|
1164 |
|
def inputSandbox(self, nj): |
1165 |
|
""" |
1177 |
|
if not self.pset is None: |
1178 |
|
inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename()) |
1179 |
|
## additional input files |
1180 |
< |
for file in self.additional_inbox_files: |
1181 |
< |
inp_box.append(file) |
1180 |
> |
tgz = self.additionalInputFileTgz() |
1181 |
> |
inp_box.append(tgz) |
1182 |
|
return inp_box |
1183 |
|
|
1184 |
|
def outputSandbox(self, nj): |
1208 |
|
txt += '# directory content\n' |
1209 |
|
txt += 'ls \n' |
1210 |
|
|
1211 |
< |
for fileWithSuffix in (self.output_file+self.output_file_sandbox): |
1211 |
> |
txt += 'output_exit_status=0\n' |
1212 |
> |
|
1213 |
> |
for fileWithSuffix in (self.output_file_sandbox): |
1214 |
|
output_file_num = self.numberFile_(fileWithSuffix, '$NJob') |
1215 |
|
txt += '\n' |
1216 |
|
txt += '# check output file\n' |
1217 |
< |
txt += 'ls '+fileWithSuffix+'\n' |
1218 |
< |
txt += 'ls_result=$?\n' |
1219 |
< |
txt += 'if [ $ls_result -ne 0 ] ; then\n' |
1220 |
< |
txt += ' echo "ERROR: Problem with output file"\n' |
1217 |
> |
txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n' |
1218 |
> |
txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n' |
1219 |
> |
txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n' |
1220 |
> |
txt += 'else\n' |
1221 |
> |
txt += ' exit_status=60302\n' |
1222 |
> |
txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n' |
1223 |
|
if common.scheduler.boss_scheduler_name == 'condor_g': |
1224 |
|
txt += ' if [ $middleware == OSG ]; then \n' |
1225 |
|
txt += ' echo "prepare dummy output file"\n' |
1226 |
|
txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n' |
1227 |
|
txt += ' fi \n' |
1228 |
+ |
txt += 'fi\n' |
1229 |
+ |
|
1230 |
+ |
for fileWithSuffix in (self.output_file): |
1231 |
+ |
output_file_num = self.numberFile_(fileWithSuffix, '$NJob') |
1232 |
+ |
txt += '\n' |
1233 |
+ |
txt += '# check output file\n' |
1234 |
+ |
txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n' |
1235 |
+ |
txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n' |
1236 |
+ |
txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n' |
1237 |
|
txt += 'else\n' |
1238 |
< |
txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n' |
1238 |
> |
txt += ' exit_status=60302\n' |
1239 |
> |
txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n' |
1240 |
> |
txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n' |
1241 |
> |
txt += ' output_exit_status=$exit_status\n' |
1242 |
> |
if common.scheduler.boss_scheduler_name == 'condor_g': |
1243 |
> |
txt += ' if [ $middleware == OSG ]; then \n' |
1244 |
> |
txt += ' echo "prepare dummy output file"\n' |
1245 |
> |
txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n' |
1246 |
> |
txt += ' fi \n' |
1247 |
|
txt += 'fi\n' |
1248 |
< |
|
1249 |
< |
txt += 'cd $RUNTIME_AREA\n' |
1248 |
> |
file_list = [] |
1249 |
> |
for fileWithSuffix in (self.output_file): |
1250 |
> |
file_list.append(self.numberFile_(fileWithSuffix, '$NJob')) |
1251 |
> |
|
1252 |
> |
txt += 'file_list="'+string.join(file_list,' ')+'"\n' |
1253 |
|
txt += 'cd $RUNTIME_AREA\n' |
1056 |
– |
### OLI_DANIELE |
1057 |
– |
txt += 'if [ $middleware == OSG ]; then\n' |
1058 |
– |
txt += ' cd $RUNTIME_AREA\n' |
1059 |
– |
txt += ' echo "Remove working directory: $WORKING_DIR"\n' |
1060 |
– |
txt += ' /bin/rm -rf $WORKING_DIR\n' |
1061 |
– |
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
1062 |
– |
txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n' |
1063 |
– |
txt += ' echo "JOB_EXIT_STATUS = 60999"\n' |
1064 |
– |
txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n' |
1065 |
– |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
1066 |
– |
txt += ' rm -f $RUNTIME_AREA/$repo \n' |
1067 |
– |
txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n' |
1068 |
– |
txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n' |
1069 |
– |
txt += ' fi\n' |
1070 |
– |
txt += 'fi\n' |
1071 |
– |
txt += '\n' |
1072 |
– |
|
1073 |
– |
file_list = '' |
1074 |
– |
## Add to filelist only files to be possibly copied to SE |
1075 |
– |
for fileWithSuffix in self.output_file: |
1076 |
– |
output_file_num = self.numberFile_(fileWithSuffix, '$NJob') |
1077 |
– |
file_list=file_list+output_file_num+' ' |
1078 |
– |
file_list=file_list[:-1] |
1079 |
– |
txt += 'file_list="'+file_list+'"\n' |
1080 |
– |
|
1254 |
|
return txt |
1255 |
|
|
1256 |
|
def numberFile_(self, file, txt): |
1261 |
|
# take away last extension |
1262 |
|
name = p[0] |
1263 |
|
for x in p[1:-1]: |
1264 |
< |
name=name+"."+x |
1264 |
> |
name=name+"."+x |
1265 |
|
# add "_txt" |
1266 |
|
if len(p)>1: |
1267 |
< |
ext = p[len(p)-1] |
1268 |
< |
result = name + '_' + txt + "." + ext |
1267 |
> |
ext = p[len(p)-1] |
1268 |
> |
result = name + '_' + txt + "." + ext |
1269 |
|
else: |
1270 |
< |
result = name + '_' + txt |
1270 |
> |
result = name + '_' + txt |
1271 |
|
|
1272 |
|
return result |
1273 |
|
|
1280 |
|
req='Member("VO-cms-' + \ |
1281 |
|
self.version + \ |
1282 |
|
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
1283 |
+ |
## SL add requirement for OS version only if SL4 |
1284 |
+ |
#reSL4 = re.compile( r'slc4' ) |
1285 |
+ |
if self.executable_arch: # and reSL4.search(self.executable_arch): |
1286 |
+ |
req+=' && Member("VO-cms-' + \ |
1287 |
+ |
self.executable_arch + \ |
1288 |
+ |
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
1289 |
|
|
1290 |
|
req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)' |
1291 |
|
|
1305 |
|
txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n' |
1306 |
|
txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n' |
1307 |
|
txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n' |
1308 |
+ |
txt += ' export SCRAM_ARCH='+self.executable_arch+'\n' |
1309 |
|
txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n' |
1310 |
|
txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n' |
1311 |
|
txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n' |
1312 |
+ |
txt += ' export SCRAM_ARCH='+self.executable_arch+'\n' |
1313 |
|
txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n' |
1314 |
|
txt += ' else\n' |
1315 |
|
txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n' |
1325 |
|
txt += ' cd $RUNTIME_AREA\n' |
1326 |
|
txt += ' /bin/rm -rf $WORKING_DIR\n' |
1327 |
|
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
1328 |
< |
txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n' |
1329 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10017"\n' |
1330 |
< |
txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n' |
1331 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
1332 |
< |
txt += ' rm -f $RUNTIME_AREA/$repo \n' |
1333 |
< |
txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n' |
1334 |
< |
txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n' |
1328 |
> |
txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n' |
1329 |
> |
txt += ' echo "JOB_EXIT_STATUS = 10017"\n' |
1330 |
> |
txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n' |
1331 |
> |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
1332 |
> |
txt += ' rm -f $RUNTIME_AREA/$repo \n' |
1333 |
> |
txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n' |
1334 |
> |
txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n' |
1335 |
|
txt += ' fi\n' |
1336 |
|
txt += '\n' |
1337 |
|
txt += ' exit 1\n' |
1390 |
|
txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n' |
1391 |
|
return txt |
1392 |
|
|
1393 |
+ |
### FEDE FOR DBS OUTPUT PUBLICATION |
1394 |
+ |
def modifyReport(self, nj): |
1395 |
+ |
""" |
1396 |
+ |
insert the part of the script that modifies the FrameworkJob Report |
1397 |
+ |
""" |
1398 |
+ |
|
1399 |
+ |
txt = '' |
1400 |
+ |
try: |
1401 |
+ |
publish_data = int(self.cfg_params['USER.publish_data']) |
1402 |
+ |
except KeyError: |
1403 |
+ |
publish_data = 0 |
1404 |
+ |
if (publish_data == 1): |
1405 |
+ |
txt += 'echo "Modify Job Report" \n' |
1406 |
+ |
#txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n' |
1407 |
+ |
################ FEDE FOR DBS2 ############################################# |
1408 |
+ |
txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n' |
1409 |
+ |
############################################################################# |
1410 |
+ |
#try: |
1411 |
+ |
# publish_data = int(self.cfg_params['USER.publish_data']) |
1412 |
+ |
#except KeyError: |
1413 |
+ |
# publish_data = 0 |
1414 |
+ |
|
1415 |
+ |
txt += 'if [ -z "$SE" ]; then\n' |
1416 |
+ |
txt += ' SE="" \n' |
1417 |
+ |
txt += 'fi \n' |
1418 |
+ |
txt += 'if [ -z "$SE_PATH" ]; then\n' |
1419 |
+ |
txt += ' SE_PATH="" \n' |
1420 |
+ |
txt += 'fi \n' |
1421 |
+ |
txt += 'echo "SE = $SE"\n' |
1422 |
+ |
txt += 'echo "SE_PATH = $SE_PATH"\n' |
1423 |
+ |
|
1424 |
+ |
#if (publish_data == 1): |
1425 |
+ |
#processedDataset = self.cfg_params['USER.processed_datasetname'] |
1426 |
+ |
processedDataset = self.cfg_params['USER.publish_data_name'] |
1427 |
+ |
txt += 'ProcessedDataset='+processedDataset+'\n' |
1428 |
+ |
#### LFN=/store/user/<user>/processedDataset_PSETHASH |
1429 |
+ |
txt += 'if [ "$SE_PATH" == "" ]; then\n' |
1430 |
+ |
#### FEDE: added slash in LFN ############## |
1431 |
+ |
txt += ' FOR_LFN=/copy_problems/ \n' |
1432 |
+ |
txt += 'else \n' |
1433 |
+ |
txt += ' tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n' |
1434 |
+ |
##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ######## |
1435 |
+ |
txt += ' FOR_LFN=/store$tmp \n' |
1436 |
+ |
txt += 'fi \n' |
1437 |
+ |
txt += 'echo "ProcessedDataset = $ProcessedDataset"\n' |
1438 |
+ |
txt += 'echo "FOR_LFN = $FOR_LFN" \n' |
1439 |
+ |
txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n' |
1440 |
+ |
#txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n' |
1441 |
+ |
txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n' |
1442 |
+ |
txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n' |
1443 |
+ |
#txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n' |
1444 |
+ |
|
1445 |
+ |
txt += 'modifyReport_result=$?\n' |
1446 |
+ |
txt += 'echo modifyReport_result = $modifyReport_result\n' |
1447 |
+ |
txt += 'if [ $modifyReport_result -ne 0 ]; then\n' |
1448 |
+ |
txt += ' exit_status=1\n' |
1449 |
+ |
txt += ' echo "ERROR: Problem with ModifyJobReport"\n' |
1450 |
+ |
txt += 'else\n' |
1451 |
+ |
txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n' |
1452 |
+ |
txt += 'fi\n' |
1453 |
+ |
else: |
1454 |
+ |
txt += 'echo "no data publication required"\n' |
1455 |
+ |
#txt += 'ProcessedDataset=no_data_to_publish \n' |
1456 |
+ |
#### FEDE: added slash in LFN ############## |
1457 |
+ |
#txt += 'FOR_LFN=/local/ \n' |
1458 |
+ |
#txt += 'echo "ProcessedDataset = $ProcessedDataset"\n' |
1459 |
+ |
#txt += 'echo "FOR_LFN = $FOR_LFN" \n' |
1460 |
+ |
return txt |
1461 |
+ |
|
1462 |
+ |
def cleanEnv(self): |
1463 |
+ |
### OLI_DANIELE |
1464 |
+ |
txt = '' |
1465 |
+ |
txt += 'if [ $middleware == OSG ]; then\n' |
1466 |
+ |
txt += ' cd $RUNTIME_AREA\n' |
1467 |
+ |
txt += ' echo "Remove working directory: $WORKING_DIR"\n' |
1468 |
+ |
txt += ' /bin/rm -rf $WORKING_DIR\n' |
1469 |
+ |
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
1470 |
+ |
txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n' |
1471 |
+ |
txt += ' echo "JOB_EXIT_STATUS = 60999"\n' |
1472 |
+ |
txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n' |
1473 |
+ |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
1474 |
+ |
txt += ' rm -f $RUNTIME_AREA/$repo \n' |
1475 |
+ |
txt += ' echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n' |
1476 |
+ |
txt += ' echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n' |
1477 |
+ |
txt += ' fi\n' |
1478 |
+ |
txt += 'fi\n' |
1479 |
+ |
txt += '\n' |
1480 |
+ |
return txt |
1481 |
+ |
|
1482 |
|
def setParam_(self, param, value): |
1483 |
|
self._params[param] = value |
1484 |
|
|
1491 |
|
def getTaskid(self): |
1492 |
|
return self._taskId |
1493 |
|
|
1224 |
– |
####################################################################### |
1494 |
|
def uniquelist(self, old): |
1495 |
|
""" |
1496 |
|
remove duplicates from a list |
1499 |
|
for e in old: |
1500 |
|
nd[e]=0 |
1501 |
|
return nd.keys() |
1502 |
+ |
|
1503 |
+ |
|
1504 |
+ |
def checkOut(self, limit): |
1505 |
+ |
""" |
1506 |
+ |
check the dimension of the output files |
1507 |
+ |
""" |
1508 |
+ |
txt = 'echo "*****************************************"\n' |
1509 |
+ |
txt += 'echo "** Starting output sandbox limit check **"\n' |
1510 |
+ |
txt += 'echo "*****************************************"\n' |
1511 |
+ |
allOutFiles = "" |
1512 |
+ |
listOutFiles = [] |
1513 |
+ |
for fileOut in (self.output_file+self.output_file_sandbox): |
1514 |
+ |
if fileOut.find('crab_fjr') == -1: |
1515 |
+ |
allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') |
1516 |
+ |
listOutFiles.append(self.numberFile_(fileOut, '$NJob')) |
1517 |
+ |
txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n' |
1518 |
+ |
txt += 'ls -gGhrta;\n' |
1519 |
+ |
txt += 'sum=0;\n' |
1520 |
+ |
txt += 'for file in '+str(allOutFiles)+' ; do\n' |
1521 |
+ |
txt += ' if [ -e $file ]; then\n' |
1522 |
+ |
txt += ' tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n' |
1523 |
+ |
txt += ' sum=`expr $sum + $tt`\n' |
1524 |
+ |
txt += ' else\n' |
1525 |
+ |
txt += ' echo "WARNING: output file $file not found!"\n' |
1526 |
+ |
txt += ' fi\n' |
1527 |
+ |
txt += 'done\n' |
1528 |
+ |
txt += 'echo "Total Output dimension: $sum";\n' |
1529 |
+ |
txt += 'limit='+str(limit)+';\n' |
1530 |
+ |
txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n' |
1531 |
+ |
txt += 'if [ $limit -lt $sum ]; then\n' |
1532 |
+ |
txt += ' echo "WARNING: output files have to big size - something will be lost;"\n' |
1533 |
+ |
txt += ' echo " checking the output file sizes..."\n' |
1534 |
+ |
""" |
1535 |
+ |
txt += ' dim=0;\n' |
1536 |
+ |
txt += ' exclude=0;\n' |
1537 |
+ |
txt += ' for files in '+str(allOutFiles)+' ; do\n' |
1538 |
+ |
txt += ' sumTemp=0;\n' |
1539 |
+ |
txt += ' for file2 in '+str(allOutFiles)+' ; do\n' |
1540 |
+ |
txt += ' if [ $file != $file2 ]; then\n' |
1541 |
+ |
txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n' |
1542 |
+ |
txt += ' sumTemp=`expr $sumTemp + $tt`;\n' |
1543 |
+ |
txt += ' fi\n' |
1544 |
+ |
txt += ' done\n' |
1545 |
+ |
txt += ' if [ $sumTemp -lt $limit ]; then\n' |
1546 |
+ |
txt += ' if [ $dim -lt $sumTemp ]; then\n' |
1547 |
+ |
txt += ' dim=$sumTemp;\n' |
1548 |
+ |
txt += ' exclude=$file;\n' |
1549 |
+ |
txt += ' fi\n' |
1550 |
+ |
txt += ' fi\n' |
1551 |
+ |
txt += ' done\n' |
1552 |
+ |
txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n' |
1553 |
+ |
""" |
1554 |
+ |
txt += ' tot=0;\n' |
1555 |
+ |
txt += ' for file2 in '+str(allOutFiles)+' ; do\n' |
1556 |
+ |
txt += ' tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n' |
1557 |
+ |
txt += ' tot=`expr $tot + $tt`;\n' |
1558 |
+ |
txt += ' if [ $limit -lt $tot ]; then\n' |
1559 |
+ |
txt += ' tot=`expr $tot - $tt`;\n' |
1560 |
+ |
txt += ' fileLast=$file;\n' |
1561 |
+ |
txt += ' break;\n' |
1562 |
+ |
txt += ' fi\n' |
1563 |
+ |
txt += ' done\n' |
1564 |
+ |
txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n' |
1565 |
+ |
txt += ' flag=0;\n' |
1566 |
+ |
txt += ' for filess in '+str(allOutFiles)+' ; do\n' |
1567 |
+ |
txt += ' if [ $fileLast = $filess ]; then\n' |
1568 |
+ |
txt += ' flag=1;\n' |
1569 |
+ |
txt += ' fi\n' |
1570 |
+ |
txt += ' if [ $flag -eq 1 ]; then\n' |
1571 |
+ |
txt += ' rm -f $filess;\n' |
1572 |
+ |
txt += ' fi\n' |
1573 |
+ |
txt += ' done\n' |
1574 |
+ |
txt += ' ls -agGhrt;\n' |
1575 |
+ |
txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n' |
1576 |
+ |
txt += ' echo "JOB_EXIT_STATUS = 70000";\n' |
1577 |
+ |
txt += ' exit_status=70000;\n' |
1578 |
+ |
txt += 'else' |
1579 |
+ |
txt += ' echo "Total Output dimension $sum is fine.";\n' |
1580 |
+ |
txt += 'fi\n' |
1581 |
+ |
txt += 'echo "*****************************************"\n' |
1582 |
+ |
txt += 'echo "*** Ending output sandbox limit check ***"\n' |
1583 |
+ |
txt += 'echo "*****************************************"\n' |
1584 |
+ |
return txt |