2 |
|
from crab_logger import Logger |
3 |
|
from crab_exceptions import * |
4 |
|
from crab_util import * |
5 |
< |
from BlackWhiteListParser import BlackWhiteListParser |
5 |
> |
from BlackWhiteListParser import SEBlackWhiteListParser |
6 |
|
import common |
7 |
|
import Scram |
8 |
|
from LFNBaseName import * |
10 |
|
import os, string, glob |
11 |
|
|
12 |
|
class Cmssw(JobType): |
13 |
< |
def __init__(self, cfg_params, ncjobs): |
13 |
> |
def __init__(self, cfg_params, ncjobs,skip_blocks, isNew): |
14 |
|
JobType.__init__(self, 'CMSSW') |
15 |
|
common.logger.debug(3,'CMSSW::__init__') |
16 |
+ |
self.skip_blocks = skip_blocks |
17 |
|
|
18 |
|
self.argsList = [] |
19 |
|
|
20 |
|
self._params = {} |
21 |
|
self.cfg_params = cfg_params |
22 |
|
# init BlackWhiteListParser |
23 |
< |
self.blackWhiteListParser = BlackWhiteListParser(cfg_params) |
23 |
> |
self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params) |
24 |
|
|
25 |
< |
self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5)) |
25 |
> |
### Temporary patch to automatically skip the ISB size check: |
26 |
> |
server=self.cfg_params.get('CRAB.server_name',None) |
27 |
> |
size = 9.5 |
28 |
> |
if server: size = 99999 |
29 |
> |
### D.S. |
30 |
> |
self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',size)) |
31 |
|
|
32 |
|
# number of jobs requested to be created, limit obj splitting |
33 |
|
self.ncjobs = ncjobs |
65 |
|
if not cfg_params.has_key('CMSSW.datasetpath'): |
66 |
|
msg = "Error: datasetpath not defined " |
67 |
|
raise CrabException(msg) |
68 |
+ |
|
69 |
+ |
### Temporary: added to remove input file control in the case of PU |
70 |
+ |
self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None) |
71 |
+ |
|
72 |
|
tmp = cfg_params['CMSSW.datasetpath'] |
73 |
|
log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp) |
74 |
< |
if string.lower(tmp)=='none': |
74 |
> |
|
75 |
> |
if tmp =='': |
76 |
> |
msg = "Error: datasetpath not defined " |
77 |
> |
raise CrabException(msg) |
78 |
> |
elif string.lower(tmp)=='none': |
79 |
|
self.datasetPath = None |
80 |
|
self.selectNoInput = 1 |
81 |
|
else: |
83 |
|
self.selectNoInput = 0 |
84 |
|
|
85 |
|
self.dataTiers = [] |
86 |
< |
|
87 |
< |
self.debug_pset = cfg_params.get('USER.debug_pset',False) |
88 |
< |
|
86 |
> |
self.debugWrap = '' |
87 |
> |
self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False) |
88 |
> |
if self.debug_wrapper: self.debugWrap='--debug' |
89 |
|
## now the application |
90 |
|
self.executable = cfg_params.get('CMSSW.executable','cmsRun') |
91 |
|
log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable) |
108 |
|
self.output_file_sandbox.append(self.fjrFileName) |
109 |
|
|
110 |
|
# other output files to be returned via sandbox or copied to SE |
111 |
+ |
outfileflag = False |
112 |
|
self.output_file = [] |
113 |
|
tmp = cfg_params.get('CMSSW.output_file',None) |
114 |
|
if tmp : |
115 |
< |
tmpOutFiles = string.split(tmp,',') |
116 |
< |
log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles)) |
117 |
< |
for tmp in tmpOutFiles: |
118 |
< |
tmp=string.strip(tmp) |
104 |
< |
self.output_file.append(tmp) |
105 |
< |
pass |
106 |
< |
else: |
107 |
< |
log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n") |
108 |
< |
pass |
115 |
> |
self.output_file = [x.strip() for x in tmp.split(',')] |
116 |
> |
outfileflag = True #output found |
117 |
> |
#else: |
118 |
> |
# log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n") |
119 |
|
|
120 |
|
# script_exe file as additional file in inputSandbox |
121 |
|
self.scriptExe = cfg_params.get('USER.script_exe',None) |
129 |
|
msg ="Error. script_exe not defined" |
130 |
|
raise CrabException(msg) |
131 |
|
|
132 |
+ |
# use parent files... |
133 |
+ |
self.useParent = self.cfg_params.get('CMSSW.use_parent',False) |
134 |
+ |
|
135 |
|
## additional input files |
136 |
|
if cfg_params.has_key('USER.additional_input_files'): |
137 |
|
tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',') |
176 |
|
self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events']) |
177 |
|
self.selectTotalNumberEvents = 1 |
178 |
|
if self.selectNumberOfJobs == 1: |
179 |
< |
if int(self.total_number_of_events) < int(self.theNumberOfJobs): |
179 |
> |
if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs): |
180 |
|
msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs ' |
181 |
|
raise CrabException(msg) |
182 |
|
else: |
206 |
|
tmp.strip() |
207 |
|
self.incrementSeeds.append(tmp) |
208 |
|
|
209 |
< |
## Old method of dealing with seeds |
210 |
< |
## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then |
198 |
< |
## remove |
199 |
< |
self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None) |
200 |
< |
if self.sourceSeed: |
201 |
< |
print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds." |
202 |
< |
self.incrementSeeds.append('sourceSeed') |
203 |
< |
self.incrementSeeds.append('theSource') |
204 |
< |
|
209 |
> |
## FUTURE: Can remove in CRAB 2.4.0 |
210 |
> |
self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None) |
211 |
|
self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None) |
212 |
< |
if self.sourceSeedVtx: |
207 |
< |
print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds." |
208 |
< |
self.incrementSeeds.append('VtxSmeared') |
209 |
< |
|
210 |
< |
self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None) |
211 |
< |
if self.sourceSeedG4: |
212 |
< |
print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds." |
213 |
< |
self.incrementSeeds.append('g4SimHits') |
214 |
< |
|
212 |
> |
self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None) |
213 |
|
self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None) |
214 |
< |
if self.sourceSeedMix: |
215 |
< |
print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds." |
216 |
< |
self.incrementSeeds.append('mix') |
214 |
> |
if self.sourceSeed or self.sourceSeedVtx or self.sourceSeedG4 or self.sourceSeedMix: |
215 |
> |
msg = 'pythia_seed, vtx_seed, g4_seed, and mix_seed are no longer valid settings. You must use increment_seeds or preserve_seeds' |
216 |
> |
raise CrabException(msg) |
217 |
|
|
218 |
|
self.firstRun = cfg_params.get('CMSSW.first_run',None) |
219 |
|
|
222 |
– |
if self.pset != None: #CarlosDaniele |
223 |
– |
import PsetManipulator as pp |
224 |
– |
PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset |
225 |
– |
|
220 |
|
# Copy/return |
227 |
– |
|
221 |
|
self.copy_data = int(cfg_params.get('USER.copy_data',0)) |
222 |
|
self.return_data = int(cfg_params.get('USER.return_data',0)) |
223 |
|
|
233 |
|
blockSites = self.DataDiscoveryAndLocation(cfg_params) |
234 |
|
#DBSDLS-end |
235 |
|
|
243 |
– |
|
236 |
|
## Select Splitting |
237 |
|
if self.selectNoInput: |
238 |
|
if self.pset == None: |
242 |
|
else: |
243 |
|
self.jobSplittingByBlocks(blockSites) |
244 |
|
|
245 |
< |
# modify Pset |
246 |
< |
if self.pset != None: |
247 |
< |
try: |
248 |
< |
# Add FrameworkJobReport to parameter-set, set max events. |
249 |
< |
# Reset later for data jobs by writeCFG which does all modifications |
250 |
< |
PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5 |
251 |
< |
PsetEdit.maxEvent(self.eventsPerJob) |
252 |
< |
PsetEdit.psetWriter(self.configFilename()) |
253 |
< |
except: |
254 |
< |
msg='Error while manipulating ParameterSet: exiting...' |
255 |
< |
raise CrabException(msg) |
256 |
< |
self.tgzNameWithPath = self.getTarBall(self.executable) |
245 |
> |
# modify Pset only the first time |
246 |
> |
if isNew: |
247 |
> |
if self.pset != None: |
248 |
> |
import PsetManipulator as pp |
249 |
> |
PsetEdit = pp.PsetManipulator(self.pset) |
250 |
> |
try: |
251 |
> |
# Add FrameworkJobReport to parameter-set, set max events. |
252 |
> |
# Reset later for data jobs by writeCFG which does all modifications |
253 |
> |
PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5 |
254 |
> |
PsetEdit.maxEvent(self.eventsPerJob) |
255 |
> |
PsetEdit.psetWriter(self.configFilename()) |
256 |
> |
## If present, add TFileService to output files |
257 |
> |
if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)): |
258 |
> |
tfsOutput = PsetEdit.getTFileService() |
259 |
> |
if tfsOutput: |
260 |
> |
if tfsOutput in self.output_file: |
261 |
> |
common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files") |
262 |
> |
else: |
263 |
> |
outfileflag = True #output found |
264 |
> |
self.output_file.append(tfsOutput) |
265 |
> |
common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)") |
266 |
> |
pass |
267 |
> |
pass |
268 |
> |
## If present and requested, add PoolOutputModule to output files |
269 |
> |
if int(cfg_params.get('CMSSW.get_edm_output',0)): |
270 |
> |
edmOutput = PsetEdit.getPoolOutputModule() |
271 |
> |
if edmOutput: |
272 |
> |
if edmOutput in self.output_file: |
273 |
> |
common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files") |
274 |
> |
else: |
275 |
> |
self.output_file.append(edmOutput) |
276 |
> |
common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)") |
277 |
> |
pass |
278 |
> |
pass |
279 |
> |
except CrabException: |
280 |
> |
msg='Error while manipulating ParameterSet: exiting...' |
281 |
> |
raise CrabException(msg) |
282 |
> |
## Prepare inputSandbox TarBall (only the first time) |
283 |
> |
self.tgzNameWithPath = self.getTarBall(self.executable) |
284 |
|
|
285 |
|
def DataDiscoveryAndLocation(self, cfg_params): |
286 |
|
|
293 |
|
## Contact the DBS |
294 |
|
common.logger.message("Contacting Data Discovery Services ...") |
295 |
|
try: |
296 |
< |
self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params) |
296 |
> |
self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks) |
297 |
|
self.pubdata.fetchDBSInfo() |
298 |
|
|
299 |
|
except DataDiscovery.NotExistingDatasetError, ex : |
309 |
|
self.filesbyblock=self.pubdata.getFiles() |
310 |
|
self.eventsbyblock=self.pubdata.getEventsPerBlock() |
311 |
|
self.eventsbyfile=self.pubdata.getEventsPerFile() |
312 |
+ |
self.parentFiles=self.pubdata.getParent() |
313 |
|
|
314 |
|
## get max number of events |
315 |
|
self.maxEvents=self.pubdata.getMaxEvents() |
426 |
|
|
427 |
|
# ---- Iterate over the files in the block until we've met the requested ---- # |
428 |
|
# ---- total # of events or we've gone over all the files in this block ---- # |
429 |
+ |
pString='' |
430 |
|
while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ): |
431 |
|
file = files[fileCount] |
432 |
+ |
if self.useParent: |
433 |
+ |
parent = self.parentFiles[file] |
434 |
+ |
for f in parent : |
435 |
+ |
pString += '\\\"' + f + '\\\"\,' |
436 |
+ |
common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent)) |
437 |
+ |
common.logger.write("File "+str(file)+" has the following parents: "+str(parent)) |
438 |
|
if newFile : |
439 |
|
try: |
440 |
|
numEventsInFile = self.eventsbyfile[file] |
455 |
|
# end job using last file, use remaining events in block |
456 |
|
# close job and touch new file |
457 |
|
fullString = parString[:-2] |
458 |
< |
list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)]) |
458 |
> |
if self.useParent: |
459 |
> |
fullParentString = pString[:-2] |
460 |
> |
list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)]) |
461 |
> |
else: |
462 |
> |
list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)]) |
463 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).") |
464 |
|
self.jobDestination.append(blockSites[block]) |
465 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
471 |
|
eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount |
472 |
|
jobSkipEventCount = 0 |
473 |
|
# reset file |
474 |
+ |
pString = "" |
475 |
|
parString = "" |
476 |
|
filesEventCount = 0 |
477 |
|
newFile = 1 |
484 |
|
elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) : |
485 |
|
# close job and touch new file |
486 |
|
fullString = parString[:-2] |
487 |
< |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
487 |
> |
if self.useParent: |
488 |
> |
fullParentString = pString[:-2] |
489 |
> |
list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
490 |
> |
else: |
491 |
> |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
492 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.") |
493 |
|
self.jobDestination.append(blockSites[block]) |
494 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
499 |
|
eventsRemaining = eventsRemaining - eventsPerJobRequested |
500 |
|
jobSkipEventCount = 0 |
501 |
|
# reset file |
502 |
+ |
pString = "" |
503 |
|
parString = "" |
504 |
|
filesEventCount = 0 |
505 |
|
newFile = 1 |
509 |
|
else : |
510 |
|
# close job but don't touch new file |
511 |
|
fullString = parString[:-2] |
512 |
< |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
512 |
> |
if self.useParent: |
513 |
> |
fullParentString = pString[:-2] |
514 |
> |
list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
515 |
> |
else: |
516 |
> |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
517 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.") |
518 |
|
self.jobDestination.append(blockSites[block]) |
519 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
527 |
|
jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file]) |
528 |
|
# remove all but the last file |
529 |
|
filesEventCount = self.eventsbyfile[file] |
530 |
+ |
if self.useParent: |
531 |
+ |
for f in parent : pString += '\\\"' + f + '\\\"\,' |
532 |
|
parString = '\\\"' + file + '\\\"\,' |
533 |
|
pass # END if |
534 |
|
pass # END while (iterate over files in the block) |
657 |
|
self.list_of_args.append([str(i)]) |
658 |
|
return |
659 |
|
|
660 |
< |
def split(self, jobParams): |
660 |
> |
def split(self, jobParams,firstJobID): |
661 |
|
|
662 |
|
njobs = self.total_number_of_jobs |
663 |
|
arglist = self.list_of_args |
667 |
|
|
668 |
|
listID=[] |
669 |
|
listField=[] |
670 |
< |
for job in range(njobs): |
671 |
< |
jobParams[job] = arglist[job] |
670 |
> |
for id in range(njobs): |
671 |
> |
job = id + int(firstJobID) |
672 |
> |
jobParams[id] = arglist[id] |
673 |
|
listID.append(job+1) |
674 |
|
job_ToSave ={} |
675 |
|
concString = ' ' |
676 |
|
argu='' |
677 |
< |
if len(jobParams[job]): |
678 |
< |
argu += concString.join(jobParams[job] ) |
677 |
> |
if len(jobParams[id]): |
678 |
> |
argu += concString.join(jobParams[id] ) |
679 |
|
job_ToSave['arguments']= str(job+1)+' '+argu |
680 |
< |
job_ToSave['dlsDestination']= self.jobDestination[job] |
680 |
> |
job_ToSave['dlsDestination']= self.jobDestination[id] |
681 |
|
listField.append(job_ToSave) |
682 |
|
msg="Job "+str(job)+" Arguments: "+str(job+1)+" "+argu+"\n" \ |
683 |
< |
+" Destination: "+str(self.jobDestination[job]) |
683 |
> |
+" Destination: "+str(self.jobDestination[id]) |
684 |
|
common.logger.debug(5,msg) |
685 |
|
common._db.updateJob_(listID,listField) |
686 |
|
self.argsList = (len(jobParams[0])+1) |
753 |
|
tar.add(module,moduleDir) |
754 |
|
|
755 |
|
## Now check if any data dir(s) is present |
712 |
– |
swAreaLen=len(swArea) |
756 |
|
self.dataExist = False |
757 |
< |
for root, dirs, files in os.walk(swArea): |
758 |
< |
if "data" in dirs: |
759 |
< |
self.dataExist=True |
760 |
< |
common.logger.debug(5,"data "+root+"/data"+" to be tarred") |
761 |
< |
tar.add(root+"/data",root[swAreaLen:]+"/data") |
757 |
> |
todo_list = [(i, i) for i in os.listdir(swArea+"/src")] |
758 |
> |
while len(todo_list): |
759 |
> |
entry, name = todo_list.pop() |
760 |
> |
if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS': |
761 |
> |
continue |
762 |
> |
if os.path.isdir(swArea+"/src/"+entry): |
763 |
> |
entryPath = entry + '/' |
764 |
> |
todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)] |
765 |
> |
if name == 'data': |
766 |
> |
self.dataExist=True |
767 |
> |
common.logger.debug(5,"data "+entry+" to be tarred") |
768 |
> |
tar.add(swArea+"/src/"+entry,"src/"+entry) |
769 |
> |
pass |
770 |
> |
pass |
771 |
|
|
772 |
|
### CMSSW ParameterSet |
773 |
|
if not self.pset is None: |
777 |
|
|
778 |
|
|
779 |
|
## Add ProdCommon dir to tar |
780 |
< |
prodcommonDir = 'ProdCommon' |
781 |
< |
prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon' |
782 |
< |
if os.path.isdir(prodcommonPath): |
783 |
< |
tar.add(prodcommonPath,prodcommonDir) |
780 |
> |
prodcommonDir = './' |
781 |
> |
prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/' |
782 |
> |
neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv'] |
783 |
> |
for file in neededStuff: |
784 |
> |
tar.add(prodcommonPath+file,prodcommonDir+file) |
785 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
786 |
|
|
787 |
|
##### ML stuff |
792 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
793 |
|
|
794 |
|
##### Utils |
795 |
< |
Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py'] |
795 |
> |
Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py'] |
796 |
|
for file in Utils_file_list: |
797 |
|
tar.add(path+file,file) |
798 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
803 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
804 |
|
|
805 |
|
tar.close() |
806 |
< |
except : |
807 |
< |
raise CrabException('Could not create tar-ball') |
806 |
> |
except IOError: |
807 |
> |
raise CrabException('Could not create tar-ball '+self.tgzNameWithPath) |
808 |
> |
except tarfile.TarError: |
809 |
> |
raise CrabException('Could not create tar-ball '+self.tgzNameWithPath) |
810 |
|
|
811 |
|
## check for tarball size |
812 |
|
tarballinfo = os.stat(self.tgzNameWithPath) |
885 |
|
txt += 'DatasetPath='+self.datasetPath+'\n' |
886 |
|
|
887 |
|
datasetpath_split = self.datasetPath.split("/") |
888 |
< |
|
888 |
> |
### FEDE FOR NEW LFN ### |
889 |
> |
self.primaryDataset = datasetpath_split[1] |
890 |
> |
######################## |
891 |
|
txt += 'PrimaryDataset='+datasetpath_split[1]+'\n' |
892 |
|
txt += 'DataTier='+datasetpath_split[2]+'\n' |
893 |
|
txt += 'ApplicationFamily=cmsRun\n' |
894 |
|
|
895 |
|
else: |
896 |
|
txt += 'DatasetPath=MCDataTier\n' |
897 |
+ |
### FEDE FOR NEW LFN ### |
898 |
+ |
self.primaryDataset = 'null' |
899 |
+ |
######################## |
900 |
|
txt += 'PrimaryDataset=null\n' |
901 |
|
txt += 'DataTier=null\n' |
902 |
|
txt += 'ApplicationFamily=MCDataTier\n' |
906 |
|
txt += 'cp $RUNTIME_AREA/'+pset+' .\n' |
907 |
|
if (self.datasetPath): # standard job |
908 |
|
txt += 'InputFiles=${args[1]}; export InputFiles\n' |
909 |
< |
txt += 'MaxEvents=${args[2]}; export MaxEvents\n' |
910 |
< |
txt += 'SkipEvents=${args[3]}; export SkipEvents\n' |
909 |
> |
if (self.useParent): |
910 |
> |
txt += 'ParentFiles=${args[2]}; export ParentFiles\n' |
911 |
> |
txt += 'MaxEvents=${args[3]}; export MaxEvents\n' |
912 |
> |
txt += 'SkipEvents=${args[4]}; export SkipEvents\n' |
913 |
> |
else: |
914 |
> |
txt += 'MaxEvents=${args[2]}; export MaxEvents\n' |
915 |
> |
txt += 'SkipEvents=${args[3]}; export SkipEvents\n' |
916 |
|
txt += 'echo "Inputfiles:<$InputFiles>"\n' |
917 |
+ |
if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n' |
918 |
|
txt += 'echo "MaxEvents:<$MaxEvents>"\n' |
919 |
|
txt += 'echo "SkipEvents:<$SkipEvents>"\n' |
920 |
|
else: # pythia like job |
932 |
|
if self.pset != None: |
933 |
|
# FUTURE: Can simply for 2_1_x and higher |
934 |
|
txt += '\n' |
935 |
< |
if self.debug_pset==True: |
935 |
> |
if self.debug_wrapper==True: |
936 |
|
txt += 'echo "***** cat ' + psetName + ' *********"\n' |
937 |
|
txt += 'cat ' + psetName + '\n' |
938 |
|
txt += 'echo "****** end ' + psetName + ' ********"\n' |
939 |
|
txt += '\n' |
940 |
< |
txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n' |
940 |
> |
if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3): |
941 |
> |
txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n' |
942 |
> |
else: |
943 |
> |
txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n' |
944 |
|
txt += 'echo "PSETHASH = $PSETHASH" \n' |
945 |
|
txt += '\n' |
946 |
|
return txt |
956 |
|
if os.path.isfile(self.tgzNameWithPath): |
957 |
|
txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n' |
958 |
|
txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n' |
959 |
< |
txt += 'ls -Al \n' |
959 |
> |
if self.debug_wrapper: |
960 |
> |
txt += 'ls -Al \n' |
961 |
|
txt += 'untar_status=$? \n' |
962 |
|
txt += 'if [ $untar_status -ne 0 ]; then \n' |
963 |
|
txt += ' echo "ERROR ==> Untarring .tgz file failed"\n' |
967 |
|
txt += ' echo "Successful untar" \n' |
968 |
|
txt += 'fi \n' |
969 |
|
txt += '\n' |
970 |
< |
txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n' |
970 |
> |
txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n' |
971 |
|
txt += 'if [ -z "$PYTHONPATH" ]; then\n' |
972 |
< |
txt += ' export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n' |
972 |
> |
txt += ' export PYTHONPATH=$RUNTIME_AREA/\n' |
973 |
|
txt += 'else\n' |
974 |
< |
txt += ' export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n' |
974 |
> |
txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n' |
975 |
|
txt += 'echo "PYTHONPATH=$PYTHONPATH"\n' |
976 |
|
txt += 'fi\n' |
977 |
|
txt += '\n' |
998 |
|
if len(self.additional_inbox_files)>0: |
999 |
|
for file in self.additional_inbox_files: |
1000 |
|
txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n' |
1001 |
< |
txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n' |
1001 |
> |
# txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n' |
1002 |
> |
# txt += 'mv $RUNTIME_AREA/IMProv/ . \n' |
1003 |
|
|
1004 |
+ |
txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n' |
1005 |
|
txt += 'if [ -z "$PYTHONPATH" ]; then\n' |
1006 |
< |
txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n' |
1006 |
> |
txt += ' export PYTHONPATH=$RUNTIME_AREA/\n' |
1007 |
|
txt += 'else\n' |
1008 |
< |
txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n' |
1008 |
> |
txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n' |
1009 |
|
txt += 'echo "PYTHONPATH=$PYTHONPATH"\n' |
1010 |
|
txt += 'fi\n' |
1011 |
|
txt += '\n' |
1012 |
|
|
1013 |
|
return txt |
1014 |
|
|
943 |
– |
def modifySteeringCards(self, nj): |
944 |
– |
""" |
945 |
– |
modify the card provided by the user, |
946 |
– |
writing a new card into share dir |
947 |
– |
""" |
1015 |
|
|
1016 |
|
def executableName(self): |
1017 |
|
if self.scriptExe: |
1056 |
|
## User Declared output files |
1057 |
|
for out in (self.output_file+self.output_file_sandbox): |
1058 |
|
n_out = nj + 1 |
1059 |
< |
out_box.append(self.numberFile_(out,str(n_out))) |
1059 |
> |
out_box.append(numberFile(out,str(n_out))) |
1060 |
|
return out_box |
1061 |
|
|
995 |
– |
def prepareSteeringCards(self): |
996 |
– |
""" |
997 |
– |
Make initial modifications of the user's steering card file. |
998 |
– |
""" |
999 |
– |
return |
1062 |
|
|
1063 |
|
def wsRenameOutput(self, nj): |
1064 |
|
""" |
1068 |
|
txt = '\n#Written by cms_cmssw::wsRenameOutput\n' |
1069 |
|
txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n' |
1070 |
|
txt += 'echo ">>> current directory content:"\n' |
1071 |
< |
txt += 'ls \n' |
1071 |
> |
if self.debug_wrapper: |
1072 |
> |
txt += 'ls -Al\n' |
1073 |
|
txt += '\n' |
1074 |
|
|
1075 |
|
for fileWithSuffix in (self.output_file): |
1076 |
< |
output_file_num = self.numberFile_(fileWithSuffix, '$NJob') |
1076 |
> |
output_file_num = numberFile(fileWithSuffix, '$NJob') |
1077 |
|
txt += '\n' |
1078 |
|
txt += '# check output file\n' |
1079 |
|
txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n' |
1094 |
|
txt += 'fi\n' |
1095 |
|
file_list = [] |
1096 |
|
for fileWithSuffix in (self.output_file): |
1097 |
< |
file_list.append(self.numberFile_(fileWithSuffix, '$NJob')) |
1097 |
> |
file_list.append(numberFile(fileWithSuffix, '$NJob')) |
1098 |
|
|
1099 |
|
txt += 'file_list="'+string.join(file_list,' ')+'"\n' |
1100 |
|
txt += '\n' |
1101 |
|
txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n' |
1102 |
|
txt += 'echo ">>> current directory content:"\n' |
1103 |
< |
txt += 'ls \n' |
1103 |
> |
if self.debug_wrapper: |
1104 |
> |
txt += 'ls -Al\n' |
1105 |
|
txt += '\n' |
1106 |
|
txt += 'cd $RUNTIME_AREA\n' |
1107 |
|
txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n' |
1108 |
|
return txt |
1109 |
|
|
1046 |
– |
def numberFile_(self, file, txt): |
1047 |
– |
""" |
1048 |
– |
append _'txt' before last extension of a file |
1049 |
– |
""" |
1050 |
– |
p = string.split(file,".") |
1051 |
– |
# take away last extension |
1052 |
– |
name = p[0] |
1053 |
– |
for x in p[1:-1]: |
1054 |
– |
name=name+"."+x |
1055 |
– |
# add "_txt" |
1056 |
– |
if len(p)>1: |
1057 |
– |
ext = p[len(p)-1] |
1058 |
– |
result = name + '_' + txt + "." + ext |
1059 |
– |
else: |
1060 |
– |
result = name + '_' + txt |
1061 |
– |
|
1062 |
– |
return result |
1063 |
– |
|
1110 |
|
def getRequirements(self, nj=[]): |
1111 |
|
""" |
1112 |
|
return job requirements to add to jdl files |
1122 |
|
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
1123 |
|
|
1124 |
|
req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)' |
1125 |
< |
if common.scheduler.name() == "glitecoll": |
1125 |
> |
if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"): |
1126 |
|
req += ' && other.GlueCEStateStatus == "Production" ' |
1127 |
|
|
1128 |
|
return req |
1201 |
|
publish_data = int(self.cfg_params.get('USER.publish_data',0)) |
1202 |
|
if (publish_data == 1): |
1203 |
|
processedDataset = self.cfg_params['USER.publish_data_name'] |
1204 |
< |
LFNBaseName = LFNBase(processedDataset) |
1204 |
> |
if (self.primaryDataset == 'null'): |
1205 |
> |
self.primaryDataset = processedDataset |
1206 |
> |
if (common.scheduler.name().upper() == "CAF" or common.scheduler.name().upper() == "LSF"): |
1207 |
> |
### FEDE FOR NEW LFN ### |
1208 |
> |
LFNBaseName = LFNBase(self.primaryDataset, processedDataset, LocalUser=True) |
1209 |
> |
self.user = getUserName(LocalUser=True) |
1210 |
> |
######################## |
1211 |
> |
else : |
1212 |
> |
### FEDE FOR NEW LFN ### |
1213 |
> |
LFNBaseName = LFNBase(self.primaryDataset, processedDataset) |
1214 |
> |
self.user = getUserName() |
1215 |
> |
######################## |
1216 |
|
|
1217 |
|
txt += 'if [ $copy_exit_status -eq 0 ]; then\n' |
1218 |
< |
txt += ' FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName) |
1218 |
> |
### FEDE FOR NEW LFN ### |
1219 |
> |
#txt += ' FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName) |
1220 |
> |
txt += ' FOR_LFN=%s/${PSETHASH}/\n'%(LFNBaseName) |
1221 |
> |
######################## |
1222 |
|
txt += 'else\n' |
1223 |
|
txt += ' FOR_LFN=/copy_problems/ \n' |
1224 |
|
txt += ' SE=""\n' |
1226 |
|
txt += 'fi\n' |
1227 |
|
|
1228 |
|
txt += 'echo ">>> Modify Job Report:" \n' |
1229 |
< |
txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n' |
1229 |
> |
txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n' |
1230 |
|
txt += 'ProcessedDataset='+processedDataset+'\n' |
1231 |
|
txt += 'echo "ProcessedDataset = $ProcessedDataset"\n' |
1232 |
|
txt += 'echo "SE = $SE"\n' |
1233 |
|
txt += 'echo "SE_PATH = $SE_PATH"\n' |
1234 |
|
txt += 'echo "FOR_LFN = $FOR_LFN" \n' |
1235 |
|
txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n' |
1236 |
< |
txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n' |
1237 |
< |
txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n' |
1236 |
> |
### FEDE FOR NEW LFN ### |
1237 |
> |
txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' + self.user + '-$ProcessedDataset-$PSETHASH $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n' |
1238 |
> |
txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' + self.user + '-$ProcessedDataset-$PSETHASH $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n' |
1239 |
> |
######################## |
1240 |
|
txt += 'modifyReport_result=$?\n' |
1241 |
|
txt += 'if [ $modifyReport_result -ne 0 ]; then\n' |
1242 |
|
txt += ' modifyReport_result=70500\n' |
1256 |
|
txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n' |
1257 |
|
txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n' |
1258 |
|
txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n' |
1259 |
< |
txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --MonitorID $MonitorID --MonitorJobID $MonitorJobID`\n' |
1260 |
< |
txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n' |
1261 |
< |
txt += ' tmp_executable_exit_status=`echo $cmd_out | awk -F\; \'{print $1}\' | awk -F \' \' \'{print $NF}\'`\n' |
1262 |
< |
txt += ' if [ -n $tmp_executable_exit_status ];then\n' |
1201 |
< |
txt += ' executable_exit_status=$tmp_executable_exit_status\n' |
1202 |
< |
txt += ' fi\n' |
1259 |
> |
txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n' |
1260 |
> |
if self.debug_wrapper : |
1261 |
> |
txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n' |
1262 |
> |
txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n' |
1263 |
|
txt += ' if [ $executable_exit_status -eq 50115 ];then\n' |
1264 |
|
txt += ' echo ">>> crab_fjr.xml contents: "\n' |
1265 |
< |
txt += ' cat $RUNTIME_AREA/crab_fjr_NJob.xml\n' |
1265 |
> |
txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n' |
1266 |
|
txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n' |
1267 |
+ |
txt += ' elif [ $executable_exit_status -eq -999 ];then\n' |
1268 |
+ |
txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n' |
1269 |
|
txt += ' else\n' |
1270 |
|
txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n' |
1271 |
|
txt += ' fi\n' |
1274 |
|
txt += ' fi\n' |
1275 |
|
#### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap |
1276 |
|
|
1277 |
< |
if self.datasetPath: |
1277 |
> |
txt += ' if [ $executable_exit_status -eq 0 ];then\n' |
1278 |
> |
txt += ' echo ">>> Executable succeded $executable_exit_status"\n' |
1279 |
> |
if (self.datasetPath and not (self.dataset_pu or self.useParent)) : |
1280 |
|
# VERIFY PROCESSED DATA |
1217 |
– |
txt += ' if [ $executable_exit_status -eq 0 ];then\n' |
1281 |
|
txt += ' echo ">>> Verify list of processed files:"\n' |
1282 |
< |
txt += ' echo $InputFiles |tr -d "\\\\" |tr "," "\\n"|tr -d "\\"" > input-files.txt\n' |
1283 |
< |
txt += ' grep LFN $RUNTIME_AREA/crab_fjr_$NJob.xml |cut -d">" -f2|cut -d"<" -f1|grep "/" > processed-files.txt\n' |
1282 |
> |
txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n' |
1283 |
> |
txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n' |
1284 |
|
txt += ' cat input-files.txt | sort | uniq > tmp.txt\n' |
1285 |
|
txt += ' mv tmp.txt input-files.txt\n' |
1286 |
|
txt += ' echo "cat input-files.txt"\n' |
1301 |
|
txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n' |
1302 |
|
txt += ' echo " ==> diff input-files.txt processed-files.txt"\n' |
1303 |
|
txt += ' fi\n' |
1304 |
< |
txt += ' fi\n' |
1305 |
< |
txt += '\n' |
1304 |
> |
txt += ' elif [ $executable_exit_status -ne 0 ] || [ $executable_exit_status -ne 50015 ] || [ $executable_exit_status -ne 50017 ];then\n' |
1305 |
> |
txt += ' echo ">>> Executable failed $executable_exit_status"\n' |
1306 |
> |
txt += ' func_exit\n' |
1307 |
> |
txt += ' fi\n' |
1308 |
> |
txt += '\n' |
1309 |
|
txt += 'else\n' |
1310 |
|
txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n' |
1311 |
|
txt += 'fi\n' |
1342 |
|
stderr = 'CMSSW_$NJob.stderr' |
1343 |
|
if (self.return_data == 1): |
1344 |
|
for file in (self.output_file+self.output_file_sandbox): |
1345 |
< |
listOutFiles.append(self.numberFile_(file, '$NJob')) |
1345 |
> |
listOutFiles.append(numberFile(file, '$NJob')) |
1346 |
|
listOutFiles.append(stdout) |
1347 |
|
listOutFiles.append(stderr) |
1348 |
|
else: |
1349 |
|
for file in (self.output_file_sandbox): |
1350 |
< |
listOutFiles.append(self.numberFile_(file, '$NJob')) |
1350 |
> |
listOutFiles.append(numberFile(file, '$NJob')) |
1351 |
|
listOutFiles.append(stdout) |
1352 |
|
listOutFiles.append(stderr) |
1353 |
|
txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n' |