[ViewVC] Diff of: cvsroot/COMP/CRAB/python/cms

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.64 by slacapra, Thu Jan 18 18:29:51 2007 UTC vs.
Revision 1.137 by slacapra, Fri Nov 16 11:09:31 2007 UTC

#	Line 2 \| Line 2 \| from JobType import JobType
2		from crab_logger import Logger
3		from crab_exceptions import *
4		from crab_util import *
5	+	from BlackWhiteListParser import BlackWhiteListParser
6		import common
6	–	import PsetManipulator
7	–
8	–	import DataDiscovery
9	–	import DataLocation
7		import Scram
8
9	<	import os, string, re, shutil
9	>	import os, string, glob
10
11		class Cmssw(JobType):
12		def __init__(self, cfg_params, ncjobs):
13		JobType.__init__(self, 'CMSSW')
14		common.logger.debug(3,'CMSSW::__init__')
15
19	–	# Marco.
16		self._params = {}
17		self.cfg_params = cfg_params
18
19	+	# init BlackWhiteListParser
20	+	self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21	+
22	+	try:
23	+	self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24	+	except KeyError:
25	+	self.MaxTarBallSize = 9.5
26	+
27		# number of jobs requested to be created, limit obj splitting
28		self.ncjobs = ncjobs
29
30		log = common.logger
31	<
31	>
32		self.scram = Scram.Scram(cfg_params)
33		self.additional_inbox_files = []
34		self.scriptExe = ''
35		self.executable = ''
36	+	self.executable_arch = self.scram.getArch()
37		self.tgz_name = 'default.tgz'
38	+	self.additional_tgz_name = 'additional.tgz'
39		self.scriptName = 'CMSSW.sh'
40	<	self.pset = '' #scrip use case Da
40	>	self.pset = '' #scrip use case Da
41		self.datasetPath = '' #scrip use case Da
42
43		# set FJR file name
44		self.fjrFileName = 'crab_fjr.xml'
45
46		self.version = self.scram.getSWVersion()
47	+
48	+	#
49	+	# Try to block creation in case of arch/version mismatch
50	+	#
51	+
52	+	a = string.split(self.version, "_")
53	+
54	+	if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55	+	msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56	+	common.logger.message(msg)
57	+	if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58	+	msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59	+	raise CrabException(msg)
60	+
61		common.taskDB.setDict('codeVersion',self.version)
62		self.setParam_('application', self.version)
63
64		### collect Data cards
65	+
66		try:
67		tmp = cfg_params['CMSSW.datasetpath']
68		log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
#	Line 52 \| Line 73 \| class Cmssw(JobType):
73		self.datasetPath = tmp
74		self.selectNoInput = 0
75		except KeyError:
76	<	msg = "Error: datasetpath not defined "
76	>	msg = "Error: datasetpath not defined "
77		raise CrabException(msg)
78
79		# ML monitoring
#	Line 61 \| Line 82 \| class Cmssw(JobType):
82		self.setParam_('dataset', 'None')
83		self.setParam_('owner', 'None')
84		else:
85	<	datasetpath_split = self.datasetPath.split("/")
86	<	self.setParam_('dataset', datasetpath_split[1])
87	<	self.setParam_('owner', datasetpath_split[-1])
85	>	try:
86	>	datasetpath_split = self.datasetPath.split("/")
87	>	# standard style
88	>	self.setParam_('datasetFull', self.datasetPath)
89	>	self.setParam_('dataset', datasetpath_split[1])
90	>	self.setParam_('owner', datasetpath_split[2])
91	>	except:
92	>	self.setParam_('dataset', self.datasetPath)
93	>	self.setParam_('owner', self.datasetPath)
94
95		self.setTaskid_()
96		self.setParam_('taskId', self.cfg_params['taskId'])
#	Line 87 \| Line 114 \| class Cmssw(JobType):
114		try:
115		self.pset = cfg_params['CMSSW.pset']
116		log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
117	<	if self.pset.lower() != 'none' :
117	>	if self.pset.lower() != 'none' :
118		if (not os.path.exists(self.pset)):
119		raise CrabException("User defined PSet file "+self.pset+" does not exist")
120		else:
#	Line 114 \| Line 141 \| class Cmssw(JobType):
141		self.output_file.append(tmp)
142		pass
143		else:
144	<	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
144	>	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145		pass
146		pass
147		except KeyError:
148	<	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
148	>	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
149		pass
150
151		# script_exe file as additional file in inputSandbox
#	Line 131 \| Line 158 \| class Cmssw(JobType):
158		self.additional_inbox_files.append(string.strip(self.scriptExe))
159		except KeyError:
160		self.scriptExe = ''
161	+
162		#CarlosDaniele
163		if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
164	<	msg ="WARNING. script_exe not defined"
164	>	msg ="Error. script_exe not defined"
165		raise CrabException(msg)
166
167		## additional input files
168		try:
169		tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
170	<	common.logger.debug(5,"Additional input files: "+str(tmpAddFiles))
171	<	for tmpFile in tmpAddFiles:
172	<	tmpFile = string.strip(tmpFile)
173	<	if not os.path.exists(tmpFile):
174	<	raise CrabException("Additional input file not found: "+tmpFile)
170	>	for tmp in tmpAddFiles:
171	>	tmp = string.strip(tmp)
172	>	dirname = ''
173	>	if not tmp[0]=="/": dirname = "."
174	>	files = []
175	>	if string.find(tmp,"*")>-1:
176	>	files = glob.glob(os.path.join(dirname, tmp))
177	>	if len(files)==0:
178	>	raise CrabException("No additional input file found with this pattern: "+tmp)
179	>	else:
180	>	files.append(tmp)
181	>	for file in files:
182	>	if not os.path.exists(file):
183	>	raise CrabException("Additional input file not found: "+file)
184		pass
185	<	storedFile = common.work_space.shareDir()+ tmpFile
186	<	shutil.copyfile(tmpFile, storedFile)
187	<	self.additional_inbox_files.append(string.strip(storedFile))
185	>	# fname = string.split(file, '/')[-1]
186	>	# storedFile = common.work_space.pathForTgz()+'share/'+fname
187	>	# shutil.copyfile(file, storedFile)
188	>	self.additional_inbox_files.append(string.strip(file))
189		pass
152	–	common.logger.debug(5,"Inbox files so far : "+str(self.additional_inbox_files))
190		pass
191	+	common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
192		except KeyError:
193		pass
194
#	Line 168 \| Line 206 \| class Cmssw(JobType):
206		except KeyError:
207		self.eventsPerJob = -1
208		self.selectEventsPerJob = 0
209	<
209	>
210		## number of jobs
211		try:
212		self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
#	Line 184 \| Line 222 \| class Cmssw(JobType):
222		self.total_number_of_events = 0
223		self.selectTotalNumberEvents = 0
224
225	<	if self.pset != None: #CarlosDaniele
225	>	if self.pset != None: #CarlosDaniele
226		if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
227		msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
228		raise CrabException(msg)
#	Line 205 \| Line 243 \| class Cmssw(JobType):
243		except KeyError:
244		self.sourceSeedVtx = None
245		common.logger.debug(5,"No vertex seed given")
246	+
247	+	try:
248	+	self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
249	+	except KeyError:
250	+	self.sourceSeedG4 = None
251	+	common.logger.debug(5,"No g4 sim hits seed given")
252	+
253	+	try:
254	+	self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
255	+	except KeyError:
256	+	self.sourceSeedMix = None
257	+	common.logger.debug(5,"No mix seed given")
258	+
259		try:
260		self.firstRun = int(cfg_params['CMSSW.first_run'])
261		except KeyError:
262		self.firstRun = None
263		common.logger.debug(5,"No first run given")
264		if self.pset != None: #CarlosDaniele
265	<	self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
265	>	import PsetManipulator as pp
266	>	PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
267
268		#DBSDLS-start
269	<	## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
269	>	## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
270		self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
271		self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
272		self.jobDestination=[] # Site destination(s) for each job (list of lists)
#	Line 223 \| Line 275 \| class Cmssw(JobType):
275		blockSites = {}
276		if self.datasetPath:
277		blockSites = self.DataDiscoveryAndLocation(cfg_params)
278	<	#DBSDLS-end
278	>	#DBSDLS-end
279
280		self.tgzNameWithPath = self.getTarBall(self.executable)
281	<
281	>
282		## Select Splitting
283	<	if self.selectNoInput:
283	>	if self.selectNoInput:
284		if self.pset == None: #CarlosDaniele
285		self.jobSplittingForScript()
286		else:
287		self.jobSplittingNoInput()
288	<	else:
288	>	else:
289		self.jobSplittingByBlocks(blockSites)
290
291		# modify Pset
#	Line 241 \| Line 293 \| class Cmssw(JobType):
293		try:
294		if (self.datasetPath): # standard job
295		# allow to processa a fraction of events in a file
296	<	self.PsetEdit.inputModule("INPUT")
297	<	self.PsetEdit.maxEvent("INPUTMAXEVENTS")
298	<	self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
296	>	PsetEdit.inputModule("INPUTFILE")
297	>	PsetEdit.maxEvent(0)
298	>	PsetEdit.skipEvent(0)
299		else: # pythia like job
300	<	self.PsetEdit.maxEvent(self.eventsPerJob)
300	>	PsetEdit.maxEvent(self.eventsPerJob)
301		if (self.firstRun):
302	<	self.PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
302	>	PsetEdit.pythiaFirstRun(0) #First Run
303		if (self.sourceSeed) :
304	<	self.PsetEdit.pythiaSeed("INPUT")
304	>	PsetEdit.pythiaSeed(0)
305		if (self.sourceSeedVtx) :
306	<	self.PsetEdit.pythiaSeedVtx("INPUTVTX")
306	>	PsetEdit.vtxSeed(0)
307	>	if (self.sourceSeedG4) :
308	>	PsetEdit.g4Seed(0)
309	>	if (self.sourceSeedMix) :
310	>	PsetEdit.mixSeed(0)
311		# add FrameworkJobReport to parameter-set
312	<	self.PsetEdit.addCrabFJR(self.fjrFileName)
313	<	self.PsetEdit.psetWriter(self.configFilename())
312	>	PsetEdit.addCrabFJR(self.fjrFileName)
313	>	PsetEdit.psetWriter(self.configFilename())
314		except:
315		msg='Error while manipuliating ParameterSet: exiting...'
316		raise CrabException(msg)
317
318		def DataDiscoveryAndLocation(self, cfg_params):
319
320	+	import DataDiscovery
321	+	import DataLocation
322		common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
323
324		datasetPath=self.datasetPath
325
268	–	## TODO
269	–	dataTiersList = ""
270	–	dataTiers = dataTiersList.split(',')
271	–
326		## Contact the DBS
327	<	common.logger.message("Contacting DBS...")
327	>	common.logger.message("Contacting Data Discovery Services ...")
328		try:
329	<	self.pubdata=DataDiscovery.DataDiscovery(datasetPath, dataTiers, cfg_params)
329	>
330	>	self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
331		self.pubdata.fetchDBSInfo()
332
333		except DataDiscovery.NotExistingDatasetError, ex :
334		msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
335		raise CrabException(msg)
281	–
336		except DataDiscovery.NoDataTierinProvenanceError, ex :
337		msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
338		raise CrabException(msg)
339		except DataDiscovery.DataDiscoveryError, ex:
340	<	msg = 'ERROR ***: failed Data Discovery in DBS %s'%ex.getErrorMessage()
340	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
341		raise CrabException(msg)
342
289	–	## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner)
290	–	## self.DBSPaths=self.pubdata.getDBSPaths()
291	–	common.logger.message("Required data are :"+self.datasetPath)
292	–
343		self.filesbyblock=self.pubdata.getFiles()
344		self.eventsbyblock=self.pubdata.getEventsPerBlock()
345		self.eventsbyfile=self.pubdata.getEventsPerFile()
296	–	# print str(self.filesbyblock)
297	–	# print 'self.eventsbyfile',len(self.eventsbyfile)
298	–	# print str(self.eventsbyfile)
346
347		## get max number of events
348	<	self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
302	<	common.logger.message("The number of available events is %s\n"%self.maxEvents)
348	>	self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
349
304	–	common.logger.message("Contacting DLS...")
350		## Contact the DLS and build a list of sites hosting the fileblocks
351		try:
352		dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
#	Line 309 \| Line 354 \| class Cmssw(JobType):
354		except DataLocation.DataLocationError , ex:
355		msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
356		raise CrabException(msg)
357	<
357	>
358
359		sites = dataloc.getSites()
360		allSites = []
#	Line 319 \| Line 364 \| class Cmssw(JobType):
364		allSites.append(oneSite)
365		allSites = self.uniquelist(allSites)
366
367	<	common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
368	<	common.logger.debug(6, "List of Sites: "+str(allSites))
367	>	# screen output
368	>	common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
369	>
370		return sites
371	<
371	>
372		def jobSplittingByBlocks(self, blockSites):
373		"""
374		Perform job splitting. Jobs run over an integer number of files
#	Line 372 \| Line 418 \| class Cmssw(JobType):
418		totalNumberOfJobs = 999999999
419		else :
420		totalNumberOfJobs = self.ncjobs
421	<
421	>
422
423		blocks = blockSites.keys()
424		blockCount = 0
#	Line 382 \| Line 428 \| class Cmssw(JobType):
428		jobCount = 0
429		list_of_lists = []
430
431	+	# list tracking which jobs are in which jobs belong to which block
432	+	jobsOfBlock = {}
433	+
434		# ---- Iterate over the blocks in the dataset until ---- #
435		# ---- we've met the requested total # of events ---- #
436		while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
437		block = blocks[blockCount]
438		blockCount += 1
439	<
439	>	if block not in jobsOfBlock.keys() :
440	>	jobsOfBlock[block] = []
441
442	<	numEventsInBlock = self.eventsbyblock[block]
443	<	common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
444	<
445	<	files = self.filesbyblock[block]
446	<	numFilesInBlock = len(files)
447	<	if (numFilesInBlock <= 0):
448	<	continue
449	<	fileCount = 0
450	<
451	<	# ---- New block => New job ---- #
452	<	parString = "\\{"
453	<	# counter for number of events in files currently worked on
454	<	filesEventCount = 0
455	<	# flag if next while loop should touch new file
456	<	newFile = 1
457	<	# job event counter
458	<	jobSkipEventCount = 0
459	<
460	<	# ---- Iterate over the files in the block until we've met the requested ---- #
461	<	# ---- total # of events or we've gone over all the files in this block ---- #
462	<	while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
463	<	file = files[fileCount]
464	<	if newFile :
465	<	try:
466	<	numEventsInFile = self.eventsbyfile[file]
467	<	common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
468	<	# increase filesEventCount
469	<	filesEventCount += numEventsInFile
470	<	# Add file to current job
471	<	parString += '\\\"' + file + '\\\"\,'
472	<	newFile = 0
473	<	except KeyError:
474	<	common.logger.message("File "+str(file)+" has unknown number of events: skipping")
475	<
476	<
477	<	# if less events in file remain than eventsPerJobRequested
478	<	if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
479	<	# if last file in block
480	<	if ( fileCount == numFilesInBlock-1 ) :
481	<	# end job using last file, use remaining events in block
442	>	if self.eventsbyblock.has_key(block) :
443	>	numEventsInBlock = self.eventsbyblock[block]
444	>	common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
445	>
446	>	files = self.filesbyblock[block]
447	>	numFilesInBlock = len(files)
448	>	if (numFilesInBlock <= 0):
449	>	continue
450	>	fileCount = 0
451	>
452	>	# ---- New block => New job ---- #
453	>	parString = ""
454	>	# counter for number of events in files currently worked on
455	>	filesEventCount = 0
456	>	# flag if next while loop should touch new file
457	>	newFile = 1
458	>	# job event counter
459	>	jobSkipEventCount = 0
460	>
461	>	# ---- Iterate over the files in the block until we've met the requested ---- #
462	>	# ---- total # of events or we've gone over all the files in this block ---- #
463	>	while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
464	>	file = files[fileCount]
465	>	if newFile :
466	>	try:
467	>	numEventsInFile = self.eventsbyfile[file]
468	>	common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
469	>	# increase filesEventCount
470	>	filesEventCount += numEventsInFile
471	>	# Add file to current job
472	>	parString += '\\\"' + file + '\\\"\,'
473	>	newFile = 0
474	>	except KeyError:
475	>	common.logger.message("File "+str(file)+" has unknown number of events: skipping")
476	>
477	>
478	>	# if less events in file remain than eventsPerJobRequested
479	>	if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
480	>	# if last file in block
481	>	if ( fileCount == numFilesInBlock-1 ) :
482	>	# end job using last file, use remaining events in block
483	>	# close job and touch new file
484	>	fullString = parString[:-2]
485	>	list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
486	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
487	>	self.jobDestination.append(blockSites[block])
488	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
489	>	# fill jobs of block dictionary
490	>	jobsOfBlock[block].append(jobCount+1)
491	>	# reset counter
492	>	jobCount = jobCount + 1
493	>	totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
494	>	eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
495	>	jobSkipEventCount = 0
496	>	# reset file
497	>	parString = ""
498	>	filesEventCount = 0
499	>	newFile = 1
500	>	fileCount += 1
501	>	else :
502	>	# go to next file
503	>	newFile = 1
504	>	fileCount += 1
505	>	# if events in file equal to eventsPerJobRequested
506	>	elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
507		# close job and touch new file
508		fullString = parString[:-2]
509	<	fullString += '\\}'
510	<	list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
436	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
509	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
510	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
511		self.jobDestination.append(blockSites[block])
512		common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
513	+	jobsOfBlock[block].append(jobCount+1)
514		# reset counter
515		jobCount = jobCount + 1
516	<	totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517	<	eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
516	>	totalEventCount = totalEventCount + eventsPerJobRequested
517	>	eventsRemaining = eventsRemaining - eventsPerJobRequested
518		jobSkipEventCount = 0
519		# reset file
520	<	parString = "\\{"
520	>	parString = ""
521		filesEventCount = 0
522		newFile = 1
523		fileCount += 1
524	+
525	+	# if more events in file remain than eventsPerJobRequested
526		else :
527	<	# go to next file
528	<	newFile = 1
529	<	fileCount += 1
530	<	# if events in file equal to eventsPerJobRequested
531	<	elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
532	<	# close job and touch new file
533	<	fullString = parString[:-2]
534	<	fullString += '\\}'
535	<	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
536	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
537	<	self.jobDestination.append(blockSites[block])
538	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
539	<	# reset counter
540	<	jobCount = jobCount + 1
541	<	totalEventCount = totalEventCount + eventsPerJobRequested
542	<	eventsRemaining = eventsRemaining - eventsPerJobRequested
543	<	jobSkipEventCount = 0
544	<	# reset file
545	<	parString = "\\{"
546	<	filesEventCount = 0
470	<	newFile = 1
471	<	fileCount += 1
472	<
473	<	# if more events in file remain than eventsPerJobRequested
474	<	else :
475	<	# close job but don't touch new file
476	<	fullString = parString[:-2]
477	<	fullString += '\\}'
478	<	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
479	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
480	<	self.jobDestination.append(blockSites[block])
481	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
482	<	# increase counter
483	<	jobCount = jobCount + 1
484	<	totalEventCount = totalEventCount + eventsPerJobRequested
485	<	eventsRemaining = eventsRemaining - eventsPerJobRequested
486	<	# calculate skip events for last file
487	<	# use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
488	<	jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
489	<	# remove all but the last file
490	<	filesEventCount = self.eventsbyfile[file]
491	<	parString = "\\{"
492	<	parString += '\\\"' + file + '\\\"\,'
493	<	pass # END if
494	<	pass # END while (iterate over files in the block)
527	>	# close job but don't touch new file
528	>	fullString = parString[:-2]
529	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
530	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
531	>	self.jobDestination.append(blockSites[block])
532	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
533	>	jobsOfBlock[block].append(jobCount+1)
534	>	# increase counter
535	>	jobCount = jobCount + 1
536	>	totalEventCount = totalEventCount + eventsPerJobRequested
537	>	eventsRemaining = eventsRemaining - eventsPerJobRequested
538	>	# calculate skip events for last file
539	>	# use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
540	>	jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
541	>	# remove all but the last file
542	>	filesEventCount = self.eventsbyfile[file]
543	>	parString = ""
544	>	parString += '\\\"' + file + '\\\"\,'
545	>	pass # END if
546	>	pass # END while (iterate over files in the block)
547		pass # END while (iterate over blocks in the dataset)
548		self.ncjobs = self.total_number_of_jobs = jobCount
549		if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
550		common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
551	<	common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552	<
551	>	common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552	>
553	>	# screen output
554	>	screenOutput = "List of jobs and available destination sites:\n\n"
555	>
556	>	# keep trace of block with no sites to print a warning at the end
557	>	noSiteBlock = []
558	>	bloskNoSite = []
559	>
560	>	blockCounter = 0
561	>	for block in blocks:
562	>	if block in jobsOfBlock.keys() :
563	>	blockCounter += 1
564	>	screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
565	>	if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
566	>	noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
567	>	bloskNoSite.append( blockCounter )
568	>
569	>	common.logger.message(screenOutput)
570	>	if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
571	>	msg = 'WARNING: No sites are hosting any part of data for block:\n '
572	>	virgola = ""
573	>	if len(bloskNoSite) > 1:
574	>	virgola = ","
575	>	for block in bloskNoSite:
576	>	msg += ' ' + str(block) + virgola
577	>	msg += '\n Related jobs:\n '
578	>	virgola = ""
579	>	if len(noSiteBlock) > 1:
580	>	virgola = ","
581	>	for range_jobs in noSiteBlock:
582	>	msg += str(range_jobs) + virgola
583	>	msg += '\n will not be submitted and this block of data can not be analyzed!\n'
584	>	common.logger.message(msg)
585	>
586		self.list_of_args = list_of_lists
587		return
588
#	Line 506 \| Line 591 \| class Cmssw(JobType):
591		Perform job splitting based on number of event per job
592		"""
593		common.logger.debug(5,'Splitting per events')
594	<	common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
595	<	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
596	<	common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
594	>
595	>	if (self.selectEventsPerJob):
596	>	common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
597	>	if (self.selectNumberOfJobs):
598	>	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
599	>	if (self.selectTotalNumberEvents):
600	>	common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
601
602		if (self.total_number_of_events < 0):
603		msg='Cannot split jobs per Events with "-1" as total number of events'
604		raise CrabException(msg)
605
606		if (self.selectEventsPerJob):
607	<	self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
607	>	if (self.selectTotalNumberEvents):
608	>	self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
609	>	elif(self.selectNumberOfJobs) :
610	>	self.total_number_of_jobs =self.theNumberOfJobs
611	>	self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
612	>
613		elif (self.selectNumberOfJobs) :
614		self.total_number_of_jobs = self.theNumberOfJobs
615		self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
#	Line 536 \| Line 630 \| class Cmssw(JobType):
630		for i in range(self.total_number_of_jobs):
631		## Since there is no input, any site is good
632		# self.jobDestination.append(["Any"])
633	<	self.jobDestination.append([""]) #must be empty to write correctly the xml
634	<	args=''
633	>	self.jobDestination.append([""]) #must be empty to write correctly the xml
634	>	args=[]
635		if (self.firstRun):
636		## pythia first run
637		#self.list_of_args.append([(str(self.firstRun)+str(i))])
638	<	args=args+(str(self.firstRun)+str(i))
638	>	args.append(str(self.firstRun)+str(i))
639		else:
640		## no first run
641		#self.list_of_args.append([str(i)])
642	<	args=args+str(i)
642	>	args.append(str(i))
643		if (self.sourceSeed):
644	+	args.append(str(self.sourceSeed)+str(i))
645		if (self.sourceSeedVtx):
646	<	## pythia + vtx random seed
647	<	#self.list_of_args.append([
648	<	# str(self.sourceSeed)+str(i),
649	<	# str(self.sourceSeedVtx)+str(i)
650	<	# ])
651	<	args=args+str(',')+str(self.sourceSeed)+str(i)+str(',')+str(self.sourceSeedVtx)+str(i)
652	<	else:
653	<	## only pythia random seed
654	<	#self.list_of_args.append([(str(self.sourceSeed)+str(i))])
655	<	args=args +str(',')+str(self.sourceSeed)+str(i)
656	<	else:
657	<	## no random seed
658	<	if str(args)=='': args=args+(str(self.firstRun)+str(i))
659	<	arguments=args.split(',')
565	<	if len(arguments)==3:self.list_of_args.append([str(arguments[0]),str(arguments[1]),str(arguments[2])])
566	<	elif len(arguments)==2:self.list_of_args.append([str(arguments[0]),str(arguments[1])])
567	<	else :self.list_of_args.append([str(arguments[0])])
568	<
569	<	# print self.list_of_args
646	>	## + vtx random seed
647	>	args.append(str(self.sourceSeedVtx)+str(i))
648	>	if (self.sourceSeedG4):
649	>	## + G4 random seed
650	>	args.append(str(self.sourceSeedG4)+str(i))
651	>	if (self.sourceSeedMix):
652	>	## + Mix random seed
653	>	args.append(str(self.sourceSeedMix)+str(i))
654	>	pass
655	>	pass
656	>	self.list_of_args.append(args)
657	>	pass
658	>
659	>	# print self.list_of_args
660
661		return
662
#	Line 595 \| Line 685 \| class Cmssw(JobType):
685		return
686
687		def split(self, jobParams):
688	<
688	>
689		common.jobDB.load()
690		#### Fabio
691		njobs = self.total_number_of_jobs
#	Line 603 \| Line 693 \| class Cmssw(JobType):
693		# create the empty structure
694		for i in range(njobs):
695		jobParams.append("")
696	<
696	>
697		for job in range(njobs):
698		jobParams[job] = arglist[job]
699		# print str(arglist[job])
#	Line 614 \| Line 704 \| class Cmssw(JobType):
704
705		common.jobDB.save()
706		return
707	<
707	>
708		def getJobTypeArguments(self, nj, sched):
709		result = ''
710		for i in common.jobDB.arguments(nj):
711		result=result+str(i)+" "
712		return result
713	<
713	>
714		def numberOfJobs(self):
715		# Fabio
716		return self.total_number_of_jobs
#	Line 629 \| Line 719 \| class Cmssw(JobType):
719		"""
720		Return the TarBall with lib and exe
721		"""
722	<
722	>
723		# if it exist, just return it
724		#
725		# Marco. Let's start to use relative path for Boss XML files
#	Line 652 \| Line 742 \| class Cmssw(JobType):
742		# print "swVersion = ", swVersion
743		swReleaseTop = self.scram.getReleaseTop_()
744		#print "swReleaseTop = ", swReleaseTop
745	<
745	>
746		## check if working area is release top
747		if swReleaseTop == '' or swArea == swReleaseTop:
748		return
#	Line 665 \| Line 755 \| class Cmssw(JobType):
755		exeWithPath = self.scram.findFile_(executable)
756		if ( not exeWithPath ):
757		raise CrabException('User executable '+executable+' not found')
758	<
758	>
759		## then check if it's private or not
760		if exeWithPath.find(swReleaseTop) == -1:
761		# the exe is private, so we must ship
762		common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
763		path = swArea+'/'
764	<	exe = string.replace(exeWithPath, path,'')
765	<	tar.add(path+exe,exe)
764	>	# distinguish case when script is in user project area or given by full path somewhere else
765	>	if exeWithPath.find(path) >= 0 :
766	>	exe = string.replace(exeWithPath, path,'')
767	>	tar.add(path+exe,exe)
768	>	else :
769	>	tar.add(exeWithPath,os.path.basename(executable))
770		pass
771		else:
772		# the exe is from release, we'll find it on WN
773		pass
774	<
774	>
775		## Now get the libraries: only those in local working area
776		libDir = 'lib'
777		lib = swArea+'/' +libDir
778		common.logger.debug(5,"lib "+lib+" to be tarred")
779		if os.path.exists(lib):
780		tar.add(lib,libDir)
781	<
781	>
782		## Now check if module dir is present
783		moduleDir = 'module'
784		module = swArea + '/' + moduleDir
#	Line 703 \| Line 797 \| class Cmssw(JobType):
797		pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
798		if os.path.isdir(pa):
799		tar.add(pa,paDir)
800	<
800	>
801	>	### FEDE FOR DBS PUBLICATION
802	>	## Add PRODCOMMON dir to tar
803	>	prodcommonDir = 'ProdCommon'
804	>	prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
805	>	if os.path.isdir(prodcommonPath):
806	>	tar.add(prodcommonPath,prodcommonDir)
807	>	#############################
808	>
809		common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
810		tar.close()
811		except :
812		raise CrabException('Could not create tar-ball')
813	<
813	>
814	>	## check for tarball size
815	>	tarballinfo = os.stat(self.tgzNameWithPath)
816	>	if ( tarballinfo.st_size > self.MaxTarBallSize10241024 ) :
817	>	raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
818	>
819		## create tar-ball with ML stuff
820	<	self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
820	>	self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
821		try:
822		tar = tarfile.open(self.MLtgzfile, "w:gz")
823		path=os.environ['CRABDIR'] + '/python/'
#	Line 720 \| Line 827 \| class Cmssw(JobType):
827		tar.close()
828		except :
829		raise CrabException('Could not create ML files tar-ball')
830	<
830	>
831		return
832	<
832	>
833	>	def additionalInputFileTgz(self):
834	>	"""
835	>	Put all additional files into a tar ball and return its name
836	>	"""
837	>	import tarfile
838	>	tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
839	>	tar = tarfile.open(tarName, "w:gz")
840	>	for file in self.additional_inbox_files:
841	>	tar.add(file,string.split(file,'/')[-1])
842	>	common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
843	>	tar.close()
844	>	return tarName
845	>
846		def wsSetupEnvironment(self, nj):
847		"""
848		Returns part of a job script which prepares
849		the execution environment for the job 'nj'.
850		"""
851		# Prepare JobType-independent part
852	<	txt = ''
853	<
854	<	## OLI_Daniele at this level middleware already known
735	<
736	<	txt += 'if [ $middleware == LCG ]; then \n'
852	>	txt = ''
853	>	txt += 'echo ">>> setup environment"\n'
854	>	txt += 'if [ $middleware == LCG ]; then \n'
855		txt += self.wsSetupCMSLCGEnvironment_()
856		txt += 'elif [ $middleware == OSG ]; then\n'
857		txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
858	<	txt += ' echo "Created working directory: $WORKING_DIR"\n'
741	<	txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
858	>	txt += ' if [ ! $? == 0 ] ;then\n'
859		txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
860	<	txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
861	<	txt += ' echo "JobExitCode=10016" \| tee -a $RUNTIME_AREA/$repo\n'
862	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
746	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
747	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
748	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
860	>	txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
861	>	txt += ' echo "JobExitCode=10016" \| tee -a $RUNTIME_AREA/$repo\n'
862	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
863		txt += ' exit 1\n'
864		txt += ' fi\n'
865	+	txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
866		txt += '\n'
867		txt += ' echo "Change to working directory: $WORKING_DIR"\n'
868		txt += ' cd $WORKING_DIR\n'
869	<	txt += self.wsSetupCMSOSGEnvironment_()
869	>	txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
870	>	txt += self.wsSetupCMSOSGEnvironment_()
871	>	#txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
872	>	#txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
873		txt += 'fi\n'
874
875		# Prepare JobType-specific part
876		scram = self.scram.commandName()
877		txt += '\n\n'
878	<	txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
878	>	txt += 'echo ">>> specific cmssw setup environment:"\n'
879	>	txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
880		txt += scram+' project CMSSW '+self.version+'\n'
881		txt += 'status=$?\n'
882		txt += 'if [ $status != 0 ] ; then\n'
883	<	txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
884	<	txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
885	<	txt += ' echo "JobExitCode=10034" \| tee -a $RUNTIME_AREA/$repo\n'
886	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
768	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
769	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
770	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
771	<	## OLI_Daniele
883	>	txt += ' echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
884	>	txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
885	>	txt += ' echo "JobExitCode=10034" \| tee -a $RUNTIME_AREA/$repo\n'
886	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
887		txt += ' if [ $middleware == OSG ]; then \n'
773	–	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
888		txt += ' cd $RUNTIME_AREA\n'
889	+	txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
890	+	txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
891		txt += ' /bin/rm -rf $WORKING_DIR\n'
892		txt += ' if [ -d $WORKING_DIR ] ;then\n'
893	<	txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
894	<	txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
895	<	txt += ' echo "JobExitCode=10018" \| tee -a $RUNTIME_AREA/$repo\n'
896	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
781	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
782	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
783	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
893	>	txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
894	>	txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
895	>	txt += ' echo "JobExitCode=10018" \| tee -a $RUNTIME_AREA/$repo\n'
896	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
897		txt += ' fi\n'
898		txt += ' fi \n'
899	<	txt += ' exit 1 \n'
899	>	txt += ' exit 1 \n'
900		txt += 'fi \n'
788	–	txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
901		txt += 'cd '+self.version+'\n'
902	+	########## FEDE FOR DBS2 ######################
903	+	txt += 'SOFTWARE_DIR=`pwd`\n'
904	+	txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
905	+	###############################################
906		### needed grep for bug in scramv1 ###
907		txt += scram+' runtime -sh\n'
908		txt += 'eval `'+scram+' runtime -sh \| grep -v SCRAMRT_LSB_JOBNAME`\n'
#	Line 796 \| Line 912 \| class Cmssw(JobType):
912		txt += "\n"
913		txt += "## number of arguments (first argument always jobnumber)\n"
914		txt += "\n"
799	–	# txt += "narg=$#\n"
915		txt += "if [ $nargs -lt 2 ]\n"
916		txt += "then\n"
917		txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
918		txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
919		txt += ' echo "JobExitCode=50113" \| tee -a $RUNTIME_AREA/$repo\n'
920		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
806	–	txt += ' rm -f $RUNTIME_AREA/$repo \n'
807	–	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
808	–	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
809	–	## OLI_Daniele
921		txt += ' if [ $middleware == OSG ]; then \n'
811	–	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
922		txt += ' cd $RUNTIME_AREA\n'
923	+	txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
924	+	txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
925		txt += ' /bin/rm -rf $WORKING_DIR\n'
926		txt += ' if [ -d $WORKING_DIR ] ;then\n'
927	<	txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
928	<	txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
929	<	txt += ' echo "JobExitCode=50114" \| tee -a $RUNTIME_AREA/$repo\n'
930	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
819	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
820	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
821	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
927	>	txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
928	>	txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
929	>	txt += ' echo "JobExitCode=50114" \| tee -a $RUNTIME_AREA/$repo\n'
930	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
931		txt += ' fi\n'
932		txt += ' fi \n'
933		txt += " exit 1\n"
#	Line 827 \| Line 936 \| class Cmssw(JobType):
936
937		# Prepare job-specific part
938		job = common.job_list[nj]
939	+	### FEDE FOR DBS OUTPUT PUBLICATION
940	+	if (self.datasetPath):
941	+	txt += '\n'
942	+	txt += 'DatasetPath='+self.datasetPath+'\n'
943	+
944	+	datasetpath_split = self.datasetPath.split("/")
945	+
946	+	txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
947	+	txt += 'DataTier='+datasetpath_split[2]+'\n'
948	+	txt += 'ApplicationFamily=cmsRun\n'
949	+
950	+	else:
951	+	txt += 'DatasetPath=MCDataTier\n'
952	+	txt += 'PrimaryDataset=null\n'
953	+	txt += 'DataTier=null\n'
954	+	txt += 'ApplicationFamily=MCDataTier\n'
955		if self.pset != None: #CarlosDaniele
956		pset = os.path.basename(job.configFilename())
957		txt += '\n'
958	+	txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
959		if (self.datasetPath): # standard job
834	–	#txt += 'InputFiles=$2\n'
960		txt += 'InputFiles=${args[1]}\n'
961		txt += 'MaxEvents=${args[2]}\n'
962		txt += 'SkipEvents=${args[3]}\n'
963		txt += 'echo "Inputfiles:<$InputFiles>"\n'
964	<	txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n'
964	>	txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
965		txt += 'echo "MaxEvents:<$MaxEvents>"\n'
966	<	txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n'
966	>	txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
967		txt += 'echo "SkipEvents:<$SkipEvents>"\n'
968	<	txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n'
968	>	txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
969		else: # pythia like job
970	<	if (self.sourceSeed):
971	<	txt += 'FirstRun=${args[1]}\n'
970	>	seedIndex=1
971	>	if (self.firstRun):
972	>	txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
973		txt += 'echo "FirstRun: <$FirstRun>"\n'
974	<	txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" $RUNTIME_AREA/'+pset+' > tmp_1.cfg\n'
975	<	else:
976	<	txt += '# Copy untouched pset\n'
851	<	txt += 'cp $RUNTIME_AREA/'+pset+' tmp_1.cfg\n'
974	>	txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
975	>	seedIndex=seedIndex+1
976	>
977		if (self.sourceSeed):
978	<	# txt += 'Seed=$2\n'
979	<	txt += 'Seed=${args[2]}\n'
980	<	txt += 'echo "Seed: <$Seed>"\n'
981	<	txt += 'sed "s#\<INPUT\>#$Seed#" tmp_1.cfg > tmp_2.cfg\n'
978	>	txt += 'Seed=${args['+str(seedIndex)+']}\n'
979	>	txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
980	>	seedIndex=seedIndex+1
981	>	## the following seeds are not always present
982		if (self.sourceSeedVtx):
983	<	# txt += 'VtxSeed=$3\n'
859	<	txt += 'VtxSeed=${args[3]}\n'
983	>	txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
984		txt += 'echo "VtxSeed: <$VtxSeed>"\n'
985	<	txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp_2.cfg > pset.cfg\n'
986	<	else:
987	<	txt += 'mv tmp_2.cfg pset.cfg\n'
988	<	else:
989	<	txt += 'mv tmp_1.cfg pset.cfg\n'
990	<	# txt += '# Copy untouched pset\n'
991	<	# txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
992	<
985	>	txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986	>	seedIndex += 1
987	>	if (self.sourceSeedG4):
988	>	txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
989	>	txt += 'echo "G4Seed: <$G4Seed>"\n'
990	>	txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
991	>	seedIndex += 1
992	>	if (self.sourceSeedMix):
993	>	txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
994	>	txt += 'echo "MixSeed: <$mixSeed>"\n'
995	>	txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
996	>	seedIndex += 1
997	>	pass
998	>	pass
999	>	txt += 'mv -f '+pset+' pset.cfg\n'
1000
1001		if len(self.additional_inbox_files) > 0:
1002	<	for file in self.additional_inbox_files:
1003	<	relFile = file.split("/")[-1]
1004	<	txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
1005	<	txt += ' cp $RUNTIME_AREA/'+relFile+' .\n'
875	<	txt += ' chmod +x '+relFile+'\n'
876	<	txt += 'fi\n'
877	<	pass
1002	>	txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1003	>	txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1004	>	txt += 'fi\n'
1005	>	pass
1006
1007		if self.pset != None: #CarlosDaniele
880	–	txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
881	–
1008		txt += '\n'
1009		txt += 'echo "*** cat pset.cfg *******"\n'
1010		txt += 'cat pset.cfg\n'
1011		txt += 'echo "**** end pset.cfg ******"\n'
1012		txt += '\n'
1013	<	# txt += 'echo "*** cat pset1.cfg *******"\n'
1014	<	# txt += 'cat pset1.cfg\n'
1015	<	# txt += 'echo "**** end pset1.cfg ******"\n'
1013	>	### FEDE FOR DBS OUTPUT PUBLICATION
1014	>	txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1015	>	txt += 'echo "PSETHASH = $PSETHASH" \n'
1016	>	##############
1017	>	txt += '\n'
1018		return txt
1019
1020		def wsBuildExe(self, nj=0):
#	Line 898 \| Line 1026 \| class Cmssw(JobType):
1026		txt = ""
1027
1028		if os.path.isfile(self.tgzNameWithPath):
1029	<	txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1029	>	txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1030		txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1031		txt += 'untar_status=$? \n'
1032		txt += 'if [ $untar_status -ne 0 ]; then \n'
#	Line 906 \| Line 1034 \| class Cmssw(JobType):
1034		txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
1035		txt += ' echo "JobExitCode=$untar_status" \| tee -a $RUNTIME_AREA/$repo\n'
1036		txt += ' if [ $middleware == OSG ]; then \n'
909	–	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1037		txt += ' cd $RUNTIME_AREA\n'
1038	+	txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1039	+	txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1040		txt += ' /bin/rm -rf $WORKING_DIR\n'
1041		txt += ' if [ -d $WORKING_DIR ] ;then\n'
1042		txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1043		txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
1044		txt += ' echo "JobExitCode=50999" \| tee -a $RUNTIME_AREA/$repo\n'
1045		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
917	–	txt += ' rm -f $RUNTIME_AREA/$repo \n'
918	–	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
919	–	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1046		txt += ' fi\n'
1047		txt += ' fi \n'
1048		txt += ' \n'
#	Line 925 \| Line 1051 \| class Cmssw(JobType):
1051		txt += ' echo "Successful untar" \n'
1052		txt += 'fi \n'
1053		txt += '\n'
1054	<	txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
1054	>	txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1055		txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1056	<	txt += ' export PYTHONPATH=ProdAgentApi\n'
1056	>	#### FEDE FOR DBS OUTPUT PUBLICATION
1057	>	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1058		txt += 'else\n'
1059	<	txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1059	>	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1060	>	txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1061	>	###################
1062		txt += 'fi\n'
1063		txt += '\n'
1064
1065		pass
1066	<
1066	>
1067		return txt
1068
1069		def modifySteeringCards(self, nj):
1070		"""
1071	<	modify the card provided by the user,
1071	>	modify the card provided by the user,
1072		writing a new card into share dir
1073		"""
1074	<
1074	>
1075		def executableName(self):
1076	<	if self.pset == None: #CarlosDaniele
1076	>	if self.scriptExe: #CarlosDaniele
1077		return "sh "
1078		else:
1079		return self.executable
1080
1081		def executableArgs(self):
1082	<	if self.pset == None:#CarlosDaniele
1082	>	if self.scriptExe:#CarlosDaniele
1083		return self.scriptExe + " $NJob"
1084	<	else:
1085	<	return " -p pset.cfg"
1084	>	else:
1085	>	# if >= CMSSW_1_5_X, add -e
1086	>	version_array = self.scram.getSWVersion().split('_')
1087	>	major = 0
1088	>	minor = 0
1089	>	try:
1090	>	major = int(version_array[1])
1091	>	minor = int(version_array[2])
1092	>	except:
1093	>	msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1094	>	raise CrabException(msg)
1095	>	if major >= 1 and minor >= 5 :
1096	>	return " -e -p pset.cfg"
1097	>	else:
1098	>	return " -p pset.cfg"
1099
1100		def inputSandbox(self, nj):
1101		"""
#	Line 968 \| Line 1110 \| class Cmssw(JobType):
1110		if os.path.isfile(self.MLtgzfile):
1111		inp_box.append(self.MLtgzfile)
1112		## config
1113	<	if not self.pset is None: #CarlosDaniele
1113	>	if not self.pset is None:
1114		inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1115		## additional input files
1116	<	#for file in self.additional_inbox_files:
1117	<	# inp_box.append(common.work_space.cwdDir()+file)
1116	>	tgz = self.additionalInputFileTgz()
1117	>	inp_box.append(tgz)
1118		return inp_box
1119
1120		def outputSandbox(self, nj):
#	Line 983 \| Line 1125 \| class Cmssw(JobType):
1125
1126		## User Declared output files
1127		for out in (self.output_file+self.output_file_sandbox):
1128	<	n_out = nj + 1
1128	>	n_out = nj + 1
1129		out_box.append(self.numberFile_(out,str(n_out)))
1130		return out_box
1131
#	Line 999 \| Line 1141 \| class Cmssw(JobType):
1141		"""
1142
1143		txt = '\n'
1144	<	txt += '# directory content\n'
1144	>	txt += 'echo" >>> directory content:"\n'
1145		txt += 'ls \n'
1146	+	txt = '\n'
1147
1148	<	for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1148	>	txt += 'output_exit_status=0\n'
1149	>
1150	>	for fileWithSuffix in (self.output_file_sandbox):
1151		output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1152		txt += '\n'
1153		txt += '# check output file\n'
1154	<	txt += 'ls '+fileWithSuffix+'\n'
1155	<	txt += 'ls_result=$?\n'
1156	<	txt += 'if [ $ls_result -ne 0 ] ; then\n'
1157	<	txt += ' echo "ERROR: Problem with output file"\n'
1154	>	txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1155	>	txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1156	>	txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1157	>	txt += 'else\n'
1158	>	txt += ' exit_status=60302\n'
1159	>	txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1160		if common.scheduler.boss_scheduler_name == 'condor_g':
1161		txt += ' if [ $middleware == OSG ]; then \n'
1162		txt += ' echo "prepare dummy output file"\n'
1163		txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1164		txt += ' fi \n'
1018	–	txt += 'else\n'
1019	–	txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1165		txt += 'fi\n'
1021	–
1022	–	txt += 'cd $RUNTIME_AREA\n'
1023	–	txt += 'cd $RUNTIME_AREA\n'
1024	–	### OLI_DANIELE
1025	–	txt += 'if [ $middleware == OSG ]; then\n'
1026	–	txt += ' cd $RUNTIME_AREA\n'
1027	–	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1028	–	txt += ' /bin/rm -rf $WORKING_DIR\n'
1029	–	txt += ' if [ -d $WORKING_DIR ] ;then\n'
1030	–	txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1031	–	txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1032	–	txt += ' echo "JobExitCode=60999" \| tee -a $RUNTIME_AREA/$repo\n'
1033	–	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1034	–	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1035	–	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1036	–	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1037	–	txt += ' fi\n'
1038	–	txt += 'fi\n'
1039	–	txt += '\n'
1166
1167	<	file_list = ''
1042	<	## Add to filelist only files to be possibly copied to SE
1043	<	for fileWithSuffix in self.output_file:
1167	>	for fileWithSuffix in (self.output_file):
1168		output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1169	<	file_list=file_list+output_file_num+' '
1170	<	file_list=file_list[:-1]
1171	<	txt += 'file_list="'+file_list+'"\n'
1169	>	txt += '\n'
1170	>	txt += '# check output file\n'
1171	>	txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1172	>	txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1173	>	txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1174	>	txt += 'else\n'
1175	>	txt += ' exit_status=60302\n'
1176	>	txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1177	>	txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1178	>	txt += ' output_exit_status=$exit_status\n'
1179	>	if common.scheduler.boss_scheduler_name == 'condor_g':
1180	>	txt += ' if [ $middleware == OSG ]; then \n'
1181	>	txt += ' echo "prepare dummy output file"\n'
1182	>	txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1183	>	txt += ' fi \n'
1184	>	txt += 'fi\n'
1185	>	file_list = []
1186	>	for fileWithSuffix in (self.output_file):
1187	>	file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1188
1189	+	txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1190	+	txt += 'cd $RUNTIME_AREA\n'
1191	+	txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1192		return txt
1193
1194		def numberFile_(self, file, txt):
#	Line 1056 \| Line 1199 \| class Cmssw(JobType):
1199		# take away last extension
1200		name = p[0]
1201		for x in p[1:-1]:
1202	<	name=name+"."+x
1202	>	name=name+"."+x
1203		# add "_txt"
1204		if len(p)>1:
1205	<	ext = p[len(p)-1]
1206	<	result = name + '_' + txt + "." + ext
1205	>	ext = p[len(p)-1]
1206	>	result = name + '_' + txt + "." + ext
1207		else:
1208	<	result = name + '_' + txt
1209	<
1208	>	result = name + '_' + txt
1209	>
1210		return result
1211
1212		def getRequirements(self, nj=[]):
1213		"""
1214	<	return job requirements to add to jdl files
1214	>	return job requirements to add to jdl files
1215		"""
1216		req = ''
1217		if self.version:
1218		req='Member("VO-cms-' + \
1219		self.version + \
1220		'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1221	+	## SL add requirement for OS version only if SL4
1222	+	#reSL4 = re.compile( r'slc4' )
1223	+	if self.executable_arch: # and reSL4.search(self.executable_arch):
1224	+	req+=' && Member("VO-cms-' + \
1225	+	self.executable_arch + \
1226	+	'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1227
1228		req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1229
#	Line 1084 \| Line 1233 \| class Cmssw(JobType):
1233		""" return the config filename """
1234		return self.name()+'.cfg'
1235
1087	–	### OLI_DANIELE
1236		def wsSetupCMSOSGEnvironment_(self):
1237		"""
1238		Returns part of a job script which is prepares
1239		the execution environment and which is common for all CMS jobs.
1240		"""
1241	<	txt = '\n'
1242	<	txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1243	<	txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1244	<	txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1245	<	txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1098	<	txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1241	>	txt = ' echo ">>> setup CMS OSG environment:"\n'
1242	>	txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1243	>	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1244	>	txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1245	>	txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1246		txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1247	<	txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1248	<	txt += ' else\n'
1249	<	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1250	<	txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1251	<	txt += ' echo "JobExitCode=10020" \| tee -a $RUNTIME_AREA/$repo\n'
1252	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1106	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1107	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1108	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1109	<	txt += ' exit 1\n'
1247	>	txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1248	>	txt += ' else\n'
1249	>	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1250	>	txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1251	>	txt += ' echo "JobExitCode=10020" \| tee -a $RUNTIME_AREA/$repo\n'
1252	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1253		txt += '\n'
1254	<	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1255	<	txt += ' cd $RUNTIME_AREA\n'
1256	<	txt += ' /bin/rm -rf $WORKING_DIR\n'
1257	<	txt += ' if [ -d $WORKING_DIR ] ;then\n'
1258	<	txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1259	<	txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1260	<	txt += ' echo "JobExitCode=10017" \| tee -a $RUNTIME_AREA/$repo\n'
1261	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1262	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1263	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1121	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1122	<	txt += ' fi\n'
1254	>	txt += ' cd $RUNTIME_AREA\n'
1255	>	txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1256	>	txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1257	>	txt += ' /bin/rm -rf $WORKING_DIR\n'
1258	>	txt += ' if [ -d $WORKING_DIR ] ;then\n'
1259	>	txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1260	>	txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1261	>	txt += ' echo "JobExitCode=10017" \| tee -a $RUNTIME_AREA/$repo\n'
1262	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1263	>	txt += ' fi\n'
1264		txt += '\n'
1265	<	txt += ' exit 1\n'
1266	<	txt += ' fi\n'
1265	>	txt += ' exit 1\n'
1266	>	txt += ' fi\n'
1267		txt += '\n'
1268	<	txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1269	<	txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1268	>	txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1269	>	txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1270
1271		return txt
1272	<
1272	>
1273		### OLI_DANIELE
1274		def wsSetupCMSLCGEnvironment_(self):
1275		"""
1276		Returns part of a job script which is prepares
1277		the execution environment and which is common for all CMS jobs.
1278		"""
1279	<	txt = ' \n'
1280	<	txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
1281	<	txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1282	<	txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1283	<	txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1284	<	txt += ' echo "JobExitCode=10031" \| tee -a $RUNTIME_AREA/$repo\n'
1285	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1286	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1287	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1288	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1289	<	txt += ' exit 1\n'
1290	<	txt += ' else\n'
1291	<	txt += ' echo "Sourcing environment... "\n'
1292	<	txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1293	<	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1294	<	txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1295	<	txt += ' echo "JobExitCode=10020" \| tee -a $RUNTIME_AREA/$repo\n'
1296	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1297	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1298	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1299	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1300	<	txt += ' exit 1\n'
1301	<	txt += ' fi\n'
1302	<	txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1303	<	txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1304	<	txt += ' result=$?\n'
1305	<	txt += ' if [ $result -ne 0 ]; then\n'
1306	<	txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1307	<	txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1308	<	txt += ' echo "JobExitCode=10032" \| tee -a $RUNTIME_AREA/$repo\n'
1309	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1310	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1311	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1312	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1313	<	txt += ' exit 1\n'
1314	<	txt += ' fi\n'
1315	<	txt += ' fi\n'
1316	<	txt += ' \n'
1317	<	txt += ' string=`cat /etc/redhat-release`\n'
1318	<	txt += ' echo $string\n'
1319	<	txt += ' if [[ $string = alhalla ]]; then\n'
1320	<	txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1321	<	txt += ' elif [[ $string = Enterprise ]] \|\| [[ $string = cientific ]]; then\n'
1322	<	txt += ' export SCRAM_ARCH=slc3_ia32_gcc323\n'
1323	<	txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1324	<	txt += ' else\n'
1325	<	txt += ' echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1326	<	txt += ' echo "JOB_EXIT_STATUS = 10033"\n'
1327	<	txt += ' echo "JobExitCode=10033" \| tee -a $RUNTIME_AREA/$repo\n'
1328	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1329	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1330	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1331	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1332	<	txt += ' exit 1\n'
1333	<	txt += ' fi\n'
1334	<	txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1335	<	txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1279	>	txt = ' echo ">>> setup CMS LCG environment:"\n'
1280	>	txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1281	>	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1282	>	txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1283	>	txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1284	>	txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1285	>	txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
1286	>	txt += ' echo "JobExitCode=10031" \| tee -a $RUNTIME_AREA/$repo\n'
1287	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1288	>	txt += ' exit 1\n'
1289	>	txt += ' else\n'
1290	>	txt += ' echo "Sourcing environment... "\n'
1291	>	txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1292	>	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1293	>	txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1294	>	txt += ' echo "JobExitCode=10020" \| tee -a $RUNTIME_AREA/$repo\n'
1295	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1296	>	txt += ' exit 1\n'
1297	>	txt += ' fi\n'
1298	>	txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1299	>	txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1300	>	txt += ' result=$?\n'
1301	>	txt += ' if [ $result -ne 0 ]; then\n'
1302	>	txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1303	>	txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
1304	>	txt += ' echo "JobExitCode=10032" \| tee -a $RUNTIME_AREA/$repo\n'
1305	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1306	>	txt += ' exit 1\n'
1307	>	txt += ' fi\n'
1308	>	txt += ' fi\n'
1309	>	txt += ' \n'
1310	>	txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1311	>	return txt
1312	>
1313	>	### FEDE FOR DBS OUTPUT PUBLICATION
1314	>	def modifyReport(self, nj):
1315	>	"""
1316	>	insert the part of the script that modifies the FrameworkJob Report
1317	>	"""
1318	>
1319	>	txt = ''
1320	>	try:
1321	>	publish_data = int(self.cfg_params['USER.publish_data'])
1322	>	except KeyError:
1323	>	publish_data = 0
1324	>	if (publish_data == 1):
1325	>	txt += 'echo ">>> Modify Job Report:" \n'
1326	>	################ FEDE FOR DBS2 #############################################
1327	>	txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1328	>	#############################################################################
1329	>
1330	>	txt += 'if [ -z "$SE" ]; then\n'
1331	>	txt += ' SE="" \n'
1332	>	txt += 'fi \n'
1333	>	txt += 'if [ -z "$SE_PATH" ]; then\n'
1334	>	txt += ' SE_PATH="" \n'
1335	>	txt += 'fi \n'
1336	>	txt += 'echo "SE = $SE"\n'
1337	>	txt += 'echo "SE_PATH = $SE_PATH"\n'
1338	>
1339	>	processedDataset = self.cfg_params['USER.publish_data_name']
1340	>	txt += 'ProcessedDataset='+processedDataset+'\n'
1341	>	#### LFN=/store/user/<user>/processedDataset_PSETHASH
1342	>	txt += 'if [ "$SE_PATH" == "" ]; then\n'
1343	>	#### FEDE: added slash in LFN ##############
1344	>	txt += ' FOR_LFN=/copy_problems/ \n'
1345	>	txt += 'else \n'
1346	>	txt += ' tmp=`echo $SE_PATH \| awk -F \'store\' \'{print$2}\'` \n'
1347	>	##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1348	>	txt += ' FOR_LFN=/store$tmp \n'
1349	>	txt += 'fi \n'
1350	>	txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1351	>	txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1352	>	txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1353	>	#txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1354	>	txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1355	>	txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1356	>	#txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1357	>
1358	>	txt += 'modifyReport_result=$?\n'
1359	>	txt += 'echo modifyReport_result = $modifyReport_result\n'
1360	>	txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1361	>	txt += ' exit_status=1\n'
1362	>	txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1363	>	txt += 'else\n'
1364	>	txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1365	>	txt += 'fi\n'
1366	>	else:
1367	>	txt += 'echo "no data publication required"\n'
1368	>	return txt
1369	>
1370	>	def cleanEnv(self):
1371	>	txt = ''
1372	>	txt += 'if [ $middleware == OSG ]; then\n'
1373	>	txt += ' cd $RUNTIME_AREA\n'
1374	>	txt += ' echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1375	>	txt += ' echo ">>> Remove working directory: $WORKING_DIR"\n'
1376	>	txt += ' /bin/rm -rf $WORKING_DIR\n'
1377	>	txt += ' if [ -d $WORKING_DIR ] ;then\n'
1378	>	txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1379	>	txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1380	>	txt += ' echo "JobExitCode=60999" \| tee -a $RUNTIME_AREA/$repo\n'
1381	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1382	>	txt += ' fi\n'
1383	>	txt += 'fi\n'
1384	>	txt += '\n'
1385		return txt
1386
1387		def setParam_(self, param, value):
#	Line 1202 \| Line 1392 \| class Cmssw(JobType):
1392
1393		def setTaskid_(self):
1394		self._taskId = self.cfg_params['taskId']
1395	<
1395	>
1396		def getTaskid(self):
1397		return self._taskId
1398
1209	–	#######################################################################
1399		def uniquelist(self, old):
1400		"""
1401		remove duplicates from a list
#	Line 1215 \| Line 1404 \| class Cmssw(JobType):
1404		for e in old:
1405		nd[e]=0
1406		return nd.keys()
1407	+
1408	+
1409	+	def checkOut(self, limit):
1410	+	"""
1411	+	check the dimension of the output files
1412	+	"""
1413	+	txt += 'echo ">>> Starting output sandbox limit check :"\n'
1414	+	allOutFiles = ""
1415	+	listOutFiles = []
1416	+	for fileOut in (self.output_file+self.output_file_sandbox):
1417	+	if fileOut.find('crab_fjr') == -1:
1418	+	allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1419	+	listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1420	+	txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1421	+	txt += 'ls -gGhrta;\n'
1422	+	txt += 'sum=0;\n'
1423	+	txt += 'for file in '+str(allOutFiles)+' ; do\n'
1424	+	txt += ' if [ -e $file ]; then\n'
1425	+	txt += ' tt=`ls -gGrta $file \| awk \'{ print $3 }\'`\n'
1426	+	txt += ' sum=`expr $sum + $tt`\n'
1427	+	txt += ' else\n'
1428	+	txt += ' echo "WARNING: output file $file not found!"\n'
1429	+	txt += ' fi\n'
1430	+	txt += 'done\n'
1431	+	txt += 'echo "Total Output dimension: $sum";\n'
1432	+	txt += 'limit='+str(limit)+';\n'
1433	+	txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1434	+	txt += 'if [ $limit -lt $sum ]; then\n'
1435	+	txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1436	+	txt += ' echo " checking the output file sizes..."\n'
1437	+	"""
1438	+	txt += ' dim=0;\n'
1439	+	txt += ' exclude=0;\n'
1440	+	txt += ' for files in '+str(allOutFiles)+' ; do\n'
1441	+	txt += ' sumTemp=0;\n'
1442	+	txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1443	+	txt += ' if [ $file != $file2 ]; then\n'
1444	+	txt += ' tt=`ls -gGrta $file2 \| awk \'{ print $3 }\';`\n'
1445	+	txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1446	+	txt += ' fi\n'
1447	+	txt += ' done\n'
1448	+	txt += ' if [ $sumTemp -lt $limit ]; then\n'
1449	+	txt += ' if [ $dim -lt $sumTemp ]; then\n'
1450	+	txt += ' dim=$sumTemp;\n'
1451	+	txt += ' exclude=$file;\n'
1452	+	txt += ' fi\n'
1453	+	txt += ' fi\n'
1454	+	txt += ' done\n'
1455	+	txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1456	+	"""
1457	+	txt += ' tot=0;\n'
1458	+	txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1459	+	txt += ' tt=`ls -gGrta $file2 \| awk \'{ print $3 }\';`\n'
1460	+	txt += ' tot=`expr $tot + $tt`;\n'
1461	+	txt += ' if [ $limit -lt $tot ]; then\n'
1462	+	txt += ' tot=`expr $tot - $tt`;\n'
1463	+	txt += ' fileLast=$file;\n'
1464	+	txt += ' break;\n'
1465	+	txt += ' fi\n'
1466	+	txt += ' done\n'
1467	+	txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1468	+	txt += ' flag=0;\n'
1469	+	txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1470	+	txt += ' if [ $fileLast = $filess ]; then\n'
1471	+	txt += ' flag=1;\n'
1472	+	txt += ' fi\n'
1473	+	txt += ' if [ $flag -eq 1 ]; then\n'
1474	+	txt += ' rm -f $filess;\n'
1475	+	txt += ' fi\n'
1476	+	txt += ' done\n'
1477	+	txt += ' ls -agGhrt;\n'
1478	+	txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1479	+	txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1480	+	txt += ' exit_status=70000;\n'
1481	+	txt += 'else'
1482	+	txt += ' echo "Total Output dimension $sum is fine.";\n'
1483	+	txt += 'fi\n'
1484	+	txt += 'echo "Ending output sandbox limit check"\n'
1485	+	return txt

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/cms_cmssw.py (file contents): Revision 1.64 by slacapra, Thu Jan 18 18:29:51 2007 UTC vs. Revision 1.137 by slacapra, Fri Nov 16 11:09:31 2007 UTC

Diff Legend

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.64 by slacapra, Thu Jan 18 18:29:51 2007 UTC vs.
Revision 1.137 by slacapra, Fri Nov 16 11:09:31 2007 UTC