[ViewVC] Diff of: cvsroot/COMP/CRAB/python/cms

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.35 by gutsche, Thu Aug 3 22:44:40 2006 UTC vs.
Revision 1.132 by ewv, Mon Oct 29 21:03:11 2007 UTC

#	Line 2 \| Line 2 \| from JobType import JobType
2		from crab_logger import Logger
3		from crab_exceptions import *
4		from crab_util import *
5	<	import math
5	>	from BlackWhiteListParser import BlackWhiteListParser
6		import common
7	–	import PsetManipulator
8	–
9	–	import DBSInfo_EDM
10	–	import DataDiscovery_EDM
11	–	import DataLocation_EDM
7		import Scram
8
9	<	import os, string, re
9	>	import os, string, glob
10
11		class Cmssw(JobType):
12	<	def __init__(self, cfg_params):
12	>	def __init__(self, cfg_params, ncjobs):
13		JobType.__init__(self, 'CMSSW')
14		common.logger.debug(3,'CMSSW::__init__')
15
21	–	self.analisys_common_info = {}
22	–	# Marco.
16		self._params = {}
17		self.cfg_params = cfg_params
18	+
19	+	# init BlackWhiteListParser
20	+	self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21	+
22	+	try:
23	+	self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24	+	except KeyError:
25	+	self.MaxTarBallSize = 9.5
26	+
27	+	# number of jobs requested to be created, limit obj splitting
28	+	self.ncjobs = ncjobs
29	+
30		log = common.logger
31	<
31	>
32		self.scram = Scram.Scram(cfg_params)
28	–	scramArea = ''
33		self.additional_inbox_files = []
34		self.scriptExe = ''
35		self.executable = ''
36	+	self.executable_arch = self.scram.getArch()
37		self.tgz_name = 'default.tgz'
38	+	self.additional_tgz_name = 'additional.tgz'
39	+	self.scriptName = 'CMSSW.sh'
40	+	self.pset = '' #scrip use case Da
41	+	self.datasetPath = '' #scrip use case Da
42
43	+	# set FJR file name
44	+	self.fjrFileName = 'crab_fjr.xml'
45
46		self.version = self.scram.getSWVersion()
47	+
48	+	#
49	+	# Try to block creation in case of arch/version mismatch
50	+	#
51	+
52	+	a = string.split(self.version, "_")
53	+
54	+	if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55	+	msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56	+	raise CrabException(msg)
57	+	if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58	+	msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59	+	raise CrabException(msg)
60	+
61	+	common.taskDB.setDict('codeVersion',self.version)
62		self.setParam_('application', self.version)
37	–	common.analisys_common_info['sw_version'] = self.version
38	–	### FEDE
39	–	common.analisys_common_info['copy_input_data'] = 0
40	–	common.analisys_common_info['events_management'] = 1
63
64		### collect Data cards
65	+
66	+	## get DBS mode
67	+	try:
68	+	self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69	+	except KeyError:
70	+	self.use_dbs_1 = 0
71	+
72		try:
73		tmp = cfg_params['CMSSW.datasetpath']
74		log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
#	Line 50 \| Line 79 \| class Cmssw(JobType):
79		self.datasetPath = tmp
80		self.selectNoInput = 0
81		except KeyError:
82	<	msg = "Error: datasetpath not defined "
82	>	msg = "Error: datasetpath not defined "
83		raise CrabException(msg)
84
85		# ML monitoring
#	Line 59 \| Line 88 \| class Cmssw(JobType):
88		self.setParam_('dataset', 'None')
89		self.setParam_('owner', 'None')
90		else:
91	<	datasetpath_split = self.datasetPath.split("/")
92	<	self.setParam_('dataset', datasetpath_split[1])
93	<	self.setParam_('owner', datasetpath_split[-1])
91	>	try:
92	>	datasetpath_split = self.datasetPath.split("/")
93	>	# standard style
94	>	self.setParam_('datasetFull', self.datasetPath)
95	>	if self.use_dbs_1 == 1 :
96	>	self.setParam_('dataset', datasetpath_split[1])
97	>	self.setParam_('owner', datasetpath_split[-1])
98	>	else:
99	>	self.setParam_('dataset', datasetpath_split[1])
100	>	self.setParam_('owner', datasetpath_split[2])
101	>	except:
102	>	self.setParam_('dataset', self.datasetPath)
103	>	self.setParam_('owner', self.datasetPath)
104
105		self.setTaskid_()
106		self.setParam_('taskId', self.cfg_params['taskId'])
#	Line 85 \| Line 124 \| class Cmssw(JobType):
124		try:
125		self.pset = cfg_params['CMSSW.pset']
126		log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127	<	if (not os.path.exists(self.pset)):
128	<	raise CrabException("User defined PSet file "+self.pset+" does not exist")
127	>	if self.pset.lower() != 'none' :
128	>	if (not os.path.exists(self.pset)):
129	>	raise CrabException("User defined PSet file "+self.pset+" does not exist")
130	>	else:
131	>	self.pset = None
132		except KeyError:
133		raise CrabException("PSet file missing. Cannot run cmsRun ")
134
135		# output files
136	+	## stuff which must be returned always via sandbox
137	+	self.output_file_sandbox = []
138	+
139	+	# add fjr report by default via sandbox
140	+	self.output_file_sandbox.append(self.fjrFileName)
141	+
142	+	# other output files to be returned via sandbox or copied to SE
143		try:
144		self.output_file = []
96	–
145		tmp = cfg_params['CMSSW.output_file']
146		if tmp != '':
147		tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
#	Line 103 \| Line 151 \| class Cmssw(JobType):
151		self.output_file.append(tmp)
152		pass
153		else:
154	<	log.message("No output file defined: only stdout/err will be available")
154	>	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155		pass
156		pass
157		except KeyError:
158	<	log.message("No output file defined: only stdout/err will be available")
158	>	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159		pass
160
161		# script_exe file as additional file in inputSandbox
162		try:
163		self.scriptExe = cfg_params['USER.script_exe']
116	–	self.additional_inbox_files.append(self.scriptExe)
164		if self.scriptExe != '':
165		if not os.path.isfile(self.scriptExe):
166	<	msg ="WARNING. file "+self.scriptExe+" not found"
166	>	msg ="ERROR. file "+self.scriptExe+" not found"
167		raise CrabException(msg)
168	+	self.additional_inbox_files.append(string.strip(self.scriptExe))
169		except KeyError:
170	<	pass
171	<
170	>	self.scriptExe = ''
171	>
172	>	#CarlosDaniele
173	>	if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174	>	msg ="Error. script_exe not defined"
175	>	raise CrabException(msg)
176	>
177		## additional input files
178		try:
179		tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180		for tmp in tmpAddFiles:
181	<	if not os.path.exists(tmp):
182	<	raise CrabException("Additional input file not found: "+tmp)
183	<	self.additional_inbox_files.append(string.strip(tmp))
181	>	tmp = string.strip(tmp)
182	>	dirname = ''
183	>	if not tmp[0]=="/": dirname = "."
184	>	files = []
185	>	if string.find(tmp,"*")>-1:
186	>	files = glob.glob(os.path.join(dirname, tmp))
187	>	if len(files)==0:
188	>	raise CrabException("No additional input file found with this pattern: "+tmp)
189	>	else:
190	>	files.append(tmp)
191	>	for file in files:
192	>	if not os.path.exists(file):
193	>	raise CrabException("Additional input file not found: "+file)
194	>	pass
195	>	# fname = string.split(file, '/')[-1]
196	>	# storedFile = common.work_space.pathForTgz()+'share/'+fname
197	>	# shutil.copyfile(file, storedFile)
198	>	self.additional_inbox_files.append(string.strip(file))
199		pass
200		pass
201	+	common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202		except KeyError:
203		pass
204
#	Line 147 \| Line 216 \| class Cmssw(JobType):
216		except KeyError:
217		self.eventsPerJob = -1
218		self.selectEventsPerJob = 0
219	<
219	>
220		## number of jobs
221		try:
222		self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
#	Line 163 \| Line 232 \| class Cmssw(JobType):
232		self.total_number_of_events = 0
233		self.selectTotalNumberEvents = 0
234
235	<	if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
236	<	msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
237	<	raise CrabException(msg)
235	>	if self.pset != None: #CarlosDaniele
236	>	if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237	>	msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238	>	raise CrabException(msg)
239	>	else:
240	>	if (self.selectNumberOfJobs == 0):
241	>	msg = 'Must specify number_of_jobs.'
242	>	raise CrabException(msg)
243
244		## source seed for pythia
245		try:
#	Line 180 \| Line 254 \| class Cmssw(JobType):
254		self.sourceSeedVtx = None
255		common.logger.debug(5,"No vertex seed given")
256
257	<	self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
257	>	try:
258	>	self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259	>	except KeyError:
260	>	self.sourceSeedG4 = None
261	>	common.logger.debug(5,"No g4 sim hits seed given")
262	>
263	>	try:
264	>	self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265	>	except KeyError:
266	>	self.sourceSeedMix = None
267	>	common.logger.debug(5,"No mix seed given")
268	>
269	>	try:
270	>	self.firstRun = int(cfg_params['CMSSW.first_run'])
271	>	except KeyError:
272	>	self.firstRun = None
273	>	common.logger.debug(5,"No first run given")
274	>	if self.pset != None: #CarlosDaniele
275	>	import PsetManipulator as pp
276	>	PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
277
278		#DBSDLS-start
279	<	## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
279	>	## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
280		self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
281		self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
282		self.jobDestination=[] # Site destination(s) for each job (list of lists)
#	Line 192 \| Line 285 \| class Cmssw(JobType):
285		blockSites = {}
286		if self.datasetPath:
287		blockSites = self.DataDiscoveryAndLocation(cfg_params)
288	<	#DBSDLS-end
288	>	#DBSDLS-end
289
290		self.tgzNameWithPath = self.getTarBall(self.executable)
291	<
291	>
292		## Select Splitting
293	<	if self.selectNoInput: self.jobSplittingNoInput()
294	<	else: self.jobSplittingByBlocks(blockSites)
293	>	if self.selectNoInput:
294	>	if self.pset == None: #CarlosDaniele
295	>	self.jobSplittingForScript()
296	>	else:
297	>	self.jobSplittingNoInput()
298	>	else:
299	>	self.jobSplittingByBlocks(blockSites)
300
301		# modify Pset
302	<	try:
303	<	if (self.datasetPath): # standard job
304	<	# always process all events in a file
305	<	self.PsetEdit.maxEvent("-1")
306	<	self.PsetEdit.inputModule("INPUT")
307	<
308	<	else: # pythia like job
309	<	self.PsetEdit.maxEvent(self.eventsPerJob)
310	<	if (self.sourceSeed) :
311	<	self.PsetEdit.pythiaSeed("INPUT")
312	<	if (self.sourceSeedVtx) :
313	<	self.PsetEdit.pythiaSeedVtx("INPUTVTX")
314	<	self.PsetEdit.psetWriter(self.configFilename())
315	<	except:
316	<	msg='Error while manipuliating ParameterSet: exiting...'
317	<	raise CrabException(msg)
302	>	if self.pset != None: #CarlosDaniele
303	>	try:
304	>	if (self.datasetPath): # standard job
305	>	# allow to processa a fraction of events in a file
306	>	PsetEdit.inputModule("INPUTFILE")
307	>	PsetEdit.maxEvent(0)
308	>	PsetEdit.skipEvent(0)
309	>	else: # pythia like job
310	>	PsetEdit.maxEvent(self.eventsPerJob)
311	>	if (self.firstRun):
312	>	PsetEdit.pythiaFirstRun(0) #First Run
313	>	if (self.sourceSeed) :
314	>	PsetEdit.pythiaSeed(0)
315	>	if (self.sourceSeedVtx) :
316	>	PsetEdit.vtxSeed(0)
317	>	if (self.sourceSeedG4) :
318	>	PsetEdit.g4Seed(0)
319	>	if (self.sourceSeedMix) :
320	>	PsetEdit.mixSeed(0)
321	>	# add FrameworkJobReport to parameter-set
322	>	PsetEdit.addCrabFJR(self.fjrFileName)
323	>	PsetEdit.psetWriter(self.configFilename())
324	>	except:
325	>	msg='Error while manipuliating ParameterSet: exiting...'
326	>	raise CrabException(msg)
327
328		def DataDiscoveryAndLocation(self, cfg_params):
329
330	+	import DataDiscovery
331	+	import DataDiscovery_DBS2
332	+	import DataLocation
333		common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
334
335		datasetPath=self.datasetPath
336
227	–	## TODO
228	–	dataTiersList = ""
229	–	dataTiers = dataTiersList.split(',')
230	–
337		## Contact the DBS
338	+	common.logger.message("Contacting Data Discovery Services ...")
339		try:
340	<	self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
340	>
341	>	if self.use_dbs_1 == 1 :
342	>	self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
343	>	else :
344	>	self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
345		self.pubdata.fetchDBSInfo()
346
347	<	except DataDiscovery_EDM.NotExistingDatasetError, ex :
347	>	except DataDiscovery.NotExistingDatasetError, ex :
348		msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
349		raise CrabException(msg)
350	<
240	<	except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
350	>	except DataDiscovery.NoDataTierinProvenanceError, ex :
351		msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
352		raise CrabException(msg)
353	<	except DataDiscovery_EDM.DataDiscoveryError, ex:
354	<	msg = 'ERROR ***: failed Data Discovery in DBS %s'%ex.getErrorMessage()
353	>	except DataDiscovery.DataDiscoveryError, ex:
354	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
355	>	raise CrabException(msg)
356	>	except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
357	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
358	>	raise CrabException(msg)
359	>	except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
360	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
361	>	raise CrabException(msg)
362	>	except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
363	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
364		raise CrabException(msg)
246	–
247	–	## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner)
248	–	## self.DBSPaths=self.pubdata.getDBSPaths()
249	–	common.logger.message("Required data are :"+self.datasetPath)
365
366		self.filesbyblock=self.pubdata.getFiles()
367	<	self.eventsbyblock=self.pubdata.getEVC()
368	<	## SL we probably don't need this
254	<	self.files = self.filesbyblock.values()
367	>	self.eventsbyblock=self.pubdata.getEventsPerBlock()
368	>	self.eventsbyfile=self.pubdata.getEventsPerFile()
369
370		## get max number of events
371	<	self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
258	<	common.logger.message("\nThe number of available events is %s"%self.maxEvents)
371	>	self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
372
373		## Contact the DLS and build a list of sites hosting the fileblocks
374		try:
375	<	dataloc=DataLocation_EDM.DataLocation_EDM(self.filesbyblock.keys(),cfg_params)
375	>	dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
376		dataloc.fetchDLSInfo()
377	<	except DataLocation_EDM.DataLocationError , ex:
377	>	except DataLocation.DataLocationError , ex:
378		msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
379		raise CrabException(msg)
380	<
380	>
381
382		sites = dataloc.getSites()
383		allSites = []
384		listSites = sites.values()
385	<	for list in listSites:
386	<	for oneSite in list:
385	>	for listSite in listSites:
386	>	for oneSite in listSite:
387		allSites.append(oneSite)
388		allSites = self.uniquelist(allSites)
389
390	<	common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
391	<	common.logger.debug(6, "List of Sites: "+str(allSites))
392	<	self.setParam_('TargetCE', ','.join(sites))
390	>	# screen output
391	>	common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
392	>
393		return sites
394	<
394	>
395		def jobSplittingByBlocks(self, blockSites):
396		"""
397		Perform job splitting. Jobs run over an integer number of files
#	Line 311 \| Line 424 \| class Cmssw(JobType):
424		else:
425		eventsRemaining = totalEventsRequested
426
427	+	# If user requested more events per job than are in the dataset
428	+	if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
429	+	eventsPerJobRequested = self.maxEvents
430	+
431		# For user info at end
432		totalEventCount = 0
433
#	Line 320 \| Line 437 \| class Cmssw(JobType):
437		if (self.selectNumberOfJobs):
438		common.logger.message("May not create the exact number_of_jobs requested.")
439
440	+	if ( self.ncjobs == 'all' ) :
441	+	totalNumberOfJobs = 999999999
442	+	else :
443	+	totalNumberOfJobs = self.ncjobs
444	+
445	+
446		blocks = blockSites.keys()
447		blockCount = 0
448		# Backup variable in case self.maxEvents counted events in a non-included block
#	Line 328 \| Line 451 \| class Cmssw(JobType):
451		jobCount = 0
452		list_of_lists = []
453
454	+	# list tracking which jobs are in which jobs belong to which block
455	+	jobsOfBlock = {}
456	+
457		# ---- Iterate over the blocks in the dataset until ---- #
458		# ---- we've met the requested total # of events ---- #
459	<	while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) ):
459	>	while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
460		block = blocks[blockCount]
461	<
462	<
463	<	evInBlock = self.eventsbyblock[block]
464	<	common.logger.debug(5,'Events in Block File '+str(evInBlock))
465	<
466	<	#Correct - switch to this when DBS up
467	<	#numEventsInBlock = self.eventsbyblock[block]
468	<	numEventsInBlock = evInBlock
469	<
470	<	files = self.filesbyblock[block]
471	<	numFilesInBlock = len(files)
472	<	if (numFilesInBlock <= 0):
473	<	continue
474	<	fileCount = 0
475	<
476	<	# ---- New block => New job ---- #
477	<	parString = "\\{"
478	<	jobEventCount = 0
479	<
480	<	# ---- Iterate over the files in the block until we've met the requested ---- #
481	<	# ---- total # of events or we've gone over all the files in this block ---- #
482	<	while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) ):
483	<	file = files[fileCount]
484	<	fileCount = fileCount + 1
485	<	#numEventsInFile = numberEventsInFile(file)
486	<	# !!!!!!!!!!!!!!!!! Need to get the # of events in each file. !!!!!!!!!!!!!!!!!!!!!!!!!
487	<	# For now, I'm assuming that all files in a block
488	<	# have the same number of events
489	<	numEventsInFile = numEventsInBlock/numFilesInBlock
490	<	common.logger.debug(5,"Estimated # of events in the file: "+str(numEventsInFile))
491	<	numEventsInFile = int(numEventsInFile)
492	<	common.logger.debug(5,"After rounding down: "+str(numEventsInFile))
493	<	# Add file to current job
494	<	parString += '\\\"' + file + '\\\"\,'
495	<	jobEventCount = jobEventCount + numEventsInFile
496	<	totalEventCount = totalEventCount + numEventsInFile
497	<	eventsRemaining = eventsRemaining - numEventsInFile
498	<	if (jobEventCount >= eventsPerJobRequested):
499	<	# ---- This job has at least CMSSW.events_per_job => End of job ---- #
500	<	# Don't need the last \,
501	<	fullString = parString[:-2]
502	<	fullString += '\\}'
503	<	list_of_lists.append([fullString])
504	<	common.logger.message("Job "+str(jobCount+1)+" can run over approximately "+str(jobEventCount)+" events.")
505	<
506	<	#self.jobDestination[jobCount] = blockSites[block]
507	<	self.jobDestination.append(blockSites[block])
508	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
509	<	if ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) ):
510	<	# ---- Still need CMSSW.total_number_of_events ---- #
511	<	# ---- and not about to jump into a new block ---- #
512	<	# ---- => New job ---- #
513	<	parString = "\\{"
514	<	jobEventCount = 0
461	>	blockCount += 1
462	>	if block not in jobsOfBlock.keys() :
463	>	jobsOfBlock[block] = []
464	>
465	>	if self.eventsbyblock.has_key(block) :
466	>	numEventsInBlock = self.eventsbyblock[block]
467	>	common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
468	>
469	>	files = self.filesbyblock[block]
470	>	numFilesInBlock = len(files)
471	>	if (numFilesInBlock <= 0):
472	>	continue
473	>	fileCount = 0
474	>
475	>	# ---- New block => New job ---- #
476	>	parString = ""
477	>	# counter for number of events in files currently worked on
478	>	filesEventCount = 0
479	>	# flag if next while loop should touch new file
480	>	newFile = 1
481	>	# job event counter
482	>	jobSkipEventCount = 0
483	>
484	>	# ---- Iterate over the files in the block until we've met the requested ---- #
485	>	# ---- total # of events or we've gone over all the files in this block ---- #
486	>	while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
487	>	file = files[fileCount]
488	>	if newFile :
489	>	try:
490	>	numEventsInFile = self.eventsbyfile[file]
491	>	common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
492	>	# increase filesEventCount
493	>	filesEventCount += numEventsInFile
494	>	# Add file to current job
495	>	parString += '\\\"' + file + '\\\"\,'
496	>	newFile = 0
497	>	except KeyError:
498	>	common.logger.message("File "+str(file)+" has unknown number of events: skipping")
499	>
500	>
501	>	# if less events in file remain than eventsPerJobRequested
502	>	if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
503	>	# if last file in block
504	>	if ( fileCount == numFilesInBlock-1 ) :
505	>	# end job using last file, use remaining events in block
506	>	# close job and touch new file
507	>	fullString = parString[:-2]
508	>	list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
509	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
510	>	self.jobDestination.append(blockSites[block])
511	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
512	>	# fill jobs of block dictionary
513	>	jobsOfBlock[block].append(jobCount+1)
514	>	# reset counter
515	>	jobCount = jobCount + 1
516	>	totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517	>	eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
518	>	jobSkipEventCount = 0
519	>	# reset file
520	>	parString = ""
521	>	filesEventCount = 0
522	>	newFile = 1
523	>	fileCount += 1
524	>	else :
525	>	# go to next file
526	>	newFile = 1
527	>	fileCount += 1
528	>	# if events in file equal to eventsPerJobRequested
529	>	elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
530	>	# close job and touch new file
531	>	fullString = parString[:-2]
532	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
533	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
534	>	self.jobDestination.append(blockSites[block])
535	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
536	>	jobsOfBlock[block].append(jobCount+1)
537	>	# reset counter
538	>	jobCount = jobCount + 1
539	>	totalEventCount = totalEventCount + eventsPerJobRequested
540	>	eventsRemaining = eventsRemaining - eventsPerJobRequested
541	>	jobSkipEventCount = 0
542	>	# reset file
543	>	parString = ""
544	>	filesEventCount = 0
545	>	newFile = 1
546	>	fileCount += 1
547	>
548	>	# if more events in file remain than eventsPerJobRequested
549	>	else :
550	>	# close job but don't touch new file
551	>	fullString = parString[:-2]
552	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
553	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
554	>	self.jobDestination.append(blockSites[block])
555	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
556	>	jobsOfBlock[block].append(jobCount+1)
557	>	# increase counter
558		jobCount = jobCount + 1
559	+	totalEventCount = totalEventCount + eventsPerJobRequested
560	+	eventsRemaining = eventsRemaining - eventsPerJobRequested
561	+	# calculate skip events for last file
562	+	# use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
563	+	jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
564	+	# remove all but the last file
565	+	filesEventCount = self.eventsbyfile[file]
566	+	parString = ""
567	+	parString += '\\\"' + file + '\\\"\,'
568		pass # END if
569	<	pass # END if
392	<	pass # END while (iterate over files in the block)
393	<	if (jobEventCount < eventsPerJobRequested):
394	<	# ---- Job ending prematurely due to end of block => End of job ---- #
395	<	# Don't need the last \,
396	<	fullString = parString[:-2]
397	<	fullString += '\\}'
398	<	list_of_lists.append([fullString])
399	<	common.logger.message("Job "+str(jobCount+1)+" can run over approximately "+str(jobEventCount)+" events.")
400	<	#self.jobDestination[jobCount] = blockSites[block]
401	<	self.jobDestination.append(blockSites[block])
402	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
403	<	pass # END if
404	<	blockCount = blockCount + 1
405	<	jobCount = jobCount + 1
569	>	pass # END while (iterate over files in the block)
570		pass # END while (iterate over blocks in the dataset)
571	<	self.total_number_of_jobs = jobCount
572	<	if (eventsRemaining > 0):
571	>	self.ncjobs = self.total_number_of_jobs = jobCount
572	>	if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
573		common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
574	<	common.logger.message("\n"+str(jobCount)+" job(s) can run on approximately "+str(totalEventCount)+" events.\n")
575	<
574	>	common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
575	>
576	>	# screen output
577	>	screenOutput = "List of jobs and available destination sites:\n\n"
578	>
579	>	# keep trace of block with no sites to print a warning at the end
580	>	noSiteBlock = []
581	>	bloskNoSite = []
582	>
583	>	blockCounter = 0
584	>	for block in blocks:
585	>	if block in jobsOfBlock.keys() :
586	>	blockCounter += 1
587	>	screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
588	>	if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
589	>	noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
590	>	bloskNoSite.append( blockCounter )
591	>
592	>	common.logger.message(screenOutput)
593	>	if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
594	>	msg = 'WARNING: No sites are hosting any part of data for block:\n '
595	>	virgola = ""
596	>	if len(bloskNoSite) > 1:
597	>	virgola = ","
598	>	for block in bloskNoSite:
599	>	msg += ' ' + str(block) + virgola
600	>	msg += '\n Related jobs:\n '
601	>	virgola = ""
602	>	if len(noSiteBlock) > 1:
603	>	virgola = ","
604	>	for range_jobs in noSiteBlock:
605	>	msg += str(range_jobs) + virgola
606	>	msg += '\n will not be submitted and this block of data can not be analyzed!\n'
607	>	common.logger.message(msg)
608	>
609		self.list_of_args = list_of_lists
610		return
611
#	Line 417 \| Line 614 \| class Cmssw(JobType):
614		Perform job splitting based on number of event per job
615		"""
616		common.logger.debug(5,'Splitting per events')
617	<	common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
618	<	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
619	<	common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
617	>
618	>	if (self.selectEventsPerJob):
619	>	common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
620	>	if (self.selectNumberOfJobs):
621	>	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
622	>	if (self.selectTotalNumberEvents):
623	>	common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
624
625		if (self.total_number_of_events < 0):
626		msg='Cannot split jobs per Events with "-1" as total number of events'
627		raise CrabException(msg)
628
629		if (self.selectEventsPerJob):
630	<	self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
630	>	if (self.selectTotalNumberEvents):
631	>	self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
632	>	elif(self.selectNumberOfJobs) :
633	>	self.total_number_of_jobs =self.theNumberOfJobs
634	>	self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
635	>
636		elif (self.selectNumberOfJobs) :
637		self.total_number_of_jobs = self.theNumberOfJobs
638		self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
#	Line 446 \| Line 652 \| class Cmssw(JobType):
652		self.list_of_args = []
653		for i in range(self.total_number_of_jobs):
654		## Since there is no input, any site is good
655	<	self.jobDestination.append(["Any"])
655	>	# self.jobDestination.append(["Any"])
656	>	self.jobDestination.append([""]) #must be empty to write correctly the xml
657	>	args=[]
658	>	if (self.firstRun):
659	>	## pythia first run
660	>	#self.list_of_args.append([(str(self.firstRun)+str(i))])
661	>	args.append(str(self.firstRun)+str(i))
662	>	else:
663	>	## no first run
664	>	#self.list_of_args.append([str(i)])
665	>	args.append(str(i))
666		if (self.sourceSeed):
667	+	args.append(str(self.sourceSeed)+str(i))
668		if (self.sourceSeedVtx):
669	<	## pythia + vtx random seed
670	<	self.list_of_args.append([
671	<	str(self.sourceSeed)+str(i),
672	<	str(self.sourceSeedVtx)+str(i)
673	<	])
674	<	else:
675	<	## only pythia random seed
676	<	self.list_of_args.append([(str(self.sourceSeed)+str(i))])
677	<	else:
678	<	## no random seed
679	<	self.list_of_args.append([str(i)])
680	<	#print self.list_of_args
669	>	## + vtx random seed
670	>	args.append(str(self.sourceSeedVtx)+str(i))
671	>	if (self.sourceSeedG4):
672	>	## + G4 random seed
673	>	args.append(str(self.sourceSeedG4)+str(i))
674	>	if (self.sourceSeedMix):
675	>	## + Mix random seed
676	>	args.append(str(self.sourceSeedMix)+str(i))
677	>	pass
678	>	pass
679	>	self.list_of_args.append(args)
680	>	pass
681	>
682	>	# print self.list_of_args
683	>
684	>	return
685	>
686	>
687	>	def jobSplittingForScript(self):#CarlosDaniele
688	>	"""
689	>	Perform job splitting based on number of job
690	>	"""
691	>	common.logger.debug(5,'Splitting per job')
692	>	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
693	>
694	>	self.total_number_of_jobs = self.theNumberOfJobs
695	>
696	>	common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
697	>
698	>	common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
699
700	+	# argument is seed number.$i
701	+	self.list_of_args = []
702	+	for i in range(self.total_number_of_jobs):
703	+	## Since there is no input, any site is good
704	+	# self.jobDestination.append(["Any"])
705	+	self.jobDestination.append([""])
706	+	## no random seed
707	+	self.list_of_args.append([str(i)])
708		return
709
710		def split(self, jobParams):
711	<
711	>
712		common.jobDB.load()
713		#### Fabio
714		njobs = self.total_number_of_jobs
#	Line 473 \| Line 716 \| class Cmssw(JobType):
716		# create the empty structure
717		for i in range(njobs):
718		jobParams.append("")
719	<
719	>
720		for job in range(njobs):
721		jobParams[job] = arglist[job]
722		# print str(arglist[job])
#	Line 484 \| Line 727 \| class Cmssw(JobType):
727
728		common.jobDB.save()
729		return
730	<
730	>
731		def getJobTypeArguments(self, nj, sched):
732		result = ''
733		for i in common.jobDB.arguments(nj):
734		result=result+str(i)+" "
735		return result
736	<
736	>
737		def numberOfJobs(self):
738		# Fabio
739		return self.total_number_of_jobs
#	Line 499 \| Line 742 \| class Cmssw(JobType):
742		"""
743		Return the TarBall with lib and exe
744		"""
745	<
745	>
746		# if it exist, just return it
747	<	self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
747	>	#
748	>	# Marco. Let's start to use relative path for Boss XML files
749	>	#
750	>	self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
751		if os.path.exists(self.tgzNameWithPath):
752		return self.tgzNameWithPath
753
#	Line 515 \| Line 761 \| class Cmssw(JobType):
761		# First of all declare the user Scram area
762		swArea = self.scram.getSWArea_()
763		#print "swArea = ", swArea
764	<	swVersion = self.scram.getSWVersion()
765	<	#print "swVersion = ", swVersion
764	>	# swVersion = self.scram.getSWVersion()
765	>	# print "swVersion = ", swVersion
766		swReleaseTop = self.scram.getReleaseTop_()
767		#print "swReleaseTop = ", swReleaseTop
768	<
768	>
769		## check if working area is release top
770		if swReleaseTop == '' or swArea == swReleaseTop:
771		return
772
773	<	filesToBeTarred = []
774	<	## First find the executable
775	<	if (self.executable != ''):
776	<	exeWithPath = self.scram.findFile_(executable)
777	<	# print exeWithPath
778	<	if ( not exeWithPath ):
779	<	raise CrabException('User executable '+executable+' not found')
780	<
781	<	## then check if it's private or not
782	<	if exeWithPath.find(swReleaseTop) == -1:
783	<	# the exe is private, so we must ship
784	<	common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
785	<	path = swArea+'/'
786	<	exe = string.replace(exeWithPath, path,'')
787	<	filesToBeTarred.append(exe)
788	<	pass
789	<	else:
790	<	# the exe is from release, we'll find it on WN
791	<	pass
792	<
793	<	## Now get the libraries: only those in local working area
794	<	libDir = 'lib'
795	<	lib = swArea+'/' +libDir
796	<	common.logger.debug(5,"lib "+lib+" to be tarred")
797	<	if os.path.exists(lib):
798	<	filesToBeTarred.append(libDir)
799	<
800	<	## Now check if module dir is present
801	<	moduleDir = 'module'
802	<	if os.path.isdir(swArea+'/'+moduleDir):
803	<	filesToBeTarred.append(moduleDir)
804	<
805	<	## Now check if the Data dir is present
806	<	dataDir = 'src/Data/'
807	<	if os.path.isdir(swArea+'/'+dataDir):
808	<	filesToBeTarred.append(dataDir)
809	<
810	<	## Create the tar-ball
811	<	if len(filesToBeTarred)>0:
812	<	cwd = os.getcwd()
813	<	os.chdir(swArea)
814	<	tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
815	<	for line in filesToBeTarred:
816	<	tarcmd = tarcmd + line + ' '
817	<	cout = runCommand(tarcmd)
818	<	if not cout:
819	<	raise CrabException('Could not create tar-ball')
820	<	os.chdir(cwd)
821	<	else:
822	<	common.logger.debug(5,"No files to be to be tarred")
823	<
773	>	import tarfile
774	>	try: # create tar ball
775	>	tar = tarfile.open(self.tgzNameWithPath, "w:gz")
776	>	## First find the executable
777	>	if (self.executable != ''):
778	>	exeWithPath = self.scram.findFile_(executable)
779	>	if ( not exeWithPath ):
780	>	raise CrabException('User executable '+executable+' not found')
781	>
782	>	## then check if it's private or not
783	>	if exeWithPath.find(swReleaseTop) == -1:
784	>	# the exe is private, so we must ship
785	>	common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
786	>	path = swArea+'/'
787	>	# distinguish case when script is in user project area or given by full path somewhere else
788	>	if exeWithPath.find(path) >= 0 :
789	>	exe = string.replace(exeWithPath, path,'')
790	>	tar.add(path+exe,exe)
791	>	else :
792	>	tar.add(exeWithPath,os.path.basename(executable))
793	>	pass
794	>	else:
795	>	# the exe is from release, we'll find it on WN
796	>	pass
797	>
798	>	## Now get the libraries: only those in local working area
799	>	libDir = 'lib'
800	>	lib = swArea+'/' +libDir
801	>	common.logger.debug(5,"lib "+lib+" to be tarred")
802	>	if os.path.exists(lib):
803	>	tar.add(lib,libDir)
804	>
805	>	## Now check if module dir is present
806	>	moduleDir = 'module'
807	>	module = swArea + '/' + moduleDir
808	>	if os.path.isdir(module):
809	>	tar.add(module,moduleDir)
810	>
811	>	## Now check if any data dir(s) is present
812	>	swAreaLen=len(swArea)
813	>	for root, dirs, files in os.walk(swArea):
814	>	if "data" in dirs:
815	>	common.logger.debug(5,"data "+root+"/data"+" to be tarred")
816	>	tar.add(root+"/data",root[swAreaLen:]+"/data")
817	>
818	>	## Add ProdAgent dir to tar
819	>	paDir = 'ProdAgentApi'
820	>	pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
821	>	if os.path.isdir(pa):
822	>	tar.add(pa,paDir)
823	>
824	>	### FEDE FOR DBS PUBLICATION
825	>	## Add PRODCOMMON dir to tar
826	>	prodcommonDir = 'ProdCommon'
827	>	prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
828	>	if os.path.isdir(prodcommonPath):
829	>	tar.add(prodcommonPath,prodcommonDir)
830	>	#############################
831	>
832	>	common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
833	>	tar.close()
834	>	except :
835	>	raise CrabException('Could not create tar-ball')
836	>
837	>	## check for tarball size
838	>	tarballinfo = os.stat(self.tgzNameWithPath)
839	>	if ( tarballinfo.st_size > self.MaxTarBallSize10241024 ) :
840	>	raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
841	>
842	>	## create tar-ball with ML stuff
843	>	self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
844	>	try:
845	>	tar = tarfile.open(self.MLtgzfile, "w:gz")
846	>	path=os.environ['CRABDIR'] + '/python/'
847	>	for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
848	>	tar.add(path+file,file)
849	>	common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
850	>	tar.close()
851	>	except :
852	>	raise CrabException('Could not create ML files tar-ball')
853	>
854		return
855	<
855	>
856	>	def additionalInputFileTgz(self):
857	>	"""
858	>	Put all additional files into a tar ball and return its name
859	>	"""
860	>	import tarfile
861	>	tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
862	>	tar = tarfile.open(tarName, "w:gz")
863	>	for file in self.additional_inbox_files:
864	>	tar.add(file,string.split(file,'/')[-1])
865	>	common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
866	>	tar.close()
867	>	return tarName
868	>
869		def wsSetupEnvironment(self, nj):
870		"""
871		Returns part of a job script which prepares
872		the execution environment for the job 'nj'.
873		"""
874		# Prepare JobType-independent part
875	<	txt = ''
876	<
875	>	txt = ''
876	>
877		## OLI_Daniele at this level middleware already known
878
879	<	txt += 'if [ $middleware == LCG ]; then \n'
879	>	txt += 'if [ $middleware == LCG ]; then \n'
880	>	txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
881	>	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
882	>	txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
883		txt += self.wsSetupCMSLCGEnvironment_()
884		txt += 'elif [ $middleware == OSG ]; then\n'
885	<	txt += ' time=`date -u +"%s"`\n'
886	<	txt += ' WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
595	<	txt += ' echo "Creating working directory: $WORKING_DIR"\n'
596	<	txt += ' /bin/mkdir -p $WORKING_DIR\n'
597	<	txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
885	>	txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
886	>	txt += ' if [ ! $? == 0 ] ;then\n'
887		txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
888	<	txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
889	<	txt += ' echo "JobExitCode=10016" \| tee -a $RUNTIME_AREA/$repo\n'
890	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
888	>	txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
889	>	txt += ' echo "JobExitCode=10016" \| tee -a $RUNTIME_AREA/$repo\n'
890	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
891		txt += ' rm -f $RUNTIME_AREA/$repo \n'
892		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
893		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
894		txt += ' exit 1\n'
895		txt += ' fi\n'
896	+	txt += ' echo "Created working directory: $WORKING_DIR"\n'
897		txt += '\n'
898		txt += ' echo "Change to working directory: $WORKING_DIR"\n'
899		txt += ' cd $WORKING_DIR\n'
900	<	txt += self.wsSetupCMSOSGEnvironment_()
900	>	txt += self.wsSetupCMSOSGEnvironment_()
901	>	txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
902	>	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
903		txt += 'fi\n'
904
905		# Prepare JobType-specific part
#	Line 630 \| Line 922 \| class Cmssw(JobType):
922		txt += ' cd $RUNTIME_AREA\n'
923		txt += ' /bin/rm -rf $WORKING_DIR\n'
924		txt += ' if [ -d $WORKING_DIR ] ;then\n'
925	<	txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
926	<	txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
927	<	txt += ' echo "JobExitCode=10018" \| tee -a $RUNTIME_AREA/$repo\n'
928	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
925	>	txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
926	>	txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
927	>	txt += ' echo "JobExitCode=10018" \| tee -a $RUNTIME_AREA/$repo\n'
928	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
929		txt += ' rm -f $RUNTIME_AREA/$repo \n'
930		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
931		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
#	Line 643 \| Line 935 \| class Cmssw(JobType):
935		txt += 'fi \n'
936		txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
937		txt += 'cd '+self.version+'\n'
938	+	########## FEDE FOR DBS2 ######################
939	+	txt += 'SOFTWARE_DIR=`pwd`\n'
940	+	txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
941	+	###############################################
942		### needed grep for bug in scramv1 ###
943	+	txt += scram+' runtime -sh\n'
944		txt += 'eval `'+scram+' runtime -sh \| grep -v SCRAMRT_LSB_JOBNAME`\n'
945	+	txt += 'echo $PATH\n'
946
947		# Handle the arguments:
948		txt += "\n"
#	Line 666 \| Line 964 \| class Cmssw(JobType):
964		txt += ' cd $RUNTIME_AREA\n'
965		txt += ' /bin/rm -rf $WORKING_DIR\n'
966		txt += ' if [ -d $WORKING_DIR ] ;then\n'
967	<	txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
968	<	txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
969	<	txt += ' echo "JobExitCode=50114" \| tee -a $RUNTIME_AREA/$repo\n'
970	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
967	>	txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
968	>	txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
969	>	txt += ' echo "JobExitCode=50114" \| tee -a $RUNTIME_AREA/$repo\n'
970	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
971		txt += ' rm -f $RUNTIME_AREA/$repo \n'
972		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
973		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
#	Line 681 \| Line 979 \| class Cmssw(JobType):
979
980		# Prepare job-specific part
981		job = common.job_list[nj]
982	<	pset = os.path.basename(job.configFilename())
983	<	txt += '\n'
984	<	if (self.datasetPath): # standard job
985	<	#txt += 'InputFiles=$2\n'
986	<	txt += 'InputFiles=${args[1]}\n'
987	<	txt += 'echo "Inputfiles:<$InputFiles>"\n'
690	<	txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
691	<	else: # pythia like job
692	<	if (self.sourceSeed):
693	<	# txt += 'Seed=$2\n'
694	<	txt += 'Seed=${args[1]}\n'
695	<	txt += 'echo "Seed: <$Seed>"\n'
696	<	txt += 'sed "s#\<INPUT\>#$Seed#" $RUNTIME_AREA/'+pset+' > tmp.cfg\n'
697	<	if (self.sourceSeedVtx):
698	<	# txt += 'VtxSeed=$3\n'
699	<	txt += 'VtxSeed=${args[2]}\n'
700	<	txt += 'echo "VtxSeed: <$VtxSeed>"\n'
701	<	txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp.cfg > pset.cfg\n'
702	<	else:
703	<	txt += 'mv tmp.cfg pset.cfg\n'
704	<	else:
705	<	txt += '# Copy untouched pset\n'
706	<	txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
982	>	### FEDE FOR DBS OUTPUT PUBLICATION
983	>	if (self.datasetPath):
984	>	txt += '\n'
985	>	txt += 'DatasetPath='+self.datasetPath+'\n'
986	>
987	>	datasetpath_split = self.datasetPath.split("/")
988
989	+	txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
990	+	txt += 'DataTier='+datasetpath_split[2]+'\n'
991	+	#txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
992	+	txt += 'ApplicationFamily=cmsRun\n'
993	+
994	+	else:
995	+	txt += 'DatasetPath=MCDataTier\n'
996	+	txt += 'PrimaryDataset=null\n'
997	+	txt += 'DataTier=null\n'
998	+	#txt += 'ProcessedDataset=null\n'
999	+	txt += 'ApplicationFamily=MCDataTier\n'
1000	+	if self.pset != None: #CarlosDaniele
1001	+	pset = os.path.basename(job.configFilename())
1002	+	txt += '\n'
1003	+	txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1004	+	if (self.datasetPath): # standard job
1005	+	#txt += 'InputFiles=$2\n'
1006	+	txt += 'InputFiles=${args[1]}\n'
1007	+	txt += 'MaxEvents=${args[2]}\n'
1008	+	txt += 'SkipEvents=${args[3]}\n'
1009	+	txt += 'echo "Inputfiles:<$InputFiles>"\n'
1010	+	txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1011	+	txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1012	+	txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1013	+	txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1014	+	txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1015	+	else: # pythia like job
1016	+	seedIndex=1
1017	+	if (self.firstRun):
1018	+	txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1019	+	txt += 'echo "FirstRun: <$FirstRun>"\n'
1020	+	txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1021	+	seedIndex=seedIndex+1
1022	+
1023	+	if (self.sourceSeed):
1024	+	txt += 'Seed=${args['+str(seedIndex)+']}\n'
1025	+	txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1026	+	seedIndex=seedIndex+1
1027	+	## the following seeds are not always present
1028	+	if (self.sourceSeedVtx):
1029	+	txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1030	+	txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1031	+	txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1032	+	seedIndex += 1
1033	+	if (self.sourceSeedG4):
1034	+	txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1035	+	txt += 'echo "G4Seed: <$G4Seed>"\n'
1036	+	txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1037	+	seedIndex += 1
1038	+	if (self.sourceSeedMix):
1039	+	txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1040	+	txt += 'echo "MixSeed: <$mixSeed>"\n'
1041	+	txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1042	+	seedIndex += 1
1043	+	pass
1044	+	pass
1045	+	txt += 'mv -f '+pset+' pset.cfg\n'
1046
1047		if len(self.additional_inbox_files) > 0:
1048	<	for file in self.additional_inbox_files:
1049	<	relFile = file.split("/")[-1]
1050	<	txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
1051	<	txt += ' cp $RUNTIME_AREA/'+relFile+' .\n'
714	<	txt += ' chmod +x '+relFile+'\n'
715	<	txt += 'fi\n'
716	<	pass
1048	>	txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1049	>	txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1050	>	txt += 'fi\n'
1051	>	pass
1052
1053	<	txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1053	>	if self.pset != None: #CarlosDaniele
1054	>	txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1055
1056	<	txt += '\n'
1057	<	txt += 'echo "*** cat pset.cfg *******"\n'
1058	<	txt += 'cat pset.cfg\n'
1059	<	txt += 'echo "**** end pset.cfg ******"\n'
1060	<	txt += '\n'
1061	<	# txt += 'echo "*** cat pset1.cfg *******"\n'
1062	<	# txt += 'cat pset1.cfg\n'
1063	<	# txt += 'echo "**** end pset1.cfg ******"\n'
1056	>	txt += '\n'
1057	>	txt += 'echo "*** cat pset.cfg *******"\n'
1058	>	txt += 'cat pset.cfg\n'
1059	>	txt += 'echo "**** end pset.cfg ******"\n'
1060	>	txt += '\n'
1061	>	### FEDE FOR DBS OUTPUT PUBLICATION
1062	>	txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1063	>	txt += 'echo "PSETHASH = $PSETHASH" \n'
1064	>	##############
1065	>	txt += '\n'
1066	>	# txt += 'echo "*** cat pset1.cfg *******"\n'
1067	>	# txt += 'cat pset1.cfg\n'
1068	>	# txt += 'echo "**** end pset1.cfg ******"\n'
1069		return txt
1070
1071	<	def wsBuildExe(self, nj):
1071	>	def wsBuildExe(self, nj=0):
1072		"""
1073		Put in the script the commands to build an executable
1074		or a library.
#	Line 762 \| Line 1103 \| class Cmssw(JobType):
1103		txt += 'else \n'
1104		txt += ' echo "Successful untar" \n'
1105		txt += 'fi \n'
1106	+	txt += '\n'
1107	+	txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1108	+	txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1109	+	#### FEDE FOR DBS OUTPUT PUBLICATION
1110	+	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1111	+	#txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1112	+	#txt += ' export PYTHONPATH=ProdAgentApi\n'
1113	+	txt += 'else\n'
1114	+	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1115	+	#txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1116	+	#txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1117	+	txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1118	+	###################
1119	+	txt += 'fi\n'
1120	+	txt += '\n'
1121	+
1122		pass
1123	<
1123	>
1124		return txt
1125
1126		def modifySteeringCards(self, nj):
1127		"""
1128	<	modify the card provided by the user,
1128	>	modify the card provided by the user,
1129		writing a new card into share dir
1130		"""
1131	<
1131	>
1132		def executableName(self):
1133	<	return self.executable
1133	>	if self.scriptExe: #CarlosDaniele
1134	>	return "sh "
1135	>	else:
1136	>	return self.executable
1137
1138		def executableArgs(self):
1139	<	return " -p pset.cfg"
1139	>	if self.scriptExe:#CarlosDaniele
1140	>	return self.scriptExe + " $NJob"
1141	>	else:
1142	>	# if >= CMSSW_1_5_X, add -e
1143	>	version_array = self.scram.getSWVersion().split('_')
1144	>	major = 0
1145	>	minor = 0
1146	>	try:
1147	>	major = int(version_array[1])
1148	>	minor = int(version_array[2])
1149	>	except:
1150	>	msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1151	>	raise CrabException(msg)
1152	>	if major >= 1 and minor >= 5 :
1153	>	return " -e -p pset.cfg"
1154	>	else:
1155	>	return " -p pset.cfg"
1156
1157		def inputSandbox(self, nj):
1158		"""
1159		Returns a list of filenames to be put in JDL input sandbox.
1160		"""
1161		inp_box = []
1162	<	# dict added to delete duplicate from input sandbox file list
1163	<	seen = {}
1162	>	# # dict added to delete duplicate from input sandbox file list
1163	>	# seen = {}
1164		## code
1165		if os.path.isfile(self.tgzNameWithPath):
1166		inp_box.append(self.tgzNameWithPath)
1167	+	if os.path.isfile(self.MLtgzfile):
1168	+	inp_box.append(self.MLtgzfile)
1169		## config
1170	<	inp_box.append(common.job_list[nj].configFilename())
1170	>	if not self.pset is None:
1171	>	inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1172		## additional input files
1173	<	#for file in self.additional_inbox_files:
1174	<	# inp_box.append(common.work_space.cwdDir()+file)
1173	>	tgz = self.additionalInputFileTgz()
1174	>	inp_box.append(tgz)
1175		return inp_box
1176
1177		def outputSandbox(self, nj):
#	Line 801 \| Line 1180 \| class Cmssw(JobType):
1180		"""
1181		out_box = []
1182
804	–	stdout=common.job_list[nj].stdout()
805	–	stderr=common.job_list[nj].stderr()
806	–
1183		## User Declared output files
1184	<	for out in self.output_file:
1185	<	n_out = nj + 1
1184	>	for out in (self.output_file+self.output_file_sandbox):
1185	>	n_out = nj + 1
1186		out_box.append(self.numberFile_(out,str(n_out)))
1187		return out_box
812	–	return []
1188
1189		def prepareSteeringCards(self):
1190		"""
#	Line 825 \| Line 1200 \| class Cmssw(JobType):
1200		txt = '\n'
1201		txt += '# directory content\n'
1202		txt += 'ls \n'
1203	<	file_list = ''
1204	<	for fileWithSuffix in self.output_file:
1203	>
1204	>	txt += 'output_exit_status=0\n'
1205	>
1206	>	for fileWithSuffix in (self.output_file_sandbox):
1207		output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
831	–	file_list=file_list+output_file_num+' '
1208		txt += '\n'
1209		txt += '# check output file\n'
1210	<	txt += 'ls '+fileWithSuffix+'\n'
1211	<	txt += 'ls_result=$?\n'
1212	<	#txt += 'exe_result=$?\n'
1213	<	txt += 'if [ $ls_result -ne 0 ] ; then\n'
1214	<	txt += ' echo "ERROR: Problem with output file"\n'
1215	<	#txt += ' echo "JOB_EXIT_STATUS = $exe_result"\n'
840	<	#txt += ' echo "JobExitCode=60302" \| tee -a $RUNTIME_AREA/$repo\n'
841	<	#txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
842	<	### OLI_DANIELE
1210	>	txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1211	>	txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1212	>	txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1213	>	txt += 'else\n'
1214	>	txt += ' exit_status=60302\n'
1215	>	txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1216		if common.scheduler.boss_scheduler_name == 'condor_g':
1217		txt += ' if [ $middleware == OSG ]; then \n'
1218		txt += ' echo "prepare dummy output file"\n'
1219		txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1220		txt += ' fi \n'
1221	+	txt += 'fi\n'
1222	+
1223	+	for fileWithSuffix in (self.output_file):
1224	+	output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1225	+	txt += '\n'
1226	+	txt += '# check output file\n'
1227	+	txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1228	+	txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1229	+	txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1230		txt += 'else\n'
1231	<	txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1231	>	txt += ' exit_status=60302\n'
1232	>	txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1233	>	txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1234	>	txt += ' output_exit_status=$exit_status\n'
1235	>	if common.scheduler.boss_scheduler_name == 'condor_g':
1236	>	txt += ' if [ $middleware == OSG ]; then \n'
1237	>	txt += ' echo "prepare dummy output file"\n'
1238	>	txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1239	>	txt += ' fi \n'
1240		txt += 'fi\n'
1241	<
1242	<	txt += 'cd $RUNTIME_AREA\n'
1243	<	file_list=file_list[:-1]
1244	<	txt += 'file_list="'+file_list+'"\n'
1241	>	file_list = []
1242	>	for fileWithSuffix in (self.output_file):
1243	>	file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1244	>
1245	>	txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1246		txt += 'cd $RUNTIME_AREA\n'
856	–	### OLI_DANIELE
857	–	txt += 'if [ $middleware == OSG ]; then\n'
858	–	txt += ' cd $RUNTIME_AREA\n'
859	–	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
860	–	txt += ' /bin/rm -rf $WORKING_DIR\n'
861	–	txt += ' if [ -d $WORKING_DIR ] ;then\n'
862	–	txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
863	–	txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
864	–	txt += ' echo "JobExitCode=60999" \| tee -a $RUNTIME_AREA/$repo\n'
865	–	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
866	–	txt += ' rm -f $RUNTIME_AREA/$repo \n'
867	–	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
868	–	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
869	–	txt += ' fi\n'
870	–	txt += 'fi\n'
871	–	txt += '\n'
1247		return txt
1248
1249		def numberFile_(self, file, txt):
#	Line 879 \| Line 1254 \| class Cmssw(JobType):
1254		# take away last extension
1255		name = p[0]
1256		for x in p[1:-1]:
1257	<	name=name+"."+x
1257	>	name=name+"."+x
1258		# add "_txt"
1259		if len(p)>1:
1260	<	ext = p[len(p)-1]
1261	<	#result = name + '_' + str(txt) + "." + ext
887	<	result = name + '_' + txt + "." + ext
1260	>	ext = p[len(p)-1]
1261	>	result = name + '_' + txt + "." + ext
1262		else:
1263	<	#result = name + '_' + str(txt)
1264	<	result = name + '_' + txt
891	<
1263	>	result = name + '_' + txt
1264	>
1265		return result
1266
1267	<	def getRequirements(self, nj):
1267	>	def getRequirements(self, nj=[]):
1268		"""
1269	<	return job requirements to add to jdl files
1269	>	return job requirements to add to jdl files
1270		"""
1271		req = ''
1272	<	if common.analisys_common_info['sw_version']:
1272	>	if self.version:
1273		req='Member("VO-cms-' + \
1274	<	common.analisys_common_info['sw_version'] + \
1274	>	self.version + \
1275	>	'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1276	>	## SL add requirement for OS version only if SL4
1277	>	#reSL4 = re.compile( r'slc4' )
1278	>	if self.executable_arch: # and reSL4.search(self.executable_arch):
1279	>	req+=' && Member("VO-cms-' + \
1280	>	self.executable_arch + \
1281		'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1282
1283		req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1284
906	–	## here we should get the requirement for job nj
907	–	sites = common.jobDB.destination(nj)
908	–
909	–	# check for "Any" site, in case no requirement for site
910	–	if len(sites)>0 and sites[0]!="Any":
911	–	req = req + ' && anyMatch(other.storage.CloseSEs, ('
912	–	for site in sites:
913	–	#req = req + 'other.GlueCEInfoHostName == "' + site + '" \|\| '
914	–	req = req + 'target.GlueSEUniqueID=="' + site + '" \|\| '
915	–	pass
916	–	# remove last \|\|
917	–	req = req[0:-4]
918	–	req = req + '))'
919	–
1285		return req
1286
1287		def configFilename(self):
#	Line 933 \| Line 1298 \| class Cmssw(JobType):
1298		txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1299		txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1300		txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1301	+	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1302		txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1303	<	txt += ' elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1304	<	txt += ' # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1305	<	txt += ' source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1303	>	txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1304	>	txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1305	>	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1306	>	txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1307		txt += ' else\n'
1308	<	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1308	>	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1309		txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1310		txt += ' echo "JobExitCode=10020" \| tee -a $RUNTIME_AREA/$repo\n'
1311		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
#	Line 951 \| Line 1318 \| class Cmssw(JobType):
1318		txt += ' cd $RUNTIME_AREA\n'
1319		txt += ' /bin/rm -rf $WORKING_DIR\n'
1320		txt += ' if [ -d $WORKING_DIR ] ;then\n'
1321	<	txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1322	<	txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1323	<	txt += ' echo "JobExitCode=10017" \| tee -a $RUNTIME_AREA/$repo\n'
1324	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1325	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1326	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1327	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1321	>	txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1322	>	txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1323	>	txt += ' echo "JobExitCode=10017" \| tee -a $RUNTIME_AREA/$repo\n'
1324	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1325	>	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1326	>	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1327	>	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1328		txt += ' fi\n'
1329		txt += '\n'
1330		txt += ' exit 1\n'
#	Line 967 \| Line 1334 \| class Cmssw(JobType):
1334		txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
1335
1336		return txt
1337	<
1337	>
1338		### OLI_DANIELE
1339		def wsSetupCMSLCGEnvironment_(self):
1340		"""
#	Line 1012 \| Line 1379 \| class Cmssw(JobType):
1379		txt += ' fi\n'
1380		txt += ' fi\n'
1381		txt += ' \n'
1015	–	txt += ' string=`cat /etc/redhat-release`\n'
1016	–	txt += ' echo $string\n'
1017	–	txt += ' if [[ $string = alhalla ]]; then\n'
1018	–	txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1019	–	txt += ' elif [[ $string = Enterprise ]] \|\| [[ $string = cientific ]]; then\n'
1020	–	txt += ' export SCRAM_ARCH=slc3_ia32_gcc323\n'
1021	–	txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1022	–	txt += ' else\n'
1023	–	txt += ' echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1024	–	txt += ' echo "JOB_EXIT_STATUS = 10033"\n'
1025	–	txt += ' echo "JobExitCode=10033" \| tee -a $RUNTIME_AREA/$repo\n'
1026	–	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1027	–	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1028	–	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1029	–	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1030	–	txt += ' exit 1\n'
1031	–	txt += ' fi\n'
1382		txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1383		txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1384		return txt
1385
1386	+	### FEDE FOR DBS OUTPUT PUBLICATION
1387	+	def modifyReport(self, nj):
1388	+	"""
1389	+	insert the part of the script that modifies the FrameworkJob Report
1390	+	"""
1391	+
1392	+	txt = ''
1393	+	try:
1394	+	publish_data = int(self.cfg_params['USER.publish_data'])
1395	+	except KeyError:
1396	+	publish_data = 0
1397	+	if (publish_data == 1):
1398	+	txt += 'echo "Modify Job Report" \n'
1399	+	#txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1400	+	################ FEDE FOR DBS2 #############################################
1401	+	txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1402	+	#############################################################################
1403	+	#try:
1404	+	# publish_data = int(self.cfg_params['USER.publish_data'])
1405	+	#except KeyError:
1406	+	# publish_data = 0
1407	+
1408	+	txt += 'if [ -z "$SE" ]; then\n'
1409	+	txt += ' SE="" \n'
1410	+	txt += 'fi \n'
1411	+	txt += 'if [ -z "$SE_PATH" ]; then\n'
1412	+	txt += ' SE_PATH="" \n'
1413	+	txt += 'fi \n'
1414	+	txt += 'echo "SE = $SE"\n'
1415	+	txt += 'echo "SE_PATH = $SE_PATH"\n'
1416	+
1417	+	#if (publish_data == 1):
1418	+	#processedDataset = self.cfg_params['USER.processed_datasetname']
1419	+	processedDataset = self.cfg_params['USER.publish_data_name']
1420	+	txt += 'ProcessedDataset='+processedDataset+'\n'
1421	+	#### LFN=/store/user/<user>/processedDataset_PSETHASH
1422	+	txt += 'if [ "$SE_PATH" == "" ]; then\n'
1423	+	#### FEDE: added slash in LFN ##############
1424	+	txt += ' FOR_LFN=/copy_problems/ \n'
1425	+	txt += 'else \n'
1426	+	txt += ' tmp=`echo $SE_PATH \| awk -F \'store\' \'{print$2}\'` \n'
1427	+	##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1428	+	txt += ' FOR_LFN=/store$tmp \n'
1429	+	txt += 'fi \n'
1430	+	txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1431	+	txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1432	+	txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1433	+	#txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1434	+	txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1435	+	txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1436	+	#txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1437	+
1438	+	txt += 'modifyReport_result=$?\n'
1439	+	txt += 'echo modifyReport_result = $modifyReport_result\n'
1440	+	txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1441	+	txt += ' exit_status=1\n'
1442	+	txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1443	+	txt += 'else\n'
1444	+	txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1445	+	txt += 'fi\n'
1446	+	else:
1447	+	txt += 'echo "no data publication required"\n'
1448	+	#txt += 'ProcessedDataset=no_data_to_publish \n'
1449	+	#### FEDE: added slash in LFN ##############
1450	+	#txt += 'FOR_LFN=/local/ \n'
1451	+	#txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1452	+	#txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1453	+	return txt
1454	+
1455	+	def cleanEnv(self):
1456	+	### OLI_DANIELE
1457	+	txt = ''
1458	+	txt += 'if [ $middleware == OSG ]; then\n'
1459	+	txt += ' cd $RUNTIME_AREA\n'
1460	+	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1461	+	txt += ' /bin/rm -rf $WORKING_DIR\n'
1462	+	txt += ' if [ -d $WORKING_DIR ] ;then\n'
1463	+	txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1464	+	txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1465	+	txt += ' echo "JobExitCode=60999" \| tee -a $RUNTIME_AREA/$repo\n'
1466	+	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1467	+	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1468	+	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1469	+	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1470	+	txt += ' fi\n'
1471	+	txt += 'fi\n'
1472	+	txt += '\n'
1473	+	return txt
1474	+
1475		def setParam_(self, param, value):
1476		self._params[param] = value
1477
#	Line 1041 \| Line 1480 \| class Cmssw(JobType):
1480
1481		def setTaskid_(self):
1482		self._taskId = self.cfg_params['taskId']
1483	<
1483	>
1484		def getTaskid(self):
1485		return self._taskId
1486
1048	–	#######################################################################
1487		def uniquelist(self, old):
1488		"""
1489		remove duplicates from a list
#	Line 1054 \| Line 1492 \| class Cmssw(JobType):
1492		for e in old:
1493		nd[e]=0
1494		return nd.keys()
1495	+
1496	+
1497	+	def checkOut(self, limit):
1498	+	"""
1499	+	check the dimension of the output files
1500	+	"""
1501	+	txt = 'echo "*****************************************"\n'
1502	+	txt += 'echo " Starting output sandbox limit check "\n'
1503	+	txt += 'echo "*****************************************"\n'
1504	+	allOutFiles = ""
1505	+	listOutFiles = []
1506	+	for fileOut in (self.output_file+self.output_file_sandbox):
1507	+	if fileOut.find('crab_fjr') == -1:
1508	+	allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1509	+	listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1510	+	txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1511	+	txt += 'ls -gGhrta;\n'
1512	+	txt += 'sum=0;\n'
1513	+	txt += 'for file in '+str(allOutFiles)+' ; do\n'
1514	+	txt += ' if [ -e $file ]; then\n'
1515	+	txt += ' tt=`ls -gGrta $file \| awk \'{ print $3 }\'`\n'
1516	+	txt += ' sum=`expr $sum + $tt`\n'
1517	+	txt += ' else\n'
1518	+	txt += ' echo "WARNING: output file $file not found!"\n'
1519	+	txt += ' fi\n'
1520	+	txt += 'done\n'
1521	+	txt += 'echo "Total Output dimension: $sum";\n'
1522	+	txt += 'limit='+str(limit)+';\n'
1523	+	txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1524	+	txt += 'if [ $limit -lt $sum ]; then\n'
1525	+	txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1526	+	txt += ' echo " checking the output file sizes..."\n'
1527	+	"""
1528	+	txt += ' dim=0;\n'
1529	+	txt += ' exclude=0;\n'
1530	+	txt += ' for files in '+str(allOutFiles)+' ; do\n'
1531	+	txt += ' sumTemp=0;\n'
1532	+	txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1533	+	txt += ' if [ $file != $file2 ]; then\n'
1534	+	txt += ' tt=`ls -gGrta $file2 \| awk \'{ print $3 }\';`\n'
1535	+	txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1536	+	txt += ' fi\n'
1537	+	txt += ' done\n'
1538	+	txt += ' if [ $sumTemp -lt $limit ]; then\n'
1539	+	txt += ' if [ $dim -lt $sumTemp ]; then\n'
1540	+	txt += ' dim=$sumTemp;\n'
1541	+	txt += ' exclude=$file;\n'
1542	+	txt += ' fi\n'
1543	+	txt += ' fi\n'
1544	+	txt += ' done\n'
1545	+	txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1546	+	"""
1547	+	txt += ' tot=0;\n'
1548	+	txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1549	+	txt += ' tt=`ls -gGrta $file2 \| awk \'{ print $3 }\';`\n'
1550	+	txt += ' tot=`expr $tot + $tt`;\n'
1551	+	txt += ' if [ $limit -lt $tot ]; then\n'
1552	+	txt += ' tot=`expr $tot - $tt`;\n'
1553	+	txt += ' fileLast=$file;\n'
1554	+	txt += ' break;\n'
1555	+	txt += ' fi\n'
1556	+	txt += ' done\n'
1557	+	txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1558	+	txt += ' flag=0;\n'
1559	+	txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1560	+	txt += ' if [ $fileLast = $filess ]; then\n'
1561	+	txt += ' flag=1;\n'
1562	+	txt += ' fi\n'
1563	+	txt += ' if [ $flag -eq 1 ]; then\n'
1564	+	txt += ' rm -f $filess;\n'
1565	+	txt += ' fi\n'
1566	+	txt += ' done\n'
1567	+	txt += ' ls -agGhrt;\n'
1568	+	txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1569	+	txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1570	+	txt += ' exit_status=70000;\n'
1571	+	txt += 'else'
1572	+	txt += ' echo "Total Output dimension $sum is fine.";\n'
1573	+	txt += 'fi\n'
1574	+	txt += 'echo "*****************************************"\n'
1575	+	txt += 'echo "* Ending output sandbox limit check *"\n'
1576	+	txt += 'echo "*****************************************"\n'
1577	+	return txt

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/cms_cmssw.py (file contents): Revision 1.35 by gutsche, Thu Aug 3 22:44:40 2006 UTC vs. Revision 1.132 by ewv, Mon Oct 29 21:03:11 2007 UTC

Diff Legend

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.35 by gutsche, Thu Aug 3 22:44:40 2006 UTC vs.
Revision 1.132 by ewv, Mon Oct 29 21:03:11 2007 UTC