[ViewVC] Diff of: cvsroot/COMP/CRAB/python/cms

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.247 by fanzago, Mon Sep 29 17:19:16 2008 UTC vs.
Revision 1.314 by spiga, Thu Jun 18 17:23:02 2009 UTC

#	Line 1 \| Line 1
1		from JobType import JobType
2	–	from crab_logger import Logger
2		from crab_exceptions import *
3		from crab_util import *
5	–	from BlackWhiteListParser import SEBlackWhiteListParser
4		import common
5		import Scram
6	+	from Splitter import JobSplitter
7
8	+	from IMProv.IMProvNode import IMProvNode
9		import os, string, glob
10
11		class Cmssw(JobType):
12		def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
13		JobType.__init__(self, 'CMSSW')
14	<	common.logger.debug(3,'CMSSW::__init__')
14	>	common.logger.debug('CMSSW::__init__')
15		self.skip_blocks = skip_blocks
16	<
17	<	self.argsList = []
16	>	self.argsList = 1
17
18		self._params = {}
19		self.cfg_params = cfg_params
21	–	# init BlackWhiteListParser
22	–	self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
20
21		### Temporary patch to automatically skip the ISB size check:
22		server=self.cfg_params.get('CRAB.server_name',None)
23	<	size = 9.5
24	<	if server: size = 99999
23	>	size = 9.5
24	>	if server or common.scheduler.name().upper() in ['LSF','CAF']: size = 99999
25		### D.S.
26	<	self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',size))
26	>	self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
27
28		# number of jobs requested to be created, limit obj splitting
29		self.ncjobs = ncjobs
30
34	–	log = common.logger
35	–
31		self.scram = Scram.Scram(cfg_params)
32		self.additional_inbox_files = []
33		self.scriptExe = ''
34		self.executable = ''
35		self.executable_arch = self.scram.getArch()
36	<	self.tgz_name = 'default.tgz'
36	>	self.tgz_name = 'default.tar.gz'
37	>	self.tar_name = 'default.tar'
38		self.scriptName = 'CMSSW.sh'
39		self.pset = ''
40		self.datasetPath = ''
41
42	+	self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
43		# set FJR file name
44		self.fjrFileName = 'crab_fjr.xml'
45
46		self.version = self.scram.getSWVersion()
47	+	common.logger.log(10-1,"CMSSW version is: "+str(self.version))
48	+
49		version_array = self.version.split('_')
50		self.CMSSW_major = 0
51		self.CMSSW_minor = 0
#	Line 59 \| Line 58 \| class Cmssw(JobType):
58		msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
59		raise CrabException(msg)
60
61	+	if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5):
62	+	msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version."
63	+	raise CrabException(msg)
64	+	"""
65	+	As CMSSW versions are dropped we can drop more code:
66	+	1.X dropped: drop support for running .cfg on WN
67	+	2.0 dropped: drop all support for cfg here and in writeCfg
68	+	2.0 dropped: Recheck the random number seed support
69	+	"""
70	+
71		### collect Data cards
72
64	–	if not cfg_params.has_key('CMSSW.datasetpath'):
65	–	msg = "Error: datasetpath not defined "
66	–	raise CrabException(msg)
73
74		### Temporary: added to remove input file control in the case of PU
75		self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
76
77		tmp = cfg_params['CMSSW.datasetpath']
78	<	log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
78	>	common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
79
80		if tmp =='':
81		msg = "Error: datasetpath not defined "
#	Line 82 \| Line 88 \| class Cmssw(JobType):
88		self.selectNoInput = 0
89
90		self.dataTiers = []
91	<	self.debugWrap = ''
92	<	self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
93	<	if self.debug_wrapper: self.debugWrap='--debug'
91	>
92	>	self.debugWrap=''
93	>	self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
94	>	if self.debug_wrapper == 1: self.debugWrap='--debug'
95	>
96		## now the application
97	+	self.managedGenerators = ['madgraph', 'comphep', 'lhe']
98	+	self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
99		self.executable = cfg_params.get('CMSSW.executable','cmsRun')
100	<	log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
100	>	common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
101
102		if not cfg_params.has_key('CMSSW.pset'):
103		raise CrabException("PSet file missing. Cannot run cmsRun ")
104		self.pset = cfg_params['CMSSW.pset']
105	<	log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
105	>	common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
106		if self.pset.lower() != 'none' :
107		if (not os.path.exists(self.pset)):
108		raise CrabException("User defined PSet file "+self.pset+" does not exist")
#	Line 124 \| Line 134 \| class Cmssw(JobType):
134		raise CrabException(msg)
135		self.additional_inbox_files.append(string.strip(self.scriptExe))
136
137	+	self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
138	+	if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
139	+
140		if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
141		msg ="Error. script_exe not defined"
142		raise CrabException(msg)
143
144		# use parent files...
145	<	self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
145	>	self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
146
147		## additional input files
148		if cfg_params.has_key('USER.additional_input_files'):
#	Line 152 \| Line 165 \| class Cmssw(JobType):
165		self.additional_inbox_files.append(string.strip(file))
166		pass
167		pass
168	<	common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
168	>	common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
169		pass
170
158	–	## Events per job
159	–	if cfg_params.has_key('CMSSW.events_per_job'):
160	–	self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
161	–	self.selectEventsPerJob = 1
162	–	else:
163	–	self.eventsPerJob = -1
164	–	self.selectEventsPerJob = 0
165	–
166	–	## number of jobs
167	–	if cfg_params.has_key('CMSSW.number_of_jobs'):
168	–	self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
169	–	self.selectNumberOfJobs = 1
170	–	else:
171	–	self.theNumberOfJobs = 0
172	–	self.selectNumberOfJobs = 0
173	–
174	–	if cfg_params.has_key('CMSSW.total_number_of_events'):
175	–	self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
176	–	self.selectTotalNumberEvents = 1
177	–	if self.selectNumberOfJobs == 1:
178	–	if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
179	–	msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
180	–	raise CrabException(msg)
181	–	else:
182	–	self.total_number_of_events = 0
183	–	self.selectTotalNumberEvents = 0
184	–
185	–	if self.pset != None:
186	–	if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
187	–	msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
188	–	raise CrabException(msg)
189	–	else:
190	–	if (self.selectNumberOfJobs == 0):
191	–	msg = 'Must specify number_of_jobs.'
192	–	raise CrabException(msg)
171
172		## New method of dealing with seeds
173		self.incrementSeeds = []
#	Line 205 \| Line 183 \| class Cmssw(JobType):
183		tmp.strip()
184		self.incrementSeeds.append(tmp)
185
208	–	## FUTURE: Can remove in CRAB 2.4.0
209	–	self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
210	–	self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
211	–	self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
212	–	self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
213	–	if self.sourceSeed or self.sourceSeedVtx or self.sourceSeedG4 or self.sourceSeedMix:
214	–	msg = 'pythia_seed, vtx_seed, g4_seed, and mix_seed are no longer valid settings. You must use increment_seeds or preserve_seeds'
215	–	raise CrabException(msg)
216	–
186		self.firstRun = cfg_params.get('CMSSW.first_run',None)
187
188		# Copy/return
189		self.copy_data = int(cfg_params.get('USER.copy_data',0))
190		self.return_data = int(cfg_params.get('USER.return_data',0))
191
192	+	self.conf = {}
193	+	self.conf['pubdata'] = None
194	+	# number of jobs requested to be created, limit obj splitting DD
195		#DBSDLS-start
196		## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
197		self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
#	Line 231 \| Line 203 \| class Cmssw(JobType):
203		if self.datasetPath:
204		blockSites = self.DataDiscoveryAndLocation(cfg_params)
205		#DBSDLS-end
206	+	self.conf['blockSites']=blockSites
207
208		## Select Splitting
209	+	splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
210	+
211		if self.selectNoInput:
212		if self.pset == None:
213	<	self.jobSplittingForScript()
213	>	self.algo = 'ForScript'
214		else:
215	<	self.jobSplittingNoInput()
216	<	elif (cfg_params.get('CMSSW.noblockboundary',0)):
217	<	self.jobSplittingNoBlockBoundary(blockSites)
215	>	self.algo = 'NoInput'
216	>	self.conf['managedGenerators']=self.managedGenerators
217	>	self.conf['generator']=self.generator
218	>	elif splitByRun ==1:
219	>	self.algo = 'RunBased'
220		else:
221	<	self.jobSplittingByBlocks(blockSites)
221	>	self.algo = 'EventBased'
222	>
223	>	# self.algo = 'LumiBased'
224	>	splitter = JobSplitter(self.cfg_params,self.conf)
225	>	self.dict = splitter.Algos()[self.algo]()
226
227	+	self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
228	+	self.rootArgsFilename= 'arguments'
229		# modify Pset only the first time
230	<	if isNew:
231	<	if self.pset != None:
232	<	import PsetManipulator as pp
233	<	PsetEdit = pp.PsetManipulator(self.pset)
234	<	try:
235	<	# Add FrameworkJobReport to parameter-set, set max events.
236	<	# Reset later for data jobs by writeCFG which does all modifications
237	<	PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
238	<	PsetEdit.maxEvent(self.eventsPerJob)
239	<	PsetEdit.psetWriter(self.configFilename())
240	<	## If present, add TFileService to output files
241	<	if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
242	<	tfsOutput = PsetEdit.getTFileService()
243	<	if tfsOutput:
244	<	if tfsOutput in self.output_file:
245	<	common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
246	<	else:
247	<	outfileflag = True #output found
248	<	self.output_file.append(tfsOutput)
249	<	common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
250	<	pass
251	<	pass
252	<	## If present and requested, add PoolOutputModule to output files
253	<	if int(cfg_params.get('CMSSW.get_edm_output',0)):
254	<	edmOutput = PsetEdit.getPoolOutputModule()
255	<	if edmOutput:
256	<	if edmOutput in self.output_file:
257	<	common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
258	<	else:
259	<	self.output_file.append(edmOutput)
260	<	common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
261	<	pass
262	<	pass
263	<	except CrabException:
264	<	msg='Error while manipulating ParameterSet: exiting...'
265	<	raise CrabException(msg)
266	<	## Prepare inputSandbox TarBall (only the first time)
267	<	self.tgzNameWithPath = self.getTarBall(self.executable)
230	>	if (isNew and self.pset != None): self.ModifyPset()
231	>
232	>	## Prepare inputSandbox TarBall (only the first time)
233	>	self.tarNameWithPath = self.getTarBall(self.executable)
234	>
235	>
236	>	def ModifyPset(self):
237	>	import PsetManipulator as pp
238	>	PsetEdit = pp.PsetManipulator(self.pset)
239	>	try:
240	>	# Add FrameworkJobReport to parameter-set, set max events.
241	>	# Reset later for data jobs by writeCFG which does all modifications
242	>	PsetEdit.maxEvent(1)
243	>	PsetEdit.skipEvent(0)
244	>	PsetEdit.psetWriter(self.configFilename())
245	>	## If present, add TFileService to output files
246	>	if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
247	>	tfsOutput = PsetEdit.getTFileService()
248	>	if tfsOutput:
249	>	if tfsOutput in self.output_file:
250	>	common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
251	>	else:
252	>	outfileflag = True #output found
253	>	self.output_file.append(tfsOutput)
254	>	common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
255	>	pass
256	>	pass
257	>	## If present and requested, add PoolOutputModule to output files
258	>	edmOutput = PsetEdit.getPoolOutputModule()
259	>	if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
260	>	if edmOutput:
261	>	if edmOutput in self.output_file:
262	>	common.logger.debug("Output from PoolOutputModule "+edmOutput+" already in output files")
263	>	else:
264	>	self.output_file.append(edmOutput)
265	>	common.logger.info("Adding "+edmOutput+" (from PoolOutputModule) to list of output files")
266	>	pass
267	>	pass
268	>	# not required: check anyhow if present, to avoid accidental T2 overload
269	>	else:
270	>	if edmOutput and (edmOutput not in self.output_file):
271	>	msg = "ERROR: a PoolOutputModule is present in your ParameteSet %s \n"%self.pset
272	>	msg +=" but the file produced ( %s ) is not in the list of output files\n"%edmOutput
273	>	msg += "WARNING: please remove it. If you want to keep it, add the file to output_files or use CMSSW.get_edm_output\n"
274	>	if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
275	>	msg +=" CMSSW.ignore_edm_output==True : Hope you know what you are doing...\n"
276	>	common.logger.info(msg)
277	>	else:
278	>	raise CrabException(msg)
279	>	pass
280	>	pass
281	>
282	>	if (PsetEdit.getBadFilesSetting()):
283	>	msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
284	>	common.logger.info(msg)
285	>
286	>	except CrabException, msg:
287	>	common.logger.info(str(msg))
288	>	msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
289	>	raise CrabException(msg)
290	>
291
292		def DataDiscoveryAndLocation(self, cfg_params):
293
294		import DataDiscovery
295		import DataLocation
296	<	common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
296	>	common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
297
298		datasetPath=self.datasetPath
299
300		## Contact the DBS
301	<	common.logger.message("Contacting Data Discovery Services ...")
301	>	common.logger.info("Contacting Data Discovery Services ...")
302		try:
303		self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
304		self.pubdata.fetchDBSInfo()
#	Line 308 \| Line 314 \| class Cmssw(JobType):
314		raise CrabException(msg)
315
316		self.filesbyblock=self.pubdata.getFiles()
317	<	self.eventsbyblock=self.pubdata.getEventsPerBlock()
318	<	self.eventsbyfile=self.pubdata.getEventsPerFile()
313	<	self.parentFiles=self.pubdata.getParent()
317	>	#print self.filesbyblock
318	>	self.conf['pubdata']=self.pubdata
319
320		## get max number of events
321		self.maxEvents=self.pubdata.getMaxEvents()
#	Line 319 \| Line 324 \| class Cmssw(JobType):
324		try:
325		dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
326		dataloc.fetchDLSInfo()
327	+
328		except DataLocation.DataLocationError , ex:
329		msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
330		raise CrabException(msg)
331
332
333	<	sites = dataloc.getSites()
333	>	unsorted_sites = dataloc.getSites()
334	>	sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
335	>	for lfn in self.filesbyblock.keys():
336	>	if unsorted_sites.has_key(lfn):
337	>	sites[lfn]=unsorted_sites[lfn]
338	>	else:
339	>	sites[lfn]=[]
340	>
341	>	if len(sites)==0:
342	>	msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
343	>	msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
344	>	msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
345	>	raise CrabException(msg)
346	>
347		allSites = []
348		listSites = sites.values()
349		for listSite in listSites:
350		for oneSite in listSite:
351		allSites.append(oneSite)
352	<	allSites = self.uniquelist(allSites)
334	<
335	<	# screen output
336	<	common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
352	>	[allSites.append(it) for it in allSites if not allSites.count(it)]
353
338	–	return sites
339	–
340	–	def jobSplittingByBlocks(self, blockSites):
341	–	"""
342	–	Perform job splitting. Jobs run over an integer number of files
343	–	and no more than one block.
344	–	ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
345	–	REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
346	–	self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
347	–	self.maxEvents, self.filesbyblock
348	–	SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
349	–	self.total_number_of_jobs - Total # of jobs
350	–	self.list_of_args - File(s) job will run on (a list of lists)
351	–	"""
352	–
353	–	# ---- Handle the possible job splitting configurations ---- #
354	–	if (self.selectTotalNumberEvents):
355	–	totalEventsRequested = self.total_number_of_events
356	–	if (self.selectEventsPerJob):
357	–	eventsPerJobRequested = self.eventsPerJob
358	–	if (self.selectNumberOfJobs):
359	–	totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
360	–
361	–	# If user requested all the events in the dataset
362	–	if (totalEventsRequested == -1):
363	–	eventsRemaining=self.maxEvents
364	–	# If user requested more events than are in the dataset
365	–	elif (totalEventsRequested > self.maxEvents):
366	–	eventsRemaining = self.maxEvents
367	–	common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
368	–	# If user requested less events than are in the dataset
369	–	else:
370	–	eventsRemaining = totalEventsRequested
371	–
372	–	# If user requested more events per job than are in the dataset
373	–	if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
374	–	eventsPerJobRequested = self.maxEvents
375	–
376	–	# For user info at end
377	–	totalEventCount = 0
378	–
379	–	if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
380	–	eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
381	–
382	–	if (self.selectNumberOfJobs):
383	–	common.logger.message("May not create the exact number_of_jobs requested.")
384	–
385	–	if ( self.ncjobs == 'all' ) :
386	–	totalNumberOfJobs = 999999999
387	–	else :
388	–	totalNumberOfJobs = self.ncjobs
389	–
390	–	blocks = blockSites.keys()
391	–	blockCount = 0
392	–	# Backup variable in case self.maxEvents counted events in a non-included block
393	–	numBlocksInDataset = len(blocks)
394	–
395	–	jobCount = 0
396	–	list_of_lists = []
397	–
398	–	# list tracking which jobs are in which jobs belong to which block
399	–	jobsOfBlock = {}
400	–
401	–	# ---- Iterate over the blocks in the dataset until ---- #
402	–	# ---- we've met the requested total # of events ---- #
403	–	while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
404	–	block = blocks[blockCount]
405	–	blockCount += 1
406	–	if block not in jobsOfBlock.keys() :
407	–	jobsOfBlock[block] = []
408	–
409	–	if self.eventsbyblock.has_key(block) :
410	–	numEventsInBlock = self.eventsbyblock[block]
411	–	common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
412	–
413	–	files = self.filesbyblock[block]
414	–	numFilesInBlock = len(files)
415	–	if (numFilesInBlock <= 0):
416	–	continue
417	–	fileCount = 0
418	–
419	–	# ---- New block => New job ---- #
420	–	parString = ""
421	–	# counter for number of events in files currently worked on
422	–	filesEventCount = 0
423	–	# flag if next while loop should touch new file
424	–	newFile = 1
425	–	# job event counter
426	–	jobSkipEventCount = 0
427	–
428	–	# ---- Iterate over the files in the block until we've met the requested ---- #
429	–	# ---- total # of events or we've gone over all the files in this block ---- #
430	–	pString=''
431	–	while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
432	–	file = files[fileCount]
433	–	if self.useParent:
434	–	parent = self.parentFiles[file]
435	–	for f in parent :
436	–	pString += '\\\"' + f + '\\\"\,'
437	–	common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
438	–	common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
439	–	if newFile :
440	–	try:
441	–	numEventsInFile = self.eventsbyfile[file]
442	–	common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
443	–	# increase filesEventCount
444	–	filesEventCount += numEventsInFile
445	–	# Add file to current job
446	–	parString += '\\\"' + file + '\\\"\,'
447	–	newFile = 0
448	–	except KeyError:
449	–	common.logger.message("File "+str(file)+" has unknown number of events: skipping")
450	–
451	–	eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
452	–	# if less events in file remain than eventsPerJobRequested
453	–	if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
454	–	# if last file in block
455	–	if ( fileCount == numFilesInBlock-1 ) :
456	–	# end job using last file, use remaining events in block
457	–	# close job and touch new file
458	–	fullString = parString[:-2]
459	–	if self.useParent:
460	–	fullParentString = pString[:-2]
461	–	list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
462	–	else:
463	–	list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
464	–	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
465	–	self.jobDestination.append(blockSites[block])
466	–	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
467	–	# fill jobs of block dictionary
468	–	jobsOfBlock[block].append(jobCount+1)
469	–	# reset counter
470	–	jobCount = jobCount + 1
471	–	totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
472	–	eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
473	–	jobSkipEventCount = 0
474	–	# reset file
475	–	pString = ""
476	–	parString = ""
477	–	filesEventCount = 0
478	–	newFile = 1
479	–	fileCount += 1
480	–	else :
481	–	# go to next file
482	–	newFile = 1
483	–	fileCount += 1
484	–	# if events in file equal to eventsPerJobRequested
485	–	elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
486	–	# close job and touch new file
487	–	fullString = parString[:-2]
488	–	if self.useParent:
489	–	fullParentString = pString[:-2]
490	–	list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
491	–	else:
492	–	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
493	–	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
494	–	self.jobDestination.append(blockSites[block])
495	–	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
496	–	jobsOfBlock[block].append(jobCount+1)
497	–	# reset counter
498	–	jobCount = jobCount + 1
499	–	totalEventCount = totalEventCount + eventsPerJobRequested
500	–	eventsRemaining = eventsRemaining - eventsPerJobRequested
501	–	jobSkipEventCount = 0
502	–	# reset file
503	–	pString = ""
504	–	parString = ""
505	–	filesEventCount = 0
506	–	newFile = 1
507	–	fileCount += 1
508	–
509	–	# if more events in file remain than eventsPerJobRequested
510	–	else :
511	–	# close job but don't touch new file
512	–	fullString = parString[:-2]
513	–	if self.useParent:
514	–	fullParentString = pString[:-2]
515	–	list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
516	–	else:
517	–	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
518	–	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
519	–	self.jobDestination.append(blockSites[block])
520	–	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
521	–	jobsOfBlock[block].append(jobCount+1)
522	–	# increase counter
523	–	jobCount = jobCount + 1
524	–	totalEventCount = totalEventCount + eventsPerJobRequested
525	–	eventsRemaining = eventsRemaining - eventsPerJobRequested
526	–	# calculate skip events for last file
527	–	# use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
528	–	jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
529	–	# remove all but the last file
530	–	filesEventCount = self.eventsbyfile[file]
531	–	if self.useParent:
532	–	for f in parent : pString += '\\\"' + f + '\\\"\,'
533	–	parString = '\\\"' + file + '\\\"\,'
534	–	pass # END if
535	–	pass # END while (iterate over files in the block)
536	–	pass # END while (iterate over blocks in the dataset)
537	–	self.ncjobs = self.total_number_of_jobs = jobCount
538	–	if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
539	–	common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
540	–	common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
354
355		# screen output
356	<	screenOutput = "List of jobs and available destination sites:\n\n"
544	<
545	<	# keep trace of block with no sites to print a warning at the end
546	<	noSiteBlock = []
547	<	bloskNoSite = []
548	<
549	<	blockCounter = 0
550	<	for block in blocks:
551	<	if block in jobsOfBlock.keys() :
552	<	blockCounter += 1
553	<	screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
554	<	','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
555	<	if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
556	<	noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
557	<	bloskNoSite.append( blockCounter )
558	<
559	<	common.logger.message(screenOutput)
560	<	if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
561	<	msg = 'WARNING: No sites are hosting any part of data for block:\n '
562	<	virgola = ""
563	<	if len(bloskNoSite) > 1:
564	<	virgola = ","
565	<	for block in bloskNoSite:
566	<	msg += ' ' + str(block) + virgola
567	<	msg += '\n Related jobs:\n '
568	<	virgola = ""
569	<	if len(noSiteBlock) > 1:
570	<	virgola = ","
571	<	for range_jobs in noSiteBlock:
572	<	msg += str(range_jobs) + virgola
573	<	msg += '\n will not be submitted and this block of data can not be analyzed!\n'
574	<	if self.cfg_params.has_key('EDG.se_white_list'):
575	<	msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
576	<	msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
577	<	msg += 'Please check if the dataset is available at this site!)\n'
578	<	if self.cfg_params.has_key('EDG.ce_white_list'):
579	<	msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
580	<	msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
581	<	msg += 'Please check if the dataset is available at this site!)\n'
582	<
583	<	common.logger.message(msg)
584	<
585	<	self.list_of_args = list_of_lists
586	<	return
587	<
588	<	def jobSplittingNoBlockBoundary(self,blockSites):
589	<	"""
590	<	"""
591	<	# ---- Handle the possible job splitting configurations ---- #
592	<	if (self.selectTotalNumberEvents):
593	<	totalEventsRequested = self.total_number_of_events
594	<	if (self.selectEventsPerJob):
595	<	eventsPerJobRequested = self.eventsPerJob
596	<	if (self.selectNumberOfJobs):
597	<	totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
598	<
599	<	# If user requested all the events in the dataset
600	<	if (totalEventsRequested == -1):
601	<	eventsRemaining=self.maxEvents
602	<	# If user requested more events than are in the dataset
603	<	elif (totalEventsRequested > self.maxEvents):
604	<	eventsRemaining = self.maxEvents
605	<	common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
606	<	# If user requested less events than are in the dataset
607	<	else:
608	<	eventsRemaining = totalEventsRequested
609	<
610	<	# If user requested more events per job than are in the dataset
611	<	if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
612	<	eventsPerJobRequested = self.maxEvents
613	<
614	<	# For user info at end
615	<	totalEventCount = 0
616	<
617	<	if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
618	<	eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
619	<
620	<	if (self.selectNumberOfJobs):
621	<	common.logger.message("May not create the exact number_of_jobs requested.")
622	<
623	<	if ( self.ncjobs == 'all' ) :
624	<	totalNumberOfJobs = 999999999
625	<	else :
626	<	totalNumberOfJobs = self.ncjobs
627	<
628	<	blocks = blockSites.keys()
629	<	blockCount = 0
630	<	# Backup variable in case self.maxEvents counted events in a non-included block
631	<	numBlocksInDataset = len(blocks)
632	<
633	<	jobCount = 0
634	<	list_of_lists = []
635	<
636	<	#AF
637	<	#AF do not reset input files and event count on block boundary
638	<	#AF
639	<	parString=""
640	<	filesEventCount = 0
641	<	#AF
642	<
643	<	# list tracking which jobs are in which jobs belong to which block
644	<	jobsOfBlock = {}
645	<	while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
646	<	block = blocks[blockCount]
647	<	blockCount += 1
648	<	if block not in jobsOfBlock.keys() :
649	<	jobsOfBlock[block] = []
650	<
651	<	if self.eventsbyblock.has_key(block) :
652	<	numEventsInBlock = self.eventsbyblock[block]
653	<	common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
654	<	files = self.filesbyblock[block]
655	<	numFilesInBlock = len(files)
656	<	if (numFilesInBlock <= 0):
657	<	continue
658	<	fileCount = 0
659	<	#AF
660	<	#AF do not reset input files and event count of block boundary
661	<	#AF
662	<	## ---- New block => New job ---- #
663	<	#parString = ""
664	<	# counter for number of events in files currently worked on
665	<	#filesEventCount = 0
666	<	#AF
667	<	# flag if next while loop should touch new file
668	<	newFile = 1
669	<	# job event counter
670	<	jobSkipEventCount = 0
671	<
672	<	# ---- Iterate over the files in the block until we've met the requested ---- #
673	<	# ---- total # of events or we've gone over all the files in this block ---- #
674	<	pString=''
675	<	while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
676	<	file = files[fileCount]
677	<	if self.useParent:
678	<	parent = self.parentFiles[file]
679	<	for f in parent :
680	<	pString += '\\\"' + f + '\\\"\,'
681	<	common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
682	<	common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
683	<	if newFile :
684	<	try:
685	<	numEventsInFile = self.eventsbyfile[file]
686	<	common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
687	<	# increase filesEventCount
688	<	filesEventCount += numEventsInFile
689	<	# Add file to current job
690	<	parString += '\\\"' + file + '\\\"\,'
691	<	newFile = 0
692	<	except KeyError:
693	<	common.logger.message("File "+str(file)+" has unknown number of events: skipping")
694	<	eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
695	<	#common.logger.message("AF filesEventCount %s - jobSkipEventCount %s "%(filesEventCount,jobSkipEventCount))
696	<	# if less events in file remain than eventsPerJobRequested
697	<	if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
698	<	#AF
699	<	#AF skip fileboundary part
700	<	#AF
701	<	# go to next file
702	<	newFile = 1
703	<	fileCount += 1
704	<	# if events in file equal to eventsPerJobRequested
705	<	elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
706	<	# close job and touch new file
707	<	fullString = parString[:-2]
708	<	if self.useParent:
709	<	fullParentString = pString[:-2]
710	<	list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
711	<	else:
712	<	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
713	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
714	<	self.jobDestination.append(blockSites[block])
715	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
716	<	jobsOfBlock[block].append(jobCount+1)
717	<	# reset counter
718	<	jobCount = jobCount + 1
719	<	totalEventCount = totalEventCount + eventsPerJobRequested
720	<	eventsRemaining = eventsRemaining - eventsPerJobRequested
721	<	jobSkipEventCount = 0
722	<	# reset file
723	<	pString = ""
724	<	parString = ""
725	<	filesEventCount = 0
726	<	newFile = 1
727	<	fileCount += 1
356	>	common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
357
358	<	# if more events in file remain than eventsPerJobRequested
730	<	else :
731	<	# close job but don't touch new file
732	<	fullString = parString[:-2]
733	<	if self.useParent:
734	<	fullParentString = pString[:-2]
735	<	list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
736	<	else:
737	<	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
738	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
739	<	self.jobDestination.append(blockSites[block])
740	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
741	<	jobsOfBlock[block].append(jobCount+1)
742	<	# increase counter
743	<	jobCount = jobCount + 1
744	<	totalEventCount = totalEventCount + eventsPerJobRequested
745	<	eventsRemaining = eventsRemaining - eventsPerJobRequested
746	<	# calculate skip events for last file
747	<	# use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
748	<	jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
749	<	# remove all but the last file
750	<	filesEventCount = self.eventsbyfile[file]
751	<	if self.useParent:
752	<	for f in parent : pString += '\\\"' + f + '\\\"\,'
753	<	parString = '\\\"' + file + '\\\"\,'
754	<	pass # END if
755	<	pass # END while (iterate over files in the block)
756	<	pass # END while (iterate over blocks in the dataset)
757	<	self.ncjobs = self.total_number_of_jobs = jobCount
758	<	if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
759	<	common.logger.message("eventsRemaining "+str(eventsRemaining))
760	<	common.logger.message("jobCount "+str(jobCount))
761	<	common.logger.message(" totalNumberOfJobs "+str(totalNumberOfJobs))
762	<	common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
763	<	common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
764	<
765	<	# screen output
766	<	screenOutput = "List of jobs and available destination sites:\n\n"
767	<
768	<	#AF
769	<	#AF skip check on block with no sites
770	<	#AF
771	<	self.list_of_args = list_of_lists
772	<
773	<	return
774	<
775	<
776	<
777	<	def jobSplittingNoInput(self):
778	<	"""
779	<	Perform job splitting based on number of event per job
780	<	"""
781	<	common.logger.debug(5,'Splitting per events')
782	<
783	<	if (self.selectEventsPerJob):
784	<	common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
785	<	if (self.selectNumberOfJobs):
786	<	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
787	<	if (self.selectTotalNumberEvents):
788	<	common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
789	<
790	<	if (self.total_number_of_events < 0):
791	<	msg='Cannot split jobs per Events with "-1" as total number of events'
792	<	raise CrabException(msg)
793	<
794	<	if (self.selectEventsPerJob):
795	<	if (self.selectTotalNumberEvents):
796	<	self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
797	<	elif(self.selectNumberOfJobs) :
798	<	self.total_number_of_jobs =self.theNumberOfJobs
799	<	self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
800	<
801	<	elif (self.selectNumberOfJobs) :
802	<	self.total_number_of_jobs = self.theNumberOfJobs
803	<	self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
804	<
805	<	common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
806	<
807	<	# is there any remainder?
808	<	check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
809	<
810	<	common.logger.debug(5,'Check '+str(check))
811	<
812	<	common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
813	<	if check > 0:
814	<	common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
815	<
816	<	# argument is seed number.$i
817	<	self.list_of_args = []
818	<	for i in range(self.total_number_of_jobs):
819	<	## Since there is no input, any site is good
820	<	self.jobDestination.append([""]) #must be empty to write correctly the xml
821	<	args=[]
822	<	if (self.firstRun):
823	<	## pythia first run
824	<	args.append(str(self.firstRun)+str(i))
825	<	self.list_of_args.append(args)
826	<
827	<	return
828	<
829	<
830	<	def jobSplittingForScript(self):
831	<	"""
832	<	Perform job splitting based on number of job
833	<	"""
834	<	common.logger.debug(5,'Splitting per job')
835	<	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
836	<
837	<	self.total_number_of_jobs = self.theNumberOfJobs
358	>	return sites
359
839	–	common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
360
361	<	common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
361	>	def split(self, jobParams,firstJobID):
362
363	<	# argument is seed number.$i
364	<	self.list_of_args = []
365	<	for i in range(self.total_number_of_jobs):
846	<	self.jobDestination.append([""])
847	<	self.list_of_args.append([str(i)])
848	<	return
363	>	jobParams = self.dict['args']
364	>	njobs = self.dict['njobs']
365	>	self.jobDestination = self.dict['jobDestination']
366
367	<	def split(self, jobParams,firstJobID):
367	>	if njobs==0:
368	>	raise CrabException("Ask to split "+str(njobs)+" jobs: aborting")
369
852	–	njobs = self.total_number_of_jobs
853	–	arglist = self.list_of_args
370		# create the empty structure
371		for i in range(njobs):
372		jobParams.append("")
373
374		listID=[]
375		listField=[]
376	+	listDictions=[]
377	+	exist= os.path.exists(self.argsFile)
378		for id in range(njobs):
379		job = id + int(firstJobID)
862	–	jobParams[id] = arglist[id]
380		listID.append(job+1)
381		job_ToSave ={}
382		concString = ' '
383		argu=''
384	+	str_argu = str(job+1)
385		if len(jobParams[id]):
386	<	argu += concString.join(jobParams[id] )
387	<	job_ToSave['arguments']= str(job+1)+' '+argu
386	>	argu = {'JobID': job+1}
387	>	for i in range(len(jobParams[id])):
388	>	argu[self.dict['params'][i]]=jobParams[id][i]
389	>	# just for debug
390	>	str_argu += concString.join(jobParams[id])
391	>	if argu != '': listDictions.append(argu)
392	>	job_ToSave['arguments']= str(job+1)
393		job_ToSave['dlsDestination']= self.jobDestination[id]
394		listField.append(job_ToSave)
395	<	msg="Job "+str(job)+" Arguments: "+str(job+1)+" "+argu+"\n" \
396	<	+" Destination: "+str(self.jobDestination[id])
397	<	common.logger.debug(5,msg)
395	>	from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
396	>	cms_se = CmsSEMap()
397	>	msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
398	>	msg+="\t Destination: %s "%(str(self.jobDestination[id]))
399	>	SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
400	>	msg+="\t CMSDestination: %s "%(str(SEDestination))
401	>	common.logger.log(10-1,msg)
402	>	# write xml
403	>	if len(listDictions):
404	>	if exist==False: self.CreateXML()
405	>	self.addEntry(listDictions)
406	>	self.addXMLfile()
407		common._db.updateJob_(listID,listField)
408	<	self.argsList = (len(jobParams[0])+1)
408	>	self.zipTarFile()
409	>	return
410	>
411	>	def addXMLfile(self):
412
413	+	import tarfile
414	+	try:
415	+	tar = tarfile.open(self.tarNameWithPath, "a")
416	+	tar.add(self.argsFile, os.path.basename(self.argsFile))
417	+	tar.close()
418	+	except IOError, exc:
419	+	msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
420	+	msg += str(exc)
421	+	raise CrabException(msg)
422	+	except tarfile.TarError, exc:
423	+	msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
424	+	msg += str(exc)
425	+	raise CrabException(msg)
426	+
427	+	def CreateXML(self):
428	+	"""
429	+	"""
430	+	result = IMProvNode( self.rootArgsFilename )
431	+	outfile = file( self.argsFile, 'w').write(str(result))
432	+	return
433	+
434	+	def addEntry(self, listDictions):
435	+	"""
436	+	_addEntry_
437	+
438	+	add an entry to the xml file
439	+	"""
440	+	from IMProv.IMProvLoader import loadIMProvFile
441	+	## load xml
442	+	improvDoc = loadIMProvFile(self.argsFile)
443	+	entrname= 'Job'
444	+	for dictions in listDictions:
445	+	report = IMProvNode(entrname , None, **dictions)
446	+	improvDoc.addNode(report)
447	+	outfile = file( self.argsFile, 'w').write(str(improvDoc))
448		return
449
450		def numberOfJobs(self):
451	<	return self.total_number_of_jobs
451	>	return self.dict['njobs']
452
453		def getTarBall(self, exe):
454		"""
455		Return the TarBall with lib and exe
456		"""
457	<	self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
458	<	if os.path.exists(self.tgzNameWithPath):
459	<	return self.tgzNameWithPath
457	>	self.tarNameWithPath = common.work_space.pathForTgz()+self.tar_name
458	>	if os.path.exists(self.tarNameWithPath):
459	>	return self.tarNameWithPath
460
461		# Prepare a tar gzipped file with user binaries.
462		self.buildTar_(exe)
463
464	<	return string.strip(self.tgzNameWithPath)
464	>	return string.strip(self.tarNameWithPath)
465
466		def buildTar_(self, executable):
467
#	Line 901 \| Line 471 \| class Cmssw(JobType):
471
472		## check if working area is release top
473		if swReleaseTop == '' or swArea == swReleaseTop:
474	<	common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
474	>	common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
475		return
476
477		import tarfile
478		try: # create tar ball
479	<	tar = tarfile.open(self.tgzNameWithPath, "w:gz")
479	>	#tar = tarfile.open(self.tgzNameWithPath, "w:gz")
480	>	tar = tarfile.open(self.tarNameWithPath, "w")
481		## First find the executable
482		if (self.executable != ''):
483		exeWithPath = self.scram.findFile_(executable)
#	Line 916 \| Line 487 \| class Cmssw(JobType):
487		## then check if it's private or not
488		if exeWithPath.find(swReleaseTop) == -1:
489		# the exe is private, so we must ship
490	<	common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
490	>	common.logger.debug("Exe "+exeWithPath+" to be tarred")
491		path = swArea+'/'
492		# distinguish case when script is in user project area or given by full path somewhere else
493		if exeWithPath.find(path) >= 0 :
#	Line 930 \| Line 501 \| class Cmssw(JobType):
501		pass
502
503		## Now get the libraries: only those in local working area
504	+	tar.dereference=True
505		libDir = 'lib'
506		lib = swArea+'/' +libDir
507	<	common.logger.debug(5,"lib "+lib+" to be tarred")
507	>	common.logger.debug("lib "+lib+" to be tarred")
508		if os.path.exists(lib):
509		tar.add(lib,libDir)
510
#	Line 941 \| Line 513 \| class Cmssw(JobType):
513		module = swArea + '/' + moduleDir
514		if os.path.isdir(module):
515		tar.add(module,moduleDir)
516	+	tar.dereference=False
517
518		## Now check if any data dir(s) is present
519		self.dataExist = False
#	Line 954 \| Line 527 \| class Cmssw(JobType):
527		todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
528		if name == 'data':
529		self.dataExist=True
530	<	common.logger.debug(5,"data "+entry+" to be tarred")
530	>	common.logger.debug("data "+entry+" to be tarred")
531		tar.add(swArea+"/src/"+entry,"src/"+entry)
532		pass
533		pass
#	Line 963 \| Line 536 \| class Cmssw(JobType):
536		if not self.pset is None:
537		cfg_file = common.work_space.jobDir()+self.configFilename()
538		tar.add(cfg_file,self.configFilename())
966	–	common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
539
540	+	try:
541	+	crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
542	+	tar.add(crab_cfg_file,'crab.cfg')
543	+	except:
544	+	pass
545
546		## Add ProdCommon dir to tar
547		prodcommonDir = './'
548		prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
549		neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
550	<	'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage']
550	>	'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
551	>	'WMCore/__init__.py','WMCore/Algorithms']
552		for file in neededStuff:
553		tar.add(prodcommonPath+file,prodcommonDir+file)
976	–	common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
554
555		##### ML stuff
556		ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
557		path=os.environ['CRABDIR'] + '/python/'
558		for file in ML_file_list:
559		tar.add(path+file,file)
983	–	common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
560
561		##### Utils
562		Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
563		for file in Utils_file_list:
564		tar.add(path+file,file)
989	–	common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
565
566		##### AdditionalFiles
567	+	tar.dereference=True
568		for file in self.additional_inbox_files:
569		tar.add(file,string.split(file,'/')[-1])
570	<	common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
570	>	tar.dereference=False
571	>	common.logger.log(10-1,"Files in "+self.tarNameWithPath+" : "+str(tar.getnames()))
572
573		tar.close()
574		except IOError, exc:
575	<	common.logger.write(str(exc))
576	<	raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
575	>	msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
576	>	msg += str(exc)
577	>	raise CrabException(msg)
578		except tarfile.TarError, exc:
579	<	common.logger.write(str(exc))
580	<	raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
579	>	msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
580	>	msg += str(exc)
581	>	raise CrabException(msg)
582	>
583	>	def zipTarFile(self):
584	>
585	>	cmd = "gzip -c %s > %s "%(self.tarNameWithPath,self.tgzNameWithPath)
586	>	res=runCommand(cmd)
587
1004	–	## check for tarball size
588		tarballinfo = os.stat(self.tgzNameWithPath)
589		if ( tarballinfo.st_size > self.MaxTarBallSize10241024 ) :
590		msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
591	<	+'MB input sandbox limit \n'
591	>	+'MB input sandbox limit \n'
592		msg += ' and not supported by the direct GRID submission system.\n'
593		msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
594		msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
#	Line 1018 \| Line 601 \| class Cmssw(JobType):
601		Returns part of a job script which prepares
602		the execution environment for the job 'nj'.
603		"""
604	+	# FUTURE: Drop support for .cfg when possible
605		if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
606		psetName = 'pset.py'
607		else:
#	Line 1025 \| Line 609 \| class Cmssw(JobType):
609		# Prepare JobType-independent part
610		txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
611		txt += 'echo ">>> setup environment"\n'
612	<	txt += 'if [ $middleware == LCG ]; then \n'
612	>	txt += 'if [ $middleware == LCG ] \|\| [ $middleware == CAF ] \|\| [ $middleware == LSF ]; then \n'
613		txt += self.wsSetupCMSLCGEnvironment_()
614		txt += 'elif [ $middleware == OSG ]; then\n'
615		txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
#	Line 1040 \| Line 624 \| class Cmssw(JobType):
624		txt += ' cd $WORKING_DIR\n'
625		txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
626		txt += self.wsSetupCMSOSGEnvironment_()
627	+	#Setup SGE Environment
628	+	txt += 'elif [ $middleware == SGE ]; then\n'
629	+	txt += self.wsSetupCMSLCGEnvironment_()
630	+
631	+	txt += 'elif [ $middleware == ARC ]; then\n'
632	+	txt += self.wsSetupCMSLCGEnvironment_()
633	+
634		txt += 'fi\n'
635
636		# Prepare JobType-specific part
#	Line 1055 \| Line 646 \| class Cmssw(JobType):
646		txt += ' func_exit\n'
647		txt += 'fi \n'
648		txt += 'cd '+self.version+'\n'
649	<	txt += 'SOFTWARE_DIR=`pwd`\n'
649	>	txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
650		txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
651		txt += 'eval `'+scram+' runtime -sh \| grep -v SCRAMRT_LSB_JOBNAME`\n'
652		txt += 'if [ $? != 0 ] ; then\n'
#	Line 1089 \| Line 680 \| class Cmssw(JobType):
680		txt += 'ApplicationFamily=cmsRun\n'
681
682		else:
683	<	self.primaryDataset = 'null'
683	>	self.primaryDataset = 'null'
684		txt += 'DatasetPath=MCDataTier\n'
685		txt += 'PrimaryDataset=null\n'
686		txt += 'DataTier=null\n'
#	Line 1098 \| Line 689 \| class Cmssw(JobType):
689		pset = os.path.basename(job.configFilename())
690		txt += '\n'
691		txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1101	–	if (self.datasetPath): # standard job
1102	–	txt += 'InputFiles=${args[1]}; export InputFiles\n'
1103	–	if (self.useParent):
1104	–	txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
1105	–	txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
1106	–	txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
1107	–	else:
1108	–	txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
1109	–	txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
1110	–	txt += 'echo "Inputfiles:<$InputFiles>"\n'
1111	–	if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
1112	–	txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1113	–	txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1114	–	else: # pythia like job
1115	–	txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
1116	–	txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
1117	–	txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
1118	–	txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
1119	–	if (self.firstRun):
1120	–	txt += 'FirstRun=${args[1]}; export FirstRun\n'
1121	–	txt += 'echo "FirstRun: <$FirstRun>"\n'
1122	–
1123	–	txt += 'mv -f ' + pset + ' ' + psetName + '\n'
692
693	+	txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
694	+	txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
695	+	txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
696	+	txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
697
698	<	if self.pset != None:
699	<	# FUTURE: Can simply for 2_1_x and higher
1128	<	txt += '\n'
1129	<	if self.debug_wrapper==True:
1130	<	txt += 'echo "*** cat ' + psetName + ' *******"\n'
1131	<	txt += 'cat ' + psetName + '\n'
1132	<	txt += 'echo "**** end ' + psetName + ' ******"\n'
1133	<	txt += '\n'
1134	<	if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1135	<	txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
1136	<	else:
1137	<	txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
1138	<	txt += 'echo "PSETHASH = $PSETHASH" \n'
698	>	txt += 'mv -f ' + pset + ' ' + psetName + '\n'
699	>	else:
700		txt += '\n'
701	+	txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs)
702	+
703		return txt
704
705		def wsUntarSoftware(self, nj=0):
#	Line 1149 \| Line 712 \| class Cmssw(JobType):
712
713		if os.path.isfile(self.tgzNameWithPath):
714		txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
715	<	txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
716	<	if self.debug_wrapper:
715	>	txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
716	>	if self.debug_wrapper==1 :
717		txt += 'ls -Al \n'
718		txt += 'untar_status=$? \n'
719		txt += 'if [ $untar_status -ne 0 ]; then \n'
#	Line 1204 \| Line 767 \| class Cmssw(JobType):
767		txt += 'fi\n'
768		txt += '\n'
769
770	+	if self.pset != None:
771	+	# FUTURE: Drop support for .cfg when possible
772	+	if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
773	+	psetName = 'pset.py'
774	+	else:
775	+	psetName = 'pset.cfg'
776	+	# FUTURE: Can simply for 2_1_x and higher
777	+	txt += '\n'
778	+	if self.debug_wrapper == 1:
779	+	txt += 'echo "*** cat ' + psetName + ' *******"\n'
780	+	txt += 'cat ' + psetName + '\n'
781	+	txt += 'echo "**** end ' + psetName + ' ******"\n'
782	+	txt += '\n'
783	+	txt += 'echo "***********************" \n'
784	+	txt += 'which edmConfigHash \n'
785	+	txt += 'echo "***********************" \n'
786	+	if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
787	+	txt += 'edmConfigHash ' + psetName + ' \n'
788	+	txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
789	+	else:
790	+	txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
791	+	txt += 'echo "PSETHASH = $PSETHASH" \n'
792	+	#### FEDE temporary fix for noEdm files #####
793	+	txt += 'if [ -z "$PSETHASH" ]; then \n'
794	+	txt += ' export PSETHASH=null\n'
795	+	txt += 'fi \n'
796	+	#############################################
797	+	txt += '\n'
798		return txt
799
800
#	Line 1215 \| Line 806 \| class Cmssw(JobType):
806
807		def executableArgs(self):
808		# FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
809	<	if self.scriptExe:#CarlosDaniele
810	<	return self.scriptExe + " $NJob"
809	>	if self.scriptExe:
810	>	return self.scriptExe + " $NJob $AdditionalArgs"
811		else:
812		ex_args = ""
813	<	# FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
814	<	# Framework job report
1224	<	if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1225	<	ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1226	<	# Type of config file
813	>	ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
814	>	# Type of config file depends on CMSSW version
815		if self.CMSSW_major >= 2 :
816		ex_args += " -p pset.py"
817		else:
#	Line 1261 \| Line 849 \| class Cmssw(JobType):
849		txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
850		txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
851		txt += 'echo ">>> current directory content:"\n'
852	<	if self.debug_wrapper:
852	>	if self.debug_wrapper==1:
853		txt += 'ls -Al\n'
854		txt += '\n'
855
#	Line 1293 \| Line 881 \| class Cmssw(JobType):
881		txt += '\n'
882		txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
883		txt += 'echo ">>> current directory content:"\n'
884	<	if self.debug_wrapper:
884	>	if self.debug_wrapper==1:
885		txt += 'ls -Al\n'
886		txt += '\n'
887		txt += 'cd $RUNTIME_AREA\n'
#	Line 1390 \| Line 978 \| class Cmssw(JobType):
978		"""
979		insert the part of the script that modifies the FrameworkJob Report
980		"""
981	<	txt = '\n#Written by cms_cmssw::wsModifyReport\n'
981	>
982	>	txt = ''
983		publish_data = int(self.cfg_params.get('USER.publish_data',0))
984	<	if (publish_data == 1):
984	>	#if (publish_data == 1):
985	>	if (self.copy_data == 1):
986	>	txt = '\n#Written by cms_cmssw::wsModifyReport\n'
987	>	publish_data = int(self.cfg_params.get('USER.publish_data',0))
988	>
989
990		txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
991	<	txt += ' FOR_LFN=$LFNBaseName/${PSETHASH}/\n'
991	>	txt += ' FOR_LFN=$LFNBaseName\n'
992		txt += 'else\n'
993		txt += ' FOR_LFN=/copy_problems/ \n'
1401	–	txt += ' SE=""\n'
1402	–	txt += ' SE_PATH=""\n'
994		txt += 'fi\n'
995
996		txt += 'echo ">>> Modify Job Report:" \n'
997		txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1407	–	txt += 'ProcessedDataset= $procDataset \n'
1408	–	txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
998		txt += 'echo "SE = $SE"\n'
999		txt += 'echo "SE_PATH = $SE_PATH"\n'
1000		txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1001		txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1002	<	args = '$RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' \
1003	<	'$User -$ProcessedDataset-$PSETHASH $ApplicationFamily '+ \
1004	<	' $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH'
1002	>
1003	>
1004	>	args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH'
1005	>	if (publish_data == 1):
1006	>	processedDataset = self.cfg_params['USER.publish_data_name']
1007	>	txt += 'ProcessedDataset='+processedDataset+'\n'
1008	>	txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1009	>	args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1010	>
1011		txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1012		txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1013		txt += 'modifyReport_result=$?\n'
#	Line 1435 \| Line 1030 \| class Cmssw(JobType):
1030		txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1031		txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1032		txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1033	<	if self.debug_wrapper :
1033	>	if self.debug_wrapper==1 :
1034		txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1035		txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1036		txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
#	Line 1452 \| Line 1047 \| class Cmssw(JobType):
1047		txt += ' fi\n'
1048		#### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1049		txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1050	<	txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1051	<	if (self.datasetPath and not (self.dataset_pu or self.useParent)) :
1050	>	txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1051	>	## This cannot more work given the changes on the Job argumentsJob
1052	>	"""
1053	>	if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1054		# VERIFY PROCESSED DATA
1055	<	txt += ' echo ">>> Verify list of processed files:"\n'
1056	<	txt += ' echo $InputFiles \|tr -d \'\\\\\' \|tr \',\' \'\\n\'\|tr -d \'"\' > input-files.txt\n'
1057	<	txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1058	<	txt += ' cat input-files.txt \| sort \| uniq > tmp.txt\n'
1059	<	txt += ' mv tmp.txt input-files.txt\n'
1060	<	txt += ' echo "cat input-files.txt"\n'
1061	<	txt += ' echo "----------------------"\n'
1062	<	txt += ' cat input-files.txt\n'
1063	<	txt += ' cat processed-files.txt \| sort \| uniq > tmp.txt\n'
1064	<	txt += ' mv tmp.txt processed-files.txt\n'
1065	<	txt += ' echo "----------------------"\n'
1066	<	txt += ' echo "cat processed-files.txt"\n'
1067	<	txt += ' echo "----------------------"\n'
1068	<	txt += ' cat processed-files.txt\n'
1069	<	txt += ' echo "----------------------"\n'
1070	<	txt += ' diff -q input-files.txt processed-files.txt\n'
1071	<	txt += ' fileverify_status=$?\n'
1072	<	txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1073	<	txt += ' executable_exit_status=30001\n'
1074	<	txt += ' echo "ERROR ==> not all input files processed"\n'
1075	<	txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1076	<	txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1077	<	txt += ' fi\n'
1078	<	txt += ' elif [ $executable_exit_status -ne 0 ] \|\| [ $executable_exit_status -ne 50015 ] \|\| [ $executable_exit_status -ne 50017 ];then\n'
1482	<	txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1483	<	txt += ' func_exit\n'
1055	>	txt += ' echo ">>> Verify list of processed files:"\n'
1056	>	txt += ' echo $InputFiles \|tr -d \'\\\\\' \|tr \',\' \'\\n\'\|tr -d \'"\' > input-files.txt\n'
1057	>	txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1058	>	txt += ' cat input-files.txt \| sort \| uniq > tmp.txt\n'
1059	>	txt += ' mv tmp.txt input-files.txt\n'
1060	>	txt += ' echo "cat input-files.txt"\n'
1061	>	txt += ' echo "----------------------"\n'
1062	>	txt += ' cat input-files.txt\n'
1063	>	txt += ' cat processed-files.txt \| sort \| uniq > tmp.txt\n'
1064	>	txt += ' mv tmp.txt processed-files.txt\n'
1065	>	txt += ' echo "----------------------"\n'
1066	>	txt += ' echo "cat processed-files.txt"\n'
1067	>	txt += ' echo "----------------------"\n'
1068	>	txt += ' cat processed-files.txt\n'
1069	>	txt += ' echo "----------------------"\n'
1070	>	txt += ' diff -qbB input-files.txt processed-files.txt\n'
1071	>	txt += ' fileverify_status=$?\n'
1072	>	txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1073	>	txt += ' executable_exit_status=30001\n'
1074	>	txt += ' echo "ERROR ==> not all input files processed"\n'
1075	>	txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1076	>	txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1077	>	txt += ' fi\n'
1078	>	"""
1079		txt += ' fi\n'
1485	–	txt += '\n'
1080		txt += 'else\n'
1081		txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1082		txt += 'fi\n'
1083		txt += '\n'
1084	+	txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1085	+	txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1086	+	txt += ' echo "ExeExitCode=$executable_exit_status" \| tee -a $RUNTIME_AREA/$repo\n'
1087	+	txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1088	+	txt += ' job_exit_code=$executable_exit_status\n'
1089	+	txt += ' func_exit\n'
1090	+	txt += 'fi\n\n'
1091		txt += 'echo "ExeExitCode=$executable_exit_status" \| tee -a $RUNTIME_AREA/$repo\n'
1092		txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1093		txt += 'job_exit_code=$executable_exit_status\n'
#	Line 1499 \| Line 1100 \| class Cmssw(JobType):
1100		def getParams(self):
1101		return self._params
1102
1103	<	def uniquelist(self, old):
1503	<	"""
1504	<	remove duplicates from a list
1505	<	"""
1506	<	nd={}
1507	<	for e in old:
1508	<	nd[e]=0
1509	<	return nd.keys()
1510	<
1511	<	def outList(self):
1103	>	def outList(self,list=False):
1104		"""
1105		check the dimension of the output files
1106		"""
#	Line 1517 \| Line 1109 \| class Cmssw(JobType):
1109		listOutFiles = []
1110		stdout = 'CMSSW_$NJob.stdout'
1111		stderr = 'CMSSW_$NJob.stderr'
1112	+	if len(self.output_file) <= 0:
1113	+	msg ="WARNING: no output files name have been defined!!\n"
1114	+	msg+="\tno output files will be reported back/staged\n"
1115	+	common.logger.info(msg)
1116		if (self.return_data == 1):
1117		for file in (self.output_file+self.output_file_sandbox):
1118		listOutFiles.append(numberFile(file, '$NJob'))
#	Line 1530 \| Line 1126 \| class Cmssw(JobType):
1126		txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1127		txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1128		txt += 'export filesToCheck\n'
1129	+
1130	+	if list : return self.output_file
1131		return txt

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/cms_cmssw.py (file contents): Revision 1.247 by fanzago, Mon Sep 29 17:19:16 2008 UTC vs. Revision 1.314 by spiga, Thu Jun 18 17:23:02 2009 UTC

Diff Legend

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.247 by fanzago, Mon Sep 29 17:19:16 2008 UTC vs.
Revision 1.314 by spiga, Thu Jun 18 17:23:02 2009 UTC