[ViewVC] Diff of: cvsroot/COMP/CRAB/python/Splitter.py

Comparing COMP/CRAB/python/Splitter.py (file contents):
Revision 1.1 by spiga, Wed Feb 4 14:24:07 2009 UTC vs.
Revision 1.24 by spiga, Tue Jul 21 16:18:09 2009 UTC

#	Line 1 \| Line 1
1		import common
2	–	from crab_logger import Logger
2		from crab_exceptions import *
3		from crab_util import *
4		from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
#	Line 7 \| Line 6 \| from WMCore.SiteScreening.BlackWhiteList
6		class JobSplitter:
7		def __init__( self, cfg_params, args ):
8		self.cfg_params = cfg_params
9	<	self.blockSites = args['blockSites']
11	<	self.pubdata = args['pubdata']
9	>	self.args=args
10		#self.maxEvents
13	–	self.jobDestination=[] # Site destination(s) for each job (list of lists)
11		# init BlackWhiteListParser
12	<	seWhiteList = cfg_params.get('EDG.se_white_list',[])
13	<	seBlackList = cfg_params.get('EDG.se_black_list',[])
14	<	self.blackWhiteListParser = SEBlackWhiteListParser(seWhiteList, seBlackList, common.logger)
12	>	self.seWhiteList = cfg_params.get('GRID.se_white_list',[])
13	>	seBlackList = cfg_params.get('GRID.se_black_list',[])
14	>	self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, seBlackList, common.logger())
15
16
17		def checkUserSettings(self):
#	Line 46 \| Line 43 \| class JobSplitter:
43		self.selectTotalNumberEvents = 0
44
45
46	+	def ComputeSubBlockSites( self, blockSites ):
47	+	"""
48	+	"""
49	+	sub_blockSites = {}
50	+	for k,v in blockSites.iteritems():
51	+	sites=self.blackWhiteListParser.checkWhiteList(v)
52	+	if sites : sub_blockSites[k]=v
53	+	if len(sub_blockSites) < 1:
54	+	msg = 'WARNING: the sites %s is not hosting any part of data.'%self.seWhiteList
55	+	raise CrabException(msg)
56	+	return sub_blockSites
57	+
58		########################################################################
59		def jobSplittingByEvent( self ):
60		"""
#	Line 55 \| Line 64 \| class JobSplitter:
64		REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
65		self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
66		self.maxEvents, self.filesbyblock
67	<	SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
67	>	SETS: jobDestination - Site destination(s) for each job (a list of lists)
68		self.total_number_of_jobs - Total # of jobs
69		self.list_of_args - File(s) job will run on (a list of lists)
70		"""
71
72	+	jobDestination=[]
73		self.checkUserSettings()
74		if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
75		msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
76		raise CrabException(msg)
67	–
68	–	self.filesbyblock=self.pubdata.getFiles()
77
78	<	self.eventsbyblock=self.pubdata.getEventsPerBlock()
79	<	self.eventsbyfile=self.pubdata.getEventsPerFile()
80	<	self.parentFiles=self.pubdata.getParent()
78	>	blockSites = self.args['blockSites']
79	>	pubdata = self.args['pubdata']
80	>	filesbyblock=pubdata.getFiles()
81	>
82	>	self.eventsbyblock=pubdata.getEventsPerBlock()
83	>	self.eventsbyfile=pubdata.getEventsPerFile()
84	>	self.parentFiles=pubdata.getParent()
85
86		## get max number of events
87	<	self.maxEvents=self.pubdata.getMaxEvents()
87	>	self.maxEvents=pubdata.getMaxEvents()
88
89		self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
90		noBboundary = int(self.cfg_params.get('CMSSW.no_block_boundary',0))
91
92	+	if noBboundary == 1:
93	+	if self.total_number_of_events== -1:
94	+	msg = 'You are selecting no_block_boundary=1 which does not allow to set total_number_of_events=-1\n'
95	+	msg +='\tYou shoud get the number of event from DBS web interface and use it for your configuration.'
96	+	raise CrabException(msg)
97	+	if len(self.seWhiteList.split(',')) != 1:
98	+	msg = 'You are selecting no_block_boundary=1 which requires to choose one and only one site.\n'
99	+	msg += "\tPlease set se_white_list with the site's storage element name."
100	+	raise CrabException(msg)
101	+	blockSites = self.ComputeSubBlockSites(blockSites)
102	+
103		# ---- Handle the possible job splitting configurations ---- #
104		if (self.selectTotalNumberEvents):
105		totalEventsRequested = self.total_number_of_events
#	Line 91 \| Line 114 \| class JobSplitter:
114		# If user requested more events than are in the dataset
115		elif (totalEventsRequested > self.maxEvents):
116		eventsRemaining = self.maxEvents
117	<	common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
117	>	common.logger.info("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
118		# If user requested less events than are in the dataset
119		else:
120		eventsRemaining = totalEventsRequested
#	Line 107 \| Line 130 \| class JobSplitter:
130		eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
131
132		if (self.selectNumberOfJobs):
133	<	common.logger.message("May not create the exact number_of_jobs requested.")
133	>	common.logger.info("May not create the exact number_of_jobs requested.")
134
135		# old... to remove Daniele
136		totalNumberOfJobs = 999999999
137
138	<	blocks = self.blockSites.keys()
138	>	blocks = blockSites.keys()
139		blockCount = 0
140		# Backup variable in case self.maxEvents counted events in a non-included block
141		numBlocksInDataset = len(blocks)
#	Line 124 \| Line 147 \| class JobSplitter:
147		jobsOfBlock = {}
148
149		parString = ""
150	+	pString = ""
151		filesEventCount = 0
152	+	msg=''
153
154		# ---- Iterate over the blocks in the dataset until ---- #
155		# ---- we've met the requested total # of events ---- #
#	Line 136 \| Line 161 \| class JobSplitter:
161
162		if self.eventsbyblock.has_key(block) :
163		numEventsInBlock = self.eventsbyblock[block]
164	<	common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
164	>	common.logger.debug('Events in Block File '+str(numEventsInBlock))
165
166	<	files = self.filesbyblock[block]
166	>	files = filesbyblock[block]
167		numFilesInBlock = len(files)
168		if (numFilesInBlock <= 0):
169		continue
#	Line 146 \| Line 171 \| class JobSplitter:
171		if noBboundary == 0: # DD
172		# ---- New block => New job ---- #
173		parString = ""
174	+	pString=""
175		# counter for number of events in files currently worked on
176		filesEventCount = 0
177		# flag if next while loop should touch new file
#	Line 155 \| Line 181 \| class JobSplitter:
181
182		# ---- Iterate over the files in the block until we've met the requested ---- #
183		# ---- total # of events or we've gone over all the files in this block ---- #
184	<	pString=''
184	>	msg='\n'
185		while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
186		file = files[fileCount]
187		if self.useParent==1:
188		parent = self.parentFiles[file]
189	<	for f in parent :
164	<	pString += '\\\"' + f + '\\\"\,'
165	<	common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
166	<	common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
189	>	common.logger.log(10-1, "File "+str(file)+" has the following parents: "+str(parent))
190		if newFile :
191		try:
192		numEventsInFile = self.eventsbyfile[file]
193	<	common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
193	>	common.logger.log(10-1, "File "+str(file)+" has "+str(numEventsInFile)+" events")
194		# increase filesEventCount
195		filesEventCount += numEventsInFile
196		# Add file to current job
197	<	parString += '\\\"' + file + '\\\"\,'
197	>	parString += file + ','
198	>	if self.useParent==1:
199	>	for f in parent :
200	>	pString += f + ','
201		newFile = 0
202		except KeyError:
203	<	common.logger.message("File "+str(file)+" has unknown number of events: skipping")
203	>	common.logger.info("File "+str(file)+" has unknown number of events: skipping")
204
205		eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
206		# if less events in file remain than eventsPerJobRequested
#	Line 187 \| Line 213 \| class JobSplitter:
213		if ( fileCount == numFilesInBlock-1 ) :
214		# end job using last file, use remaining events in block
215		# close job and touch new file
216	<	fullString = parString[:-2]
216	>	fullString = parString[:-1]
217		if self.useParent==1:
218	<	fullParentString = pString[:-2]
218	>	fullParentString = pString[:-1]
219		list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
220		else:
221		list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
222	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
223	<	self.jobDestination.append(self.blockSites[block])
224	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
222	>	msg += "Job %s can run over %s events (last file in block).\n"%(str(jobCount+1), str(filesEventCount - jobSkipEventCount))
223	>	jobDestination.append(blockSites[block])
224	>	msg += "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
225		# fill jobs of block dictionary
226		jobsOfBlock[block].append(jobCount+1)
227		# reset counter
#	Line 216 \| Line 242 \| class JobSplitter:
242		# if events in file equal to eventsPerJobRequested
243		elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
244		# close job and touch new file
245	<	fullString = parString[:-2]
245	>	fullString = parString[:-1]
246		if self.useParent==1:
247	<	fullParentString = pString[:-2]
247	>	fullParentString = pString[:-1]
248		list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
249		else:
250		list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
251	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
252	<	self.jobDestination.append(self.blockSites[block])
253	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
251	>	msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
252	>	jobDestination.append(blockSites[block])
253	>	msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
254		jobsOfBlock[block].append(jobCount+1)
255		# reset counter
256		jobCount = jobCount + 1
#	Line 241 \| Line 267 \| class JobSplitter:
267		# if more events in file remain than eventsPerJobRequested
268		else :
269		# close job but don't touch new file
270	<	fullString = parString[:-2]
270	>	fullString = parString[:-1]
271		if self.useParent==1:
272	<	fullParentString = pString[:-2]
272	>	fullParentString = pString[:-1]
273		list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
274		else:
275		list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
276	<	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
277	<	self.jobDestination.append(self.blockSites[block])
278	<	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
276	>	msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
277	>	jobDestination.append(blockSites[block])
278	>	msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
279		jobsOfBlock[block].append(jobCount+1)
280		# increase counter
281		jobCount = jobCount + 1
#	Line 260 \| Line 286 \| class JobSplitter:
286		jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
287		# remove all but the last file
288		filesEventCount = self.eventsbyfile[file]
289	+	pString_tmp=''
290		if self.useParent==1:
291	<	for f in parent : pString += '\\\"' + f + '\\\"\,'
292	<	parString = '\\\"' + file + '\\\"\,'
291	>	for f in parent : pString_tmp += f + ','
292	>	pString = pString_tmp
293	>	parString = file + ','
294		pass # END if
295		pass # END while (iterate over files in the block)
296		pass # END while (iterate over blocks in the dataset)
297	+	common.logger.debug(msg)
298		self.ncjobs = self.total_number_of_jobs = jobCount
299		if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
300	<	common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
301	<	common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
302	<
300	>	common.logger.info("Could not run on all requested events because some blocks not hosted at allowed sites.")
301	>	common.logger.info(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
302	>
303		# skip check on block with no sites DD
304	<	if noBboundary == 0 : self.checkBlockNoSite(blocks,jobsOfBlock)
304	>	if noBboundary == 0 : self.checkBlockNoSite(blocks,jobsOfBlock,blockSites)
305
306		# prepare dict output
307		dictOut = {}
308	+	dictOut['params']= ['InputFiles','MaxEvents','SkipEvents']
309	+	if self.useParent: dictOut['params']= ['InputFiles','ParentFiles','MaxEvents','SkipEvents']
310		dictOut['args'] = list_of_lists
311	<	dictOut['jobDestination'] = self.jobDestination
311	>	dictOut['jobDestination'] = jobDestination
312		dictOut['njobs']=self.total_number_of_jobs
313
314		return dictOut
315
316		# keep trace of block with no sites to print a warning at the end
317
318	<	def checkBlockNoSite(self,blocks,jobsOfBlock):
318	>	def checkBlockNoSite(self,blocks,jobsOfBlock,blockSites):
319		# screen output
320		screenOutput = "List of jobs and available destination sites:\n\n"
321		noSiteBlock = []
322		bloskNoSite = []
323	+	allBlock = []
324
325		blockCounter = 0
326		for block in blocks:
327		if block in jobsOfBlock.keys() :
328		blockCounter += 1
329	+	allBlock.append( blockCounter )
330	+	sites=self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],[block]),[block])
331		screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
332	<	','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(self.blockSites[block],block),block)))
333	<	if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(self.blockSites[block],block),block)) == 0:
332	>	', '.join(SE2CMS(sites)))
333	>	if len(sites) == 0:
334		noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
335		bloskNoSite.append( blockCounter )
336
337	<	common.logger.message(screenOutput)
337	>	common.logger.info(screenOutput)
338		if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
339		msg = 'WARNING: No sites are hosting any part of data for block:\n '
340		virgola = ""
#	Line 315 \| Line 349 \| class JobSplitter:
349		for range_jobs in noSiteBlock:
350		msg += str(range_jobs) + virgola
351		msg += '\n will not be submitted and this block of data can not be analyzed!\n'
352	<	if self.cfg_params.has_key('EDG.se_white_list'):
353	<	msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
352	>	if self.cfg_params.has_key('GRID.se_white_list'):
353	>	msg += 'WARNING: SE White List: '+self.cfg_params['GRID.se_white_list']+'\n'
354		msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
355		msg += 'Please check if the dataset is available at this site!)\n'
356	<	if self.cfg_params.has_key('EDG.ce_white_list'):
357	<	msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
356	>	if self.cfg_params.has_key('GRID.ce_white_list'):
357	>	msg += 'WARNING: CE White List: '+self.cfg_params['GRID.ce_white_list']+'\n'
358		msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
359		msg += 'Please check if the dataset is available at this site!)\n'
360
361	<	common.logger.message(msg)
361	>	common.logger.info(msg)
362	>
363	>	if bloskNoSite == allBlock:
364	>	raise CrabException('No jobs created')
365
366		return
367
368
369		########################################################################
370	<	def jobSplittingByRun(self):
370	>	def jobSplittingByRun(self):
371		"""
372		"""
373	<	from sets import Set
373	>	from sets import Set
374		from WMCore.JobSplitting.RunBased import RunBased
375		from WMCore.DataStructs.Workflow import Workflow
376		from WMCore.DataStructs.File import File
377		from WMCore.DataStructs.Fileset import Fileset
378		from WMCore.DataStructs.Subscription import Subscription
379		from WMCore.JobSplitting.SplitterFactory import SplitterFactory
380	<	from WMCore.DataStructs.Run import Run
380	>	from WMCore.DataStructs.Run import Run
381
382		self.checkUserSettings()
383	+	blockSites = self.args['blockSites']
384	+	pubdata = self.args['pubdata']
385
386		if self.selectNumberOfJobs == 0 :
387		self.theNumberOfJobs = 9999999
388		blocks = {}
389	<	runList = []
389	>	runList = []
390		thefiles = Fileset(name='FilesToSplit')
391	<	fileList = self.pubdata.getListFiles()
391	>	fileList = pubdata.getListFiles()
392		for f in fileList:
354	–	# print f
393		block = f['Block']['Name']
394	<	# if not blocks.has_key(block):
395	<	# blocks[block] = reader.listFileBlockLocation(block)
358	<	try:
359	<	f['Block']['StorageElementList'].extend(self.blockSites[block])
394	>	try:
395	>	f['Block']['StorageElementList'].extend(blockSites[block])
396		except:
397		continue
398		wmbsFile = File(f['LogicalFileName'])
399	<	[ wmbsFile['locations'].add(x) for x in self.blockSites[block] ]
399	>	[ wmbsFile['locations'].add(x) for x in blockSites[block] ]
400		wmbsFile['block'] = block
401		runNum = f['RunsList'][0]['RunNumber']
402	<	runList.append(runNum)
402	>	runList.append(runNum)
403		myRun = Run(runNumber=runNum)
404		wmbsFile.addRun( myRun )
405		thefiles.addFile(
406		wmbsFile
407		)
408	<
408	>
409		work = Workflow()
410		subs = Subscription(
411		fileset = thefiles,
#	Line 378 \| Line 414 \| class JobSplitter:
414		type = "Processing")
415		splitter = SplitterFactory()
416		jobfactory = splitter(subs)
417	<
418	<	#loop over all runs
417	>
418	>	#loop over all runs
419		set = Set(runList)
420		list_of_lists = []
421		jobDestination = []
386	–
422		count = 0
423	<	for i in list(set):
423	>	for jobGroup in jobfactory():
424		if count < self.theNumberOfJobs:
425	<	res = self.getJobInfo(jobfactory())
426	<	parString = ''
425	>	res = self.getJobInfo(jobGroup)
426	>	parString = ''
427		for file in res['lfns']:
428	<	parString += '\\\"' + file + '\\\"\,'
429	<	fullString = parString[:-2]
430	<	list_of_lists.append([fullString,str(-1),str(0)])
431	<	# sto assumendo che i jobs siano tutti locati insieme
432	<	jobDestination.append(res['locations'])
428	>	parString += file + ','
429	>	fullString = parString[:-1]
430	>	list_of_lists.append([fullString,str(-1),str(0)])
431	>	#need to check single file location
432	>	jobDestination.append(res['locations'])
433		count +=1
399	–	#print jobDestination
434		# prepare dict output
435		dictOut = {}
436	+	dictOut['params']= ['InputFiles','MaxEvents','SkipEvents']
437		dictOut['args'] = list_of_lists
438		dictOut['jobDestination'] = jobDestination
439		dictOut['njobs']=count
#	Line 407 \| Line 442 \| class JobSplitter:
442
443		def getJobInfo( self,jobGroup ):
444		res = {}
445	<	lfns = []
446	<	locations = []
445	>	lfns = []
446	>	locations = []
447		tmp_check=0
448		for job in jobGroup.jobs:
449		for file in job.getFiles():
450	<	lfns.append(file['lfn'])
450	>	lfns.append(file['lfn'])
451		for loc in file['locations']:
452		if tmp_check < 1 :
453		locations.append(loc)
454	<	tmp_check = tmp_check + 1
455	<	### qui va messo il check per la locations
456	<	res['lfns'] = lfns
457	<	res['locations'] = locations
458	<	return res
459	<
454	>	tmp_check = tmp_check + 1
455	>	### qui va messo il check per la locations
456	>	res['lfns'] = lfns
457	>	res['locations'] = locations
458	>	return res
459	>
460		########################################################################
461	<	def jobSplittingNoInput(self):
461	>	def prepareSplittingNoInput(self):
462		"""
428	–	Perform job splitting based on number of event per job
463		"""
430	–	common.logger.debug(5,'Splitting per events')
431	–
464		if (self.selectEventsPerJob):
465	<	common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
465	>	common.logger.info('Required '+str(self.eventsPerJob)+' events per job ')
466		if (self.selectNumberOfJobs):
467	<	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
467	>	common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
468		if (self.selectTotalNumberEvents):
469	<	common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
469	>	common.logger.info('Required '+str(self.total_number_of_events)+' events in total ')
470
471		if (self.total_number_of_events < 0):
472		msg='Cannot split jobs per Events with "-1" as total number of events'
#	Line 451 \| Line 483 \| class JobSplitter:
483		self.total_number_of_jobs = self.theNumberOfJobs
484		self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
485
486	<	common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
486	>
487	>	def jobSplittingNoInput(self):
488	>	"""
489	>	Perform job splitting based on number of event per job
490	>	"""
491	>	common.logger.debug('Splitting per events')
492	>	self.checkUserSettings()
493	>	jobDestination=[]
494	>	if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
495	>	msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
496	>	raise CrabException(msg)
497	>
498	>	managedGenerators =self.args['managedGenerators']
499	>	generator = self.args['generator']
500	>	firstRun = self.cfg_params.get('CMSSW.first_run',None)
501	>
502	>	self.prepareSplittingNoInput()
503	>
504	>	common.logger.debug('N jobs '+str(self.total_number_of_jobs))
505
506		# is there any remainder?
507		check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
508
509	<	common.logger.debug(5,'Check '+str(check))
509	>	common.logger.debug('Check '+str(check))
510
511	<	common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
511	>	common.logger.info(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
512		if check > 0:
513	<	common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
513	>	common.logger.info('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
514
515		# argument is seed number.$i
516		self.list_of_args = []
517		for i in range(self.total_number_of_jobs):
518		## Since there is no input, any site is good
519	<	self.jobDestination.append([""]) #must be empty to write correctly the xml
519	>	jobDestination.append([""]) #must be empty to write correctly the xml
520		args=[]
521	<	if (self.firstRun):
521	>	if (firstRun):
522		## pythia first run
523	<	args.append(str(self.firstRun)+str(i))
524	<	if (self.generator in self.managedGenerators):
525	<	if (self.generator == 'comphep' and i == 0):
523	>	args.append(str(firstRun)+str(i))
524	>	if (generator in managedGenerators):
525	>	args.append(generator)
526	>	if (generator == 'comphep' and i == 0):
527		# COMPHEP is brain-dead and wants event #'s like 1,100,200,300
528		args.append('1')
529	<	else:
529	>	else:
530		args.append(str(i*self.eventsPerJob))
531	+	args.append(str(self.eventsPerJob))
532		self.list_of_args.append(args)
533		# prepare dict output
534	+
535		dictOut = {}
536	+	dictOut['params'] = ['MaxEvents']
537	+	if (firstRun):
538	+	dictOut['params'] = ['FirstRun','MaxEvents']
539	+	if ( generator in managedGenerators ) :
540	+	dictOut['params'] = ['FirstRun', 'Generator', 'FirstEvent', 'MaxEvents']
541	+	else:
542	+	if (generator in managedGenerators) :
543	+	dictOut['params'] = ['Generator', 'FirstEvent', 'MaxEvents']
544		dictOut['args'] = self.list_of_args
545	<	dictOut['jobDestination'] = self.jobDestination
545	>	dictOut['jobDestination'] = jobDestination
546		dictOut['njobs']=self.total_number_of_jobs
547
548		return dictOut
#	Line 492 \| Line 553 \| class JobSplitter:
553		Perform job splitting based on number of job
554		"""
555		self.checkUserSettings()
556	<	if (self.selectnumberofjobs == 0):
556	>	if (self.selectNumberOfJobs == 0):
557		msg = 'must specify number_of_jobs.'
558		raise crabexception(msg)
559	+	jobDestination = []
560	+	common.logger.debug('Splitting per job')
561	+	common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
562
563	<	common.logger.debug(5,'Splitting per job')
500	<	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
563	>	# self.total_number_of_jobs = self.theNumberOfJobs
564
565	<	self.total_number_of_jobs = self.theNumberOfJobs
565	>	self.prepareSplittingNoInput()
566
567	<	common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
567	>	common.logger.debug('N jobs '+str(self.total_number_of_jobs))
568
569	<	common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
569	>	common.logger.info(str(self.total_number_of_jobs)+' jobs can be created')
570
571		# argument is seed number.$i
572		self.list_of_args = []
573		for i in range(self.total_number_of_jobs):
574	<	self.jobDestination.append([""])
575	<	self.list_of_args.append([str(i)])
574	>	args=[]
575	>	jobDestination.append([""])
576	>	if self.eventsPerJob != 0 :
577	>	args.append(str(self.eventsPerJob))
578	>	self.list_of_args.append(args)
579
580		# prepare dict output
581		dictOut = {}
582	<	dictOut['args'] = self.list_of_args
583	<	dictOut['jobDestination'] = []
582	>	dictOut['params'] = ['MaxEvents']
583	>	dictOut['args'] = self.list_of_args
584	>	dictOut['jobDestination'] = jobDestination
585		dictOut['njobs']=self.total_number_of_jobs
586		return dictOut
520	–
587
588	<	def jobSplittingByLumi(self):
588	>
589	>	def jobSplittingByLumi(self):
590		"""
591		"""
592		return
#	Line 527 \| Line 594 \| class JobSplitter:
594		"""
595		Define key splittingType matrix
596		"""
597	<	SplitAlogs = {
598	<	'EventBased' : self.jobSplittingByEvent,
597	>	SplitAlogs = {
598	>	'EventBased' : self.jobSplittingByEvent,
599		'RunBased' : self.jobSplittingByRun,
600	<	'LumiBased' : self.jobSplittingByLumi,
601	<	'NoInput' : self.jobSplittingNoInput,
600	>	'LumiBased' : self.jobSplittingByLumi,
601	>	'NoInput' : self.jobSplittingNoInput,
602		'ForScript' : self.jobSplittingForScript
603	<	}
603	>	}
604		return SplitAlogs
605

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/Splitter.py (file contents): Revision 1.1 by spiga, Wed Feb 4 14:24:07 2009 UTC vs. Revision 1.24 by spiga, Tue Jul 21 16:18:09 2009 UTC

Diff Legend

Comparing COMP/CRAB/python/Splitter.py (file contents):
Revision 1.1 by spiga, Wed Feb 4 14:24:07 2009 UTC vs.
Revision 1.24 by spiga, Tue Jul 21 16:18:09 2009 UTC