ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/Splitter.py
Revision: 1.25
Committed: Tue Jul 21 20:42:13 2009 UTC (15 years, 9 months ago) by spiga
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_6_1_pre4, CRAB_2_6_1_pre3, CRAB_2_6_1_pre2, CRAB_2_6_1
Changes since 1.24: +9 -9 lines
Log Message:
minor fix. Print out formatter

File Contents

# User Rev Content
1 spiga 1.1 import common
2     from crab_exceptions import *
3     from crab_util import *
4     from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
5    
6     class JobSplitter:
7     def __init__( self, cfg_params, args ):
8     self.cfg_params = cfg_params
9 spiga 1.3 self.args=args
10 spiga 1.1 #self.maxEvents
11     # init BlackWhiteListParser
12 spiga 1.24 self.seWhiteList = cfg_params.get('GRID.se_white_list',[])
13 spiga 1.14 seBlackList = cfg_params.get('GRID.se_black_list',[])
14 spiga 1.24 self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, seBlackList, common.logger())
15 spiga 1.1
16    
17     def checkUserSettings(self):
18     ## Events per job
19     if self.cfg_params.has_key('CMSSW.events_per_job'):
20     self.eventsPerJob =int( self.cfg_params['CMSSW.events_per_job'])
21     self.selectEventsPerJob = 1
22     else:
23     self.eventsPerJob = -1
24     self.selectEventsPerJob = 0
25    
26     ## number of jobs
27     if self.cfg_params.has_key('CMSSW.number_of_jobs'):
28     self.theNumberOfJobs =int( self.cfg_params['CMSSW.number_of_jobs'])
29     self.selectNumberOfJobs = 1
30     else:
31     self.theNumberOfJobs = 0
32     self.selectNumberOfJobs = 0
33    
34     if self.cfg_params.has_key('CMSSW.total_number_of_events'):
35     self.total_number_of_events = int(self.cfg_params['CMSSW.total_number_of_events'])
36     self.selectTotalNumberEvents = 1
37     if self.selectNumberOfJobs == 1:
38     if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
39     msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
40     raise CrabException(msg)
41     else:
42     self.total_number_of_events = 0
43     self.selectTotalNumberEvents = 0
44    
45    
46 spiga 1.24 def ComputeSubBlockSites( self, blockSites ):
47     """
48     """
49     sub_blockSites = {}
50     for k,v in blockSites.iteritems():
51     sites=self.blackWhiteListParser.checkWhiteList(v)
52     if sites : sub_blockSites[k]=v
53     if len(sub_blockSites) < 1:
54     msg = 'WARNING: the sites %s is not hosting any part of data.'%self.seWhiteList
55     raise CrabException(msg)
56     return sub_blockSites
57    
58 spiga 1.1 ########################################################################
59     def jobSplittingByEvent( self ):
60     """
61     Perform job splitting. Jobs run over an integer number of files
62     and no more than one block.
63     ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
64     REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
65     self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
66     self.maxEvents, self.filesbyblock
67 spiga 1.3 SETS: jobDestination - Site destination(s) for each job (a list of lists)
68 spiga 1.1 self.total_number_of_jobs - Total # of jobs
69     self.list_of_args - File(s) job will run on (a list of lists)
70     """
71    
72 ewv 1.22 jobDestination=[]
73 spiga 1.1 self.checkUserSettings()
74     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
75     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
76     raise CrabException(msg)
77 ewv 1.22
78     blockSites = self.args['blockSites']
79 spiga 1.4 pubdata = self.args['pubdata']
80 spiga 1.3 filesbyblock=pubdata.getFiles()
81    
82     self.eventsbyblock=pubdata.getEventsPerBlock()
83     self.eventsbyfile=pubdata.getEventsPerFile()
84     self.parentFiles=pubdata.getParent()
85 spiga 1.1
86     ## get max number of events
87 spiga 1.3 self.maxEvents=pubdata.getMaxEvents()
88 spiga 1.1
89     self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
90     noBboundary = int(self.cfg_params.get('CMSSW.no_block_boundary',0))
91    
92 spiga 1.24 if noBboundary == 1:
93     if self.total_number_of_events== -1:
94     msg = 'You are selecting no_block_boundary=1 which does not allow to set total_number_of_events=-1\n'
95     msg +='\tYou shoud get the number of event from DBS web interface and use it for your configuration.'
96     raise CrabException(msg)
97 spiga 1.25 if len(self.seWhiteList) == 0 or len(self.seWhiteList.split(',')) != 1:
98 spiga 1.24 msg = 'You are selecting no_block_boundary=1 which requires to choose one and only one site.\n'
99     msg += "\tPlease set se_white_list with the site's storage element name."
100     raise CrabException(msg)
101     blockSites = self.ComputeSubBlockSites(blockSites)
102    
103 spiga 1.1 # ---- Handle the possible job splitting configurations ---- #
104     if (self.selectTotalNumberEvents):
105     totalEventsRequested = self.total_number_of_events
106     if (self.selectEventsPerJob):
107     eventsPerJobRequested = self.eventsPerJob
108     if (self.selectNumberOfJobs):
109     totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
110    
111     # If user requested all the events in the dataset
112     if (totalEventsRequested == -1):
113     eventsRemaining=self.maxEvents
114     # If user requested more events than are in the dataset
115     elif (totalEventsRequested > self.maxEvents):
116     eventsRemaining = self.maxEvents
117 spiga 1.13 common.logger.info("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
118 spiga 1.1 # If user requested less events than are in the dataset
119     else:
120     eventsRemaining = totalEventsRequested
121    
122     # If user requested more events per job than are in the dataset
123     if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
124     eventsPerJobRequested = self.maxEvents
125    
126     # For user info at end
127     totalEventCount = 0
128    
129     if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
130     eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
131    
132     if (self.selectNumberOfJobs):
133 spiga 1.13 common.logger.info("May not create the exact number_of_jobs requested.")
134 spiga 1.1
135     # old... to remove Daniele
136     totalNumberOfJobs = 999999999
137    
138 spiga 1.3 blocks = blockSites.keys()
139 spiga 1.1 blockCount = 0
140     # Backup variable in case self.maxEvents counted events in a non-included block
141     numBlocksInDataset = len(blocks)
142    
143     jobCount = 0
144     list_of_lists = []
145    
146     # list tracking which jobs are in which jobs belong to which block
147     jobsOfBlock = {}
148    
149     parString = ""
150 spiga 1.16 pString = ""
151 spiga 1.1 filesEventCount = 0
152 ewv 1.22 msg=''
153 spiga 1.1
154     # ---- Iterate over the blocks in the dataset until ---- #
155     # ---- we've met the requested total # of events ---- #
156     while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
157     block = blocks[blockCount]
158     blockCount += 1
159     if block not in jobsOfBlock.keys() :
160     jobsOfBlock[block] = []
161    
162     if self.eventsbyblock.has_key(block) :
163     numEventsInBlock = self.eventsbyblock[block]
164 spiga 1.13 common.logger.debug('Events in Block File '+str(numEventsInBlock))
165 spiga 1.1
166 spiga 1.4 files = filesbyblock[block]
167 spiga 1.1 numFilesInBlock = len(files)
168     if (numFilesInBlock <= 0):
169     continue
170     fileCount = 0
171     if noBboundary == 0: # DD
172     # ---- New block => New job ---- #
173     parString = ""
174 spiga 1.16 pString=""
175 spiga 1.1 # counter for number of events in files currently worked on
176     filesEventCount = 0
177     # flag if next while loop should touch new file
178     newFile = 1
179     # job event counter
180     jobSkipEventCount = 0
181    
182     # ---- Iterate over the files in the block until we've met the requested ---- #
183     # ---- total # of events or we've gone over all the files in this block ---- #
184 spiga 1.15 msg='\n'
185 spiga 1.1 while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
186     file = files[fileCount]
187     if self.useParent==1:
188     parent = self.parentFiles[file]
189 spiga 1.13 common.logger.log(10-1, "File "+str(file)+" has the following parents: "+str(parent))
190 spiga 1.1 if newFile :
191     try:
192     numEventsInFile = self.eventsbyfile[file]
193 spiga 1.13 common.logger.log(10-1, "File "+str(file)+" has "+str(numEventsInFile)+" events")
194 spiga 1.1 # increase filesEventCount
195     filesEventCount += numEventsInFile
196     # Add file to current job
197 spiga 1.11 parString += file + ','
198 spiga 1.16 if self.useParent==1:
199     for f in parent :
200     pString += f + ','
201 spiga 1.1 newFile = 0
202     except KeyError:
203 spiga 1.13 common.logger.info("File "+str(file)+" has unknown number of events: skipping")
204 spiga 1.1
205     eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
206     # if less events in file remain than eventsPerJobRequested
207     if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
208     if noBboundary == 1: ## DD
209     newFile = 1
210     fileCount += 1
211     else:
212     # if last file in block
213     if ( fileCount == numFilesInBlock-1 ) :
214     # end job using last file, use remaining events in block
215     # close job and touch new file
216 spiga 1.11 fullString = parString[:-1]
217 spiga 1.1 if self.useParent==1:
218 spiga 1.11 fullParentString = pString[:-1]
219 spiga 1.1 list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
220     else:
221     list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
222 spiga 1.15 msg += "Job %s can run over %s events (last file in block).\n"%(str(jobCount+1), str(filesEventCount - jobSkipEventCount))
223 spiga 1.3 jobDestination.append(blockSites[block])
224 slacapra 1.20 msg += "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
225 spiga 1.1 # fill jobs of block dictionary
226     jobsOfBlock[block].append(jobCount+1)
227     # reset counter
228     jobCount = jobCount + 1
229     totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
230     eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
231     jobSkipEventCount = 0
232     # reset file
233     pString = ""
234     parString = ""
235     filesEventCount = 0
236     newFile = 1
237     fileCount += 1
238     else :
239     # go to next file
240     newFile = 1
241     fileCount += 1
242     # if events in file equal to eventsPerJobRequested
243     elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
244     # close job and touch new file
245 spiga 1.11 fullString = parString[:-1]
246 spiga 1.1 if self.useParent==1:
247 spiga 1.11 fullParentString = pString[:-1]
248 spiga 1.1 list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
249     else:
250     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
251 spiga 1.15 msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
252 spiga 1.3 jobDestination.append(blockSites[block])
253 slacapra 1.20 msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
254 spiga 1.1 jobsOfBlock[block].append(jobCount+1)
255     # reset counter
256     jobCount = jobCount + 1
257     totalEventCount = totalEventCount + eventsPerJobRequested
258     eventsRemaining = eventsRemaining - eventsPerJobRequested
259     jobSkipEventCount = 0
260     # reset file
261     pString = ""
262     parString = ""
263     filesEventCount = 0
264     newFile = 1
265     fileCount += 1
266    
267     # if more events in file remain than eventsPerJobRequested
268     else :
269     # close job but don't touch new file
270 spiga 1.11 fullString = parString[:-1]
271 spiga 1.1 if self.useParent==1:
272 spiga 1.11 fullParentString = pString[:-1]
273 spiga 1.1 list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
274     else:
275     list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
276 spiga 1.15 msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
277 spiga 1.3 jobDestination.append(blockSites[block])
278 slacapra 1.20 msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
279 spiga 1.1 jobsOfBlock[block].append(jobCount+1)
280     # increase counter
281     jobCount = jobCount + 1
282     totalEventCount = totalEventCount + eventsPerJobRequested
283     eventsRemaining = eventsRemaining - eventsPerJobRequested
284     # calculate skip events for last file
285     # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
286     jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
287     # remove all but the last file
288     filesEventCount = self.eventsbyfile[file]
289 spiga 1.16 pString_tmp=''
290 spiga 1.1 if self.useParent==1:
291 spiga 1.16 for f in parent : pString_tmp += f + ','
292 ewv 1.22 pString = pString_tmp
293 spiga 1.11 parString = file + ','
294 spiga 1.1 pass # END if
295     pass # END while (iterate over files in the block)
296     pass # END while (iterate over blocks in the dataset)
297 spiga 1.15 common.logger.debug(msg)
298 spiga 1.1 self.ncjobs = self.total_number_of_jobs = jobCount
299     if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
300 spiga 1.13 common.logger.info("Could not run on all requested events because some blocks not hosted at allowed sites.")
301     common.logger.info(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
302 ewv 1.22
303 spiga 1.1 # skip check on block with no sites DD
304 spiga 1.5 if noBboundary == 0 : self.checkBlockNoSite(blocks,jobsOfBlock,blockSites)
305 spiga 1.1
306     # prepare dict output
307     dictOut = {}
308 spiga 1.11 dictOut['params']= ['InputFiles','MaxEvents','SkipEvents']
309     if self.useParent: dictOut['params']= ['InputFiles','ParentFiles','MaxEvents','SkipEvents']
310 spiga 1.1 dictOut['args'] = list_of_lists
311 spiga 1.3 dictOut['jobDestination'] = jobDestination
312 spiga 1.1 dictOut['njobs']=self.total_number_of_jobs
313    
314     return dictOut
315    
316     # keep trace of block with no sites to print a warning at the end
317    
318 ewv 1.22 def checkBlockNoSite(self,blocks,jobsOfBlock,blockSites):
319 spiga 1.1 # screen output
320     screenOutput = "List of jobs and available destination sites:\n\n"
321     noSiteBlock = []
322     bloskNoSite = []
323 spiga 1.10 allBlock = []
324 spiga 1.1
325     blockCounter = 0
326     for block in blocks:
327     if block in jobsOfBlock.keys() :
328     blockCounter += 1
329 spiga 1.10 allBlock.append( blockCounter )
330 slacapra 1.19 sites=self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],[block]),[block])
331 spiga 1.1 screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
332 slacapra 1.20 ', '.join(SE2CMS(sites)))
333 slacapra 1.19 if len(sites) == 0:
334 spiga 1.1 noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
335     bloskNoSite.append( blockCounter )
336    
337 spiga 1.13 common.logger.info(screenOutput)
338 spiga 1.1 if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
339     msg = 'WARNING: No sites are hosting any part of data for block:\n '
340     virgola = ""
341     if len(bloskNoSite) > 1:
342     virgola = ","
343     for block in bloskNoSite:
344     msg += ' ' + str(block) + virgola
345 spiga 1.25 msg += '\n\t\tRelated jobs:\n '
346 spiga 1.1 virgola = ""
347     if len(noSiteBlock) > 1:
348     virgola = ","
349     for range_jobs in noSiteBlock:
350     msg += str(range_jobs) + virgola
351 spiga 1.25 msg += '\n\t\twill not be submitted and this block of data can not be analyzed!\n'
352 spiga 1.14 if self.cfg_params.has_key('GRID.se_white_list'):
353 spiga 1.25 msg += '\tWARNING: SE White List: '+self.cfg_params['GRID.se_white_list']+'\n'
354     msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n'
355     msg += '\tPlease check if the dataset is available at this site!)'
356 spiga 1.14 if self.cfg_params.has_key('GRID.ce_white_list'):
357 spiga 1.25 msg += '\tWARNING: CE White List: '+self.cfg_params['GRID.ce_white_list']+'\n'
358     msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n'
359     msg += '\tPlease check if the dataset is available at this site!)\n'
360 spiga 1.1
361 spiga 1.13 common.logger.info(msg)
362 spiga 1.1
363 spiga 1.10 if bloskNoSite == allBlock:
364 ewv 1.22 raise CrabException('No jobs created')
365 spiga 1.10
366 spiga 1.1 return
367    
368    
369     ########################################################################
370 ewv 1.22 def jobSplittingByRun(self):
371 spiga 1.1 """
372     """
373 ewv 1.22 from sets import Set
374 spiga 1.1 from WMCore.JobSplitting.RunBased import RunBased
375     from WMCore.DataStructs.Workflow import Workflow
376     from WMCore.DataStructs.File import File
377     from WMCore.DataStructs.Fileset import Fileset
378     from WMCore.DataStructs.Subscription import Subscription
379     from WMCore.JobSplitting.SplitterFactory import SplitterFactory
380 ewv 1.22 from WMCore.DataStructs.Run import Run
381 spiga 1.1
382     self.checkUserSettings()
383 ewv 1.22 blockSites = self.args['blockSites']
384 spiga 1.4 pubdata = self.args['pubdata']
385 spiga 1.1
386     if self.selectNumberOfJobs == 0 :
387     self.theNumberOfJobs = 9999999
388     blocks = {}
389 ewv 1.22 runList = []
390 spiga 1.1 thefiles = Fileset(name='FilesToSplit')
391 spiga 1.3 fileList = pubdata.getListFiles()
392 spiga 1.1 for f in fileList:
393     block = f['Block']['Name']
394 ewv 1.22 try:
395 spiga 1.3 f['Block']['StorageElementList'].extend(blockSites[block])
396 spiga 1.1 except:
397     continue
398     wmbsFile = File(f['LogicalFileName'])
399 spiga 1.3 [ wmbsFile['locations'].add(x) for x in blockSites[block] ]
400 spiga 1.1 wmbsFile['block'] = block
401     runNum = f['RunsList'][0]['RunNumber']
402 ewv 1.22 runList.append(runNum)
403 spiga 1.1 myRun = Run(runNumber=runNum)
404     wmbsFile.addRun( myRun )
405     thefiles.addFile(
406     wmbsFile
407     )
408 ewv 1.22
409 spiga 1.1 work = Workflow()
410     subs = Subscription(
411     fileset = thefiles,
412     workflow = work,
413     split_algo = 'RunBased',
414     type = "Processing")
415     splitter = SplitterFactory()
416     jobfactory = splitter(subs)
417 ewv 1.22
418     #loop over all runs
419 spiga 1.1 set = Set(runList)
420     list_of_lists = []
421     jobDestination = []
422     count = 0
423 spiga 1.17 for jobGroup in jobfactory():
424 spiga 1.1 if count < self.theNumberOfJobs:
425 spiga 1.17 res = self.getJobInfo(jobGroup)
426 ewv 1.22 parString = ''
427 spiga 1.1 for file in res['lfns']:
428 spiga 1.11 parString += file + ','
429     fullString = parString[:-1]
430 ewv 1.22 list_of_lists.append([fullString,str(-1),str(0)])
431 spiga 1.2 #need to check single file location
432 ewv 1.22 jobDestination.append(res['locations'])
433 spiga 1.1 count +=1
434     # prepare dict output
435     dictOut = {}
436 spiga 1.11 dictOut['params']= ['InputFiles','MaxEvents','SkipEvents']
437 spiga 1.1 dictOut['args'] = list_of_lists
438     dictOut['jobDestination'] = jobDestination
439     dictOut['njobs']=count
440    
441     return dictOut
442    
443     def getJobInfo( self,jobGroup ):
444     res = {}
445 ewv 1.22 lfns = []
446     locations = []
447 spiga 1.1 tmp_check=0
448     for job in jobGroup.jobs:
449     for file in job.getFiles():
450 ewv 1.22 lfns.append(file['lfn'])
451 spiga 1.1 for loc in file['locations']:
452     if tmp_check < 1 :
453     locations.append(loc)
454 ewv 1.22 tmp_check = tmp_check + 1
455     ### qui va messo il check per la locations
456     res['lfns'] = lfns
457     res['locations'] = locations
458     return res
459    
460 spiga 1.1 ########################################################################
461 spiga 1.23 def prepareSplittingNoInput(self):
462 spiga 1.1 """
463     """
464     if (self.selectEventsPerJob):
465 spiga 1.13 common.logger.info('Required '+str(self.eventsPerJob)+' events per job ')
466 spiga 1.1 if (self.selectNumberOfJobs):
467 spiga 1.13 common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
468 spiga 1.1 if (self.selectTotalNumberEvents):
469 spiga 1.13 common.logger.info('Required '+str(self.total_number_of_events)+' events in total ')
470 spiga 1.1
471     if (self.total_number_of_events < 0):
472     msg='Cannot split jobs per Events with "-1" as total number of events'
473     raise CrabException(msg)
474    
475     if (self.selectEventsPerJob):
476     if (self.selectTotalNumberEvents):
477     self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
478     elif(self.selectNumberOfJobs) :
479     self.total_number_of_jobs =self.theNumberOfJobs
480     self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
481    
482     elif (self.selectNumberOfJobs) :
483     self.total_number_of_jobs = self.theNumberOfJobs
484     self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
485    
486 spiga 1.23
487     def jobSplittingNoInput(self):
488     """
489     Perform job splitting based on number of event per job
490     """
491     common.logger.debug('Splitting per events')
492     self.checkUserSettings()
493     jobDestination=[]
494     if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
495     msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
496     raise CrabException(msg)
497    
498     managedGenerators =self.args['managedGenerators']
499     generator = self.args['generator']
500     firstRun = self.cfg_params.get('CMSSW.first_run',None)
501    
502     self.prepareSplittingNoInput()
503    
504 spiga 1.13 common.logger.debug('N jobs '+str(self.total_number_of_jobs))
505 spiga 1.1
506     # is there any remainder?
507     check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
508    
509 spiga 1.13 common.logger.debug('Check '+str(check))
510 spiga 1.1
511 spiga 1.13 common.logger.info(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
512 spiga 1.1 if check > 0:
513 spiga 1.13 common.logger.info('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
514 spiga 1.1
515     # argument is seed number.$i
516     self.list_of_args = []
517     for i in range(self.total_number_of_jobs):
518     ## Since there is no input, any site is good
519 spiga 1.3 jobDestination.append([""]) #must be empty to write correctly the xml
520 spiga 1.1 args=[]
521 spiga 1.3 if (firstRun):
522 spiga 1.1 ## pythia first run
523 spiga 1.3 args.append(str(firstRun)+str(i))
524     if (generator in managedGenerators):
525 ewv 1.22 args.append(generator)
526     if (generator == 'comphep' and i == 0):
527 spiga 1.1 # COMPHEP is brain-dead and wants event #'s like 1,100,200,300
528     args.append('1')
529 ewv 1.22 else:
530 spiga 1.1 args.append(str(i*self.eventsPerJob))
531 spiga 1.7 args.append(str(self.eventsPerJob))
532 spiga 1.1 self.list_of_args.append(args)
533     # prepare dict output
534 spiga 1.11
535 spiga 1.1 dictOut = {}
536 spiga 1.11 dictOut['params'] = ['MaxEvents']
537     if (firstRun):
538     dictOut['params'] = ['FirstRun','MaxEvents']
539 ewv 1.22 if ( generator in managedGenerators ) :
540     dictOut['params'] = ['FirstRun', 'Generator', 'FirstEvent', 'MaxEvents']
541     else:
542     if (generator in managedGenerators) :
543     dictOut['params'] = ['Generator', 'FirstEvent', 'MaxEvents']
544 spiga 1.1 dictOut['args'] = self.list_of_args
545 spiga 1.3 dictOut['jobDestination'] = jobDestination
546 spiga 1.1 dictOut['njobs']=self.total_number_of_jobs
547    
548     return dictOut
549    
550    
551     def jobSplittingForScript(self):
552     """
553     Perform job splitting based on number of job
554     """
555     self.checkUserSettings()
556 spiga 1.3 if (self.selectNumberOfJobs == 0):
557 spiga 1.1 msg = 'must specify number_of_jobs.'
558     raise crabexception(msg)
559 spiga 1.3 jobDestination = []
560 spiga 1.13 common.logger.debug('Splitting per job')
561     common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
562 spiga 1.1
563 spiga 1.23 # self.total_number_of_jobs = self.theNumberOfJobs
564    
565     self.prepareSplittingNoInput()
566 spiga 1.1
567 spiga 1.13 common.logger.debug('N jobs '+str(self.total_number_of_jobs))
568 spiga 1.1
569 spiga 1.13 common.logger.info(str(self.total_number_of_jobs)+' jobs can be created')
570 spiga 1.1
571     # argument is seed number.$i
572 spiga 1.23 self.list_of_args = []
573 spiga 1.1 for i in range(self.total_number_of_jobs):
574 spiga 1.23 args=[]
575 spiga 1.3 jobDestination.append([""])
576 spiga 1.23 if self.eventsPerJob != 0 :
577     args.append(str(self.eventsPerJob))
578     self.list_of_args.append(args)
579 spiga 1.1
580     # prepare dict output
581     dictOut = {}
582 spiga 1.23 dictOut['params'] = ['MaxEvents']
583     dictOut['args'] = self.list_of_args
584 spiga 1.3 dictOut['jobDestination'] = jobDestination
585 spiga 1.1 dictOut['njobs']=self.total_number_of_jobs
586     return dictOut
587    
588 ewv 1.22
589     def jobSplittingByLumi(self):
590 spiga 1.1 """
591     """
592     return
593     def Algos(self):
594     """
595     Define key splittingType matrix
596     """
597 ewv 1.22 SplitAlogs = {
598     'EventBased' : self.jobSplittingByEvent,
599 spiga 1.1 'RunBased' : self.jobSplittingByRun,
600 ewv 1.22 'LumiBased' : self.jobSplittingByLumi,
601     'NoInput' : self.jobSplittingNoInput,
602 spiga 1.1 'ForScript' : self.jobSplittingForScript
603 ewv 1.22 }
604 spiga 1.1 return SplitAlogs
605