ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/Splitter.py
(Generate patch)

Comparing COMP/CRAB/python/Splitter.py (file contents):
Revision 1.1 by spiga, Wed Feb 4 14:24:07 2009 UTC vs.
Revision 1.24 by spiga, Tue Jul 21 16:18:09 2009 UTC

# Line 1 | Line 1
1   import common
2 from crab_logger import Logger
2   from crab_exceptions import *
3   from crab_util import *
4   from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
# Line 7 | Line 6 | from WMCore.SiteScreening.BlackWhiteList
6   class JobSplitter:
7      def __init__( self, cfg_params,  args ):
8          self.cfg_params = cfg_params
9 <        self.blockSites = args['blockSites']
11 <        self.pubdata = args['pubdata']
9 >        self.args=args
10          #self.maxEvents
13        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
11          # init BlackWhiteListParser
12 <        seWhiteList = cfg_params.get('EDG.se_white_list',[])
13 <        seBlackList = cfg_params.get('EDG.se_black_list',[])
14 <        self.blackWhiteListParser = SEBlackWhiteListParser(seWhiteList, seBlackList, common.logger)
12 >        self.seWhiteList = cfg_params.get('GRID.se_white_list',[])
13 >        seBlackList = cfg_params.get('GRID.se_black_list',[])
14 >        self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, seBlackList, common.logger())
15  
16  
17      def checkUserSettings(self):
# Line 46 | Line 43 | class JobSplitter:
43              self.selectTotalNumberEvents = 0
44  
45  
46 +    def ComputeSubBlockSites( self, blockSites ):
47 +        """
48 +        """
49 +        sub_blockSites = {}
50 +        for k,v in blockSites.iteritems():
51 +            sites=self.blackWhiteListParser.checkWhiteList(v)
52 +            if sites : sub_blockSites[k]=v
53 +        if len(sub_blockSites) < 1:
54 +            msg = 'WARNING: the sites %s is not hosting any part of data.'%self.seWhiteList
55 +            raise CrabException(msg)
56 +        return sub_blockSites
57 +
58   ########################################################################
59      def jobSplittingByEvent( self ):
60          """
# Line 55 | Line 64 | class JobSplitter:
64          REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
65                    self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
66                    self.maxEvents, self.filesbyblock
67 <        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
67 >        SETS: jobDestination - Site destination(s) for each job (a list of lists)
68                self.total_number_of_jobs - Total # of jobs
69                self.list_of_args - File(s) job will run on (a list of lists)
70          """
71  
72 +        jobDestination=[]
73          self.checkUserSettings()
74          if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
75              msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
76              raise CrabException(msg)
67
68        self.filesbyblock=self.pubdata.getFiles()
77  
78 <        self.eventsbyblock=self.pubdata.getEventsPerBlock()
79 <        self.eventsbyfile=self.pubdata.getEventsPerFile()
80 <        self.parentFiles=self.pubdata.getParent()
78 >        blockSites = self.args['blockSites']
79 >        pubdata = self.args['pubdata']
80 >        filesbyblock=pubdata.getFiles()
81 >
82 >        self.eventsbyblock=pubdata.getEventsPerBlock()
83 >        self.eventsbyfile=pubdata.getEventsPerFile()
84 >        self.parentFiles=pubdata.getParent()
85  
86          ## get max number of events
87 <        self.maxEvents=self.pubdata.getMaxEvents()
87 >        self.maxEvents=pubdata.getMaxEvents()
88  
89          self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
90          noBboundary = int(self.cfg_params.get('CMSSW.no_block_boundary',0))
91  
92 +        if noBboundary == 1:
93 +            if self.total_number_of_events== -1:
94 +                msg = 'You are selecting no_block_boundary=1 which does not allow to set total_number_of_events=-1\n'
95 +                msg +='\tYou shoud get the number of event from DBS web interface and use it for your configuration.'                    
96 +                raise CrabException(msg)
97 +            if len(self.seWhiteList.split(',')) != 1:
98 +                msg = 'You are selecting no_block_boundary=1 which requires to choose one and only one site.\n'
99 +                msg += "\tPlease set se_white_list with the site's storage element name."
100 +                raise  CrabException(msg)  
101 +            blockSites = self.ComputeSubBlockSites(blockSites)    
102 +
103          # ---- Handle the possible job splitting configurations ---- #
104          if (self.selectTotalNumberEvents):
105              totalEventsRequested = self.total_number_of_events
# Line 91 | Line 114 | class JobSplitter:
114          # If user requested more events than are in the dataset
115          elif (totalEventsRequested > self.maxEvents):
116              eventsRemaining = self.maxEvents
117 <            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
117 >            common.logger.info("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
118          # If user requested less events than are in the dataset
119          else:
120              eventsRemaining = totalEventsRequested
# Line 107 | Line 130 | class JobSplitter:
130              eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
131  
132          if (self.selectNumberOfJobs):
133 <            common.logger.message("May not create the exact number_of_jobs requested.")
133 >            common.logger.info("May not create the exact number_of_jobs requested.")
134  
135          # old... to remove Daniele
136          totalNumberOfJobs = 999999999
137  
138 <        blocks = self.blockSites.keys()
138 >        blocks = blockSites.keys()
139          blockCount = 0
140          # Backup variable in case self.maxEvents counted events in a non-included block
141          numBlocksInDataset = len(blocks)
# Line 124 | Line 147 | class JobSplitter:
147          jobsOfBlock = {}
148  
149          parString = ""
150 +        pString = ""
151          filesEventCount = 0
152 +        msg=''
153  
154          # ---- Iterate over the blocks in the dataset until ---- #
155          # ---- we've met the requested total # of events    ---- #
# Line 136 | Line 161 | class JobSplitter:
161  
162              if self.eventsbyblock.has_key(block) :
163                  numEventsInBlock = self.eventsbyblock[block]
164 <                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
164 >                common.logger.debug('Events in Block File '+str(numEventsInBlock))
165  
166 <                files = self.filesbyblock[block]
166 >                files = filesbyblock[block]
167                  numFilesInBlock = len(files)
168                  if (numFilesInBlock <= 0):
169                      continue
# Line 146 | Line 171 | class JobSplitter:
171                  if noBboundary == 0: # DD
172                      # ---- New block => New job ---- #
173                      parString = ""
174 +                    pString=""
175                      # counter for number of events in files currently worked on
176                      filesEventCount = 0
177                  # flag if next while loop should touch new file
# Line 155 | Line 181 | class JobSplitter:
181  
182                  # ---- Iterate over the files in the block until we've met the requested ---- #
183                  # ---- total # of events or we've gone over all the files in this block  ---- #
184 <                pString=''
184 >                msg='\n'
185                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
186                      file = files[fileCount]
187                      if self.useParent==1:
188                          parent = self.parentFiles[file]
189 <                        for f in parent :
164 <                            pString += '\\\"' + f + '\\\"\,'
165 <                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
166 <                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
189 >                        common.logger.log(10-1, "File "+str(file)+" has the following parents: "+str(parent))
190                      if newFile :
191                          try:
192                              numEventsInFile = self.eventsbyfile[file]
193 <                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
193 >                            common.logger.log(10-1, "File "+str(file)+" has "+str(numEventsInFile)+" events")
194                              # increase filesEventCount
195                              filesEventCount += numEventsInFile
196                              # Add file to current job
197 <                            parString += '\\\"' + file + '\\\"\,'
197 >                            parString +=  file + ','
198 >                            if self.useParent==1:
199 >                                for f in parent :
200 >                                    pString += f  + ','
201                              newFile = 0
202                          except KeyError:
203 <                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
203 >                            common.logger.info("File "+str(file)+" has unknown number of events: skipping")
204  
205                      eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
206                      # if less events in file remain than eventsPerJobRequested
# Line 187 | Line 213 | class JobSplitter:
213                              if ( fileCount == numFilesInBlock-1 ) :
214                                  # end job using last file, use remaining events in block
215                                  # close job and touch new file
216 <                                fullString = parString[:-2]
216 >                                fullString = parString[:-1]
217                                  if self.useParent==1:
218 <                                    fullParentString = pString[:-2]
218 >                                    fullParentString = pString[:-1]
219                                      list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
220                                  else:
221                                      list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
222 <                                common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
223 <                                self.jobDestination.append(self.blockSites[block])
224 <                                common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
222 >                                msg += "Job %s can run over %s  events (last file in block).\n"%(str(jobCount+1), str(filesEventCount - jobSkipEventCount))
223 >                                jobDestination.append(blockSites[block])
224 >                                msg += "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
225                                  # fill jobs of block dictionary
226                                  jobsOfBlock[block].append(jobCount+1)
227                                  # reset counter
# Line 216 | Line 242 | class JobSplitter:
242                      # if events in file equal to eventsPerJobRequested
243                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
244                          # close job and touch new file
245 <                        fullString = parString[:-2]
245 >                        fullString = parString[:-1]
246                          if self.useParent==1:
247 <                            fullParentString = pString[:-2]
247 >                            fullParentString = pString[:-1]
248                              list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
249                          else:
250                              list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
251 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
252 <                        self.jobDestination.append(self.blockSites[block])
253 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
251 >                        msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
252 >                        jobDestination.append(blockSites[block])
253 >                        msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
254                          jobsOfBlock[block].append(jobCount+1)
255                          # reset counter
256                          jobCount = jobCount + 1
# Line 241 | Line 267 | class JobSplitter:
267                      # if more events in file remain than eventsPerJobRequested
268                      else :
269                          # close job but don't touch new file
270 <                        fullString = parString[:-2]
270 >                        fullString = parString[:-1]
271                          if self.useParent==1:
272 <                            fullParentString = pString[:-2]
272 >                            fullParentString = pString[:-1]
273                              list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
274                          else:
275                              list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
276 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
277 <                        self.jobDestination.append(self.blockSites[block])
278 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
276 >                        msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
277 >                        jobDestination.append(blockSites[block])
278 >                        msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount])))
279                          jobsOfBlock[block].append(jobCount+1)
280                          # increase counter
281                          jobCount = jobCount + 1
# Line 260 | Line 286 | class JobSplitter:
286                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
287                          # remove all but the last file
288                          filesEventCount = self.eventsbyfile[file]
289 +                        pString_tmp=''
290                          if self.useParent==1:
291 <                            for f in parent : pString += '\\\"' + f + '\\\"\,'
292 <                        parString = '\\\"' + file + '\\\"\,'
291 >                            for f in parent : pString_tmp +=  f + ','
292 >                        pString =  pString_tmp
293 >                        parString =  file + ','
294                      pass # END if
295                  pass # END while (iterate over files in the block)
296          pass # END while (iterate over blocks in the dataset)
297 +        common.logger.debug(msg)
298          self.ncjobs = self.total_number_of_jobs = jobCount
299          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
300 <            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
301 <        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
302 <
300 >            common.logger.info("Could not run on all requested events because some blocks not hosted at allowed sites.")
301 >        common.logger.info(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
302 >
303          # skip check on  block with no sites  DD
304 <        if noBboundary == 0 : self.checkBlockNoSite(blocks,jobsOfBlock)
304 >        if noBboundary == 0 : self.checkBlockNoSite(blocks,jobsOfBlock,blockSites)
305  
306         # prepare dict output
307          dictOut = {}
308 +        dictOut['params']= ['InputFiles','MaxEvents','SkipEvents']
309 +        if self.useParent: dictOut['params']= ['InputFiles','ParentFiles','MaxEvents','SkipEvents']
310          dictOut['args'] = list_of_lists
311 <        dictOut['jobDestination'] = self.jobDestination
311 >        dictOut['jobDestination'] = jobDestination
312          dictOut['njobs']=self.total_number_of_jobs
313  
314          return dictOut
315  
316          # keep trace of block with no sites to print a warning at the end
317  
318 <    def checkBlockNoSite(self,blocks,jobsOfBlock):  
318 >    def checkBlockNoSite(self,blocks,jobsOfBlock,blockSites):
319          # screen output
320          screenOutput = "List of jobs and available destination sites:\n\n"
321          noSiteBlock = []
322          bloskNoSite = []
323 +        allBlock = []
324  
325          blockCounter = 0
326          for block in blocks:
327              if block in jobsOfBlock.keys() :
328                  blockCounter += 1
329 +                allBlock.append( blockCounter )
330 +                sites=self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],[block]),[block])
331                  screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
332 <                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(self.blockSites[block],block),block)))
333 <                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(self.blockSites[block],block),block)) == 0:
332 >                    ', '.join(SE2CMS(sites)))
333 >                if len(sites) == 0:
334                      noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
335                      bloskNoSite.append( blockCounter )
336  
337 <        common.logger.message(screenOutput)
337 >        common.logger.info(screenOutput)
338          if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
339              msg = 'WARNING: No sites are hosting any part of data for block:\n                '
340              virgola = ""
# Line 315 | Line 349 | class JobSplitter:
349              for range_jobs in noSiteBlock:
350                  msg += str(range_jobs) + virgola
351              msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
352 <            if self.cfg_params.has_key('EDG.se_white_list'):
353 <                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
352 >            if self.cfg_params.has_key('GRID.se_white_list'):
353 >                msg += 'WARNING: SE White List: '+self.cfg_params['GRID.se_white_list']+'\n'
354                  msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
355                  msg += 'Please check if the dataset is available at this site!)\n'
356 <            if self.cfg_params.has_key('EDG.ce_white_list'):
357 <                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
356 >            if self.cfg_params.has_key('GRID.ce_white_list'):
357 >                msg += 'WARNING: CE White List: '+self.cfg_params['GRID.ce_white_list']+'\n'
358                  msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
359                  msg += 'Please check if the dataset is available at this site!)\n'
360  
361 <            common.logger.message(msg)
361 >            common.logger.info(msg)
362 >
363 >        if bloskNoSite == allBlock:
364 >            raise CrabException('No jobs created')
365  
366          return
367  
368  
369   ########################################################################
370 <    def jobSplittingByRun(self):
370 >    def jobSplittingByRun(self):
371          """
372          """
373 <        from sets import Set  
373 >        from sets import Set
374          from WMCore.JobSplitting.RunBased import RunBased
375          from WMCore.DataStructs.Workflow import Workflow
376          from WMCore.DataStructs.File import File
377          from WMCore.DataStructs.Fileset import Fileset
378          from WMCore.DataStructs.Subscription import Subscription
379          from WMCore.JobSplitting.SplitterFactory import SplitterFactory
380 <        from WMCore.DataStructs.Run import Run
380 >        from WMCore.DataStructs.Run import Run
381  
382          self.checkUserSettings()
383 +        blockSites = self.args['blockSites']
384 +        pubdata = self.args['pubdata']
385  
386          if self.selectNumberOfJobs == 0 :
387              self.theNumberOfJobs = 9999999
388          blocks = {}
389 <        runList = []
389 >        runList = []
390          thefiles = Fileset(name='FilesToSplit')
391 <        fileList = self.pubdata.getListFiles()
391 >        fileList = pubdata.getListFiles()
392          for f in fileList:
354           # print f
393              block = f['Block']['Name']
394 <          #  if not blocks.has_key(block):
395 <          #      blocks[block] = reader.listFileBlockLocation(block)
358 <            try:
359 <                f['Block']['StorageElementList'].extend(self.blockSites[block])
394 >            try:
395 >                f['Block']['StorageElementList'].extend(blockSites[block])
396              except:
397                  continue
398              wmbsFile = File(f['LogicalFileName'])
399 <            [ wmbsFile['locations'].add(x) for x in self.blockSites[block] ]
399 >            [ wmbsFile['locations'].add(x) for x in blockSites[block] ]
400              wmbsFile['block'] = block
401              runNum = f['RunsList'][0]['RunNumber']
402 <            runList.append(runNum)
402 >            runList.append(runNum)
403              myRun = Run(runNumber=runNum)
404              wmbsFile.addRun( myRun )
405              thefiles.addFile(
406                  wmbsFile
407                  )
408 <
408 >
409          work = Workflow()
410          subs = Subscription(
411          fileset = thefiles,
# Line 378 | Line 414 | class JobSplitter:
414          type = "Processing")
415          splitter = SplitterFactory()
416          jobfactory = splitter(subs)
417 <        
418 <        #loop over all runs
417 >
418 >        #loop over all runs
419          set = Set(runList)
420          list_of_lists = []
421          jobDestination = []
386
422          count = 0
423 <        for i in list(set):
423 >        for jobGroup in  jobfactory():
424              if count <  self.theNumberOfJobs:
425 <                res = self.getJobInfo(jobfactory())
426 <                parString = ''
425 >                res = self.getJobInfo(jobGroup)
426 >                parString = ''
427                  for file in res['lfns']:
428 <                    parString += '\\\"' + file + '\\\"\,'
429 <                fullString = parString[:-2]
430 <                list_of_lists.append([fullString,str(-1),str(0)])    
431 <                # sto assumendo che i jobs siano tutti locati insieme
432 <                jobDestination.append(res['locations'])  
428 >                    parString += file + ','
429 >                fullString = parString[:-1]
430 >                list_of_lists.append([fullString,str(-1),str(0)])
431 >                #need to check single file location
432 >                jobDestination.append(res['locations'])
433                  count +=1
399        #print jobDestination
434         # prepare dict output
435          dictOut = {}
436 +        dictOut['params']= ['InputFiles','MaxEvents','SkipEvents']
437          dictOut['args'] = list_of_lists
438          dictOut['jobDestination'] = jobDestination
439          dictOut['njobs']=count
# Line 407 | Line 442 | class JobSplitter:
442  
443      def getJobInfo( self,jobGroup ):
444          res = {}
445 <        lfns = []        
446 <        locations = []        
445 >        lfns = []
446 >        locations = []
447          tmp_check=0
448          for job in jobGroup.jobs:
449              for file in job.getFiles():
450 <                lfns.append(file['lfn'])
450 >                lfns.append(file['lfn'])
451                  for loc in file['locations']:
452                      if tmp_check < 1 :
453                          locations.append(loc)
454 <                    tmp_check = tmp_check + 1
455 <                ### qui va messo il check per la locations
456 <        res['lfns'] = lfns
457 <        res['locations'] = locations
458 <        return res                
459 <      
454 >                tmp_check = tmp_check + 1
455 >                ### qui va messo il check per la locations
456 >        res['lfns'] = lfns
457 >        res['locations'] = locations
458 >        return res
459 >
460   ########################################################################
461 <    def jobSplittingNoInput(self):
461 >    def prepareSplittingNoInput(self):
462          """
428        Perform job splitting based on number of event per job
463          """
430        common.logger.debug(5,'Splitting per events')
431
464          if (self.selectEventsPerJob):
465 <            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
465 >            common.logger.info('Required '+str(self.eventsPerJob)+' events per job ')
466          if (self.selectNumberOfJobs):
467 <            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
467 >            common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
468          if (self.selectTotalNumberEvents):
469 <            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
469 >            common.logger.info('Required '+str(self.total_number_of_events)+' events in total ')
470  
471          if (self.total_number_of_events < 0):
472              msg='Cannot split jobs per Events with "-1" as total number of events'
# Line 451 | Line 483 | class JobSplitter:
483              self.total_number_of_jobs = self.theNumberOfJobs
484              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
485  
486 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
486 >
487 >    def jobSplittingNoInput(self):
488 >        """
489 >        Perform job splitting based on number of event per job
490 >        """
491 >        common.logger.debug('Splitting per events')
492 >        self.checkUserSettings()
493 >        jobDestination=[]
494 >        if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
495 >            msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
496 >            raise CrabException(msg)
497 >
498 >        managedGenerators =self.args['managedGenerators']
499 >        generator = self.args['generator']
500 >        firstRun = self.cfg_params.get('CMSSW.first_run',None)
501 >
502 >        self.prepareSplittingNoInput()
503 >
504 >        common.logger.debug('N jobs  '+str(self.total_number_of_jobs))
505  
506          # is there any remainder?
507          check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
508  
509 <        common.logger.debug(5,'Check  '+str(check))
509 >        common.logger.debug('Check  '+str(check))
510  
511 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
511 >        common.logger.info(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
512          if check > 0:
513 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
513 >            common.logger.info('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
514  
515          # argument is seed number.$i
516          self.list_of_args = []
517          for i in range(self.total_number_of_jobs):
518              ## Since there is no input, any site is good
519 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
519 >            jobDestination.append([""]) #must be empty to write correctly the xml
520              args=[]
521 <            if (self.firstRun):
521 >            if (firstRun):
522                  ## pythia first run
523 <                args.append(str(self.firstRun)+str(i))
524 <            if (self.generator in self.managedGenerators):
525 <                if (self.generator == 'comphep' and i == 0):
523 >                args.append(str(firstRun)+str(i))
524 >            if (generator in managedGenerators):
525 >               args.append(generator)
526 >               if (generator == 'comphep' and i == 0):
527                      # COMPHEP is brain-dead and wants event #'s like 1,100,200,300
528                      args.append('1')
529 <                else:
529 >               else:
530                      args.append(str(i*self.eventsPerJob))
531 +            args.append(str(self.eventsPerJob))
532              self.list_of_args.append(args)
533         # prepare dict output
534 +
535          dictOut = {}
536 +        dictOut['params'] = ['MaxEvents']
537 +        if (firstRun):
538 +            dictOut['params'] = ['FirstRun','MaxEvents']
539 +            if ( generator in managedGenerators ) :
540 +                dictOut['params'] = ['FirstRun', 'Generator', 'FirstEvent', 'MaxEvents']
541 +        else:
542 +            if (generator in managedGenerators) :
543 +                dictOut['params'] = ['Generator', 'FirstEvent', 'MaxEvents']
544          dictOut['args'] = self.list_of_args
545 <        dictOut['jobDestination'] = self.jobDestination
545 >        dictOut['jobDestination'] = jobDestination
546          dictOut['njobs']=self.total_number_of_jobs
547  
548          return dictOut
# Line 492 | Line 553 | class JobSplitter:
553          Perform job splitting based on number of job
554          """
555          self.checkUserSettings()
556 <        if (self.selectnumberofjobs == 0):
556 >        if (self.selectNumberOfJobs == 0):
557              msg = 'must specify  number_of_jobs.'
558              raise crabexception(msg)
559 +        jobDestination = []
560 +        common.logger.debug('Splitting per job')
561 +        common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
562  
563 <        common.logger.debug(5,'Splitting per job')
500 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
563 > #        self.total_number_of_jobs = self.theNumberOfJobs
564  
565 <        self.total_number_of_jobs = self.theNumberOfJobs
565 >        self.prepareSplittingNoInput()
566  
567 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
567 >        common.logger.debug('N jobs  '+str(self.total_number_of_jobs))
568  
569 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
569 >        common.logger.info(str(self.total_number_of_jobs)+' jobs can be created')
570  
571          # argument is seed number.$i
572          self.list_of_args = []
573          for i in range(self.total_number_of_jobs):
574 <            self.jobDestination.append([""])
575 <            self.list_of_args.append([str(i)])
574 >            args=[]
575 >            jobDestination.append([""])
576 >            if self.eventsPerJob != 0 :
577 >                args.append(str(self.eventsPerJob))
578 >                self.list_of_args.append(args)
579  
580         # prepare dict output
581          dictOut = {}
582 <        dictOut['args'] = self.list_of_args
583 <        dictOut['jobDestination'] = []
582 >        dictOut['params'] = ['MaxEvents']
583 >        dictOut['args'] =  self.list_of_args
584 >        dictOut['jobDestination'] = jobDestination
585          dictOut['njobs']=self.total_number_of_jobs
586          return dictOut
520
587  
588 <    def jobSplittingByLumi(self):
588 >
589 >    def jobSplittingByLumi(self):
590          """
591          """
592          return
# Line 527 | Line 594 | class JobSplitter:
594          """
595          Define key splittingType matrix
596          """
597 <        SplitAlogs = {
598 <                     'EventBased'           : self.jobSplittingByEvent,
597 >        SplitAlogs = {
598 >                     'EventBased'           : self.jobSplittingByEvent,
599                       'RunBased'             : self.jobSplittingByRun,
600 <                     'LumiBased'            : self.jobSplittingByLumi,
601 <                     'NoInput'              : self.jobSplittingNoInput,
600 >                     'LumiBased'            : self.jobSplittingByLumi,
601 >                     'NoInput'              : self.jobSplittingNoInput,
602                       'ForScript'            : self.jobSplittingForScript
603 <                     }  
603 >                     }
604          return SplitAlogs
605  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines