ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/Splitter.py
(Generate patch)

Comparing COMP/CRAB/python/Splitter.py (file contents):
Revision 1.12 by spiga, Sun May 10 13:42:57 2009 UTC vs.
Revision 1.19 by slacapra, Wed Jun 10 11:31:33 2009 UTC

# Line 1 | Line 1
1   import common
2 from crab_logger import Logger
2   from crab_exceptions import *
3   from crab_util import *
4   from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
# Line 10 | Line 9 | class JobSplitter:
9          self.args=args
10          #self.maxEvents
11          # init BlackWhiteListParser
12 <        seWhiteList = cfg_params.get('EDG.se_white_list',[])
13 <        seBlackList = cfg_params.get('EDG.se_black_list',[])
14 <        self.blackWhiteListParser = SEBlackWhiteListParser(seWhiteList, seBlackList, common.logger)
12 >        seWhiteList = cfg_params.get('GRID.se_white_list',[])
13 >        seBlackList = cfg_params.get('GRID.se_black_list',[])
14 >        self.blackWhiteListParser = SEBlackWhiteListParser(seWhiteList, seBlackList, common.logger())
15  
16  
17      def checkUserSettings(self):
# Line 92 | Line 91 | class JobSplitter:
91          # If user requested more events than are in the dataset
92          elif (totalEventsRequested > self.maxEvents):
93              eventsRemaining = self.maxEvents
94 <            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
94 >            common.logger.info("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
95          # If user requested less events than are in the dataset
96          else:
97              eventsRemaining = totalEventsRequested
# Line 108 | Line 107 | class JobSplitter:
107              eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
108  
109          if (self.selectNumberOfJobs):
110 <            common.logger.message("May not create the exact number_of_jobs requested.")
110 >            common.logger.info("May not create the exact number_of_jobs requested.")
111  
112          # old... to remove Daniele
113          totalNumberOfJobs = 999999999
# Line 125 | Line 124 | class JobSplitter:
124          jobsOfBlock = {}
125  
126          parString = ""
127 +        pString = ""
128          filesEventCount = 0
129  
130          # ---- Iterate over the blocks in the dataset until ---- #
# Line 137 | Line 137 | class JobSplitter:
137  
138              if self.eventsbyblock.has_key(block) :
139                  numEventsInBlock = self.eventsbyblock[block]
140 <                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
140 >                common.logger.debug('Events in Block File '+str(numEventsInBlock))
141  
142                  files = filesbyblock[block]
143                  numFilesInBlock = len(files)
# Line 147 | Line 147 | class JobSplitter:
147                  if noBboundary == 0: # DD
148                      # ---- New block => New job ---- #
149                      parString = ""
150 +                    pString=""
151                      # counter for number of events in files currently worked on
152                      filesEventCount = 0
153                  # flag if next while loop should touch new file
# Line 156 | Line 157 | class JobSplitter:
157  
158                  # ---- Iterate over the files in the block until we've met the requested ---- #
159                  # ---- total # of events or we've gone over all the files in this block  ---- #
160 <                pString=''
160 >                msg='\n'
161                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
162                      file = files[fileCount]
163                      if self.useParent==1:
164                          parent = self.parentFiles[file]
165 <                        for f in parent :
165 <                            pString +=  f + ','
166 <                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
167 <                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
165 >                        common.logger.log(10-1, "File "+str(file)+" has the following parents: "+str(parent))
166                      if newFile :
167                          try:
168                              numEventsInFile = self.eventsbyfile[file]
169 <                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
169 >                            common.logger.log(10-1, "File "+str(file)+" has "+str(numEventsInFile)+" events")
170                              # increase filesEventCount
171                              filesEventCount += numEventsInFile
172                              # Add file to current job
173                              parString +=  file + ','
174 +                            if self.useParent==1:
175 +                                for f in parent :
176 +                                    pString += f  + ','
177                              newFile = 0
178                          except KeyError:
179 <                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
179 >                            common.logger.info("File "+str(file)+" has unknown number of events: skipping")
180  
181                      eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
182                      # if less events in file remain than eventsPerJobRequested
# Line 194 | Line 195 | class JobSplitter:
195                                      list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
196                                  else:
197                                      list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
198 <                                common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
198 >                                msg += "Job %s can run over %s  events (last file in block).\n"%(str(jobCount+1), str(filesEventCount - jobSkipEventCount))
199                                  jobDestination.append(blockSites[block])
200 <                                common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(jobDestination[jobCount]))
200 >                                msg += "Job %s Destination: %s\n"%(str(jobCount+1),str(destinationCMS(jobDestination[jobCount])))
201 >                                from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
202 >                                cms_se = CmsSEMap()
203 >                                SEDestination = [cms_se[dest] for dest in jobDestination[jobCount]]
204 >                                msg+="\t  CMSDestination: %s "%(str(SEDestination))
205                                  # fill jobs of block dictionary
206                                  jobsOfBlock[block].append(jobCount+1)
207                                  # reset counter
# Line 223 | Line 228 | class JobSplitter:
228                              list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
229                          else:
230                              list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
231 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
231 >                        msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
232                          jobDestination.append(blockSites[block])
233 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(jobDestination[jobCount]))
233 >                        msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(destinationCMS(jobDestination[jobCount])))
234                          jobsOfBlock[block].append(jobCount+1)
235                          # reset counter
236                          jobCount = jobCount + 1
# Line 248 | Line 253 | class JobSplitter:
253                              list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
254                          else:
255                              list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
256 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
256 >                        msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested))
257                          jobDestination.append(blockSites[block])
258 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(jobDestination[jobCount]))
258 >                        msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(destinationCMS(jobDestination[jobCount])))
259                          jobsOfBlock[block].append(jobCount+1)
260                          # increase counter
261                          jobCount = jobCount + 1
# Line 261 | Line 266 | class JobSplitter:
266                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
267                          # remove all but the last file
268                          filesEventCount = self.eventsbyfile[file]
269 +                        pString_tmp=''
270                          if self.useParent==1:
271 <                            for f in parent : pString +=  f + ','
271 >                            for f in parent : pString_tmp +=  f + ','
272 >                        pString =  pString_tmp
273                          parString =  file + ','
274                      pass # END if
275                  pass # END while (iterate over files in the block)
276          pass # END while (iterate over blocks in the dataset)
277 +        common.logger.debug(msg)
278          self.ncjobs = self.total_number_of_jobs = jobCount
279          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
280 <            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
281 <        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
280 >            common.logger.info("Could not run on all requested events because some blocks not hosted at allowed sites.")
281 >        common.logger.info(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
282  
283          # skip check on  block with no sites  DD
284          if noBboundary == 0 : self.checkBlockNoSite(blocks,jobsOfBlock,blockSites)
# Line 299 | Line 307 | class JobSplitter:
307              if block in jobsOfBlock.keys() :
308                  blockCounter += 1
309                  allBlock.append( blockCounter )
310 +                sites=self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],[block]),[block])
311                  screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
312 <                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],[block]),[block])))
313 <                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],[block]),[block])) == 0:
312 >                    ', '.join(destinationCMS(sites)))
313 >                if len(sites) == 0:
314                      noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
315                      bloskNoSite.append( blockCounter )
316  
317 <        common.logger.message(screenOutput)
317 >        common.logger.info(screenOutput)
318          if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
319              msg = 'WARNING: No sites are hosting any part of data for block:\n                '
320              virgola = ""
# Line 320 | Line 329 | class JobSplitter:
329              for range_jobs in noSiteBlock:
330                  msg += str(range_jobs) + virgola
331              msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
332 <            if self.cfg_params.has_key('EDG.se_white_list'):
333 <                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
332 >            if self.cfg_params.has_key('GRID.se_white_list'):
333 >                msg += 'WARNING: SE White List: '+self.cfg_params['GRID.se_white_list']+'\n'
334                  msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
335                  msg += 'Please check if the dataset is available at this site!)\n'
336 <            if self.cfg_params.has_key('EDG.ce_white_list'):
337 <                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
336 >            if self.cfg_params.has_key('GRID.ce_white_list'):
337 >                msg += 'WARNING: CE White List: '+self.cfg_params['GRID.ce_white_list']+'\n'
338                  msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
339                  msg += 'Please check if the dataset is available at this site!)\n'
340  
341 <            common.logger.message(msg)
341 >            common.logger.info(msg)
342  
343          if bloskNoSite == allBlock:
344              raise CrabException('No jobs created')
# Line 390 | Line 399 | class JobSplitter:
399          set = Set(runList)
400          list_of_lists = []
401          jobDestination = []
393
402          count = 0
403 <        for i in list(set):
403 >        for jobGroup in  jobfactory():
404              if count <  self.theNumberOfJobs:
405 <                res = self.getJobInfo(jobfactory())
405 >                res = self.getJobInfo(jobGroup)
406                  parString = ''
407                  for file in res['lfns']:
408                      parString += file + ','
# Line 434 | Line 442 | class JobSplitter:
442          """
443          Perform job splitting based on number of event per job
444          """
445 <        common.logger.debug(5,'Splitting per events')
445 >        common.logger.debug('Splitting per events')
446          self.checkUserSettings()
447          jobDestination=[]
448          if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
# Line 446 | Line 454 | class JobSplitter:
454          firstRun = self.cfg_params.get('CMSSW.first_run',None)
455  
456          if (self.selectEventsPerJob):
457 <            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
457 >            common.logger.info('Required '+str(self.eventsPerJob)+' events per job ')
458          if (self.selectNumberOfJobs):
459 <            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
459 >            common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
460          if (self.selectTotalNumberEvents):
461 <            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
461 >            common.logger.info('Required '+str(self.total_number_of_events)+' events in total ')
462  
463          if (self.total_number_of_events < 0):
464              msg='Cannot split jobs per Events with "-1" as total number of events'
# Line 467 | Line 475 | class JobSplitter:
475              self.total_number_of_jobs = self.theNumberOfJobs
476              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
477  
478 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
478 >        common.logger.debug('N jobs  '+str(self.total_number_of_jobs))
479  
480          # is there any remainder?
481          check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
482  
483 <        common.logger.debug(5,'Check  '+str(check))
483 >        common.logger.debug('Check  '+str(check))
484  
485 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
485 >        common.logger.info(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
486          if check > 0:
487 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
487 >            common.logger.info('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
488  
489          # argument is seed number.$i
490          self.list_of_args = []
# Line 520 | Line 528 | class JobSplitter:
528              msg = 'must specify  number_of_jobs.'
529              raise crabexception(msg)
530          jobDestination = []
531 <        common.logger.debug(5,'Splitting per job')
532 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
531 >        common.logger.debug('Splitting per job')
532 >        common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ')
533  
534          self.total_number_of_jobs = self.theNumberOfJobs
535  
536 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
536 >        common.logger.debug('N jobs  '+str(self.total_number_of_jobs))
537  
538 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
538 >        common.logger.info(str(self.total_number_of_jobs)+' jobs can be created')
539  
540          # argument is seed number.$i
541          #self.list_of_args = []

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines