ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.92 by gutsche, Tue Jun 19 16:21:49 2007 UTC vs.
Revision 1.132 by ewv, Mon Oct 29 21:03:11 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
7   import Scram
8  
9 < import os, string, re, shutil, glob
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12      def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
15        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18  
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22          try:
23              self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24          except KeyError:
# Line 25 | Line 28 | class Cmssw(JobType):
28          self.ncjobs = ncjobs
29  
30          log = common.logger
31 <        
31 >
32          self.scram = Scram.Scram(cfg_params)
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36          self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39          self.scriptName = 'CMSSW.sh'
40 <        self.pset = ''      #scrip use case Da  
40 >        self.pset = ''      #scrip use case Da
41          self.datasetPath = '' #scrip use case Da
42  
43          # set FJR file name
44          self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56 +            raise CrabException(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +
61          common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
63  
# Line 50 | Line 68 | class Cmssw(JobType):
68              self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69          except KeyError:
70              self.use_dbs_1 = 0
71 <            
71 >
72          try:
73              tmp =  cfg_params['CMSSW.datasetpath']
74              log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
# Line 61 | Line 79 | class Cmssw(JobType):
79                  self.datasetPath = tmp
80                  self.selectNoInput = 0
81          except KeyError:
82 <            msg = "Error: datasetpath not defined "  
82 >            msg = "Error: datasetpath not defined "
83              raise CrabException(msg)
84  
85          # ML monitoring
# Line 73 | Line 91 | class Cmssw(JobType):
91              try:
92                  datasetpath_split = self.datasetPath.split("/")
93                  # standard style
94 +                self.setParam_('datasetFull', self.datasetPath)
95                  if self.use_dbs_1 == 1 :
96                      self.setParam_('dataset', datasetpath_split[1])
97                      self.setParam_('owner', datasetpath_split[-1])
# Line 82 | Line 101 | class Cmssw(JobType):
101              except:
102                  self.setParam_('dataset', self.datasetPath)
103                  self.setParam_('owner', self.datasetPath)
104 <                
104 >
105          self.setTaskid_()
106          self.setParam_('taskId', self.cfg_params['taskId'])
107  
# Line 105 | Line 124 | class Cmssw(JobType):
124          try:
125              self.pset = cfg_params['CMSSW.pset']
126              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if self.pset.lower() != 'none' :
127 >            if self.pset.lower() != 'none' :
128                  if (not os.path.exists(self.pset)):
129                      raise CrabException("User defined PSet file "+self.pset+" does not exist")
130              else:
# Line 173 | Line 192 | class Cmssw(JobType):
192                      if not os.path.exists(file):
193                          raise CrabException("Additional input file not found: "+file)
194                      pass
195 <                    fname = string.split(file, '/')[-1]
196 <                    storedFile = common.work_space.pathForTgz()+'share/'+fname
197 <                    shutil.copyfile(file, storedFile)
198 <                    self.additional_inbox_files.append(string.strip(storedFile))
195 >                    # fname = string.split(file, '/')[-1]
196 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197 >                    # shutil.copyfile(file, storedFile)
198 >                    self.additional_inbox_files.append(string.strip(file))
199                  pass
200              pass
201              common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
# Line 197 | Line 216 | class Cmssw(JobType):
216          except KeyError:
217              self.eventsPerJob = -1
218              self.selectEventsPerJob = 0
219 <    
219 >
220          ## number of jobs
221          try:
222              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
# Line 213 | Line 232 | class Cmssw(JobType):
232              self.total_number_of_events = 0
233              self.selectTotalNumberEvents = 0
234  
235 <        if self.pset != None: #CarlosDaniele
235 >        if self.pset != None: #CarlosDaniele
236               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238                   raise CrabException(msg)
# Line 253 | Line 272 | class Cmssw(JobType):
272              self.firstRun = None
273              common.logger.debug(5,"No first run given")
274          if self.pset != None: #CarlosDaniele
275 <            import PsetManipulator  
276 <            PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
275 >            import PsetManipulator as pp
276 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
277  
278          #DBSDLS-start
279 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
279 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
280          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
281          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
282          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 266 | Line 285 | class Cmssw(JobType):
285          blockSites = {}
286          if self.datasetPath:
287              blockSites = self.DataDiscoveryAndLocation(cfg_params)
288 <        #DBSDLS-end          
288 >        #DBSDLS-end
289  
290          self.tgzNameWithPath = self.getTarBall(self.executable)
291 <    
291 >
292          ## Select Splitting
293 <        if self.selectNoInput:
293 >        if self.selectNoInput:
294              if self.pset == None: #CarlosDaniele
295                  self.jobSplittingForScript()
296              else:
# Line 284 | Line 303 | class Cmssw(JobType):
303              try:
304                  if (self.datasetPath): # standard job
305                      # allow to processa a fraction of events in a file
306 <                    PsetEdit.inputModule("INPUT")
307 <                    PsetEdit.maxEvent("INPUTMAXEVENTS")
308 <                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
306 >                    PsetEdit.inputModule("INPUTFILE")
307 >                    PsetEdit.maxEvent(0)
308 >                    PsetEdit.skipEvent(0)
309                  else:  # pythia like job
310                      PsetEdit.maxEvent(self.eventsPerJob)
311                      if (self.firstRun):
312 <                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
312 >                        PsetEdit.pythiaFirstRun(0)  #First Run
313                      if (self.sourceSeed) :
314 <                        PsetEdit.pythiaSeed("INPUT")
314 >                        PsetEdit.pythiaSeed(0)
315                          if (self.sourceSeedVtx) :
316 <                            PsetEdit.vtxSeed("INPUTVTX")
316 >                            PsetEdit.vtxSeed(0)
317                          if (self.sourceSeedG4) :
318 <                            self.PsetEdit.g4Seed("INPUTG4")
318 >                            PsetEdit.g4Seed(0)
319                          if (self.sourceSeedMix) :
320 <                            self.PsetEdit.mixSeed("INPUTMIX")
320 >                            PsetEdit.mixSeed(0)
321                  # add FrameworkJobReport to parameter-set
322                  PsetEdit.addCrabFJR(self.fjrFileName)
323                  PsetEdit.psetWriter(self.configFilename())
# Line 349 | Line 368 | class Cmssw(JobType):
368          self.eventsbyfile=self.pubdata.getEventsPerFile()
369  
370          ## get max number of events
371 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
371 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
372  
373          ## Contact the DLS and build a list of sites hosting the fileblocks
374          try:
# Line 358 | Line 377 | class Cmssw(JobType):
377          except DataLocation.DataLocationError , ex:
378              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
379              raise CrabException(msg)
380 <        
380 >
381  
382          sites = dataloc.getSites()
383          allSites = []
# Line 372 | Line 391 | class Cmssw(JobType):
391          common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
392  
393          return sites
394 <    
394 >
395      def jobSplittingByBlocks(self, blockSites):
396          """
397          Perform job splitting. Jobs run over an integer number of files
# Line 422 | Line 441 | class Cmssw(JobType):
441              totalNumberOfJobs = 999999999
442          else :
443              totalNumberOfJobs = self.ncjobs
444 <            
444 >
445  
446          blocks = blockSites.keys()
447          blockCount = 0
# Line 440 | Line 459 | class Cmssw(JobType):
459          while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
460              block = blocks[blockCount]
461              blockCount += 1
462 <            
462 >            if block not in jobsOfBlock.keys() :
463 >                jobsOfBlock[block] = []
464 >
465              if self.eventsbyblock.has_key(block) :
466                  numEventsInBlock = self.eventsbyblock[block]
467                  common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
468 <            
468 >
469                  files = self.filesbyblock[block]
470                  numFilesInBlock = len(files)
471                  if (numFilesInBlock <= 0):
# Line 452 | Line 473 | class Cmssw(JobType):
473                  fileCount = 0
474  
475                  # ---- New block => New job ---- #
476 <                parString = "\\{"
476 >                parString = ""
477                  # counter for number of events in files currently worked on
478                  filesEventCount = 0
479                  # flag if next while loop should touch new file
480                  newFile = 1
481                  # job event counter
482                  jobSkipEventCount = 0
483 <            
483 >
484                  # ---- Iterate over the files in the block until we've met the requested ---- #
485                  # ---- total # of events or we've gone over all the files in this block  ---- #
486                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
# Line 475 | Line 496 | class Cmssw(JobType):
496                              newFile = 0
497                          except KeyError:
498                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
499 <                        
499 >
500  
501                      # if less events in file remain than eventsPerJobRequested
502                      if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
# Line 484 | Line 505 | class Cmssw(JobType):
505                              # end job using last file, use remaining events in block
506                              # close job and touch new file
507                              fullString = parString[:-2]
487                            fullString += '\\}'
508                              list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
509                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
510                              self.jobDestination.append(blockSites[block])
511                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
512                              # fill jobs of block dictionary
513 <                            if block in jobsOfBlock.keys() :
494 <                                jobsOfBlock[block].append(jobCount+1)
495 <                            else:
496 <                                jobsOfBlock[block] = [jobCount+1]
513 >                            jobsOfBlock[block].append(jobCount+1)
514                              # reset counter
515                              jobCount = jobCount + 1
516                              totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
518                              jobSkipEventCount = 0
519                              # reset file
520 <                            parString = "\\{"
520 >                            parString = ""
521                              filesEventCount = 0
522                              newFile = 1
523                              fileCount += 1
# Line 512 | Line 529 | class Cmssw(JobType):
529                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
530                          # close job and touch new file
531                          fullString = parString[:-2]
515                        fullString += '\\}'
532                          list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
533                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
534                          self.jobDestination.append(blockSites[block])
535                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
536 <                        if block in jobsOfBlock.keys() :
521 <                            jobsOfBlock[block].append(jobCount+1)
522 <                        else:
523 <                            jobsOfBlock[block] = [jobCount+1]
536 >                        jobsOfBlock[block].append(jobCount+1)
537                          # reset counter
538                          jobCount = jobCount + 1
539                          totalEventCount = totalEventCount + eventsPerJobRequested
540                          eventsRemaining = eventsRemaining - eventsPerJobRequested
541                          jobSkipEventCount = 0
542                          # reset file
543 <                        parString = "\\{"
543 >                        parString = ""
544                          filesEventCount = 0
545                          newFile = 1
546                          fileCount += 1
547 <                        
547 >
548                      # if more events in file remain than eventsPerJobRequested
549                      else :
550                          # close job but don't touch new file
551                          fullString = parString[:-2]
539                        fullString += '\\}'
552                          list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
553                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
554                          self.jobDestination.append(blockSites[block])
555                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
556 <                        if block in jobsOfBlock.keys() :
545 <                            jobsOfBlock[block].append(jobCount+1)
546 <                        else:
547 <                            jobsOfBlock[block] = [jobCount+1]
556 >                        jobsOfBlock[block].append(jobCount+1)
557                          # increase counter
558                          jobCount = jobCount + 1
559                          totalEventCount = totalEventCount + eventsPerJobRequested
# Line 554 | Line 563 | class Cmssw(JobType):
563                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
564                          # remove all but the last file
565                          filesEventCount = self.eventsbyfile[file]
566 <                        parString = "\\{"
566 >                        parString = ""
567                          parString += '\\\"' + file + '\\\"\,'
568                      pass # END if
569                  pass # END while (iterate over files in the block)
# Line 563 | Line 572 | class Cmssw(JobType):
572          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
573              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
574          common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
575 <        
575 >
576          # screen output
577          screenOutput = "List of jobs and available destination sites:\n\n"
578  
579 +        # keep trace of block with no sites to print a warning at the end
580 +        noSiteBlock = []
581 +        bloskNoSite = []
582 +
583          blockCounter = 0
584 <        for block in jobsOfBlock.keys():
585 <            blockCounter += 1
586 <            screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(blockSites[block]))
584 >        for block in blocks:
585 >            if block in jobsOfBlock.keys() :
586 >                blockCounter += 1
587 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
588 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
589 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
590 >                    bloskNoSite.append( blockCounter )
591  
592          common.logger.message(screenOutput)
593 +        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
594 +            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
595 +            virgola = ""
596 +            if len(bloskNoSite) > 1:
597 +                virgola = ","
598 +            for block in bloskNoSite:
599 +                msg += ' ' + str(block) + virgola
600 +            msg += '\n               Related jobs:\n                 '
601 +            virgola = ""
602 +            if len(noSiteBlock) > 1:
603 +                virgola = ","
604 +            for range_jobs in noSiteBlock:
605 +                msg += str(range_jobs) + virgola
606 +            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
607 +            common.logger.message(msg)
608  
609          self.list_of_args = list_of_lists
610          return
# Line 582 | Line 614 | class Cmssw(JobType):
614          Perform job splitting based on number of event per job
615          """
616          common.logger.debug(5,'Splitting per events')
617 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
618 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
619 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
617 >
618 >        if (self.selectEventsPerJob):
619 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
620 >        if (self.selectNumberOfJobs):
621 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
622 >        if (self.selectTotalNumberEvents):
623 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
624  
625          if (self.total_number_of_events < 0):
626              msg='Cannot split jobs per Events with "-1" as total number of events'
# Line 593 | Line 629 | class Cmssw(JobType):
629          if (self.selectEventsPerJob):
630              if (self.selectTotalNumberEvents):
631                  self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
632 <            elif(self.selectNumberOfJobs) :  
632 >            elif(self.selectNumberOfJobs) :
633                  self.total_number_of_jobs =self.theNumberOfJobs
634 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
634 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
635  
636          elif (self.selectNumberOfJobs) :
637              self.total_number_of_jobs = self.theNumberOfJobs
638              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
639 <
639 >
640          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
641  
642          # is there any remainder?
# Line 617 | Line 653 | class Cmssw(JobType):
653          for i in range(self.total_number_of_jobs):
654              ## Since there is no input, any site is good
655             # self.jobDestination.append(["Any"])
656 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
656 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
657              args=[]
658              if (self.firstRun):
659                      ## pythia first run
# Line 635 | Line 671 | class Cmssw(JobType):
671                  if (self.sourceSeedG4):
672                      ## + G4 random seed
673                      args.append(str(self.sourceSeedG4)+str(i))
674 <                if (self.sourceSeedMix):    
674 >                if (self.sourceSeedMix):
675                      ## + Mix random seed
676                      args.append(str(self.sourceSeedMix)+str(i))
677                  pass
678              pass
679              self.list_of_args.append(args)
680          pass
681 <            
681 >
682          # print self.list_of_args
683  
684          return
# Line 672 | Line 708 | class Cmssw(JobType):
708          return
709  
710      def split(self, jobParams):
711 <
711 >
712          common.jobDB.load()
713          #### Fabio
714          njobs = self.total_number_of_jobs
# Line 680 | Line 716 | class Cmssw(JobType):
716          # create the empty structure
717          for i in range(njobs):
718              jobParams.append("")
719 <        
719 >
720          for job in range(njobs):
721              jobParams[job] = arglist[job]
722              # print str(arglist[job])
# Line 691 | Line 727 | class Cmssw(JobType):
727  
728          common.jobDB.save()
729          return
730 <    
730 >
731      def getJobTypeArguments(self, nj, sched):
732          result = ''
733          for i in common.jobDB.arguments(nj):
734              result=result+str(i)+" "
735          return result
736 <  
736 >
737      def numberOfJobs(self):
738          # Fabio
739          return self.total_number_of_jobs
# Line 706 | Line 742 | class Cmssw(JobType):
742          """
743          Return the TarBall with lib and exe
744          """
745 <        
745 >
746          # if it exist, just return it
747          #
748          # Marco. Let's start to use relative path for Boss XML files
# Line 729 | Line 765 | class Cmssw(JobType):
765          # print "swVersion = ", swVersion
766          swReleaseTop = self.scram.getReleaseTop_()
767          #print "swReleaseTop = ", swReleaseTop
768 <        
768 >
769          ## check if working area is release top
770          if swReleaseTop == '' or swArea == swReleaseTop:
771              return
# Line 742 | Line 778 | class Cmssw(JobType):
778                  exeWithPath = self.scram.findFile_(executable)
779                  if ( not exeWithPath ):
780                      raise CrabException('User executable '+executable+' not found')
781 <    
781 >
782                  ## then check if it's private or not
783                  if exeWithPath.find(swReleaseTop) == -1:
784                      # the exe is private, so we must ship
# Line 751 | Line 787 | class Cmssw(JobType):
787                      # distinguish case when script is in user project area or given by full path somewhere else
788                      if exeWithPath.find(path) >= 0 :
789                          exe = string.replace(exeWithPath, path,'')
790 <                        tar.add(path+exe,os.path.basename(executable))
790 >                        tar.add(path+exe,exe)
791                      else :
792                          tar.add(exeWithPath,os.path.basename(executable))
793                      pass
794                  else:
795                      # the exe is from release, we'll find it on WN
796                      pass
797 <    
797 >
798              ## Now get the libraries: only those in local working area
799              libDir = 'lib'
800              lib = swArea+'/' +libDir
801              common.logger.debug(5,"lib "+lib+" to be tarred")
802              if os.path.exists(lib):
803                  tar.add(lib,libDir)
804 <    
804 >
805              ## Now check if module dir is present
806              moduleDir = 'module'
807              module = swArea + '/' + moduleDir
# Line 784 | Line 820 | class Cmssw(JobType):
820              pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
821              if os.path.isdir(pa):
822                  tar.add(pa,paDir)
823 <        
823 >
824 >            ### FEDE FOR DBS PUBLICATION
825 >            ## Add PRODCOMMON dir to tar
826 >            prodcommonDir = 'ProdCommon'
827 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
828 >            if os.path.isdir(prodcommonPath):
829 >                tar.add(prodcommonPath,prodcommonDir)
830 >            #############################
831 >
832              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
833              tar.close()
834          except :
# Line 796 | Line 840 | class Cmssw(JobType):
840              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
841  
842          ## create tar-ball with ML stuff
843 <        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
843 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
844          try:
845              tar = tarfile.open(self.MLtgzfile, "w:gz")
846              path=os.environ['CRABDIR'] + '/python/'
# Line 806 | Line 850 | class Cmssw(JobType):
850              tar.close()
851          except :
852              raise CrabException('Could not create ML files tar-ball')
853 <        
853 >
854          return
855 <        
855 >
856 >    def additionalInputFileTgz(self):
857 >        """
858 >        Put all additional files into a tar ball and return its name
859 >        """
860 >        import tarfile
861 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
862 >        tar = tarfile.open(tarName, "w:gz")
863 >        for file in self.additional_inbox_files:
864 >            tar.add(file,string.split(file,'/')[-1])
865 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
866 >        tar.close()
867 >        return tarName
868 >
869      def wsSetupEnvironment(self, nj):
870          """
871          Returns part of a job script which prepares
872          the execution environment for the job 'nj'.
873          """
874          # Prepare JobType-independent part
875 <        txt = ''
876 <  
875 >        txt = ''
876 >
877          ## OLI_Daniele at this level  middleware already known
878  
879 <        txt += 'if [ $middleware == LCG ]; then \n'
879 >        txt += 'if [ $middleware == LCG ]; then \n'
880 >        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
881 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
882 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
883          txt += self.wsSetupCMSLCGEnvironment_()
884          txt += 'elif [ $middleware == OSG ]; then\n'
885          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
886 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
827 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
886 >        txt += '    if [ ! $? == 0 ] ;then\n'
887          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
888          txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
889          txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
# Line 834 | Line 893 | class Cmssw(JobType):
893          txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
894          txt += '        exit 1\n'
895          txt += '    fi\n'
896 +        txt += '    echo "Created working directory: $WORKING_DIR"\n'
897          txt += '\n'
898          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
899          txt += '    cd $WORKING_DIR\n'
900 <        txt += self.wsSetupCMSOSGEnvironment_()
900 >        txt += self.wsSetupCMSOSGEnvironment_()
901 >        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
902 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
903          txt += 'fi\n'
904  
905          # Prepare JobType-specific part
906          scram = self.scram.commandName()
907          txt += '\n\n'
908          txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
847        txt += 'echo "Setting SCRAM_ARCH='+self.executable_arch+'"\n'
848        txt += 'export SCRAM_ARCH='+self.executable_arch+'\n'
909          txt += scram+' project CMSSW '+self.version+'\n'
910          txt += 'status=$?\n'
911          txt += 'if [ $status != 0 ] ; then\n'
# Line 862 | Line 922 | class Cmssw(JobType):
922          txt += '        cd $RUNTIME_AREA\n'
923          txt += '        /bin/rm -rf $WORKING_DIR\n'
924          txt += '        if [ -d $WORKING_DIR ] ;then\n'
925 <        txt += '        echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
926 <        txt += '        echo "JOB_EXIT_STATUS = 10018"\n'
927 <        txt += '        echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
928 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
925 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
926 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
927 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
928 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
929          txt += '            rm -f $RUNTIME_AREA/$repo \n'
930          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
931          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 875 | Line 935 | class Cmssw(JobType):
935          txt += 'fi \n'
936          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
937          txt += 'cd '+self.version+'\n'
938 +        ########## FEDE FOR DBS2 ######################
939 +        txt += 'SOFTWARE_DIR=`pwd`\n'
940 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
941 +        ###############################################
942          ### needed grep for bug in scramv1 ###
943          txt += scram+' runtime -sh\n'
944          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
# Line 900 | Line 964 | class Cmssw(JobType):
964          txt += '        cd $RUNTIME_AREA\n'
965          txt += '        /bin/rm -rf $WORKING_DIR\n'
966          txt += '        if [ -d $WORKING_DIR ] ;then\n'
967 <        txt += '        echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
968 <        txt += '        echo "JOB_EXIT_STATUS = 50114"\n'
969 <        txt += '        echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
970 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
967 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
968 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
969 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
970 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
971          txt += '            rm -f $RUNTIME_AREA/$repo \n'
972          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
973          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 915 | Line 979 | class Cmssw(JobType):
979  
980          # Prepare job-specific part
981          job = common.job_list[nj]
982 +        ### FEDE FOR DBS OUTPUT PUBLICATION
983 +        if (self.datasetPath):
984 +            txt += '\n'
985 +            txt += 'DatasetPath='+self.datasetPath+'\n'
986 +
987 +            datasetpath_split = self.datasetPath.split("/")
988 +
989 +            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
990 +            txt += 'DataTier='+datasetpath_split[2]+'\n'
991 +            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
992 +            txt += 'ApplicationFamily=cmsRun\n'
993 +
994 +        else:
995 +            txt += 'DatasetPath=MCDataTier\n'
996 +            txt += 'PrimaryDataset=null\n'
997 +            txt += 'DataTier=null\n'
998 +            #txt += 'ProcessedDataset=null\n'
999 +            txt += 'ApplicationFamily=MCDataTier\n'
1000          if self.pset != None: #CarlosDaniele
1001              pset = os.path.basename(job.configFilename())
1002              txt += '\n'
1003 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
1004              if (self.datasetPath): # standard job
1005                  #txt += 'InputFiles=$2\n'
1006                  txt += 'InputFiles=${args[1]}\n'
1007                  txt += 'MaxEvents=${args[2]}\n'
1008                  txt += 'SkipEvents=${args[3]}\n'
1009                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
1010 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1010 >                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1011                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1012 <                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1012 >                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1013                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1014 <                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014 >                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1015              else:  # pythia like job
1016                  seedIndex=1
1017                  if (self.firstRun):
1018                      txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1019                      txt += 'echo "FirstRun: <$FirstRun>"\n'
1020 <                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1020 >                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1021                      seedIndex=seedIndex+1
1022  
1023                  if (self.sourceSeed):
1024                      txt += 'Seed=${args['+str(seedIndex)+']}\n'
1025 <                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1025 >                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1026                      seedIndex=seedIndex+1
1027                      ## the following seeds are not always present
1028                      if (self.sourceSeedVtx):
1029                          txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1030                          txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1031 <                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1031 >                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1032                          seedIndex += 1
1033                      if (self.sourceSeedG4):
1034                          txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1035                          txt += 'echo "G4Seed: <$G4Seed>"\n'
1036 <                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1036 >                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1037                          seedIndex += 1
1038                      if (self.sourceSeedMix):
1039                          txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1040                          txt += 'echo "MixSeed: <$mixSeed>"\n'
1041 <                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1041 >                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1042                          seedIndex += 1
1043                      pass
1044                  pass
1045              txt += 'mv -f '+pset+' pset.cfg\n'
1046  
1047          if len(self.additional_inbox_files) > 0:
1048 <            for file in self.additional_inbox_files:
1049 <                relFile = file.split("/")[-1]
1050 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
1051 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
969 <                txt += '   chmod +x '+relFile+'\n'
970 <                txt += 'fi\n'
971 <            pass
1048 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1049 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1050 >            txt += 'fi\n'
1051 >            pass
1052  
1053          if self.pset != None: #CarlosDaniele
1054              txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1055 <        
1055 >
1056              txt += '\n'
1057              txt += 'echo "***** cat pset.cfg *********"\n'
1058              txt += 'cat pset.cfg\n'
1059              txt += 'echo "****** end pset.cfg ********"\n'
1060              txt += '\n'
1061 +            ### FEDE FOR DBS OUTPUT PUBLICATION
1062 +            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1063 +            txt += 'echo "PSETHASH = $PSETHASH" \n'
1064 +            ##############
1065 +            txt += '\n'
1066 +            # txt += 'echo "***** cat pset1.cfg *********"\n'
1067 +            # txt += 'cat pset1.cfg\n'
1068 +            # txt += 'echo "****** end pset1.cfg ********"\n'
1069          return txt
1070  
1071      def wsBuildExe(self, nj=0):
# Line 1016 | Line 1104 | class Cmssw(JobType):
1104              txt += '   echo "Successful untar" \n'
1105              txt += 'fi \n'
1106              txt += '\n'
1107 <            txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
1107 >            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1108              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1109 <            txt += '   export PYTHONPATH=ProdAgentApi\n'
1109 >            #### FEDE FOR DBS OUTPUT PUBLICATION
1110 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1111 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1112 >            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1113              txt += 'else\n'
1114 <            txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1114 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1115 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1116 >            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1117 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1118 >            ###################
1119              txt += 'fi\n'
1120              txt += '\n'
1121  
1122              pass
1123 <        
1123 >
1124          return txt
1125  
1126      def modifySteeringCards(self, nj):
1127          """
1128 <        modify the card provided by the user,
1128 >        modify the card provided by the user,
1129          writing a new card into share dir
1130          """
1131 <        
1131 >
1132      def executableName(self):
1133          if self.scriptExe: #CarlosDaniele
1134              return "sh "
# Line 1043 | Line 1138 | class Cmssw(JobType):
1138      def executableArgs(self):
1139          if self.scriptExe:#CarlosDaniele
1140              return   self.scriptExe + " $NJob"
1141 <        else:
1142 <            return " -p pset.cfg"
1141 >        else:
1142 >            # if >= CMSSW_1_5_X, add -e
1143 >            version_array = self.scram.getSWVersion().split('_')
1144 >            major = 0
1145 >            minor = 0
1146 >            try:
1147 >                major = int(version_array[1])
1148 >                minor = int(version_array[2])
1149 >            except:
1150 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1151 >                raise CrabException(msg)
1152 >            if major >= 1 and minor >= 5 :
1153 >                return " -e -p pset.cfg"
1154 >            else:
1155 >                return " -p pset.cfg"
1156  
1157      def inputSandbox(self, nj):
1158          """
# Line 1062 | Line 1170 | class Cmssw(JobType):
1170          if not self.pset is None:
1171              inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1172          ## additional input files
1173 <        for file in self.additional_inbox_files:
1174 <            inp_box.append(file)
1173 >        tgz = self.additionalInputFileTgz()
1174 >        inp_box.append(tgz)
1175          return inp_box
1176  
1177      def outputSandbox(self, nj):
# Line 1074 | Line 1182 | class Cmssw(JobType):
1182  
1183          ## User Declared output files
1184          for out in (self.output_file+self.output_file_sandbox):
1185 <            n_out = nj + 1
1185 >            n_out = nj + 1
1186              out_box.append(self.numberFile_(out,str(n_out)))
1187          return out_box
1188  
# Line 1093 | Line 1201 | class Cmssw(JobType):
1201          txt += '# directory content\n'
1202          txt += 'ls \n'
1203  
1204 <        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1204 >        txt += 'output_exit_status=0\n'
1205 >
1206 >        for fileWithSuffix in (self.output_file_sandbox):
1207              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1208              txt += '\n'
1209              txt += '# check output file\n'
1210 <            txt += 'ls '+fileWithSuffix+'\n'
1211 <            txt += 'ls_result=$?\n'
1212 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1213 <            #txt += '   JOB_EXIT_STATUS=60302\n'
1214 <            ### FEDE
1215 <            txt += '   exit_status=60302\n'
1106 <            ####
1107 <            txt += '   echo "ERROR: Problem with output file"\n'
1210 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1211 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1212 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1213 >            txt += 'else\n'
1214 >            txt += '    exit_status=60302\n'
1215 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1216              if common.scheduler.boss_scheduler_name == 'condor_g':
1217                  txt += '    if [ $middleware == OSG ]; then \n'
1218                  txt += '        echo "prepare dummy output file"\n'
1219                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1220                  txt += '    fi \n'
1113            txt += 'else\n'
1114            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1221              txt += 'fi\n'
1116      
1117        txt += 'cd $RUNTIME_AREA\n'
1118        txt += 'cd $RUNTIME_AREA\n'
1119        ### OLI_DANIELE
1120        txt += 'if [ $middleware == OSG ]; then\n'  
1121        txt += '    cd $RUNTIME_AREA\n'
1122        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1123        txt += '    /bin/rm -rf $WORKING_DIR\n'
1124        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1125        txt += '    echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1126        txt += '    echo "JOB_EXIT_STATUS = 60999"\n'
1127        txt += '    echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1128        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
1129        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1130        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1131        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1132        txt += '    fi\n'
1133        txt += 'fi\n'
1134        txt += '\n'
1222  
1223 <        file_list = ''
1137 <        ## Add to filelist only files to be possibly copied to SE
1138 <        for fileWithSuffix in self.output_file:
1223 >        for fileWithSuffix in (self.output_file):
1224              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1225 <            file_list=file_list+output_file_num+' '
1226 <        file_list=file_list[:-1]
1227 <        txt += 'file_list="'+file_list+'"\n'
1225 >            txt += '\n'
1226 >            txt += '# check output file\n'
1227 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1228 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1229 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1230 >            txt += 'else\n'
1231 >            txt += '    exit_status=60302\n'
1232 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1233 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1234 >            txt += '    output_exit_status=$exit_status\n'
1235 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1236 >                txt += '    if [ $middleware == OSG ]; then \n'
1237 >                txt += '        echo "prepare dummy output file"\n'
1238 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1239 >                txt += '    fi \n'
1240 >            txt += 'fi\n'
1241 >        file_list = []
1242 >        for fileWithSuffix in (self.output_file):
1243 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1244  
1245 +        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1246 +        txt += 'cd $RUNTIME_AREA\n'
1247          return txt
1248  
1249      def numberFile_(self, file, txt):
# Line 1158 | Line 1261 | class Cmssw(JobType):
1261              result = name + '_' + txt + "." + ext
1262          else:
1263              result = name + '_' + txt
1264 <        
1264 >
1265          return result
1266  
1267      def getRequirements(self, nj=[]):
1268          """
1269 <        return job requirements to add to jdl files
1269 >        return job requirements to add to jdl files
1270          """
1271          req = ''
1272          if self.version:
1273              req='Member("VO-cms-' + \
1274                   self.version + \
1275                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1276 <        # if self.executable_arch:
1277 <        #     req='Member("VO-cms-' + \
1278 <        #          self.executable_arch + \
1279 <        #          '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1276 >        ## SL add requirement for OS version only if SL4
1277 >        #reSL4 = re.compile( r'slc4' )
1278 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1279 >            req+=' && Member("VO-cms-' + \
1280 >                 self.executable_arch + \
1281 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1282  
1283          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1284  
# Line 1213 | Line 1318 | class Cmssw(JobType):
1318          txt += '       cd $RUNTIME_AREA\n'
1319          txt += '       /bin/rm -rf $WORKING_DIR\n'
1320          txt += '       if [ -d $WORKING_DIR ] ;then\n'
1321 <        txt += '        echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1322 <        txt += '        echo "JOB_EXIT_STATUS = 10017"\n'
1323 <        txt += '        echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1324 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1325 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1326 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1327 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1321 >        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1322 >        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1323 >        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1324 >        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1325 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1326 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1327 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1328          txt += '       fi\n'
1329          txt += '\n'
1330          txt += '       exit 1\n'
# Line 1229 | Line 1334 | class Cmssw(JobType):
1334          txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1335  
1336          return txt
1337 <
1337 >
1338      ### OLI_DANIELE
1339      def wsSetupCMSLCGEnvironment_(self):
1340          """
# Line 1278 | Line 1383 | class Cmssw(JobType):
1383          txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1384          return txt
1385  
1386 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1387 +    def modifyReport(self, nj):
1388 +        """
1389 +        insert the part of the script that modifies the FrameworkJob Report
1390 +        """
1391 +
1392 +        txt = ''
1393 +        try:
1394 +            publish_data = int(self.cfg_params['USER.publish_data'])
1395 +        except KeyError:
1396 +            publish_data = 0
1397 +        if (publish_data == 1):
1398 +            txt += 'echo "Modify Job Report" \n'
1399 +            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1400 +            ################ FEDE FOR DBS2 #############################################
1401 +            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1402 +            #############################################################################
1403 +            #try:
1404 +            #    publish_data = int(self.cfg_params['USER.publish_data'])
1405 +            #except KeyError:
1406 +            #    publish_data = 0
1407 +
1408 +            txt += 'if [ -z "$SE" ]; then\n'
1409 +            txt += '    SE="" \n'
1410 +            txt += 'fi \n'
1411 +            txt += 'if [ -z "$SE_PATH" ]; then\n'
1412 +            txt += '    SE_PATH="" \n'
1413 +            txt += 'fi \n'
1414 +            txt += 'echo "SE = $SE"\n'
1415 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1416 +
1417 +        #if (publish_data == 1):
1418 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1419 +            processedDataset = self.cfg_params['USER.publish_data_name']
1420 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1421 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1422 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1423 +            #### FEDE: added slash in LFN ##############
1424 +            txt += '    FOR_LFN=/copy_problems/ \n'
1425 +            txt += 'else \n'
1426 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1427 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1428 +            txt += '    FOR_LFN=/store$tmp \n'
1429 +            txt += 'fi \n'
1430 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1431 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1432 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1433 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1434 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1435 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1436 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1437 +
1438 +            txt += 'modifyReport_result=$?\n'
1439 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1440 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1441 +            txt += '    exit_status=1\n'
1442 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1443 +            txt += 'else\n'
1444 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1445 +            txt += 'fi\n'
1446 +        else:
1447 +            txt += 'echo "no data publication required"\n'
1448 +            #txt += 'ProcessedDataset=no_data_to_publish \n'
1449 +            #### FEDE: added slash in LFN ##############
1450 +            #txt += 'FOR_LFN=/local/ \n'
1451 +            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1452 +            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1453 +        return txt
1454 +
1455 +    def cleanEnv(self):
1456 +        ### OLI_DANIELE
1457 +        txt = ''
1458 +        txt += 'if [ $middleware == OSG ]; then\n'
1459 +        txt += '    cd $RUNTIME_AREA\n'
1460 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1461 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1462 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1463 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1464 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1465 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1466 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1467 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1468 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1469 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1470 +        txt += '    fi\n'
1471 +        txt += 'fi\n'
1472 +        txt += '\n'
1473 +        return txt
1474 +
1475      def setParam_(self, param, value):
1476          self._params[param] = value
1477  
# Line 1286 | Line 1480 | class Cmssw(JobType):
1480  
1481      def setTaskid_(self):
1482          self._taskId = self.cfg_params['taskId']
1483 <        
1483 >
1484      def getTaskid(self):
1485          return self._taskId
1486  
1293 #######################################################################
1487      def uniquelist(self, old):
1488          """
1489          remove duplicates from a list
# Line 1299 | Line 1492 | class Cmssw(JobType):
1492          for e in old:
1493              nd[e]=0
1494          return nd.keys()
1495 +
1496 +
1497 +    def checkOut(self, limit):
1498 +        """
1499 +        check the dimension of the output files
1500 +        """
1501 +        txt = 'echo "*****************************************"\n'
1502 +        txt += 'echo "** Starting output sandbox limit check **"\n'
1503 +        txt += 'echo "*****************************************"\n'
1504 +        allOutFiles = ""
1505 +        listOutFiles = []
1506 +        for fileOut in (self.output_file+self.output_file_sandbox):
1507 +             if fileOut.find('crab_fjr') == -1:
1508 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1509 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1510 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1511 +        txt += 'ls -gGhrta;\n'
1512 +        txt += 'sum=0;\n'
1513 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1514 +        txt += '    if [ -e $file ]; then\n'
1515 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1516 +        txt += '        sum=`expr $sum + $tt`\n'
1517 +        txt += '    else\n'
1518 +        txt += '        echo "WARNING: output file $file not found!"\n'
1519 +        txt += '    fi\n'
1520 +        txt += 'done\n'
1521 +        txt += 'echo "Total Output dimension: $sum";\n'
1522 +        txt += 'limit='+str(limit)+';\n'
1523 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1524 +        txt += 'if [ $limit -lt $sum ]; then\n'
1525 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1526 +        txt += '    echo "         checking the output file sizes..."\n'
1527 +        """
1528 +        txt += '    dim=0;\n'
1529 +        txt += '    exclude=0;\n'
1530 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1531 +        txt += '        sumTemp=0;\n'
1532 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1533 +        txt += '            if [ $file != $file2 ]; then\n'
1534 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1535 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1536 +        txt += '            fi\n'
1537 +        txt += '        done\n'
1538 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1539 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1540 +        txt += '                dim=$sumTemp;\n'
1541 +        txt += '                exclude=$file;\n'
1542 +        txt += '            fi\n'
1543 +        txt += '        fi\n'
1544 +        txt += '    done\n'
1545 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1546 +        """
1547 +        txt += '    tot=0;\n'
1548 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1549 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1550 +        txt += '        tot=`expr $tot + $tt`;\n'
1551 +        txt += '        if [ $limit -lt $tot ]; then\n'
1552 +        txt += '            tot=`expr $tot - $tt`;\n'
1553 +        txt += '            fileLast=$file;\n'
1554 +        txt += '            break;\n'
1555 +        txt += '        fi\n'
1556 +        txt += '    done\n'
1557 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1558 +        txt += '    flag=0;\n'
1559 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1560 +        txt += '        if [ $fileLast = $filess ]; then\n'
1561 +        txt += '            flag=1;\n'
1562 +        txt += '        fi\n'
1563 +        txt += '        if [ $flag -eq 1 ]; then\n'
1564 +        txt += '            rm -f $filess;\n'
1565 +        txt += '        fi\n'
1566 +        txt += '    done\n'
1567 +        txt += '    ls -agGhrt;\n'
1568 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1569 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1570 +        txt += '    exit_status=70000;\n'
1571 +        txt += 'else'
1572 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1573 +        txt += 'fi\n'
1574 +        txt += 'echo "*****************************************"\n'
1575 +        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1576 +        txt += 'echo "*****************************************"\n'
1577 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines