[ViewVC] Diff of: cvsroot/COMP/CRAB/python/cms

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.7 by gutsche, Tue Jun 13 20:43:00 2006 UTC vs.
Revision 1.244 by spiga, Thu Sep 25 15:08:01 2008 UTC

+from crab_logger import Logger
+from crab_exceptions import *
+from crab_util import *
-+
+from BlackWhiteListParser import SEBlackWhiteListParser
+import common
-–
+import PsetManipulator
-–
-–
+import DBSInfo_EDM
-–
+import DataDiscovery_EDM
-–
+import DataLocation_EDM
+import Scram
-<
+import os, string, re
->
+import os, string, glob
+class Cmssw(JobType):
-<
+    def __init__(self, cfg_params):
->
+    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
+        JobType.__init__(self, 'CMSSW')
+        common.logger.debug(3,'CMSSW::__init__')
-+
+        self.skip_blocks = skip_blocks
-+
-+
+        self.argsList = []
-–
+        self.analisys_common_info = {}
-–
+        # Marco.
+        self._params = {}
+        self.cfg_params = cfg_params
-+
+        # init BlackWhiteListParser
-+
+        self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
-+
-+
+        ### Temporary patch to automatically skip the ISB size check:
-+
+        server=self.cfg_params.get('CRAB.server_name',None)
-+
+        size = 9.5
-+
+        if server: size = 99999
-+
+        ### D.S.
-+
+        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',size))
-+
-+
+        # number of jobs requested to be created, limit obj splitting
-+
+        self.ncjobs = ncjobs
+        log = common.logger
-<
->
+        self.scram = Scram.Scram(cfg_params)
-–
+        scramArea = ''
+        self.additional_inbox_files = []
+        self.scriptExe = ''
+        self.executable = ''
-+
+        self.executable_arch = self.scram.getArch()
+        self.tgz_name = 'default.tgz'
-+
+        self.scriptName = 'CMSSW.sh'
-+
+        self.pset = ''
-+
+        self.datasetPath = ''
-+
+        # set FJR file name
-+
+        self.fjrFileName = 'crab_fjr.xml'
+        self.version = self.scram.getSWVersion()
-<
+        self.setParam_('application', self.version)
-<
+        common.analisys_common_info['sw_version'] = self.version
-<
+        ### FEDE
-<
+        common.analisys_common_info['copy_input_data'] = 0
-<
+        common.analisys_common_info['events_management'] = 1
-<
-<
+        ### collect Data cards
->
+        version_array = self.version.split('_')
->
+        self.CMSSW_major = 0
->
+        self.CMSSW_minor = 0
->
+        self.CMSSW_patch = 0
+        try:
-<
+         #   self.owner = cfg_params['CMSSW.owner']
-<
+         #   log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
-<
+         #   self.dataset = cfg_params['CMSSW.dataset']
-<
+            self.datasetPath = cfg_params['CMSSW.datasetpath']
-<
+            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+self.datasetPath)
-<
+        except KeyError:
-<
+        #    msg = "Error: owner and/or dataset not defined "
-<
+            msg = "Error: datasetpath not defined "
->
+            self.CMSSW_major = int(version_array[1])
->
+            self.CMSSW_minor = int(version_array[2])
->
+            self.CMSSW_patch = int(version_array[3])
->
+        except:
->
+            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
+            raise CrabException(msg)
-<
+        # ML monitoring
-<
+        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
-<
+        datasetpath_split = self.datasetPath.split("/")
-<
+        self.setParam_('dataset', datasetpath_split[1])
-<
+        self.setParam_('owner', datasetpath_split[-1])
->
+        ### collect Data cards
-+
+        if not cfg_params.has_key('CMSSW.datasetpath'):
-+
+            msg = "Error: datasetpath not defined "
-+
+            raise CrabException(msg)
-+
+        ### Temporary: added to remove input file control in the case of PU
-+
+        self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
-+
+        tmp =  cfg_params['CMSSW.datasetpath']
-+
+        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
-<
+        self.dataTiers = []
-<
+ #       try:
-<
+ #           tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
-<
+ #           for tmp in tmpDataTiers:
-<
+ #               tmp=string.strip(tmp)
-<
+ #               self.dataTiers.append(tmp)
-<
+ #               pass
-<
+ #           pass
-<
+ #       except KeyError:
-<
+ #           pass
-<
+ #       log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
->
+        if tmp =='':
->
+            msg = "Error: datasetpath not defined "
->
+            raise CrabException(msg)
->
+        elif string.lower(tmp)=='none':
->
+            self.datasetPath = None
->
+            self.selectNoInput = 1
->
+        else:
->
+            self.datasetPath = tmp
->
+            self.selectNoInput = 0
-+
+        self.dataTiers = []
-+
+        self.debugWrap = ''
-+
+        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
-+
+        if self.debug_wrapper: self.debugWrap='--debug'
+        ## now the application
-<
+        try:
-<
+            self.executable = cfg_params['CMSSW.executable']
-<
+            self.setParam_('exe', self.executable)
-<
+            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
-<
+            msg = "Default executable cmsRun overridden. Switch to " + self.executable
-<
+            log.debug(3,msg)
-<
+        except KeyError:
-<
+            self.executable = 'cmsRun'
-<
+            self.setParam_('exe', self.executable)
-<
+            msg = "User executable not defined. Use cmsRun"
-<
+            log.debug(3,msg)
-<
+            pass
->
+        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
->
+        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
-<
+        try:
-<
+            self.pset = cfg_params['CMSSW.pset']
-<
+            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
->
+        if not cfg_params.has_key('CMSSW.pset'):
->
+            raise CrabException("PSet file missing. Cannot run cmsRun ")
->
+        self.pset = cfg_params['CMSSW.pset']
->
+        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
->
+        if self.pset.lower() != 'none' :
+            if (not os.path.exists(self.pset)):
+                raise CrabException("User defined PSet file "+self.pset+" does not exist")
-<
+        except KeyError:
-<
+            raise CrabException("PSet file missing. Cannot run cmsRun ")
->
+        else:
->
+            self.pset = None
+        # output files
-<
+        try:
-<
+            self.output_file = []
->
+        ## stuff which must be returned always via sandbox
->
+        self.output_file_sandbox = []
-<
+            tmp = cfg_params['CMSSW.output_file']
-<
+            if tmp != '':
-<
+                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
-<
+                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
-<
+                for tmp in tmpOutFiles:
-<
+                    tmp=string.strip(tmp)
-<
+                    self.output_file.append(tmp)
-<
+                    pass
-<
+            else:
-<
+                log.message("No output file defined: only stdout/err will be available")
-<
+                pass
-<
+            pass
-<
+        except KeyError:
-<
+            log.message("No output file defined: only stdout/err will be available")
-<
+            pass
->
+        # add fjr report by default via sandbox
->
+        self.output_file_sandbox.append(self.fjrFileName)
->
->
+        # other output files to be returned via sandbox or copied to SE
->
+        outfileflag = False
->
+        self.output_file = []
->
+        tmp = cfg_params.get('CMSSW.output_file',None)
->
+        if tmp :
->
+            self.output_file = [x.strip() for x in tmp.split(',')]
->
+            outfileflag = True #output found
->
+        #else:
->
+        #    log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
+        # script_exe file as additional file in inputSandbox
-<
+        try:
-<
+           self.scriptExe = cfg_params['USER.script_exe']
-<
+           self.additional_inbox_files.append(self.scriptExe)
-<
+        except KeyError:
-<
+           pass
-<
+        if self.scriptExe != '':
-<
+           if os.path.isfile(self.scriptExe):
-<
+              pass
-<
+           else:
-<
+              log.message("WARNING. file "+self.scriptExe+" not found")
-<
+              sys.exit()
-<
->
+        self.scriptExe = cfg_params.get('USER.script_exe',None)
->
+        if self.scriptExe :
->
+            if not os.path.isfile(self.scriptExe):
->
+                msg ="ERROR. file "+self.scriptExe+" not found"
->
+                raise CrabException(msg)
->
+            self.additional_inbox_files.append(string.strip(self.scriptExe))
->
->
+        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
->
+            msg ="Error. script_exe  not defined"
->
+            raise CrabException(msg)
->
->
+        # use parent files...
->
+        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
->
+        ## additional input files
-<
+        try:
-<
+            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
->
+        if cfg_params.has_key('USER.additional_input_files'):
->
+            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
+            for tmp in tmpAddFiles:
-<
+                if not os.path.exists(tmp):
-<
+                    raise CrabException("Additional input file not found: "+tmp)
-<
+                tmp=string.strip(tmp)
-<
+                self.additional_inbox_files.append(tmp)
->
+                tmp = string.strip(tmp)
->
+                dirname = ''
->
+                if not tmp[0]=="/": dirname = "."
->
+                files = []
->
+                if string.find(tmp,"*")>-1:
->
+                    files = glob.glob(os.path.join(dirname, tmp))
->
+                    if len(files)==0:
->
+                        raise CrabException("No additional input file found with this pattern: "+tmp)
->
+                else:
->
+                    files.append(tmp)
->
+                for file in files:
->
+                    if not os.path.exists(file):
->
+                        raise CrabException("Additional input file not found: "+file)
->
+                    pass
->
+                    self.additional_inbox_files.append(string.strip(file))
+                pass
+            pass
-<
+        except KeyError:
-<
+            pass
-<
-<
+        try:
-<
+            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
-<
+        except KeyError:
-<
+            self.filesPerJob = 1
-<
-<
+        ## Max event   will be total_number_of_events ???  Daniele
-<
+        try:
-<
+            self.maxEv = cfg_params['CMSSW.event_per_job']
-<
+        except KeyError:
-<
+            self.maxEv = "-1"
-<
+        ##
-<
+        try:
-<
+            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
-<
+        except KeyError:
-<
+            msg = 'Must define total_number_of_events'
-<
+            raise CrabException(msg)
-<
-<
+        CEBlackList = []
-<
+        try:
-<
+            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
-<
+            for tmp in tmpBad:
-<
+                tmp=string.strip(tmp)
-<
+                CEBlackList.append(tmp)
-<
+        except KeyError:
-<
+            pass
->
+            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
->
+        pass
-<
+        self.reCEBlackList=[]
-<
+        for bad in CEBlackList:
-<
+            self.reCEBlackList.append(re.compile( bad ))
->
+        ## Events per job
->
+        if cfg_params.has_key('CMSSW.events_per_job'):
->
+            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
->
+            self.selectEventsPerJob = 1
->
+        else:
->
+            self.eventsPerJob = -1
->
+            self.selectEventsPerJob = 0
-<
+        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
->
+        ## number of jobs
->
+        if cfg_params.has_key('CMSSW.number_of_jobs'):
->
+            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
->
+            self.selectNumberOfJobs = 1
->
+        else:
->
+            self.theNumberOfJobs = 0
->
+            self.selectNumberOfJobs = 0
-<
+        CEWhiteList = []
-<
+        try:
-<
+            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
-<
+            for tmp in tmpGood:
-<
+                tmp=string.strip(tmp)
-<
+                CEWhiteList.append(tmp)
-<
+        except KeyError:
-<
+            pass
->
+        if cfg_params.has_key('CMSSW.total_number_of_events'):
->
+            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
->
+            self.selectTotalNumberEvents = 1
->
+            if self.selectNumberOfJobs  == 1:
->
+                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
->
+                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
->
+                    raise CrabException(msg)
->
+        else:
->
+            self.total_number_of_events = 0
->
+            self.selectTotalNumberEvents = 0
-<
+        #print 'CEWhiteList: ',CEWhiteList
-<
+        self.reCEWhiteList=[]
-<
+        for Good in CEWhiteList:
-<
+            self.reCEWhiteList.append(re.compile( Good ))
->
+        if self.pset != None:
->
+             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
->
+                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
->
+                 raise CrabException(msg)
->
+        else:
->
+             if (self.selectNumberOfJobs == 0):
->
+                 msg = 'Must specify  number_of_jobs.'
->
+                 raise CrabException(msg)
->
->
+        ## New method of dealing with seeds
->
+        self.incrementSeeds = []
->
+        self.preserveSeeds = []
->
+        if cfg_params.has_key('CMSSW.preserve_seeds'):
->
+            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
->
+            for tmp in tmpList:
->
+                tmp.strip()
->
+                self.preserveSeeds.append(tmp)
->
+        if cfg_params.has_key('CMSSW.increment_seeds'):
->
+            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
->
+            for tmp in tmpList:
->
+                tmp.strip()
->
+                self.incrementSeeds.append(tmp)
->
->
+        ## FUTURE: Can remove in CRAB 2.4.0
->
+        self.sourceSeed    = cfg_params.get('CMSSW.pythia_seed',None)
->
+        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
->
+        self.sourceSeedG4  = cfg_params.get('CMSSW.g4_seed',None)
->
+        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
->
+        if self.sourceSeed or self.sourceSeedVtx or self.sourceSeedG4 or self.sourceSeedMix:
->
+            msg = 'pythia_seed, vtx_seed, g4_seed, and mix_seed are no longer valid settings. You must use increment_seeds or preserve_seeds'
->
+            raise CrabException(msg)
-<
+        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
->
+        self.firstRun = cfg_params.get('CMSSW.first_run',None)
-<
+        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
->
+        # Copy/return
->
+        self.copy_data = int(cfg_params.get('USER.copy_data',0))
->
+        self.return_data = int(cfg_params.get('USER.return_data',0))
+        #DBSDLS-start
-<
+        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
->
+        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
+        self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
+        self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
-+
+        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
+        ## Perform the data location and discovery (based on DBS/DLS)
-<
+        self.DataDiscoveryAndLocation(cfg_params)
-<
+        #DBSDLS-end
-<
-<
+        self.tgzNameWithPath = self.getTarBall(self.executable)
-<
-<
+        self.jobSplitting()  #Daniele job Splitting
-<
+        self.PsetEdit.maxEvent(self.maxEv) #Daniele
-<
+        self.PsetEdit.inputModule("INPUT") #Daniele
-<
+        self.PsetEdit.psetWriter(self.configFilename())
-<
->
+        ## SL: Don't if NONE is specified as input (pythia use case)
->
+        blockSites = {}
->
+        if self.datasetPath:
->
+            blockSites = self.DataDiscoveryAndLocation(cfg_params)
->
+        #DBSDLS-end
->
->
+        ## Select Splitting
->
+        if self.selectNoInput:
->
+            if self.pset == None:
->
+                self.jobSplittingForScript()
->
+            else:
->
+                self.jobSplittingNoInput()
->
+        elif (cfg_params.get('CMSSW.noblockboundary',0)):
->
+            self.jobSplittingNoBlockBoundary(blockSites)
->
+        else:
->
+            self.jobSplittingByBlocks(blockSites)
-+
+        # modify Pset only the first time
-+
+        if isNew:
-+
+            if self.pset != None:
-+
+                import PsetManipulator as pp
-+
+                PsetEdit = pp.PsetManipulator(self.pset)
-+
+                try:
-+
+                    # Add FrameworkJobReport to parameter-set, set max events.
-+
+                    # Reset later for data jobs by writeCFG which does all modifications
-+
+                    PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
-+
+                    PsetEdit.maxEvent(self.eventsPerJob)
-+
+                    PsetEdit.psetWriter(self.configFilename())
-+
+                    ## If present, add TFileService to output files
-+
+                    if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
-+
+                        tfsOutput = PsetEdit.getTFileService()
-+
+                        if tfsOutput:
-+
+                            if tfsOutput in self.output_file:
-+
+                                common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
-+
+                            else:
-+
+                                outfileflag = True #output found
-+
+                                self.output_file.append(tfsOutput)
-+
+                                common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
-+
+                            pass
-+
+                        pass
-+
+                    ## If present and requested, add PoolOutputModule to output files
-+
+                    if int(cfg_params.get('CMSSW.get_edm_output',0)):
-+
+                        edmOutput = PsetEdit.getPoolOutputModule()
-+
+                        if edmOutput:
-+
+                            if edmOutput in self.output_file:
-+
+                                common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
-+
+                            else:
-+
+                                self.output_file.append(edmOutput)
-+
+                                common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
-+
+                            pass
-+
+                        pass
-+
+                except CrabException:
-+
+                    msg='Error while manipulating ParameterSet: exiting...'
-+
+                    raise CrabException(msg)
-+
+            ## Prepare inputSandbox TarBall (only the first time)
-+
+            self.tgzNameWithPath = self.getTarBall(self.executable)
+    def DataDiscoveryAndLocation(self, cfg_params):
-+
+        import DataDiscovery
-+
+        import DataLocation
+        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
-–
+        #datasetPath = "/"+self.owner+"/"+self.dataTiers[0]+"/"+self.dataset
-–
+        datasetPath=self.datasetPath
-–
+        ## TODO
-–
+        dataTiersList = ""
-–
+        dataTiers = dataTiersList.split(',')
-–
+        ## Contact the DBS
-+
+        common.logger.message("Contacting Data Discovery Services ...")
+        try:
-<
+            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
->
+            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
+            self.pubdata.fetchDBSInfo()
-<
+        except DataDiscovery_EDM.NotExistingDatasetError, ex :
->
+        except DataDiscovery.NotExistingDatasetError, ex :
+            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
+            raise CrabException(msg)
-<
-<
+        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
->
+        except DataDiscovery.NoDataTierinProvenanceError, ex :
+            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
+            raise CrabException(msg)
-<
+        except DataDiscovery_EDM.DataDiscoveryError, ex:
-<
+            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
->
+        except DataDiscovery.DataDiscoveryError, ex:
->
+            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
+            raise CrabException(msg)
-<
+        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
-<
+        ## self.DBSPaths=self.pubdata.getDBSPaths()
-<
+        common.logger.message("Required data are :"+self.datasetPath)
-<
-<
+        filesbyblock=self.pubdata.getFiles()
-<
+        self.AllInputFiles=filesbyblock.values()
-<
+        self.files = self.AllInputFiles
-<
-<
+        ## TEMP
-<
+    #    self.filesTmp = filesbyblock.values()
-<
+    #    self.files = []
-<
+    #    locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/'
-<
+    #    locPath=''
-<
+    #    tmp = []
-<
+    #    for file in self.filesTmp[0]:
-<
+    #        tmp.append(locPath+file)
-<
+    #    self.files.append(tmp)
-<
+        ## END TEMP
->
+        self.filesbyblock=self.pubdata.getFiles()
->
+        self.eventsbyblock=self.pubdata.getEventsPerBlock()
->
+        self.eventsbyfile=self.pubdata.getEventsPerFile()
->
+        self.parentFiles=self.pubdata.getParent()
+        ## get max number of events
-<
+        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
-<
+        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
-<
+        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
->
+        self.maxEvents=self.pubdata.getMaxEvents()
+        ## Contact the DLS and build a list of sites hosting the fileblocks
+        try:
-<
+            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
->
+            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
+            dataloc.fetchDLSInfo()
-<
+        except DataLocation_EDM.DataLocationError , ex:
->
+        except DataLocation.DataLocationError , ex:
+            msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
+            raise CrabException(msg)
-<
-<
+        allsites=dataloc.getSites()
-<
+        common.logger.debug(5,"sites are %s"%allsites)
-<
+        sites=self.checkBlackList(allsites)
-<
+        common.logger.debug(5,"sites are (after black list) %s"%sites)
-<
+        sites=self.checkWhiteList(sites)
-<
+        common.logger.debug(5,"sites are (after white list) %s"%sites)
-<
-<
+        if len(sites)==0:
-<
+            msg = 'No sites hosting all the needed data! Exiting... '
-<
+            raise CrabException(msg)
-<
-<
+        common.logger.message("List of Sites hosting the data : "+str(sites))
-<
+        common.logger.debug(6, "List of Sites: "+str(sites))
-<
+        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
-<
+        self.setParam_('TargetCE', ','.join(sites))
-<
+        return
-<
-<
+    def jobSplitting(self):
->
->
->
+        sites = dataloc.getSites()
->
+        allSites = []
->
+        listSites = sites.values()
->
+        for listSite in listSites:
->
+            for oneSite in listSite:
->
+                allSites.append(oneSite)
->
+        allSites = self.uniquelist(allSites)
->
->
+        # screen output
->
+        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
->
->
+        return sites
->
->
+    def jobSplittingByBlocks(self, blockSites):
+        """
-<
+        first implemntation for job splitting
-<
+        """
-<
+      #  print 'eventi totali '+str(self.maxEvents)
-<
+      #  print 'eventi totali richiesti dallo user '+str(self.total_number_of_events)
-<
+        #print 'files per job '+str(self.filesPerJob)
-<
+        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
-<
+        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
-<
-<
+        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
-<
+        n_tot_files = (len(self.files[0]))
-<
+        ## SL: this is wrong if the files have different number of events
-<
+        evPerFile = int(self.maxEvents)/n_tot_files
-<
-<
+        common.logger.debug(5,'Events per File '+str(evPerFile))
-<
-<
+        ## if asked to process all events, do it
-<
+        if self.total_number_of_events == -1:
-<
+            self.total_number_of_events=self.maxEvents
-<
+            self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
-<
+            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events')
-<
-<
+        else:
-<
+            self.total_number_of_files = int(self.total_number_of_events/evPerFile)
-<
+            ## SL: if ask for less event than what is computed to be available on a
-<
+            ##     file, process the first file anyhow.
-<
+            if self.total_number_of_files == 0:
-<
+                self.total_number_of_files = self.total_number_of_files + 1
-<
-<
+            common.logger.debug(5,'N files  '+str(self.total_number_of_files))
-<
-<
+            check = 0
-<
-<
+            ## Compute the number of jobs
-<
+            #self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
-<
+            self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob)
-<
+            common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
-<
-<
+            ## is there any remainder?
-<
+            check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob)
-<
-<
+            common.logger.debug(5,'Check  '+str(check))
-<
-<
+            if check > 0:
-<
+                self.total_number_of_jobs =  self.total_number_of_jobs + 1
-<
+                common.logger.message('Warning: last job will be created with '+str(check)+' files')
->
+        Perform job splitting. Jobs run over an integer number of files
->
+        and no more than one block.
->
+        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
->
+        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
->
+                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
->
+                  self.maxEvents, self.filesbyblock
->
+        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
->
+              self.total_number_of_jobs - Total # of jobs
->
+              self.list_of_args - File(s) job will run on (a list of lists)
->
+        """
->
->
+        # ---- Handle the possible job splitting configurations ---- #
->
+        if (self.selectTotalNumberEvents):
->
+            totalEventsRequested = self.total_number_of_events
->
+        if (self.selectEventsPerJob):
->
+            eventsPerJobRequested = self.eventsPerJob
->
+            if (self.selectNumberOfJobs):
->
+                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
->
->
+        # If user requested all the events in the dataset
->
+        if (totalEventsRequested == -1):
->
+            eventsRemaining=self.maxEvents
->
+        # If user requested more events than are in the dataset
->
+        elif (totalEventsRequested > self.maxEvents):
->
+            eventsRemaining = self.maxEvents
->
+            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
->
+        # If user requested less events than are in the dataset
->
+        else:
->
+            eventsRemaining = totalEventsRequested
-<
+            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)*self.filesPerJob*evPerFile + check*evPerFile)+' events')
-<
+            pass
->
+        # If user requested more events per job than are in the dataset
->
+        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
->
+            eventsPerJobRequested = self.maxEvents
->
->
+        # For user info at end
->
+        totalEventCount = 0
->
->
+        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
->
+            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
->
->
+        if (self.selectNumberOfJobs):
->
+            common.logger.message("May not create the exact number_of_jobs requested.")
->
->
+        if ( self.ncjobs == 'all' ) :
->
+            totalNumberOfJobs = 999999999
->
+        else :
->
+            totalNumberOfJobs = self.ncjobs
-+
+        blocks = blockSites.keys()
-+
+        blockCount = 0
-+
+        # Backup variable in case self.maxEvents counted events in a non-included block
-+
+        numBlocksInDataset = len(blocks)
-+
-+
+        jobCount = 0
-+
+        list_of_lists = []
-+
-+
+        # list tracking which jobs are in which jobs belong to which block
-+
+        jobsOfBlock = {}
-+
-+
+        # ---- Iterate over the blocks in the dataset until ---- #
-+
+        # ---- we've met the requested total # of events    ---- #
-+
+        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
-+
+            block = blocks[blockCount]
-+
+            blockCount += 1
-+
+            if block not in jobsOfBlock.keys() :
-+
+                jobsOfBlock[block] = []
-+
-+
+            if self.eventsbyblock.has_key(block) :
-+
+                numEventsInBlock = self.eventsbyblock[block]
-+
+                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
-+
-+
+                files = self.filesbyblock[block]
-+
+                numFilesInBlock = len(files)
-+
+                if (numFilesInBlock <= 0):
-+
+                    continue
-+
+                fileCount = 0
-+
-+
+                # ---- New block => New job ---- #
-+
+                parString = ""
-+
+                # counter for number of events in files currently worked on
-+
+                filesEventCount = 0
-+
+                # flag if next while loop should touch new file
-+
+                newFile = 1
-+
+                # job event counter
-+
+                jobSkipEventCount = 0
-+
-+
+                # ---- Iterate over the files in the block until we've met the requested ---- #
-+
+                # ---- total # of events or we've gone over all the files in this block  ---- #
-+
+                pString=''
-+
+                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
-+
+                    file = files[fileCount]
-+
+                    if self.useParent:
-+
+                        parent = self.parentFiles[file]
-+
+                        for f in parent :
-+
+                            pString += '\\\"' + f + '\\\"\,'
-+
+                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
-+
+                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
-+
+                    if newFile :
-+
+                        try:
-+
+                            numEventsInFile = self.eventsbyfile[file]
-+
+                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
-+
+                            # increase filesEventCount
-+
+                            filesEventCount += numEventsInFile
-+
+                            # Add file to current job
-+
+                            parString += '\\\"' + file + '\\\"\,'
-+
+                            newFile = 0
-+
+                        except KeyError:
-+
+                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
-+
-+
+                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
-+
+                    # if less events in file remain than eventsPerJobRequested
-+
+                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
-+
+                        # if last file in block
-+
+                        if ( fileCount == numFilesInBlock-1 ) :
-+
+                            # end job using last file, use remaining events in block
-+
+                            # close job and touch new file
-+
+                            fullString = parString[:-2]
-+
+                            if self.useParent:
-+
+                                fullParentString = pString[:-2]
-+
+                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
-+
+                            else:
-+
+                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
-+
+                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
-+
+                            self.jobDestination.append(blockSites[block])
-+
+                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
-+
+                            # fill jobs of block dictionary
-+
+                            jobsOfBlock[block].append(jobCount+1)
-+
+                            # reset counter
-+
+                            jobCount = jobCount + 1
-+
+                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
-+
+                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
-+
+                            jobSkipEventCount = 0
-+
+                            # reset file
-+
+                            pString = ""
-+
+                            parString = ""
-+
+                            filesEventCount = 0
-+
+                            newFile = 1
-+
+                            fileCount += 1
-+
+                        else :
-+
+                            # go to next file
-+
+                            newFile = 1
-+
+                            fileCount += 1
-+
+                    # if events in file equal to eventsPerJobRequested
-+
+                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
-+
+                        # close job and touch new file
-+
+                        fullString = parString[:-2]
-+
+                        if self.useParent:
-+
+                            fullParentString = pString[:-2]
-+
+                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
-+
+                        else:
-+
+                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
-+
+                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
-+
+                        self.jobDestination.append(blockSites[block])
-+
+                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
-+
+                        jobsOfBlock[block].append(jobCount+1)
-+
+                        # reset counter
-+
+                        jobCount = jobCount + 1
-+
+                        totalEventCount = totalEventCount + eventsPerJobRequested
-+
+                        eventsRemaining = eventsRemaining - eventsPerJobRequested
-+
+                        jobSkipEventCount = 0
-+
+                        # reset file
-+
+                        pString = ""
-+
+                        parString = ""
-+
+                        filesEventCount = 0
-+
+                        newFile = 1
-+
+                        fileCount += 1
-+
-+
+                    # if more events in file remain than eventsPerJobRequested
-+
+                    else :
-+
+                        # close job but don't touch new file
-+
+                        fullString = parString[:-2]
-+
+                        if self.useParent:
-+
+                            fullParentString = pString[:-2]
-+
+                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
-+
+                        else:
-+
+                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
-+
+                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
-+
+                        self.jobDestination.append(blockSites[block])
-+
+                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
-+
+                        jobsOfBlock[block].append(jobCount+1)
-+
+                        # increase counter
-+
+                        jobCount = jobCount + 1
-+
+                        totalEventCount = totalEventCount + eventsPerJobRequested
-+
+                        eventsRemaining = eventsRemaining - eventsPerJobRequested
-+
+                        # calculate skip events for last file
-+
+                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
-+
+                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
-+
+                        # remove all but the last file
-+
+                        filesEventCount = self.eventsbyfile[file]
-+
+                        if self.useParent:
-+
+                            for f in parent : pString += '\\\"' + f + '\\\"\,'
-+
+                        parString = '\\\"' + file + '\\\"\,'
-+
+                    pass # END if
-+
+                pass # END while (iterate over files in the block)
-+
+        pass # END while (iterate over blocks in the dataset)
-+
+        self.ncjobs = self.total_number_of_jobs = jobCount
-+
+        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
-+
+            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
-+
+        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
-+
-+
+        # screen output
-+
+        screenOutput = "List of jobs and available destination sites:\n\n"
-+
-+
+        # keep trace of block with no sites to print a warning at the end
-+
+        noSiteBlock = []
-+
+        bloskNoSite = []
-+
-+
+        blockCounter = 0
-+
+        for block in blocks:
-+
+            if block in jobsOfBlock.keys() :
-+
+                blockCounter += 1
-+
+                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
-+
+                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
-+
+                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
-+
+                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
-+
+                    bloskNoSite.append( blockCounter )
-+
-+
+        common.logger.message(screenOutput)
-+
+        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
-+
+            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
-+
+            virgola = ""
-+
+            if len(bloskNoSite) > 1:
-+
+                virgola = ","
-+
+            for block in bloskNoSite:
-+
+                msg += ' ' + str(block) + virgola
-+
+            msg += '\n               Related jobs:\n                 '
-+
+            virgola = ""
-+
+            if len(noSiteBlock) > 1:
-+
+                virgola = ","
-+
+            for range_jobs in noSiteBlock:
-+
+                msg += str(range_jobs) + virgola
-+
+            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
-+
+            if self.cfg_params.has_key('EDG.se_white_list'):
-+
+                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
-+
+                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
-+
+                msg += 'Please check if the dataset is available at this site!)\n'
-+
+            if self.cfg_params.has_key('EDG.ce_white_list'):
-+
+                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
-+
+                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
-+
+                msg += 'Please check if the dataset is available at this site!)\n'
-+
-+
+            common.logger.message(msg)
-+
-+
+        self.list_of_args = list_of_lists
-+
+        return
-+
-+
+    def jobSplittingNoBlockBoundary(self,blockSites):
-+
+        """
-+
+        """
-+
+        # ---- Handle the possible job splitting configurations ---- #
-+
+        if (self.selectTotalNumberEvents):
-+
+            totalEventsRequested = self.total_number_of_events
-+
+        if (self.selectEventsPerJob):
-+
+            eventsPerJobRequested = self.eventsPerJob
-+
+            if (self.selectNumberOfJobs):
-+
+                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
-+
-+
+        # If user requested all the events in the dataset
-+
+        if (totalEventsRequested == -1):
-+
+            eventsRemaining=self.maxEvents
-+
+        # If user requested more events than are in the dataset
-+
+        elif (totalEventsRequested > self.maxEvents):
-+
+            eventsRemaining = self.maxEvents
-+
+            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
-+
+        # If user requested less events than are in the dataset
-+
+        else:
-+
+            eventsRemaining = totalEventsRequested
-+
-+
+        # If user requested more events per job than are in the dataset
-+
+        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
-+
+            eventsPerJobRequested = self.maxEvents
-+
-+
+        # For user info at end
-+
+        totalEventCount = 0
-+
-+
+        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
-+
+            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
-+
-+
+        if (self.selectNumberOfJobs):
-+
+            common.logger.message("May not create the exact number_of_jobs requested.")
-+
-+
+        if ( self.ncjobs == 'all' ) :
-+
+            totalNumberOfJobs = 999999999
-+
+        else :
-+
+            totalNumberOfJobs = self.ncjobs
-+
-+
+        blocks = blockSites.keys()
-+
+        blockCount = 0
-+
+        # Backup variable in case self.maxEvents counted events in a non-included block
-+
+        numBlocksInDataset = len(blocks)
-+
-+
+        jobCount = 0
+        list_of_lists = []
-–
+        for i in xrange(0, int(n_tot_files), self.filesPerJob):
-–
+            list_of_lists.append(self.files[0][i: i+self.filesPerJob])
-<
+        self.list_of_files = list_of_lists
-<
->
+        #AF
->
+        #AF do not reset input files and event count on block boundary
->
+        #AF
->
+        parString=""
->
+        filesEventCount = 0
->
+        #AF
->
->
+        # list tracking which jobs are in which jobs belong to which block
->
+        jobsOfBlock = {}
->
+        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
->
+            block = blocks[blockCount]
->
+            blockCount += 1
->
+            if block not in jobsOfBlock.keys() :
->
+                jobsOfBlock[block] = []
->
->
+            if self.eventsbyblock.has_key(block) :
->
+                numEventsInBlock = self.eventsbyblock[block]
->
+                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
->
+                files = self.filesbyblock[block]
->
+                numFilesInBlock = len(files)
->
+                if (numFilesInBlock <= 0):
->
+                    continue
->
+                fileCount = 0
->
+                #AF
->
+                #AF do not reset input files and event count of block boundary
->
+                #AF
->
+                ## ---- New block => New job ---- #
->
+                #parString = ""
->
+                # counter for number of events in files currently worked on
->
+                #filesEventCount = 0
->
+                #AF
->
+                # flag if next while loop should touch new file
->
+                newFile = 1
->
+                # job event counter
->
+                jobSkipEventCount = 0
->
->
+                # ---- Iterate over the files in the block until we've met the requested ---- #
->
+                # ---- total # of events or we've gone over all the files in this block  ---- #
->
+                pString=''
->
+                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
->
+                    file = files[fileCount]
->
+                    if self.useParent:
->
+                        parent = self.parentFiles[file]
->
+                        for f in parent :
->
+                            pString += '\\\"' + f + '\\\"\,'
->
+                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
->
+                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
->
+                    if newFile :
->
+                        try:
->
+                            numEventsInFile = self.eventsbyfile[file]
->
+                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
->
+                            # increase filesEventCount
->
+                            filesEventCount += numEventsInFile
->
+                            # Add file to current job
->
+                            parString += '\\\"' + file + '\\\"\,'
->
+                            newFile = 0
->
+                        except KeyError:
->
+                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
->
+                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
->
+                    #common.logger.message("AF filesEventCount %s - jobSkipEventCount %s "%(filesEventCount,jobSkipEventCount))
->
+                    # if less events in file remain than eventsPerJobRequested
->
+                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
->
+                      #AF
->
+                      #AF skip fileboundary part
->
+                      #AF
->
+                            # go to next file
->
+                            newFile = 1
->
+                            fileCount += 1
->
+                    # if events in file equal to eventsPerJobRequested
->
+                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
->
+                        # close job and touch new file
->
+                        fullString = parString[:-2]
->
+                        if self.useParent:
->
+                            fullParentString = pString[:-2]
->
+                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
->
+                        else:
->
+                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
->
+                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
->
+                        self.jobDestination.append(blockSites[block])
->
+                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
->
+                        jobsOfBlock[block].append(jobCount+1)
->
+                        # reset counter
->
+                        jobCount = jobCount + 1
->
+                        totalEventCount = totalEventCount + eventsPerJobRequested
->
+                        eventsRemaining = eventsRemaining - eventsPerJobRequested
->
+                        jobSkipEventCount = 0
->
+                        # reset file
->
+                        pString = ""
->
+                        parString = ""
->
+                        filesEventCount = 0
->
+                        newFile = 1
->
+                        fileCount += 1
->
->
+                    # if more events in file remain than eventsPerJobRequested
->
+                    else :
->
+                        # close job but don't touch new file
->
+                        fullString = parString[:-2]
->
+                        if self.useParent:
->
+                            fullParentString = pString[:-2]
->
+                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
->
+                        else:
->
+                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
->
+                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
->
+                        self.jobDestination.append(blockSites[block])
->
+                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
->
+                        jobsOfBlock[block].append(jobCount+1)
->
+                        # increase counter
->
+                        jobCount = jobCount + 1
->
+                        totalEventCount = totalEventCount + eventsPerJobRequested
->
+                        eventsRemaining = eventsRemaining - eventsPerJobRequested
->
+                        # calculate skip events for last file
->
+                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
->
+                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
->
+                        # remove all but the last file
->
+                        filesEventCount = self.eventsbyfile[file]
->
+                        if self.useParent:
->
+                            for f in parent : pString += '\\\"' + f + '\\\"\,'
->
+                        parString = '\\\"' + file + '\\\"\,'
->
+                    pass # END if
->
+                pass # END while (iterate over files in the block)
->
+        pass # END while (iterate over blocks in the dataset)
->
+        self.ncjobs = self.total_number_of_jobs = jobCount
->
+        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
->
+            common.logger.message("eventsRemaining "+str(eventsRemaining))
->
+            common.logger.message("jobCount "+str(jobCount))
->
+            common.logger.message(" totalNumberOfJobs "+str(totalNumberOfJobs))
->
+            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
->
+        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
->
->
+        # screen output
->
+        screenOutput = "List of jobs and available destination sites:\n\n"
->
->
+        #AF
->
+        #AF   skip check on  block with no sites
->
+        #AF
->
+        self.list_of_args = list_of_lists
->
->
+        return
->
->
->
->
+    def jobSplittingNoInput(self):
->
+        """
->
+        Perform job splitting based on number of event per job
->
+        """
->
+        common.logger.debug(5,'Splitting per events')
->
->
+        if (self.selectEventsPerJob):
->
+            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
->
+        if (self.selectNumberOfJobs):
->
+            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
->
+        if (self.selectTotalNumberEvents):
->
+            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
->
->
+        if (self.total_number_of_events < 0):
->
+            msg='Cannot split jobs per Events with "-1" as total number of events'
->
+            raise CrabException(msg)
->
->
+        if (self.selectEventsPerJob):
->
+            if (self.selectTotalNumberEvents):
->
+                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
->
+            elif(self.selectNumberOfJobs) :
->
+                self.total_number_of_jobs =self.theNumberOfJobs
->
+                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
->
->
+        elif (self.selectNumberOfJobs) :
->
+            self.total_number_of_jobs = self.theNumberOfJobs
->
+            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
->
->
+        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
->
->
+        # is there any remainder?
->
+        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
->
->
+        common.logger.debug(5,'Check  '+str(check))
->
->
+        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
->
+        if check > 0:
->
+            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
->
->
+        # argument is seed number.$i
->
+        self.list_of_args = []
->
+        for i in range(self.total_number_of_jobs):
->
+            ## Since there is no input, any site is good
->
+            self.jobDestination.append([""]) #must be empty to write correctly the xml
->
+            args=[]
->
+            if (self.firstRun):
->
+                ## pythia first run
->
+                args.append(str(self.firstRun)+str(i))
->
+            self.list_of_args.append(args)
->
->
+        return
->
->
->
+    def jobSplittingForScript(self):
->
+        """
->
+        Perform job splitting based on number of job
->
+        """
->
+        common.logger.debug(5,'Splitting per job')
->
+        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
->
->
+        self.total_number_of_jobs = self.theNumberOfJobs
->
->
+        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
->
->
+        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
->
->
+        # argument is seed number.$i
->
+        self.list_of_args = []
->
+        for i in range(self.total_number_of_jobs):
->
+            self.jobDestination.append([""])
->
+            self.list_of_args.append([str(i)])
+        return
-<
+    def split(self, jobParams):
-<
-<
+        common.jobDB.load()
-<
+        #### Fabio
->
+    def split(self, jobParams,firstJobID):
->
+        njobs = self.total_number_of_jobs
-<
+        filelist = self.list_of_files
->
+        arglist = self.list_of_args
+        # create the empty structure
+        for i in range(njobs):
+            jobParams.append("")
-–
-–
+        for job in range(njobs):
-–
+            jobParams[job] = filelist[job]
-–
+            common.jobDB.setArguments(job, jobParams[job])
-<
+        common.jobDB.save()
->
+        listID=[]
->
+        listField=[]
->
+        for id in range(njobs):
->
+            job = id + int(firstJobID)
->
+            jobParams[id] = arglist[id]
->
+            listID.append(job+1)
->
+            job_ToSave ={}
->
+            concString = ' '
->
+            argu=''
->
+            if len(jobParams[id]):
->
+                argu +=   concString.join(jobParams[id] )
->
+            job_ToSave['arguments']= str(job+1)+' '+argu
->
+            job_ToSave['dlsDestination']= self.jobDestination[id]
->
+            listField.append(job_ToSave)
->
+            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
->
+            +"                     Destination: "+str(self.jobDestination[id])
->
+            common.logger.debug(5,msg)
->
+        common._db.updateJob_(listID,listField)
->
+        self.argsList = (len(jobParams[0])+1)
->
+        return
-–
-–
+    def getJobTypeArguments(self, nj, sched):
-–
+        params = common.jobDB.arguments(nj)
-–
+        #print params
-–
+        parString = "\\{"
-–
-–
+        for i in range(len(params) - 1):
-–
+            parString += '\\\"' + params[i] + '\\\"\,'
-–
-–
+        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
-–
+        return parString
-–
-–
+    def numberOfJobs(self):
-–
+        # Fabio
-+
+    def numberOfJobs(self):
+        return self.total_number_of_jobs
-–
-–
-–
-–
+    def checkBlackList(self, allSites):
-–
+        if len(self.reCEBlackList)==0: return allSites
-–
+        sites = []
-–
+        for site in allSites:
-–
+            common.logger.debug(10,'Site '+site)
-–
+            good=1
-–
+            for re in self.reCEBlackList:
-–
+                if re.search(site):
-–
+                    common.logger.message('CE in black list, skipping site '+site)
-–
+                    good=0
-–
+                pass
-–
+            if good: sites.append(site)
-–
+        if len(sites) == 0:
-–
+            common.logger.debug(3,"No sites found after BlackList")
-–
+        return sites
-–
-–
+    def checkWhiteList(self, allSites):
-–
-–
+        if len(self.reCEWhiteList)==0: return allSites
-–
+        sites = []
-–
+        for site in allSites:
-–
+            good=0
-–
+            for re in self.reCEWhiteList:
-–
+                if re.search(site):
-–
+                    common.logger.debug(5,'CE in white list, adding site '+site)
-–
+                    good=1
-–
+                if not good: continue
-–
+                sites.append(site)
-–
+        if len(sites) == 0:
-–
+            common.logger.message("No sites found after WhiteList\n")
-–
+        else:
-–
+            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
-–
+        return sites
+    def getTarBall(self, exe):
+        """
+        Return the TarBall with lib and exe
+        """
-<
-<
+        # if it exist, just return it
-<
+        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
->
+        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
+        if os.path.exists(self.tgzNameWithPath):
+            return self.tgzNameWithPath
+        # First of all declare the user Scram area
+        swArea = self.scram.getSWArea_()
-–
+        #print "swArea = ", swArea
-–
+        swVersion = self.scram.getSWVersion()
-–
+        #print "swVersion = ", swVersion
+        swReleaseTop = self.scram.getReleaseTop_()
-<
+        #print "swReleaseTop = ", swReleaseTop
-<
->
+        ## check if working area is release top
+        if swReleaseTop == '' or swArea == swReleaseTop:
-+
+            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
+            return
-<
+        filesToBeTarred = []
-<
+        ## First find the executable
-<
+        if (self.executable != ''):
-<
+            exeWithPath = self.scram.findFile_(executable)
-<
+#           print exeWithPath
-<
+            if ( not exeWithPath ):
-<
+                raise CrabException('User executable '+executable+' not found')
-<
-<
+            ## then check if it's private or not
-<
+            if exeWithPath.find(swReleaseTop) == -1:
-<
+                # the exe is private, so we must ship
-<
+                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
-<
+                path = swArea+'/'
-<
+                exe = string.replace(exeWithPath, path,'')
-<
+                filesToBeTarred.append(exe)
-<
+                pass
-<
+            else:
-<
+                # the exe is from release, we'll find it on WN
->
+        import tarfile
->
+        try: # create tar ball
->
+            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
->
+            ## First find the executable
->
+            if (self.executable != ''):
->
+                exeWithPath = self.scram.findFile_(executable)
->
+                if ( not exeWithPath ):
->
+                    raise CrabException('User executable '+executable+' not found')
->
->
+                ## then check if it's private or not
->
+                if exeWithPath.find(swReleaseTop) == -1:
->
+                    # the exe is private, so we must ship
->
+                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
->
+                    path = swArea+'/'
->
+                    # distinguish case when script is in user project area or given by full path somewhere else
->
+                    if exeWithPath.find(path) >= 0 :
->
+                        exe = string.replace(exeWithPath, path,'')
->
+                        tar.add(path+exe,exe)
->
+                    else :
->
+                        tar.add(exeWithPath,os.path.basename(executable))
->
+                    pass
->
+                else:
->
+                    # the exe is from release, we'll find it on WN
->
+                    pass
->
->
+            ## Now get the libraries: only those in local working area
->
+            libDir = 'lib'
->
+            lib = swArea+'/' +libDir
->
+            common.logger.debug(5,"lib "+lib+" to be tarred")
->
+            if os.path.exists(lib):
->
+                tar.add(lib,libDir)
->
->
+            ## Now check if module dir is present
->
+            moduleDir = 'module'
->
+            module = swArea + '/' + moduleDir
->
+            if os.path.isdir(module):
->
+                tar.add(module,moduleDir)
->
->
+            ## Now check if any data dir(s) is present
->
+            self.dataExist = False
->
+            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
->
+            while len(todo_list):
->
+                entry, name = todo_list.pop()
->
+                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
->
+                    continue
->
+                if os.path.isdir(swArea+"/src/"+entry):
->
+                    entryPath = entry + '/'
->
+                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
->
+                    if name == 'data':
->
+                        self.dataExist=True
->
+                        common.logger.debug(5,"data "+entry+" to be tarred")
->
+                        tar.add(swArea+"/src/"+entry,"src/"+entry)
->
+                    pass
+                pass
-<
-<
+        ## Now get the libraries: only those in local working area
-<
+        libDir = 'lib'
-<
+        lib = swArea+'/' +libDir
-<
+        common.logger.debug(5,"lib "+lib+" to be tarred")
-<
+        if os.path.exists(lib):
-<
+            filesToBeTarred.append(libDir)
-<
-<
+        ## Now check if module dir is present
-<
+        moduleDir = 'module'
-<
+        if os.path.isdir(swArea+'/'+moduleDir):
-<
+            filesToBeTarred.append(moduleDir)
-<
-<
+        ## Now check if the Data dir is present
-<
+        dataDir = 'src/Data/'
-<
+        if os.path.isdir(swArea+'/'+dataDir):
-<
+            filesToBeTarred.append(dataDir)
-<
-<
+        ## Create the tar-ball
-<
+        if len(filesToBeTarred)>0:
-<
+            cwd = os.getcwd()
-<
+            os.chdir(swArea)
-<
+            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
-<
+            for line in filesToBeTarred:
-<
+                tarcmd = tarcmd + line + ' '
-<
+            cout = runCommand(tarcmd)
-<
+            if not cout:
-<
+                raise CrabException('Could not create tar-ball')
-<
+            os.chdir(cwd)
-<
+        else:
-<
+            common.logger.debug(5,"No files to be to be tarred")
-<
-<
+        return
-<
-<
+    def wsSetupEnvironment(self, nj):
->
->
+            ### CMSSW ParameterSet
->
+            if not self.pset is None:
->
+                cfg_file = common.work_space.jobDir()+self.configFilename()
->
+                tar.add(cfg_file,self.configFilename())
->
+                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
->
->
->
+            ## Add ProdCommon dir to tar
->
+            prodcommonDir = './'
->
+            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
->
+            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
->
+                           'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage']
->
+            for file in neededStuff:
->
+                tar.add(prodcommonPath+file,prodcommonDir+file)
->
+            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
->
->
+            ##### ML stuff
->
+            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
->
+            path=os.environ['CRABDIR'] + '/python/'
->
+            for file in ML_file_list:
->
+                tar.add(path+file,file)
->
+            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
->
->
+            ##### Utils
->
+            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
->
+            for file in Utils_file_list:
->
+                tar.add(path+file,file)
->
+            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
->
->
+            ##### AdditionalFiles
->
+            for file in self.additional_inbox_files:
->
+                tar.add(file,string.split(file,'/')[-1])
->
+            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
->
->
+            tar.close()
->
+        except IOError, exc:
->
+            common.logger.write(str(exc))
->
+            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
->
+        except tarfile.TarError, exc:
->
+            common.logger.write(str(exc))
->
+            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
->
->
+        ## check for tarball size
->
+        tarballinfo = os.stat(self.tgzNameWithPath)
->
+        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
->
+            msg  = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
->
+               +'MB input sandbox limit \n'
->
+            msg += '      and not supported by the direct GRID submission system.\n'
->
+            msg += '      Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
->
+            msg += '      For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
->
+            raise CrabException(msg)
->
->
+        ## create tar-ball with ML stuff
->
->
+    def wsSetupEnvironment(self, nj=0):
+        """
+        Returns part of a job script which prepares
+        the execution environment for the job 'nj'.
+        """
-+
+        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
-+
+            psetName = 'pset.py'
-+
+        else:
-+
+            psetName = 'pset.cfg'
+        # Prepare JobType-independent part
-<
+        txt = ''
-<
-<
+        ## OLI_Daniele at this level  middleware already known
-<
-<
+        txt += 'if [ $middleware == LCG ]; then \n'
->
+        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
->
+        txt += 'echo ">>> setup environment"\n'
->
+        txt += 'if [ $middleware == LCG ]; then \n'
+        txt += self.wsSetupCMSLCGEnvironment_()
+        txt += 'elif [ $middleware == OSG ]; then\n'
-<
+        txt += '    time=`date -u +"%s"`\n'
-<
+        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
-<
+        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
-<
+        txt += '    /bin/mkdir -p $WORKING_DIR\n'
-<
+        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
-<
+        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
-<
+        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
-<
+        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '        exit 1\n'
->
+        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
->
+        txt += '    if [ ! $? == 0 ] ;then\n'
->
+        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
->
+        txt += '        job_exit_code=10016\n'
->
+        txt += '        func_exit\n'
+        txt += '    fi\n'
-+
+        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
+        txt += '\n'
+        txt += '    echo "Change to working directory: $WORKING_DIR"\n'
+        txt += '    cd $WORKING_DIR\n'
-<
+        txt += self.wsSetupCMSOSGEnvironment_()
->
+        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
->
+        txt += self.wsSetupCMSOSGEnvironment_()
+        txt += 'fi\n'
+        # Prepare JobType-specific part
+        scram = self.scram.commandName()
+        txt += '\n\n'
-<
+        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
->
+        txt += 'echo ">>> specific cmssw setup environment:"\n'
->
+        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
+        txt += scram+' project CMSSW '+self.version+'\n'
+        txt += 'status=$?\n'
+        txt += 'if [ $status != 0 ] ; then\n'
-<
+        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
-<
+        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
-<
+        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        ## OLI_Daniele
-<
+        txt += '    if [ $middleware == OSG ]; then \n'
-<
+        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
-<
+        txt += '        cd $RUNTIME_AREA\n'
-<
+        txt += '        /bin/rm -rf $WORKING_DIR\n'
-<
+        txt += '        if [ -d $WORKING_DIR ] ;then\n'
-<
+        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
-<
+        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
-<
+        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '        fi\n'
-<
+        txt += '    fi \n'
-<
+        txt += '   exit 1 \n'
->
+        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
->
+        txt += '    job_exit_code=10034\n'
->
+        txt += '    func_exit\n'
+        txt += 'fi \n'
-–
+        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
+        txt += 'cd '+self.version+'\n'
-<
+        ### needed grep for bug in scramv1 ###
->
+        txt += 'SOFTWARE_DIR=`pwd`\n'
->
+        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
+        txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
-<
->
+        txt += 'if [ $? != 0 ] ; then\n'
->
+        txt += '    echo "ERROR ==> Problem with the command: "\n'
->
+        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
->
+        txt += '    job_exit_code=10034\n'
->
+        txt += '    func_exit\n'
->
+        txt += 'fi \n'
+        # Handle the arguments:
+        txt += "\n"
+        txt += "## number of arguments (first argument always jobnumber)\n"
+        txt += "\n"
-<
+        txt += "narg=$#\n"
-<
+        txt += "if [ $narg -lt 2 ]\n"
->
+        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
+        txt += "then\n"
-<
+        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
-<
+        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
-<
+        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        ## OLI_Daniele
-<
+        txt += '    if [ $middleware == OSG ]; then \n'
-<
+        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
-<
+        txt += '        cd $RUNTIME_AREA\n'
-<
+        txt += '        /bin/rm -rf $WORKING_DIR\n'
-<
+        txt += '        if [ -d $WORKING_DIR ] ;then\n'
-<
+        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
-<
+        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
-<
+        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '        fi\n'
-<
+        txt += '    fi \n'
-<
+        txt += "    exit 1\n"
->
+        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
->
+        txt += '    job_exit_code=50113\n'
->
+        txt += "    func_exit\n"
+        txt += "fi\n"
+        txt += "\n"
+        # Prepare job-specific part
+        job = common.job_list[nj]
-<
+        pset = os.path.basename(job.configFilename())
-<
+        txt += '\n'
-<
+        txt += 'InputFiles=$2\n'
-<
+        txt += 'echo "<$InputFiles>"\n'
-<
+        #txt += 'echo sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' \n'
-<
+        txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
-<
+        #txt += 'sed "s#{\'INPUT\'}#${InputFiles}#" $RUNTIME_AREA/'+pset+' > pset1.cfg\n'
->
+        if (self.datasetPath):
->
+            self.primaryDataset = self.datasetPath.split("/")[1]
->
+            DataTier = self.datasetPath.split("/")[2]
->
+            txt += '\n'
->
+            txt += 'DatasetPath='+self.datasetPath+'\n'
-<
+        if len(self.additional_inbox_files) > 0:
-<
+            for file in self.additional_inbox_files:
-<
+                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
-<
+                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
-<
+                txt += '   chmod +x '+file+'\n'
-<
+                txt += 'fi\n'
-<
+            pass
->
+            txt += 'PrimaryDataset='+self.primaryDataset +'\n'
->
+            txt += 'DataTier='+DataTier+'\n'
->
+            txt += 'ApplicationFamily=cmsRun\n'
-<
+        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
->
+        else:
->
+            self.primaryDataset = 'null'
->
+            txt += 'DatasetPath=MCDataTier\n'
->
+            txt += 'PrimaryDataset=null\n'
->
+            txt += 'DataTier=null\n'
->
+            txt += 'ApplicationFamily=MCDataTier\n'
->
+        if self.pset != None:
->
+            pset = os.path.basename(job.configFilename())
->
+            txt += '\n'
->
+            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
->
+            if (self.datasetPath): # standard job
->
+                txt += 'InputFiles=${args[1]}; export InputFiles\n'
->
+                if (self.useParent):
->
+                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
->
+                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
->
+                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
->
+                else:
->
+                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
->
+                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
->
+                txt += 'echo "Inputfiles:<$InputFiles>"\n'
->
+                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
->
+                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
->
+                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
->
+            else:  # pythia like job
->
+                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
->
+                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
->
+                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
->
+                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
->
+                if (self.firstRun):
->
+                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
->
+                    txt += 'echo "FirstRun: <$FirstRun>"\n'
-<
+        txt += '\n'
-<
+        txt += 'echo "***** cat pset.cfg *********"\n'
-<
+        txt += 'cat pset.cfg\n'
-<
+        txt += 'echo "****** end pset.cfg ********"\n'
-<
+        txt += '\n'
-<
+        # txt += 'echo "***** cat pset1.cfg *********"\n'
-<
+        # txt += 'cat pset1.cfg\n'
-<
+        # txt += 'echo "****** end pset1.cfg ********"\n'
->
+            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
->
->
->
+        if self.pset != None:
->
+            # FUTURE: Can simply for 2_1_x and higher
->
+            txt += '\n'
->
+            if self.debug_wrapper==True:
->
+                txt += 'echo "***** cat ' + psetName + ' *********"\n'
->
+                txt += 'cat ' + psetName + '\n'
->
+                txt += 'echo "****** end ' + psetName + ' ********"\n'
->
+                txt += '\n'
->
+            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
->
+                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
->
+            else:
->
+                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
->
+            txt += 'echo "PSETHASH = $PSETHASH" \n'
->
+            txt += '\n'
+        return txt
-<
+    def wsBuildExe(self, nj):
->
+    def wsUntarSoftware(self, nj=0):
+        """
+        Put in the script the commands to build an executable
+        or a library.
+        """
-<
+        txt = ""
->
+        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
+        if os.path.isfile(self.tgzNameWithPath):
-<
+            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
->
+            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
+            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
-+
+            if  self.debug_wrapper:
-+
+                txt += 'ls -Al \n'
+            txt += 'untar_status=$? \n'
+            txt += 'if [ $untar_status -ne 0 ]; then \n'
-<
+            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
-<
+            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
-<
+            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
-<
+            txt += '   if [ $middleware == OSG ]; then \n'
-<
+            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
-<
+            txt += '       cd $RUNTIME_AREA\n'
-<
+            txt += '       /bin/rm -rf $WORKING_DIR\n'
-<
+            txt += '       if [ -d $WORKING_DIR ] ;then\n'
-<
+            txt += '        echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
-<
+            txt += '        echo "JOB_EXIT_STATUS = 50999"\n'
-<
+            txt += '        echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
-<
+            txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
-<
+            txt += '       fi\n'
-<
+            txt += '   fi \n'
-<
+            txt += '   \n'
-<
+            txt += '   exit 1 \n'
->
+            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
->
+            txt += '   job_exit_code=$untar_status\n'
->
+            txt += '   func_exit\n'
+            txt += 'else \n'
+            txt += '   echo "Successful untar" \n'
+            txt += 'fi \n'
-+
+            txt += '\n'
-+
+            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
-+
+            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
-+
+            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
-+
+            txt += 'else\n'
-+
+            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
-+
+            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
-+
+            txt += 'fi\n'
-+
+            txt += '\n'
-+
+            pass
-<
->
+        return txt
-<
+    def modifySteeringCards(self, nj):
->
+    def wsBuildExe(self, nj=0):
+        """
-<
+        modify the card provided by the user,
-<
+        writing a new card into share dir
->
+        Put in the script the commands to build an executable
->
+        or a library.
+        """
-<
->
->
+        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
->
+        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
->
->
+        txt += 'rm -r lib/ module/ \n'
->
+        txt += 'mv $RUNTIME_AREA/lib/ . \n'
->
+        txt += 'mv $RUNTIME_AREA/module/ . \n'
->
+        if self.dataExist == True:
->
+            txt += 'rm -r src/ \n'
->
+            txt += 'mv $RUNTIME_AREA/src/ . \n'
->
+        if len(self.additional_inbox_files)>0:
->
+            for file in self.additional_inbox_files:
->
+                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
->
+        # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
->
+        # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
->
->
+        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
->
+        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
->
+        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
->
+        txt += 'else\n'
->
+        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
->
+        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
->
+        txt += 'fi\n'
->
+        txt += '\n'
->
->
+        return txt
->
->
+    def executableName(self):
-<
+        return self.executable
->
+        if self.scriptExe:
->
+            return "sh "
->
+        else:
->
+            return self.executable
+    def executableArgs(self):
-<
+        return " -p pset.cfg"
->
+        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
->
+        if self.scriptExe:#CarlosDaniele
->
+            return   self.scriptExe + " $NJob"
->
+        else:
->
+            ex_args = ""
->
+            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
->
+            # Framework job report
->
+            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
->
+                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
->
+            # Type of config file
->
+            if self.CMSSW_major >= 2 :
->
+                ex_args += " -p pset.py"
->
+            else:
->
+                ex_args += " -p pset.cfg"
->
+            return ex_args
+    def inputSandbox(self, nj):
+        """
+        Returns a list of filenames to be put in JDL input sandbox.
+        """
+        inp_box = []
-–
+        # dict added to delete duplicate from input sandbox file list
-–
+        seen = {}
-–
+        ## code
+        if os.path.isfile(self.tgzNameWithPath):
+            inp_box.append(self.tgzNameWithPath)
-<
+        ## config
-<
+        inp_box.append(common.job_list[nj].configFilename())
-<
+        ## additional input files
-<
+        #for file in self.additional_inbox_files:
-<
+        #    inp_box.append(common.work_space.cwdDir()+file)
->
+        inp_box.append(common.work_space.jobDir() + self.scriptName)
+        return inp_box
+    def outputSandbox(self, nj):
+        """
+        out_box = []
-–
+        stdout=common.job_list[nj].stdout()
-–
+        stderr=common.job_list[nj].stderr()
-–
+        ## User Declared output files
-<
+        for out in self.output_file:
-<
+            n_out = nj + 1
-<
+            out_box.append(self.numberFile_(out,str(n_out)))
->
+        for out in (self.output_file+self.output_file_sandbox):
->
+            n_out = nj + 1
->
+            out_box.append(numberFile(out,str(n_out)))
+        return out_box
-–
+        return []
-–
+    def prepareSteeringCards(self):
-–
+        """
-–
+        Make initial modifications of the user's steering card file.
-–
+        """
-–
+        return
+    def wsRenameOutput(self, nj):
+        """
+        Returns part of a job script which renames the produced files.
+        """
-<
+        txt = '\n'
-<
+        txt += '# directory content\n'
-<
+        txt += 'ls \n'
-<
+        file_list = ''
-<
+        for fileWithSuffix in self.output_file:
-<
+            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
-<
+            file_list=file_list+output_file_num+' '
->
+        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
->
+        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
->
+        txt += 'echo ">>> current directory content:"\n'
->
+        if self.debug_wrapper:
->
+            txt += 'ls -Al\n'
->
+        txt += '\n'
->
->
+        for fileWithSuffix in (self.output_file):
->
+            output_file_num = numberFile(fileWithSuffix, '$NJob')
+            txt += '\n'
+            txt += '# check output file\n'
-<
+            txt += 'ls '+fileWithSuffix+'\n'
-<
+            txt += 'exe_result=$?\n'
-<
+            txt += 'if [ $exe_result -ne 0 ] ; then\n'
-<
+            txt += '   echo "ERROR: No output file to manage"\n'
-<
+            txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
-<
+            txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
-<
+            txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
-<
+            ### OLI_DANIELE
-<
+            if common.scheduler.boss_scheduler_name == 'condor_g':
->
+            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
->
+            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
->
+                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
->
+                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
->
+            else:
->
+                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
->
+                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
->
+            txt += 'else\n'
->
+            txt += '    job_exit_code=60302\n'
->
+            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
->
+            if common.scheduler.name().upper() == 'CONDOR_G':
+                txt += '    if [ $middleware == OSG ]; then \n'
+                txt += '        echo "prepare dummy output file"\n'
+                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
+                txt += '    fi \n'
-–
+            txt += 'else\n'
-–
+            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
+            txt += 'fi\n'
-<
-<
+        txt += 'cd $RUNTIME_AREA\n'
-<
+        file_list=file_list[:-1]
-<
+        txt += 'file_list="'+file_list+'"\n'
-<
+        ### OLI_DANIELE
-<
+        txt += 'if [ $middleware == OSG ]; then\n'
-<
+        txt += '    cd $RUNTIME_AREA\n'
-<
+        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
-<
+        txt += '    /bin/rm -rf $WORKING_DIR\n'
-<
+        txt += '    if [ -d $WORKING_DIR ] ;then\n'
-<
+        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
-<
+        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
-<
+        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '    fi\n'
-<
+        txt += 'fi\n'
->
+        file_list = []
->
+        for fileWithSuffix in (self.output_file):
->
+             file_list.append(numberFile(fileWithSuffix, '$NJob'))
->
->
+        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
+        txt += '\n'
-+
+        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
-+
+        txt += 'echo ">>> current directory content:"\n'
-+
+        if self.debug_wrapper:
-+
+            txt += 'ls -Al\n'
-+
+        txt += '\n'
-+
+        txt += 'cd $RUNTIME_AREA\n'
-+
+        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
+        return txt
-<
+    def numberFile_(self, file, txt):
-<
+        """
-<
+        append _'txt' before last extension of a file
-<
+        """
-<
+        p = string.split(file,".")
-<
+        # take away last extension
-<
+        name = p[0]
-<
+        for x in p[1:-1]:
-<
+           name=name+"."+x
-<
+        # add "_txt"
-<
+        if len(p)>1:
-<
+          ext = p[len(p)-1]
-<
+          #result = name + '_' + str(txt) + "." + ext
-<
+          result = name + '_' + txt + "." + ext
-<
+        else:
-<
+          #result = name + '_' + str(txt)
-<
+          result = name + '_' + txt
-<
-<
+        return result
-<
-<
+    def getRequirements(self):
->
+    def getRequirements(self, nj=[]):
+        """
-<
+        return job requirements to add to jdl files
->
+        return job requirements to add to jdl files
+        """
+        req = ''
-<
+        if common.analisys_common_info['sites']:
-<
+            if common.analisys_common_info['sw_version']:
-<
+                req='Member("VO-cms-' + \
-<
+                     common.analisys_common_info['sw_version'] + \
-<
+                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
-<
+            if len(common.analisys_common_info['sites'])>0:
-<
+                req = req + ' && ('
-<
+                for i in range(len(common.analisys_common_info['sites'])):
-<
+                    req = req + 'other.GlueCEInfoHostName == "' \
-<
+                         + common.analisys_common_info['sites'][i] + '"'
-<
+                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
-<
+                        req = req + ' || '
-<
+            req = req + ')'
-<
+        #print "req = ", req
->
+        if self.version:
->
+            req='Member("VO-cms-' + \
->
+                 self.version + \
->
+                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
->
+        if self.executable_arch:
->
+            req+=' && Member("VO-cms-' + \
->
+                 self.executable_arch + \
->
+                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
->
->
+        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
->
+        if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
->
+            req += ' && other.GlueCEStateStatus == "Production" '
->
+        return req
+    def configFilename(self):
+        """ return the config filename """
-<
+        return self.name()+'.cfg'
->
+        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
->
+        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
->
+          return self.name()+'.py'
->
+        else:
->
+          return self.name()+'.cfg'
-–
+    ### OLI_DANIELE
+    def wsSetupCMSOSGEnvironment_(self):
+        """
+        Returns part of a job script which is prepares
+        the execution environment and which is common for all CMS jobs.
+        """
-<
+        txt = '\n'
-<
+        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
-<
+        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
-<
+        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
-<
+        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
-<
+        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
-<
+        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
-<
+        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
-<
+        txt += '   else\n'
-<
+        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
-<
+        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
-<
+        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '       exit 1\n'
-<
+        txt += '\n'
-<
+        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
-<
+        txt += '       cd $RUNTIME_AREA\n'
-<
+        txt += '       /bin/rm -rf $WORKING_DIR\n'
-<
+        txt += '       if [ -d $WORKING_DIR ] ;then\n'
-<
+        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
-<
+        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
-<
+        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '       fi\n'
-<
+        txt += '\n'
-<
+        txt += '       exit 1\n'
-<
+        txt += '   fi\n'
->
+        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
->
+        txt += '    echo ">>> setup CMS OSG environment:"\n'
->
+        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
->
+        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
->
+        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
->
+        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
->
+        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
->
+        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
->
+        txt += '    else\n'
->
+        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
->
+        txt += '        job_exit_code=10020\n'
->
+        txt += '        func_exit\n'
->
+        txt += '    fi\n'
+        txt += '\n'
-<
+        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
-<
+        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
->
+        txt += '    echo "==> setup cms environment ok"\n'
->
+        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
+        return txt
-<
-<
+    ### OLI_DANIELE
->
+    def wsSetupCMSLCGEnvironment_(self):
+        """
+        Returns part of a job script which is prepares
+        the execution environment and which is common for all CMS jobs.
+        """
-<
+        txt  = '   \n'
-<
+        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
-<
+        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
-<
+        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
-<
+        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
-<
+        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '       exit 1\n'
-<
+        txt += '   else\n'
-<
+        txt += '       echo "Sourcing environment... "\n'
-<
+        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
-<
+        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
-<
+        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
-<
+        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '           exit 1\n'
-<
+        txt += '       fi\n'
-<
+        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
-<
+        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
-<
+        txt += '       result=$?\n'
-<
+        txt += '       if [ $result -ne 0 ]; then\n'
-<
+        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
-<
+        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
-<
+        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '           exit 1\n'
-<
+        txt += '       fi\n'
-<
+        txt += '   fi\n'
-<
+        txt += '   \n'
-<
+        txt += '   string=`cat /etc/redhat-release`\n'
-<
+        txt += '   echo $string\n'
-<
+        txt += '   if [[ $string = *alhalla* ]]; then\n'
-<
+        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
-<
+        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
-<
+        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
-<
+        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
-<
+        txt += '   else\n'
-<
+        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
-<
+        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
-<
+        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
-<
+        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
-<
+        txt += '       exit 1\n'
-<
+        txt += '   fi\n'
-<
+        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
-<
+        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
->
+        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
->
+        txt += '    echo ">>> setup CMS LCG environment:"\n'
->
+        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
->
+        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
->
+        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
->
+        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
->
+        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
->
+        txt += '        job_exit_code=10031\n'
->
+        txt += '        func_exit\n'
->
+        txt += '    else\n'
->
+        txt += '        echo "Sourcing environment... "\n'
->
+        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
->
+        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
->
+        txt += '            job_exit_code=10020\n'
->
+        txt += '            func_exit\n'
->
+        txt += '        fi\n'
->
+        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
->
+        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
->
+        txt += '        result=$?\n'
->
+        txt += '        if [ $result -ne 0 ]; then\n'
->
+        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
->
+        txt += '            job_exit_code=10032\n'
->
+        txt += '            func_exit\n'
->
+        txt += '        fi\n'
->
+        txt += '    fi\n'
->
+        txt += '    \n'
->
+        txt += '    echo "==> setup cms environment ok"\n'
->
+        return txt
->
->
+    def wsModifyReport(self, nj):
->
+        """
->
+        insert the part of the script that modifies the FrameworkJob Report
->
+        """
->
+        txt = '\n#Written by cms_cmssw::wsModifyReport\n'
->
+        publish_data = int(self.cfg_params.get('USER.publish_data',0))
->
+        if (publish_data == 1):
->
->
+            txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
->
+            txt += '    FOR_LFN=$LFNBaseName/${PSETHASH}/\n'
->
+            txt += 'else\n'
->
+            txt += '    FOR_LFN=/copy_problems/ \n'
->
+            txt += '    SE=""\n'
->
+            txt += '    SE_PATH=""\n'
->
+            txt += 'fi\n'
->
->
+            txt += 'echo ">>> Modify Job Report:" \n'
->
+            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
->
+            txt += 'ProcessedDataset= $procDataset \n'
->
+            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
->
+            txt += 'echo "SE = $SE"\n'
->
+            txt += 'echo "SE_PATH = $SE_PATH"\n'
->
+            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
->
+            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
->
+            args = '$RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' \
->
+                   '$User -$ProcessedDataset-$PSETHASH $ApplicationFamily '+ \
->
+                    '  $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
->
+            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)
->
+            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)
->
+            txt += 'modifyReport_result=$?\n'
->
+            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
->
+            txt += '    modifyReport_result=70500\n'
->
+            txt += '    job_exit_code=$modifyReport_result\n'
->
+            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
->
+            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
->
+            txt += 'else\n'
->
+            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
->
+            txt += 'fi\n'
->
+        return txt
->
->
+    def wsParseFJR(self):
->
+        """
->
+        Parse the FrameworkJobReport to obtain useful infos
->
+        """
->
+        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
->
+        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
->
+        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
->
+        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
->
+        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
->
+        if self.debug_wrapper :
->
+            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
->
+        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
->
+        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
->
+        txt += '            echo ">>> crab_fjr.xml contents: "\n'
->
+        txt += '            cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
->
+        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
->
+        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
->
+        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
->
+        txt += '        else\n'
->
+        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
->
+        txt += '        fi\n'
->
+        txt += '    else\n'
->
+        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
->
+        txt += '    fi\n'
->
+          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
->
+        txt += '    if [ $executable_exit_status -eq 0 ];then\n'
->
+        txt += '      echo ">>> Executable succeded  $executable_exit_status"\n'
->
+        if (self.datasetPath and not (self.dataset_pu or self.useParent)) :
->
+          # VERIFY PROCESSED DATA
->
+            txt += '      echo ">>> Verify list of processed files:"\n'
->
+            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
->
+            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
->
+            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
->
+            txt += '      mv tmp.txt input-files.txt\n'
->
+            txt += '      echo "cat input-files.txt"\n'
->
+            txt += '      echo "----------------------"\n'
->
+            txt += '      cat input-files.txt\n'
->
+            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
->
+            txt += '      mv tmp.txt processed-files.txt\n'
->
+            txt += '      echo "----------------------"\n'
->
+            txt += '      echo "cat processed-files.txt"\n'
->
+            txt += '      echo "----------------------"\n'
->
+            txt += '      cat processed-files.txt\n'
->
+            txt += '      echo "----------------------"\n'
->
+            txt += '      diff -q input-files.txt processed-files.txt\n'
->
+            txt += '      fileverify_status=$?\n'
->
+            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
->
+            txt += '         executable_exit_status=30001\n'
->
+            txt += '         echo "ERROR ==> not all input files processed"\n'
->
+            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
->
+            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
->
+            txt += '      fi\n'
->
+        txt += '    elif [ $executable_exit_status -ne 0 ] || [ $executable_exit_status -ne 50015 ] || [ $executable_exit_status -ne 50017 ];then\n'
->
+        txt += '      echo ">>> Executable failed  $executable_exit_status"\n'
->
+        txt += '      func_exit\n'
->
+        txt += '    fi\n'
->
+        txt += '\n'
->
+        txt += 'else\n'
->
+        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
->
+        txt += 'fi\n'
->
+        txt += '\n'
->
+        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
->
+        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
->
+        txt += 'job_exit_code=$executable_exit_status\n'
->
+        return txt
+    def setParam_(self, param, value):
+    def getParams(self):
+        return self._params
-+
-+
+    def uniquelist(self, old):
-+
+        """
-+
+        remove duplicates from a list
-+
+        """
-+
+        nd={}
-+
+        for e in old:
-+
+            nd[e]=0
-+
+        return nd.keys()
-+
-+
+    def outList(self):
-+
+        """
-+
+        check the dimension of the output files
-+
+        """
-+
+        txt = ''
-+
+        txt += 'echo ">>> list of expected files on output sandbox"\n'
-+
+        listOutFiles = []
-+
+        stdout = 'CMSSW_$NJob.stdout'
-+
+        stderr = 'CMSSW_$NJob.stderr'
-+
+        if (self.return_data == 1):
-+
+            for file in (self.output_file+self.output_file_sandbox):
-+
+                listOutFiles.append(numberFile(file, '$NJob'))
-+
+            listOutFiles.append(stdout)
-+
+            listOutFiles.append(stderr)
-+
+        else:
-+
+            for file in (self.output_file_sandbox):
-+
+                listOutFiles.append(numberFile(file, '$NJob'))
-+
+            listOutFiles.append(stdout)
-+
+            listOutFiles.append(stderr)
-+
+        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
-+
+        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
-+
+        txt += 'export filesToCheck\n'
-+
+        return txt

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/cms_cmssw.py (file contents): Revision 1.7 by gutsche, Tue Jun 13 20:43:00 2006 UTC vs. Revision 1.244 by spiga, Thu Sep 25 15:08:01 2008 UTC

Diff Legend

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.7 by gutsche, Tue Jun 13 20:43:00 2006 UTC vs.
Revision 1.244 by spiga, Thu Sep 25 15:08:01 2008 UTC