ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.33 by mkirn, Fri Jul 28 18:19:34 2006 UTC vs.
Revision 1.125 by mcinquil, Wed Oct 10 13:29:26 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 < import math
5 > from BlackWhiteListParser import BlackWhiteListParser
6   import common
7 import PsetManipulator  
8
9 import DBSInfo_EDM
10 import DataDiscovery_EDM
11 import DataLocation_EDM
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
21        self.analisys_common_info = {}
22        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18 +
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27 +        # number of jobs requested to be created, limit obj splitting
28 +        self.ncjobs = ncjobs
29 +
30          log = common.logger
31          
32          self.scram = Scram.Scram(cfg_params)
28        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39 +        self.scriptName = 'CMSSW.sh'
40 +        self.pset = ''      #scrip use case Da  
41 +        self.datasetPath = '' #scrip use case Da
42  
43 +        # set FJR file name
44 +        self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +        
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56 +            raise CrabException(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +        
61 +        common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
37        common.analisys_common_info['sw_version'] = self.version
38        ### FEDE
39        common.analisys_common_info['copy_input_data'] = 0
40        common.analisys_common_info['events_management'] = 1
63  
64          ### collect Data cards
65 +
66 +        ## get DBS mode
67 +        try:
68 +            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 +        except KeyError:
70 +            self.use_dbs_1 = 0
71 +            
72          try:
73              tmp =  cfg_params['CMSSW.datasetpath']
74              log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
# Line 59 | Line 88 | class Cmssw(JobType):
88              self.setParam_('dataset', 'None')
89              self.setParam_('owner', 'None')
90          else:
91 <            datasetpath_split = self.datasetPath.split("/")
92 <            self.setParam_('dataset', datasetpath_split[1])
93 <            self.setParam_('owner', datasetpath_split[-1])
94 <
91 >            try:
92 >                datasetpath_split = self.datasetPath.split("/")
93 >                # standard style
94 >                self.setParam_('datasetFull', self.datasetPath)
95 >                if self.use_dbs_1 == 1 :
96 >                    self.setParam_('dataset', datasetpath_split[1])
97 >                    self.setParam_('owner', datasetpath_split[-1])
98 >                else:
99 >                    self.setParam_('dataset', datasetpath_split[1])
100 >                    self.setParam_('owner', datasetpath_split[2])
101 >            except:
102 >                self.setParam_('dataset', self.datasetPath)
103 >                self.setParam_('owner', self.datasetPath)
104 >                
105          self.setTaskid_()
106          self.setParam_('taskId', self.cfg_params['taskId'])
107  
# Line 85 | Line 124 | class Cmssw(JobType):
124          try:
125              self.pset = cfg_params['CMSSW.pset']
126              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if (not os.path.exists(self.pset)):
128 <                raise CrabException("User defined PSet file "+self.pset+" does not exist")
127 >            if self.pset.lower() != 'none' :
128 >                if (not os.path.exists(self.pset)):
129 >                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
130 >            else:
131 >                self.pset = None
132          except KeyError:
133              raise CrabException("PSet file missing. Cannot run cmsRun ")
134  
135          # output files
136 +        ## stuff which must be returned always via sandbox
137 +        self.output_file_sandbox = []
138 +
139 +        # add fjr report by default via sandbox
140 +        self.output_file_sandbox.append(self.fjrFileName)
141 +
142 +        # other output files to be returned via sandbox or copied to SE
143          try:
144              self.output_file = []
96
145              tmp = cfg_params['CMSSW.output_file']
146              if tmp != '':
147                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 103 | Line 151 | class Cmssw(JobType):
151                      self.output_file.append(tmp)
152                      pass
153              else:
154 <                log.message("No output file defined: only stdout/err will be available")
154 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155                  pass
156              pass
157          except KeyError:
158 <            log.message("No output file defined: only stdout/err will be available")
158 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159              pass
160  
161          # script_exe file as additional file in inputSandbox
162          try:
163              self.scriptExe = cfg_params['USER.script_exe']
116            self.additional_inbox_files.append(self.scriptExe)
164              if self.scriptExe != '':
165                 if not os.path.isfile(self.scriptExe):
166 <                  msg ="WARNING. file "+self.scriptExe+" not found"
166 >                  msg ="ERROR. file "+self.scriptExe+" not found"
167                    raise CrabException(msg)
168 +               self.additional_inbox_files.append(string.strip(self.scriptExe))
169          except KeyError:
170 <           pass
171 <                  
170 >            self.scriptExe = ''
171 >
172 >        #CarlosDaniele
173 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 >           msg ="Error. script_exe  not defined"
175 >           raise CrabException(msg)
176 >
177          ## additional input files
178          try:
179              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180              for tmp in tmpAddFiles:
181 <                if not os.path.exists(tmp):
182 <                    raise CrabException("Additional input file not found: "+tmp)
183 <                self.additional_inbox_files.append(string.strip(tmp))
181 >                tmp = string.strip(tmp)
182 >                dirname = ''
183 >                if not tmp[0]=="/": dirname = "."
184 >                files = []
185 >                if string.find(tmp,"*")>-1:
186 >                    files = glob.glob(os.path.join(dirname, tmp))
187 >                    if len(files)==0:
188 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
189 >                else:
190 >                    files.append(tmp)
191 >                for file in files:
192 >                    if not os.path.exists(file):
193 >                        raise CrabException("Additional input file not found: "+file)
194 >                    pass
195 >                    # fname = string.split(file, '/')[-1]
196 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197 >                    # shutil.copyfile(file, storedFile)
198 >                    self.additional_inbox_files.append(string.strip(file))
199                  pass
200              pass
201 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202          except KeyError:
203              pass
204  
205          # files per job
206          try:
207 <            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
208 <            self.selectFilesPerJob = 1
207 >            if (cfg_params['CMSSW.files_per_jobs']):
208 >                raise CrabException("files_per_jobs no longer supported.  Quitting.")
209          except KeyError:
210 <            self.filesPerJob = 0
142 <            self.selectFilesPerJob = 0
210 >            pass
211  
212          ## Events per job
213          try:
# Line 157 | Line 225 | class Cmssw(JobType):
225              self.theNumberOfJobs = 0
226              self.selectNumberOfJobs = 0
227  
228 +        try:
229 +            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230 +            self.selectTotalNumberEvents = 1
231 +        except KeyError:
232 +            self.total_number_of_events = 0
233 +            self.selectTotalNumberEvents = 0
234 +
235 +        if self.pset != None: #CarlosDaniele
236 +             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237 +                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238 +                 raise CrabException(msg)
239 +        else:
240 +             if (self.selectNumberOfJobs == 0):
241 +                 msg = 'Must specify  number_of_jobs.'
242 +                 raise CrabException(msg)
243 +
244          ## source seed for pythia
245          try:
246              self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
# Line 170 | Line 254 | class Cmssw(JobType):
254              self.sourceSeedVtx = None
255              common.logger.debug(5,"No vertex seed given")
256  
173        if not (self.selectFilesPerJob + self.selectEventsPerJob + self.selectNumberOfJobs == 1 ):
174            msg = 'Must define either files_per_jobs or events_per_job or number_of_jobs'
175            raise CrabException(msg)
176
257          try:
258 <            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
258 >            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259          except KeyError:
260 <            msg = 'Must define total_number_of_events'
261 <            raise CrabException(msg)
262 <        
183 <        CEBlackList = []
260 >            self.sourceSeedG4 = None
261 >            common.logger.debug(5,"No g4 sim hits seed given")
262 >
263          try:
264 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
186 <            for tmp in tmpBad:
187 <                tmp=string.strip(tmp)
188 <                CEBlackList.append(tmp)
264 >            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265          except KeyError:
266 <            pass
266 >            self.sourceSeedMix = None
267 >            common.logger.debug(5,"No mix seed given")
268  
269 <        self.reCEBlackList=[]
270 <        for bad in CEBlackList:
194 <            self.reCEBlackList.append(re.compile( bad ))
195 <
196 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
197 <
198 <        CEWhiteList = []
199 <        try:
200 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
201 <            for tmp in tmpGood:
202 <                tmp=string.strip(tmp)
203 <                CEWhiteList.append(tmp)
269 >        try:
270 >            self.firstRun = int(cfg_params['CMSSW.first_run'])
271          except KeyError:
272 <            pass
273 <
274 <        #print 'CEWhiteList: ',CEWhiteList
275 <        self.reCEWhiteList=[]
276 <        for Good in CEWhiteList:
277 <            self.reCEWhiteList.append(re.compile( Good ))
278 <
279 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
280 <
214 <        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
272 >            self.firstRun = None
273 >            common.logger.debug(5,"No first run given")
274 >        if self.pset != None: #CarlosDaniele
275 >            ver = string.split(self.version,"_")
276 >            if (int(ver[1])>=1 and int(ver[2])>=5):
277 >                import PsetManipulator150 as pp
278 >            else:
279 >                import PsetManipulator as pp
280 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281  
282          #DBSDLS-start
283          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
285          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
286 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
287          ## Perform the data location and discovery (based on DBS/DLS)
288          ## SL: Don't if NONE is specified as input (pythia use case)
289 <        common.analisys_common_info['sites']=None
289 >        blockSites = {}
290          if self.datasetPath:
291 <            self.DataDiscoveryAndLocation(cfg_params)
291 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
292          #DBSDLS-end          
293  
294          self.tgzNameWithPath = self.getTarBall(self.executable)
295      
296          ## Select Splitting
297 <        if self.selectNoInput: self.jobSplittingNoInput()
298 <        elif self.selectFilesPerJob or self.selectEventsPerJob or self.selectNumberOfJobs: self.jobSplittingPerFiles()
297 >        if self.selectNoInput:
298 >            if self.pset == None: #CarlosDaniele
299 >                self.jobSplittingForScript()
300 >            else:
301 >                self.jobSplittingNoInput()
302          else:
303 <            msg = 'Don\'t know how to split...'
234 <            raise CrabException(msg)
303 >            self.jobSplittingByBlocks(blockSites)
304  
305          # modify Pset
306 <        try:
307 <            if (self.datasetPath): # standard job
308 <                #self.PsetEdit.maxEvent(self.eventsPerJob)
309 <                # always process all events in a file
310 <                self.PsetEdit.maxEvent("-1")
311 <                self.PsetEdit.inputModule("INPUT")
312 <
313 <            else:  # pythia like job
314 <                self.PsetEdit.maxEvent(self.eventsPerJob)
315 <                if (self.sourceSeed) :
316 <                    self.PsetEdit.pythiaSeed("INPUT")
317 <                    if (self.sourceSeedVtx) :
318 <                        self.PsetEdit.pythiaSeedVtx("INPUTVTX")
319 <            self.PsetEdit.psetWriter(self.configFilename())
320 <        except:
321 <            msg='Error while manipuliating ParameterSet: exiting...'
322 <            raise CrabException(msg)
306 >        if self.pset != None: #CarlosDaniele
307 >            try:
308 >                if (self.datasetPath): # standard job
309 >                    # allow to processa a fraction of events in a file
310 >                    PsetEdit.inputModule("INPUT")
311 >                    PsetEdit.maxEvent("INPUTMAXEVENTS")
312 >                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 >                else:  # pythia like job
314 >                    PsetEdit.maxEvent(self.eventsPerJob)
315 >                    if (self.firstRun):
316 >                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
317 >                    if (self.sourceSeed) :
318 >                        PsetEdit.pythiaSeed("INPUT")
319 >                        if (self.sourceSeedVtx) :
320 >                            PsetEdit.vtxSeed("INPUTVTX")
321 >                        if (self.sourceSeedG4) :
322 >                            PsetEdit.g4Seed("INPUTG4")
323 >                        if (self.sourceSeedMix) :
324 >                            PsetEdit.mixSeed("INPUTMIX")
325 >                # add FrameworkJobReport to parameter-set
326 >                PsetEdit.addCrabFJR(self.fjrFileName)
327 >                PsetEdit.psetWriter(self.configFilename())
328 >            except:
329 >                msg='Error while manipuliating ParameterSet: exiting...'
330 >                raise CrabException(msg)
331  
332      def DataDiscoveryAndLocation(self, cfg_params):
333  
334 +        import DataDiscovery
335 +        import DataDiscovery_DBS2
336 +        import DataLocation
337          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338  
339          datasetPath=self.datasetPath
340  
261        ## TODO
262        dataTiersList = ""
263        dataTiers = dataTiersList.split(',')
264
341          ## Contact the DBS
342 +        common.logger.message("Contacting Data Discovery Services ...")
343          try:
344 <            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
344 >
345 >            if self.use_dbs_1 == 1 :
346 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347 >            else :
348 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349              self.pubdata.fetchDBSInfo()
350  
351 <        except DataDiscovery_EDM.NotExistingDatasetError, ex :
351 >        except DataDiscovery.NotExistingDatasetError, ex :
352              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353              raise CrabException(msg)
354 <
274 <        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
354 >        except DataDiscovery.NoDataTierinProvenanceError, ex :
355              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356              raise CrabException(msg)
357 <        except DataDiscovery_EDM.DataDiscoveryError, ex:
358 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
357 >        except DataDiscovery.DataDiscoveryError, ex:
358 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
359 >            raise CrabException(msg)
360 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362 >            raise CrabException(msg)
363 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365 >            raise CrabException(msg)
366 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
368              raise CrabException(msg)
369  
370 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
371 <        ## self.DBSPaths=self.pubdata.getDBSPaths()
372 <        common.logger.message("Required data are :"+self.datasetPath)
284 <
285 <        filesbyblock=self.pubdata.getFiles()
286 < #        print filesbyblock
287 <        self.AllInputFiles=filesbyblock.values()
288 <        self.files = self.AllInputFiles        
370 >        self.filesbyblock=self.pubdata.getFiles()
371 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
372 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
373  
374          ## get max number of events
291        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
375          self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
293        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
376  
377          ## Contact the DLS and build a list of sites hosting the fileblocks
378          try:
379 <            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
379 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380              dataloc.fetchDLSInfo()
381 <        except DataLocation_EDM.DataLocationError , ex:
381 >        except DataLocation.DataLocationError , ex:
382              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383              raise CrabException(msg)
384          
303        allsites=dataloc.getSites()
304        common.logger.debug(5,"sites are %s"%allsites)
305        sites=self.checkBlackList(allsites)
306        common.logger.debug(5,"sites are (after black list) %s"%sites)
307        sites=self.checkWhiteList(sites)
308        common.logger.debug(5,"sites are (after white list) %s"%sites)
385  
386 <        if len(sites)==0:
387 <            msg = 'No sites hosting all the needed data! Exiting... '
388 <            raise CrabException(msg)
386 >        sites = dataloc.getSites()
387 >        allSites = []
388 >        listSites = sites.values()
389 >        for listSite in listSites:
390 >            for oneSite in listSite:
391 >                allSites.append(oneSite)
392 >        allSites = self.uniquelist(allSites)
393  
394 <        common.logger.message("List of Sites ("+str(len(sites))+") hosting the data : "+str(sites))
395 <        common.logger.debug(6, "List of Sites: "+str(sites))
396 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
397 <        self.setParam_('TargetCE', ','.join(sites))
318 <        return
394 >        # screen output
395 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396 >
397 >        return sites
398      
399 <    def jobSplittingPerFiles(self):
399 >    def jobSplittingByBlocks(self, blockSites):
400          """
401 <        Perform job splitting based on number of files to be accessed per job
402 <        """
403 <        common.logger.debug(5,'Splitting per input files')
404 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
405 <        common.logger.message('Available '+str(self.maxEvents)+' events in total ')
406 <        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
407 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
408 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job')
409 <
410 <        ## if asked to process all events, do it
411 <        if self.total_number_of_events == -1:
412 <            self.total_number_of_events=self.maxEvents
401 >        Perform job splitting. Jobs run over an integer number of files
402 >        and no more than one block.
403 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406 >                  self.maxEvents, self.filesbyblock
407 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408 >              self.total_number_of_jobs - Total # of jobs
409 >              self.list_of_args - File(s) job will run on (a list of lists)
410 >        """
411 >
412 >        # ---- Handle the possible job splitting configurations ---- #
413 >        if (self.selectTotalNumberEvents):
414 >            totalEventsRequested = self.total_number_of_events
415 >        if (self.selectEventsPerJob):
416 >            eventsPerJobRequested = self.eventsPerJob
417 >            if (self.selectNumberOfJobs):
418 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419 >
420 >        # If user requested all the events in the dataset
421 >        if (totalEventsRequested == -1):
422 >            eventsRemaining=self.maxEvents
423 >        # If user requested more events than are in the dataset
424 >        elif (totalEventsRequested > self.maxEvents):
425 >            eventsRemaining = self.maxEvents
426 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427 >        # If user requested less events than are in the dataset
428          else:
429 <            if self.total_number_of_events>self.maxEvents:
336 <                common.logger.message("Asked "+str(self.total_number_of_events)+" but only "+str(self.maxEvents)+" available.")
337 <                self.total_number_of_events=self.maxEvents
338 <            pass
429 >            eventsRemaining = totalEventsRequested
430  
431 <        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
432 <        n_tot_files = (len(self.files[0]))
433 <        ## SL: this is wrong if the files have different number of events
343 <        evPerFile = int(self.maxEvents)/n_tot_files
344 <
345 <        common.logger.debug(5,'Events per File '+str(evPerFile))
346 <
347 <        ## compute job splitting parameters: filesPerJob, eventsPerJob and theNumberOfJobs
348 <        if self.selectFilesPerJob:
349 <            ## user define files per event.
350 <            filesPerJob = self.filesPerJob
351 <            eventsPerJob = filesPerJob*evPerFile
352 <            theNumberOfJobs = int(self.total_number_of_events*1./eventsPerJob)
353 <            check = int(self.total_number_of_events) - (theNumberOfJobs*eventsPerJob)
354 <            if check > 0:
355 <                theNumberOfJobs +=1
356 <                filesLastJob = int(check*1./evPerFile+0.5)
357 <                common.logger.message('Warning: last job will be created with '+str(check)+' files')
358 <            else:
359 <                filesLastJob = filesPerJob
431 >        # If user requested more events per job than are in the dataset
432 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433 >            eventsPerJobRequested = self.maxEvents
434  
435 <        elif self.selectNumberOfJobs:
436 <            ## User select the number of jobs: last might be bigger to match request of events
363 <            theNumberOfJobs =  self.theNumberOfJobs
364 <
365 <            eventsPerJob = self.total_number_of_events/theNumberOfJobs
366 <            filesPerJob = int(eventsPerJob/evPerFile)
367 <            if (filesPerJob==0) : filesPerJob=1
368 <            check = int(self.total_number_of_events) - (int(theNumberOfJobs)*filesPerJob*evPerFile)
369 <            if not check == 0:
370 <                if check<0:
371 <                    missingFiles = int(check/evPerFile)
372 <                    additionalJobs = int(missingFiles/filesPerJob)
373 <                    #print missingFiles, additionalJobs
374 <                    theNumberOfJobs+=additionalJobs
375 <                    common.logger.message('Warning: will create only '+str(theNumberOfJobs)+' jobs')
376 <                    check = int(self.total_number_of_events) - (int(theNumberOfJobs)*filesPerJob*evPerFile)
377 <                    
378 <                if check >0 :
379 <                    filesLastJob = filesPerJob+int(check*1./evPerFile+0.5)
380 <                    common.logger.message('Warning: last job will be created with '+str(filesLastJob*evPerFile)+' events')
381 <                else:
382 <                    filesLastJob = filesPerJob
383 <            else:
384 <                filesLastJob = filesPerJob
385 <        elif self.selectEventsPerJob:
386 <            # SL case if asked events per job
387 <            ## estimate the number of files per job to match the user requirement
388 <            filesPerJob = int(float(self.eventsPerJob)/float(evPerFile))
389 <            if filesPerJob==0: filesPerJob=1
390 <            common.logger.debug(5,"filesPerJob "+str(filesPerJob))
391 <            if (filesPerJob==0): filesPerJob=1
392 <            eventsPerJob=filesPerJob*evPerFile
393 <            theNumberOfJobs = int(self.total_number_of_events)/int(eventsPerJob)
394 <            check = int(self.total_number_of_events) - (int(theNumberOfJobs)*eventsPerJob)
395 <            if not check == 0:
396 <                missingFiles = int(check/evPerFile)
397 <                additionalJobs = int(missingFiles/filesPerJob)
398 <                if ( additionalJobs>0) : theNumberOfJobs+=additionalJobs
399 <                check = int(self.total_number_of_events) - (int(theNumberOfJobs)*eventsPerJob)
400 <                if not check == 0:
401 <                    if (check <0 ):
402 <                        filesLastJob = filesPerJob+int(check*1./evPerFile-0.5)
403 <                    else:
404 <                        theNumberOfJobs+=1
405 <                        filesLastJob = int(check*1./evPerFile+0.5)
435 >        # For user info at end
436 >        totalEventCount = 0
437  
438 <                    common.logger.message('Warning: last job will be created with '+str(filesLastJob*evPerFile)+' events')
439 <                else:
409 <                    filesLastJob = filesPerJob
410 <            else:
411 <                filesLastJob = filesPerJob
412 <        
413 <        self.total_number_of_jobs = theNumberOfJobs
438 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440  
441 <        totalEventsToBeUsed=theNumberOfJobs*filesPerJob*evPerFile
442 <        if not check == 0:
417 <        #    print (theNumberOfJobs-1)*filesPerJob*evPerFile,filesLastJob*evPerFile
418 <            totalEventsToBeUsed=(theNumberOfJobs-1)*filesPerJob*evPerFile+filesLastJob*evPerFile
441 >        if (self.selectNumberOfJobs):
442 >            common.logger.message("May not create the exact number_of_jobs requested.")
443  
444 <        common.logger.message(str(self.total_number_of_jobs)+' jobs will be created, each for '+str(filesPerJob*evPerFile)+' events, for a total of '+str(totalEventsToBeUsed)+' events')
444 >        if ( self.ncjobs == 'all' ) :
445 >            totalNumberOfJobs = 999999999
446 >        else :
447 >            totalNumberOfJobs = self.ncjobs
448 >            
449  
450 <        totalFilesToBeUsed=filesPerJob*(theNumberOfJobs-1)+filesLastJob
450 >        blocks = blockSites.keys()
451 >        blockCount = 0
452 >        # Backup variable in case self.maxEvents counted events in a non-included block
453 >        numBlocksInDataset = len(blocks)
454  
455 <        ## set job arguments (files)
455 >        jobCount = 0
456          list_of_lists = []
457 <        lastFile=0
458 <        for i in range(0, int(totalFilesToBeUsed), filesPerJob)[:-1]:
459 <            parString = "\\{"
457 >
458 >        # list tracking which jobs are in which jobs belong to which block
459 >        jobsOfBlock = {}
460 >
461 >        # ---- Iterate over the blocks in the dataset until ---- #
462 >        # ---- we've met the requested total # of events    ---- #
463 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464 >            block = blocks[blockCount]
465 >            blockCount += 1
466 >            if block not in jobsOfBlock.keys() :
467 >                jobsOfBlock[block] = []
468              
469 <            lastFile=i+filesPerJob
470 <            params = self.files[0][i: lastFile]
471 <            for i in range(len(params) - 1):
433 <                parString += '\\\"' + params[i] + '\\\"\,'
469 >            if self.eventsbyblock.has_key(block) :
470 >                numEventsInBlock = self.eventsbyblock[block]
471 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472              
473 <            parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
474 <            list_of_lists.append([parString])
475 <            pass
476 <
477 <        ## last job
478 <        parString = "\\{"
473 >                files = self.filesbyblock[block]
474 >                numFilesInBlock = len(files)
475 >                if (numFilesInBlock <= 0):
476 >                    continue
477 >                fileCount = 0
478 >
479 >                # ---- New block => New job ---- #
480 >                parString = "\\{"
481 >                # counter for number of events in files currently worked on
482 >                filesEventCount = 0
483 >                # flag if next while loop should touch new file
484 >                newFile = 1
485 >                # job event counter
486 >                jobSkipEventCount = 0
487 >            
488 >                # ---- Iterate over the files in the block until we've met the requested ---- #
489 >                # ---- total # of events or we've gone over all the files in this block  ---- #
490 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491 >                    file = files[fileCount]
492 >                    if newFile :
493 >                        try:
494 >                            numEventsInFile = self.eventsbyfile[file]
495 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496 >                            # increase filesEventCount
497 >                            filesEventCount += numEventsInFile
498 >                            # Add file to current job
499 >                            parString += '\\\"' + file + '\\\"\,'
500 >                            newFile = 0
501 >                        except KeyError:
502 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503 >                        
504 >
505 >                    # if less events in file remain than eventsPerJobRequested
506 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507 >                        # if last file in block
508 >                        if ( fileCount == numFilesInBlock-1 ) :
509 >                            # end job using last file, use remaining events in block
510 >                            # close job and touch new file
511 >                            fullString = parString[:-2]
512 >                            fullString += '\\}'
513 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515 >                            self.jobDestination.append(blockSites[block])
516 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 >                            # fill jobs of block dictionary
518 >                            jobsOfBlock[block].append(jobCount+1)
519 >                            # reset counter
520 >                            jobCount = jobCount + 1
521 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523 >                            jobSkipEventCount = 0
524 >                            # reset file
525 >                            parString = "\\{"
526 >                            filesEventCount = 0
527 >                            newFile = 1
528 >                            fileCount += 1
529 >                        else :
530 >                            # go to next file
531 >                            newFile = 1
532 >                            fileCount += 1
533 >                    # if events in file equal to eventsPerJobRequested
534 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535 >                        # close job and touch new file
536 >                        fullString = parString[:-2]
537 >                        fullString += '\\}'
538 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540 >                        self.jobDestination.append(blockSites[block])
541 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 >                        jobsOfBlock[block].append(jobCount+1)
543 >                        # reset counter
544 >                        jobCount = jobCount + 1
545 >                        totalEventCount = totalEventCount + eventsPerJobRequested
546 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
547 >                        jobSkipEventCount = 0
548 >                        # reset file
549 >                        parString = "\\{"
550 >                        filesEventCount = 0
551 >                        newFile = 1
552 >                        fileCount += 1
553 >                        
554 >                    # if more events in file remain than eventsPerJobRequested
555 >                    else :
556 >                        # close job but don't touch new file
557 >                        fullString = parString[:-2]
558 >                        fullString += '\\}'
559 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561 >                        self.jobDestination.append(blockSites[block])
562 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 >                        jobsOfBlock[block].append(jobCount+1)
564 >                        # increase counter
565 >                        jobCount = jobCount + 1
566 >                        totalEventCount = totalEventCount + eventsPerJobRequested
567 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
568 >                        # calculate skip events for last file
569 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571 >                        # remove all but the last file
572 >                        filesEventCount = self.eventsbyfile[file]
573 >                        parString = "\\{"
574 >                        parString += '\\\"' + file + '\\\"\,'
575 >                    pass # END if
576 >                pass # END while (iterate over files in the block)
577 >        pass # END while (iterate over blocks in the dataset)
578 >        self.ncjobs = self.total_number_of_jobs = jobCount
579 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582          
583 <        params = self.files[0][lastFile: lastFile+filesLastJob]
584 <        for i in range(len(params) - 1):
585 <            parString += '\\\"' + params[i] + '\\\"\,'
583 >        # screen output
584 >        screenOutput = "List of jobs and available destination sites:\n\n"
585 >
586 >        # keep trace of block with no sites to print a warning at the end
587 >        noSiteBlock = []
588 >        bloskNoSite = []
589 >
590 >        blockCounter = 0
591 >        for block in blocks:
592 >            if block in jobsOfBlock.keys() :
593 >                blockCounter += 1
594 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597 >                    bloskNoSite.append( blockCounter )
598          
599 <        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
600 <        list_of_lists.append([parString])
601 <        pass
599 >        common.logger.message(screenOutput)
600 >
601 >        msg = 'WARNING: No sites are hosting any part of data for block:\n                '
602 >        virgola = ""
603 >        if len(bloskNoSite) > 1:
604 >            virgola = ","
605 >        for block in bloskNoSite:
606 >            msg += ' ' + str(block) + virgola
607 >
608 >        msg += '\n               Related jobs:\n                 '
609 >        virgola = ""
610 >        if len(noSiteBlock) > 1:
611 >            virgola = ","
612 >        for range_jobs in noSiteBlock:
613 >            msg += str(range_jobs) + virgola
614 >        msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
615 >
616 >        common.logger.message(msg)
617  
618          self.list_of_args = list_of_lists
451        # print self.list_of_args[0]
619          return
620  
621      def jobSplittingNoInput(self):
# Line 465 | Line 632 | class Cmssw(JobType):
632              raise CrabException(msg)
633  
634          if (self.selectEventsPerJob):
635 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
635 >            if (self.selectTotalNumberEvents):
636 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
637 >            elif(self.selectNumberOfJobs) :  
638 >                self.total_number_of_jobs =self.theNumberOfJobs
639 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
640 >
641          elif (self.selectNumberOfJobs) :
642              self.total_number_of_jobs = self.theNumberOfJobs
643              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
644 <
644 >
645          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
646  
647          # is there any remainder?
# Line 477 | Line 649 | class Cmssw(JobType):
649  
650          common.logger.debug(5,'Check  '+str(check))
651  
652 <        common.logger.message(str(self.total_number_of_jobs)+' jobs will be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
652 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
653          if check > 0:
654 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but will do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
483 <
654 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
655  
656          # argument is seed number.$i
657          self.list_of_args = []
658          for i in range(self.total_number_of_jobs):
659 +            ## Since there is no input, any site is good
660 +           # self.jobDestination.append(["Any"])
661 +            self.jobDestination.append([""]) #must be empty to write correctly the xml
662 +            args=[]
663 +            if (self.firstRun):
664 +                    ## pythia first run
665 +                #self.list_of_args.append([(str(self.firstRun)+str(i))])
666 +                args.append(str(self.firstRun)+str(i))
667 +            else:
668 +                ## no first run
669 +                #self.list_of_args.append([str(i)])
670 +                args.append(str(i))
671              if (self.sourceSeed):
672 +                args.append(str(self.sourceSeed)+str(i))
673                  if (self.sourceSeedVtx):
674 <                    ## pythia + vtx random seed
675 <                    self.list_of_args.append([
676 <                                              str(self.sourceSeed)+str(i),
677 <                                              str(self.sourceSeedVtx)+str(i)
678 <                                              ])
679 <                else:
680 <                    ## only pythia random seed
681 <                    self.list_of_args.append([(str(self.sourceSeed)+str(i))])
682 <            else:
683 <                ## no random seed
684 <                self.list_of_args.append([str(i)])
685 <        #print self.list_of_args
674 >                    ## + vtx random seed
675 >                    args.append(str(self.sourceSeedVtx)+str(i))
676 >                if (self.sourceSeedG4):
677 >                    ## + G4 random seed
678 >                    args.append(str(self.sourceSeedG4)+str(i))
679 >                if (self.sourceSeedMix):    
680 >                    ## + Mix random seed
681 >                    args.append(str(self.sourceSeedMix)+str(i))
682 >                pass
683 >            pass
684 >            self.list_of_args.append(args)
685 >        pass
686 >            
687 >        # print self.list_of_args
688 >
689 >        return
690 >
691 >
692 >    def jobSplittingForScript(self):#CarlosDaniele
693 >        """
694 >        Perform job splitting based on number of job
695 >        """
696 >        common.logger.debug(5,'Splitting per job')
697 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
698 >
699 >        self.total_number_of_jobs = self.theNumberOfJobs
700 >
701 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
702 >
703 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
704  
705 +        # argument is seed number.$i
706 +        self.list_of_args = []
707 +        for i in range(self.total_number_of_jobs):
708 +            ## Since there is no input, any site is good
709 +           # self.jobDestination.append(["Any"])
710 +            self.jobDestination.append([""])
711 +            ## no random seed
712 +            self.list_of_args.append([str(i)])
713          return
714  
715      def split(self, jobParams):
# Line 517 | Line 727 | class Cmssw(JobType):
727              # print str(arglist[job])
728              # print jobParams[job]
729              common.jobDB.setArguments(job, jobParams[job])
730 +            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
731 +            common.jobDB.setDestination(job, self.jobDestination[job])
732  
733          common.jobDB.save()
734          return
# Line 531 | Line 743 | class Cmssw(JobType):
743          # Fabio
744          return self.total_number_of_jobs
745  
534    def checkBlackList(self, allSites):
535        if len(self.reCEBlackList)==0: return allSites
536        sites = []
537        for site in allSites:
538            common.logger.debug(10,'Site '+site)
539            good=1
540            for re in self.reCEBlackList:
541                if re.search(site):
542                    common.logger.message('CE in black list, skipping site '+site)
543                    good=0
544                pass
545            if good: sites.append(site)
546        if len(sites) == 0:
547            common.logger.debug(3,"No sites found after BlackList")
548        return sites
549
550    def checkWhiteList(self, allSites):
551
552        if len(self.reCEWhiteList)==0: return allSites
553        sites = []
554        for site in allSites:
555            good=0
556            for re in self.reCEWhiteList:
557                if re.search(site):
558                    common.logger.debug(5,'CE in white list, adding site '+site)
559                    good=1
560                if not good: continue
561                sites.append(site)
562        if len(sites) == 0:
563            common.logger.message("No sites found after WhiteList\n")
564        else:
565            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
566        return sites
567
746      def getTarBall(self, exe):
747          """
748          Return the TarBall with lib and exe
749          """
750          
751          # if it exist, just return it
752 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
752 >        #
753 >        # Marco. Let's start to use relative path for Boss XML files
754 >        #
755 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
756          if os.path.exists(self.tgzNameWithPath):
757              return self.tgzNameWithPath
758  
# Line 585 | Line 766 | class Cmssw(JobType):
766          # First of all declare the user Scram area
767          swArea = self.scram.getSWArea_()
768          #print "swArea = ", swArea
769 <        swVersion = self.scram.getSWVersion()
770 <        #print "swVersion = ", swVersion
769 >        # swVersion = self.scram.getSWVersion()
770 >        # print "swVersion = ", swVersion
771          swReleaseTop = self.scram.getReleaseTop_()
772          #print "swReleaseTop = ", swReleaseTop
773          
# Line 594 | Line 775 | class Cmssw(JobType):
775          if swReleaseTop == '' or swArea == swReleaseTop:
776              return
777  
778 <        filesToBeTarred = []
779 <        ## First find the executable
780 <        if (self.executable != ''):
781 <            exeWithPath = self.scram.findFile_(executable)
782 < #           print exeWithPath
783 <            if ( not exeWithPath ):
784 <                raise CrabException('User executable '+executable+' not found')
785 <
786 <            ## then check if it's private or not
787 <            if exeWithPath.find(swReleaseTop) == -1:
788 <                # the exe is private, so we must ship
789 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
790 <                path = swArea+'/'
791 <                exe = string.replace(exeWithPath, path,'')
792 <                filesToBeTarred.append(exe)
793 <                pass
794 <            else:
795 <                # the exe is from release, we'll find it on WN
796 <                pass
797 <
798 <        ## Now get the libraries: only those in local working area
799 <        libDir = 'lib'
800 <        lib = swArea+'/' +libDir
801 <        common.logger.debug(5,"lib "+lib+" to be tarred")
802 <        if os.path.exists(lib):
803 <            filesToBeTarred.append(libDir)
804 <
805 <        ## Now check if module dir is present
806 <        moduleDir = 'module'
807 <        if os.path.isdir(swArea+'/'+moduleDir):
808 <            filesToBeTarred.append(moduleDir)
809 <
810 <        ## Now check if the Data dir is present
811 <        dataDir = 'src/Data/'
812 <        if os.path.isdir(swArea+'/'+dataDir):
813 <            filesToBeTarred.append(dataDir)
814 <
815 <        ## Create the tar-ball
816 <        if len(filesToBeTarred)>0:
817 <            cwd = os.getcwd()
818 <            os.chdir(swArea)
819 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
820 <            for line in filesToBeTarred:
821 <                tarcmd = tarcmd + line + ' '
822 <            cout = runCommand(tarcmd)
823 <            if not cout:
824 <                raise CrabException('Could not create tar-ball')
825 <            os.chdir(cwd)
826 <        else:
827 <            common.logger.debug(5,"No files to be to be tarred")
778 >        import tarfile
779 >        try: # create tar ball
780 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
781 >            ## First find the executable
782 >            if (self.executable != ''):
783 >                exeWithPath = self.scram.findFile_(executable)
784 >                if ( not exeWithPath ):
785 >                    raise CrabException('User executable '+executable+' not found')
786 >    
787 >                ## then check if it's private or not
788 >                if exeWithPath.find(swReleaseTop) == -1:
789 >                    # the exe is private, so we must ship
790 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
791 >                    path = swArea+'/'
792 >                    # distinguish case when script is in user project area or given by full path somewhere else
793 >                    if exeWithPath.find(path) >= 0 :
794 >                        exe = string.replace(exeWithPath, path,'')
795 >                        tar.add(path+exe,os.path.basename(executable))
796 >                    else :
797 >                        tar.add(exeWithPath,os.path.basename(executable))
798 >                    pass
799 >                else:
800 >                    # the exe is from release, we'll find it on WN
801 >                    pass
802 >    
803 >            ## Now get the libraries: only those in local working area
804 >            libDir = 'lib'
805 >            lib = swArea+'/' +libDir
806 >            common.logger.debug(5,"lib "+lib+" to be tarred")
807 >            if os.path.exists(lib):
808 >                tar.add(lib,libDir)
809 >    
810 >            ## Now check if module dir is present
811 >            moduleDir = 'module'
812 >            module = swArea + '/' + moduleDir
813 >            if os.path.isdir(module):
814 >                tar.add(module,moduleDir)
815 >
816 >            ## Now check if any data dir(s) is present
817 >            swAreaLen=len(swArea)
818 >            for root, dirs, files in os.walk(swArea):
819 >                if "data" in dirs:
820 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
821 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
822 >
823 >            ## Add ProdAgent dir to tar
824 >            paDir = 'ProdAgentApi'
825 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
826 >            if os.path.isdir(pa):
827 >                tar.add(pa,paDir)
828 >
829 >            ### FEDE FOR DBS PUBLICATION
830 >            ## Add PRODCOMMON dir to tar
831 >            prodcommonDir = 'ProdCommon'
832 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
833 >            if os.path.isdir(prodcommonPath):
834 >                tar.add(prodcommonPath,prodcommonDir)
835 >            #############################    
836 >        
837 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
838 >            tar.close()
839 >        except :
840 >            raise CrabException('Could not create tar-ball')
841 >
842 >        ## check for tarball size
843 >        tarballinfo = os.stat(self.tgzNameWithPath)
844 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
845 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
846 >
847 >        ## create tar-ball with ML stuff
848 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
849 >        try:
850 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
851 >            path=os.environ['CRABDIR'] + '/python/'
852 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
853 >                tar.add(path+file,file)
854 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
855 >            tar.close()
856 >        except :
857 >            raise CrabException('Could not create ML files tar-ball')
858          
859          return
860          
861 +    def additionalInputFileTgz(self):
862 +        """
863 +        Put all additional files into a tar ball and return its name
864 +        """
865 +        import tarfile
866 +        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
867 +        tar = tarfile.open(tarName, "w:gz")
868 +        for file in self.additional_inbox_files:
869 +            tar.add(file,string.split(file,'/')[-1])
870 +        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
871 +        tar.close()
872 +        return tarName
873 +
874      def wsSetupEnvironment(self, nj):
875          """
876          Returns part of a job script which prepares
# Line 658 | Line 882 | class Cmssw(JobType):
882          ## OLI_Daniele at this level  middleware already known
883  
884          txt += 'if [ $middleware == LCG ]; then \n'
885 +        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
886 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
887 +        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
888          txt += self.wsSetupCMSLCGEnvironment_()
889          txt += 'elif [ $middleware == OSG ]; then\n'
890 <        txt += '    time=`date -u +"%s"`\n'
891 <        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
665 <        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
666 <        txt += '    /bin/mkdir -p $WORKING_DIR\n'
890 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
891 >        txt += '    echo "Created working directory: $WORKING_DIR"\n'
892          txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
893          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
894 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
895 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
896 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
894 >        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
895 >        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
896 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
897          txt += '        rm -f $RUNTIME_AREA/$repo \n'
898          txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
899          txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 678 | Line 903 | class Cmssw(JobType):
903          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
904          txt += '    cd $WORKING_DIR\n'
905          txt += self.wsSetupCMSOSGEnvironment_()
906 +        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
907 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
908          txt += 'fi\n'
909  
910          # Prepare JobType-specific part
# Line 700 | Line 927 | class Cmssw(JobType):
927          txt += '        cd $RUNTIME_AREA\n'
928          txt += '        /bin/rm -rf $WORKING_DIR\n'
929          txt += '        if [ -d $WORKING_DIR ] ;then\n'
930 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
931 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
932 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
933 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
930 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
931 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
932 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
933 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
934          txt += '            rm -f $RUNTIME_AREA/$repo \n'
935          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
936          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 713 | Line 940 | class Cmssw(JobType):
940          txt += 'fi \n'
941          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
942          txt += 'cd '+self.version+'\n'
943 +        ########## FEDE FOR DBS2 ######################
944 +        txt += 'SOFTWARE_DIR=`pwd`\n'
945 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
946 +        ###############################################
947          ### needed grep for bug in scramv1 ###
948 +        txt += scram+' runtime -sh\n'
949          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
950 +        txt += 'echo $PATH\n'
951  
952          # Handle the arguments:
953          txt += "\n"
# Line 736 | Line 969 | class Cmssw(JobType):
969          txt += '        cd $RUNTIME_AREA\n'
970          txt += '        /bin/rm -rf $WORKING_DIR\n'
971          txt += '        if [ -d $WORKING_DIR ] ;then\n'
972 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
973 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
974 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
975 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
972 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
973 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
974 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
975 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
976          txt += '            rm -f $RUNTIME_AREA/$repo \n'
977          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
978          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 751 | Line 984 | class Cmssw(JobType):
984  
985          # Prepare job-specific part
986          job = common.job_list[nj]
987 <        pset = os.path.basename(job.configFilename())
988 <        txt += '\n'
989 <        if (self.datasetPath): # standard job
990 <            #txt += 'InputFiles=$2\n'
758 <            txt += 'InputFiles=${args[1]}\n'
759 <            txt += 'echo "Inputfiles:<$InputFiles>"\n'
760 <            txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
761 <        else:  # pythia like job
762 <            if (self.sourceSeed):
763 <                txt += 'Seed=$2\n'
764 <                txt += 'echo "Seed: <$Seed>"\n'
765 <                txt += 'sed "s#\<INPUT\>#$Seed#" $RUNTIME_AREA/'+pset+' > tmp.cfg\n'
766 <                if (self.sourceSeedVtx):
767 <                    txt += 'VtxSeed=$3\n'
768 <                    txt += 'echo "VtxSeed: <$VtxSeed>"\n'
769 <                    txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp.cfg > pset.cfg\n'
770 <                else:
771 <                    txt += 'mv tmp.cfg pset.cfg\n'
772 <            else:
773 <                txt += '# Copy untouched pset\n'
774 <                txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
987 >        ### FEDE FOR DBS OUTPUT PUBLICATION
988 >        if (self.datasetPath):
989 >            txt += '\n'
990 >            txt += 'DatasetPath='+self.datasetPath+'\n'
991  
992 +            datasetpath_split = self.datasetPath.split("/")
993 +            
994 +            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
995 +            txt += 'DataTier='+datasetpath_split[2]+'\n'
996 +            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
997 +            txt += 'ApplicationFamily=cmsRun\n'
998 +
999 +        else:
1000 +            txt += 'DatasetPath=MCDataTier\n'
1001 +            txt += 'PrimaryDataset=null\n'
1002 +            txt += 'DataTier=null\n'
1003 +            #txt += 'ProcessedDataset=null\n'
1004 +            txt += 'ApplicationFamily=MCDataTier\n'
1005 +        if self.pset != None: #CarlosDaniele
1006 +            pset = os.path.basename(job.configFilename())
1007 +            txt += '\n'
1008 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
1009 +            if (self.datasetPath): # standard job
1010 +                #txt += 'InputFiles=$2\n'
1011 +                txt += 'InputFiles=${args[1]}\n'
1012 +                txt += 'MaxEvents=${args[2]}\n'
1013 +                txt += 'SkipEvents=${args[3]}\n'
1014 +                txt += 'echo "Inputfiles:<$InputFiles>"\n'
1015 +                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1016 +                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1017 +                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018 +                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1019 +                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1020 +            else:  # pythia like job
1021 +                seedIndex=1
1022 +                if (self.firstRun):
1023 +                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1024 +                    txt += 'echo "FirstRun: <$FirstRun>"\n'
1025 +                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1026 +                    seedIndex=seedIndex+1
1027 +
1028 +                if (self.sourceSeed):
1029 +                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1030 +                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1031 +                    seedIndex=seedIndex+1
1032 +                    ## the following seeds are not always present
1033 +                    if (self.sourceSeedVtx):
1034 +                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1035 +                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1036 +                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1037 +                        seedIndex += 1
1038 +                    if (self.sourceSeedG4):
1039 +                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1040 +                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1041 +                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1042 +                        seedIndex += 1
1043 +                    if (self.sourceSeedMix):
1044 +                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1045 +                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1046 +                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1047 +                        seedIndex += 1
1048 +                    pass
1049 +                pass
1050 +            txt += 'mv -f '+pset+' pset.cfg\n'
1051  
1052          if len(self.additional_inbox_files) > 0:
1053 <            for file in self.additional_inbox_files:
1054 <                relFile = file.split("/")[-1]
1055 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
781 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
782 <                txt += '   chmod +x '+relFile+'\n'
783 <                txt += 'fi\n'
1053 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1054 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1055 >            txt += 'fi\n'
1056              pass
1057  
1058 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1059 <
1060 <        txt += '\n'
1061 <        txt += 'echo "***** cat pset.cfg *********"\n'
1062 <        txt += 'cat pset.cfg\n'
1063 <        txt += 'echo "****** end pset.cfg ********"\n'
1064 <        txt += '\n'
1065 <        # txt += 'echo "***** cat pset1.cfg *********"\n'
1066 <        # txt += 'cat pset1.cfg\n'
1067 <        # txt += 'echo "****** end pset1.cfg ********"\n'
1058 >        if self.pset != None: #CarlosDaniele
1059 >            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1060 >        
1061 >            txt += '\n'
1062 >            txt += 'echo "***** cat pset.cfg *********"\n'
1063 >            txt += 'cat pset.cfg\n'
1064 >            txt += 'echo "****** end pset.cfg ********"\n'
1065 >            txt += '\n'
1066 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1067 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1068 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1069 >            ##############
1070 >            txt += '\n'
1071 >            # txt += 'echo "***** cat pset1.cfg *********"\n'
1072 >            # txt += 'cat pset1.cfg\n'
1073 >            # txt += 'echo "****** end pset1.cfg ********"\n'
1074          return txt
1075  
1076 <    def wsBuildExe(self, nj):
1076 >    def wsBuildExe(self, nj=0):
1077          """
1078          Put in the script the commands to build an executable
1079          or a library.
# Line 830 | Line 1108 | class Cmssw(JobType):
1108              txt += 'else \n'
1109              txt += '   echo "Successful untar" \n'
1110              txt += 'fi \n'
1111 +            txt += '\n'
1112 +            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1113 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1114 +            #### FEDE FOR DBS OUTPUT PUBLICATION
1115 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1116 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1117 +            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1118 +            txt += 'else\n'
1119 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1120 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1121 +            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1122 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1123 +            ###################  
1124 +            txt += 'fi\n'
1125 +            txt += '\n'
1126 +
1127              pass
1128          
1129          return txt
# Line 841 | Line 1135 | class Cmssw(JobType):
1135          """
1136          
1137      def executableName(self):
1138 <        return self.executable
1138 >        if self.scriptExe: #CarlosDaniele
1139 >            return "sh "
1140 >        else:
1141 >            return self.executable
1142  
1143      def executableArgs(self):
1144 <        return " -p pset.cfg"
1144 >        if self.scriptExe:#CarlosDaniele
1145 >            return   self.scriptExe + " $NJob"
1146 >        else:
1147 >            # if >= CMSSW_1_5_X, add -e
1148 >            version_array = self.scram.getSWVersion().split('_')
1149 >            major = 0
1150 >            minor = 0
1151 >            try:
1152 >                major = int(version_array[1])
1153 >                minor = int(version_array[2])
1154 >            except:
1155 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1156 >                raise CrabException(msg)
1157 >            if major >= 1 and minor >= 5 :
1158 >                return " -e -p pset.cfg"
1159 >            else:
1160 >                return " -p pset.cfg"
1161  
1162      def inputSandbox(self, nj):
1163          """
1164          Returns a list of filenames to be put in JDL input sandbox.
1165          """
1166          inp_box = []
1167 <        # dict added to delete duplicate from input sandbox file list
1168 <        seen = {}
1167 >        # # dict added to delete duplicate from input sandbox file list
1168 >        # seen = {}
1169          ## code
1170          if os.path.isfile(self.tgzNameWithPath):
1171              inp_box.append(self.tgzNameWithPath)
1172 +        if os.path.isfile(self.MLtgzfile):
1173 +            inp_box.append(self.MLtgzfile)
1174          ## config
1175 <        inp_box.append(common.job_list[nj].configFilename())
1175 >        if not self.pset is None:
1176 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1177          ## additional input files
1178 <        #for file in self.additional_inbox_files:
1179 <        #    inp_box.append(common.work_space.cwdDir()+file)
1178 >        tgz = self.additionalInputFileTgz()
1179 >        inp_box.append(tgz)
1180          return inp_box
1181  
1182      def outputSandbox(self, nj):
# Line 869 | Line 1185 | class Cmssw(JobType):
1185          """
1186          out_box = []
1187  
872        stdout=common.job_list[nj].stdout()
873        stderr=common.job_list[nj].stderr()
874
1188          ## User Declared output files
1189 <        for out in self.output_file:
1189 >        for out in (self.output_file+self.output_file_sandbox):
1190              n_out = nj + 1
1191              out_box.append(self.numberFile_(out,str(n_out)))
1192          return out_box
880        return []
1193  
1194      def prepareSteeringCards(self):
1195          """
# Line 893 | Line 1205 | class Cmssw(JobType):
1205          txt = '\n'
1206          txt += '# directory content\n'
1207          txt += 'ls \n'
1208 <        file_list = ''
1209 <        for fileWithSuffix in self.output_file:
1208 >
1209 >        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1210              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
899            file_list=file_list+output_file_num+' '
1211              txt += '\n'
1212              txt += '# check output file\n'
1213 <            txt += 'ls '+fileWithSuffix+'\n'
1214 <            txt += 'ls_result=$?\n'
1215 <            #txt += 'exe_result=$?\n'
1216 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1217 <            txt += '   echo "ERROR: Problem with output file"\n'
1218 <            #txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
1219 <            #txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
1220 <            #txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
1221 <            ### OLI_DANIELE
1213 >            # txt += 'ls '+fileWithSuffix+'\n'
1214 >            # txt += 'ls_result=$?\n'
1215 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1216 >            ###### FEDE FOR OUTPUT DATA PUBLICATION ########
1217 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1218 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1219 >            ################################################
1220 >            txt += 'else\n'
1221 >            txt += '    exit_status=60302\n'
1222 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1223 >            ############# FEDE ADDED CHECK FOR OUTPUT #############
1224 >            ## MATTY's FIX: the exit option was interrupting the execution
1225 >            if fileWithSuffix in self.output_file:
1226 >                txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1227 >                txt += '    output_exit_status=$exit_status\n'
1228 >                txt += '    # exit $exit_status\n'
1229 >            #######################################################    
1230              if common.scheduler.boss_scheduler_name == 'condor_g':
1231                  txt += '    if [ $middleware == OSG ]; then \n'
1232                  txt += '        echo "prepare dummy output file"\n'
1233                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1234                  txt += '    fi \n'
916            txt += 'else\n'
917            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1235              txt += 'fi\n'
1236 <      
1236 >        file_list = []
1237 >        for fileWithSuffix in (self.output_file):
1238 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1239 >            
1240 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1241          txt += 'cd $RUNTIME_AREA\n'
921        file_list=file_list[:-1]
922        txt += 'file_list="'+file_list+'"\n'
923        txt += 'cd $RUNTIME_AREA\n'
924        ### OLI_DANIELE
925        txt += 'if [ $middleware == OSG ]; then\n'  
926        txt += '    cd $RUNTIME_AREA\n'
927        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
928        txt += '    /bin/rm -rf $WORKING_DIR\n'
929        txt += '    if [ -d $WORKING_DIR ] ;then\n'
930        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
931        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
932        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
933        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
934        txt += '        rm -f $RUNTIME_AREA/$repo \n'
935        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
936        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
937        txt += '    fi\n'
938        txt += 'fi\n'
939        txt += '\n'
1242          return txt
1243  
1244      def numberFile_(self, file, txt):
# Line 947 | Line 1249 | class Cmssw(JobType):
1249          # take away last extension
1250          name = p[0]
1251          for x in p[1:-1]:
1252 <           name=name+"."+x
1252 >            name=name+"."+x
1253          # add "_txt"
1254          if len(p)>1:
1255 <          ext = p[len(p)-1]
1256 <          #result = name + '_' + str(txt) + "." + ext
955 <          result = name + '_' + txt + "." + ext
1255 >            ext = p[len(p)-1]
1256 >            result = name + '_' + txt + "." + ext
1257          else:
1258 <          #result = name + '_' + str(txt)
958 <          result = name + '_' + txt
1258 >            result = name + '_' + txt
1259          
1260          return result
1261  
1262 <    def getRequirements(self):
1262 >    def getRequirements(self, nj=[]):
1263          """
1264          return job requirements to add to jdl files
1265          """
1266          req = ''
1267 <        if common.analisys_common_info['sw_version']:
1267 >        if self.version:
1268              req='Member("VO-cms-' + \
1269 <                 common.analisys_common_info['sw_version'] + \
1269 >                 self.version + \
1270                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1271 <        if common.analisys_common_info['sites']:
1272 <            if len(common.analisys_common_info['sites'])>0:
1273 <                req = req + ' && ('
1274 <                for i in range(len(common.analisys_common_info['sites'])):
1275 <                    req = req + 'other.GlueCEInfoHostName == "' \
1276 <                         + common.analisys_common_info['sites'][i] + '"'
1277 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1278 <                        req = req + ' || '
1279 <            req = req + ')'
980 <        #print "req = ", req
1271 >        ## SL add requirement for OS version only if SL4
1272 >        #reSL4 = re.compile( r'slc4' )
1273 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1274 >            req+=' && Member("VO-cms-' + \
1275 >                 self.executable_arch + \
1276 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1277 >
1278 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1279 >
1280          return req
1281  
1282      def configFilename(self):
# Line 994 | Line 1293 | class Cmssw(JobType):
1293          txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1294          txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1295          txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1296 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1297          txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1298 <        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1299 <        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1300 <        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1298 >        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1299 >        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1300 >        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1301 >        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1302          txt += '   else\n'
1303 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1303 >        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1304          txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1305          txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1306          txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
# Line 1012 | Line 1313 | class Cmssw(JobType):
1313          txt += '       cd $RUNTIME_AREA\n'
1314          txt += '       /bin/rm -rf $WORKING_DIR\n'
1315          txt += '       if [ -d $WORKING_DIR ] ;then\n'
1316 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1317 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1318 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1319 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1320 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1321 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1322 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1316 >        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1317 >        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1318 >        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1319 >        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1320 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1321 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1322 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1323          txt += '       fi\n'
1324          txt += '\n'
1325          txt += '       exit 1\n'
# Line 1073 | Line 1374 | class Cmssw(JobType):
1374          txt += '       fi\n'
1375          txt += '   fi\n'
1376          txt += '   \n'
1076        txt += '   string=`cat /etc/redhat-release`\n'
1077        txt += '   echo $string\n'
1078        txt += '   if [[ $string = *alhalla* ]]; then\n'
1079        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1080        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1081        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1082        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1083        txt += '   else\n'
1084        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1085        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1086        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1087        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1088        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1089        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1090        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1091        txt += '       exit 1\n'
1092        txt += '   fi\n'
1377          txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1378          txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1379          return txt
1380  
1381 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1382 +    def modifyReport(self, nj):
1383 +        """
1384 +        insert the part of the script that modifies the FrameworkJob Report
1385 +        """
1386 +
1387 +        txt = ''
1388 +        try:
1389 +            publish_data = int(self.cfg_params['USER.publish_data'])          
1390 +        except KeyError:
1391 +            publish_data = 0
1392 +        if (publish_data == 1):  
1393 +            txt += 'echo "Modify Job Report" \n'
1394 +            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1395 +            ################ FEDE FOR DBS2 #############################################
1396 +            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1397 +            #############################################################################
1398 +            #try:
1399 +            #    publish_data = int(self.cfg_params['USER.publish_data'])          
1400 +            #except KeyError:
1401 +            #    publish_data = 0
1402 +
1403 +            txt += 'if [ -z "$SE" ]; then\n'
1404 +            txt += '    SE="" \n'
1405 +            txt += 'fi \n'
1406 +            txt += 'if [ -z "$SE_PATH" ]; then\n'
1407 +            txt += '    SE_PATH="" \n'
1408 +            txt += 'fi \n'
1409 +            txt += 'echo "SE = $SE"\n'
1410 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1411 +
1412 +        #if (publish_data == 1):  
1413 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1414 +            processedDataset = self.cfg_params['USER.publish_data_name']
1415 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1416 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1417 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1418 +            #### FEDE: added slash in LFN ##############
1419 +            txt += '    FOR_LFN=/copy_problems/ \n'
1420 +            txt += 'else \n'
1421 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1422 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1423 +            txt += '    FOR_LFN=/store$tmp \n'
1424 +            txt += 'fi \n'
1425 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1426 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1427 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1428 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1429 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1430 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1431 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1432 +      
1433 +            txt += 'modifyReport_result=$?\n'
1434 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1435 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1436 +            txt += '    exit_status=1\n'
1437 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1438 +            txt += 'else\n'
1439 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1440 +            txt += 'fi\n'
1441 +        else:
1442 +            txt += 'echo "no data publication required"\n'
1443 +            #txt += 'ProcessedDataset=no_data_to_publish \n'
1444 +            #### FEDE: added slash in LFN ##############
1445 +            #txt += 'FOR_LFN=/local/ \n'
1446 +            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1447 +            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1448 +        return txt
1449 +
1450 +    def cleanEnv(self):
1451 +        ### OLI_DANIELE
1452 +        txt = ''
1453 +        txt += 'if [ $middleware == OSG ]; then\n'  
1454 +        txt += '    cd $RUNTIME_AREA\n'
1455 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1456 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1457 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1458 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1459 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1460 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1461 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1462 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1463 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1464 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1465 +        txt += '    fi\n'
1466 +        txt += 'fi\n'
1467 +        txt += '\n'
1468 +        return txt
1469 +
1470      def setParam_(self, param, value):
1471          self._params[param] = value
1472  
# Line 1105 | Line 1478 | class Cmssw(JobType):
1478          
1479      def getTaskid(self):
1480          return self._taskId
1481 +
1482 +    def uniquelist(self, old):
1483 +        """
1484 +        remove duplicates from a list
1485 +        """
1486 +        nd={}
1487 +        for e in old:
1488 +            nd[e]=0
1489 +        return nd.keys()
1490 +
1491 +
1492 +    def checkOut(self, limit):
1493 +        """
1494 +        check the dimension of the output files
1495 +        """
1496 +        txt = 'echo "*****************************************"\n'
1497 +        txt += 'echo "** Starting output sandbox limit check **"\n'
1498 +        txt += 'echo "*****************************************"\n'
1499 +        allOutFiles = ""
1500 +        listOutFiles = []
1501 +        for fileOut in (self.output_file+self.output_file_sandbox):
1502 +             if fileOut.find('crab_fjr') == -1:
1503 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1504 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1505 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1506 +        txt += 'ls -gGhrta;\n'
1507 +        txt += 'sum=0;\n'
1508 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1509 +        txt += '    if [ -e $file ]; then\n'
1510 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1511 +        txt += '        sum=`expr $sum + $tt`\n'
1512 +        txt += '    else\n'
1513 +        txt += '        echo "WARNING: output file $file not found!"\n'
1514 +        txt += '    fi\n'
1515 +        txt += 'done\n'
1516 +        txt += 'echo "Total Output dimension: $sum";\n'
1517 +        txt += 'limit='+str(limit)+';\n'
1518 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1519 +        txt += 'if [ $limit -lt $sum ]; then\n'
1520 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1521 +        txt += '    echo "         checking the output file sizes..."\n'
1522 +        """
1523 +        txt += '    dim=0;\n'
1524 +        txt += '    exclude=0;\n'
1525 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1526 +        txt += '        sumTemp=0;\n'
1527 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1528 +        txt += '            if [ $file != $file2 ]; then\n'
1529 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1530 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1531 +        txt += '            fi\n'
1532 +        txt += '        done\n'
1533 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1534 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1535 +        txt += '                dim=$sumTemp;\n'
1536 +        txt += '                exclude=$file;\n'
1537 +        txt += '            fi\n'
1538 +        txt += '        fi\n'
1539 +        txt += '    done\n'
1540 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1541 +        """
1542 +        txt += '    tot=0;\n'
1543 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1544 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1545 +        txt += '        tot=`expr $tot + $tt`;\n'
1546 +        txt += '        if [ $limit -lt $tot ]; then\n'
1547 +        txt += '            tot=`expr $tot - $tt`;\n'
1548 +        txt += '            fileLast=$file;\n'
1549 +        txt += '            break;\n'
1550 +        txt += '        fi\n'
1551 +        txt += '    done\n'
1552 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1553 +        txt += '    flag=0;\n'    
1554 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1555 +        txt += '        if [ $fileLast = $filess ]; then\n'
1556 +        txt += '            flag=1;\n'
1557 +        txt += '        fi\n'
1558 +        txt += '        if [ $flag -eq 1 ]; then\n'
1559 +        txt += '            rm -f $filess;\n'
1560 +        txt += '        fi\n'
1561 +        txt += '    done\n'
1562 +        txt += '    ls -agGhrt;\n'
1563 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1564 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1565 +        txt += '    exit_status=70000;\n'
1566 +        txt += 'else'
1567 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1568 +        txt += 'fi\n'
1569 +        txt += 'echo "*****************************************"\n'
1570 +        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1571 +        txt += 'echo "*****************************************"\n'
1572 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines