ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.22 by slacapra, Tue Jul 4 16:30:39 2006 UTC vs.
Revision 1.137 by slacapra, Fri Nov 16 11:09:31 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 < import math
5 > from BlackWhiteListParser import BlackWhiteListParser
6   import common
7 import PsetManipulator  
8
9 import DBSInfo_EDM
10 import DataDiscovery_EDM
11 import DataLocation_EDM
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
21        self.analisys_common_info = {}
22        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18 +
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27 +        # number of jobs requested to be created, limit obj splitting
28 +        self.ncjobs = ncjobs
29 +
30          log = common.logger
31 <        
31 >
32          self.scram = Scram.Scram(cfg_params)
28        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39 +        self.scriptName = 'CMSSW.sh'
40 +        self.pset = ''      #scrip use case Da
41 +        self.datasetPath = '' #scrip use case Da
42  
43 +        # set FJR file name
44 +        self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
56 +            common.logger.message(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +
61 +        common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
37        common.analisys_common_info['sw_version'] = self.version
38        ### FEDE
39        common.analisys_common_info['copy_input_data'] = 0
40        common.analisys_common_info['events_management'] = 1
63  
64          ### collect Data cards
65 +
66          try:
67              tmp =  cfg_params['CMSSW.datasetpath']
68              log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
# Line 50 | Line 73 | class Cmssw(JobType):
73                  self.datasetPath = tmp
74                  self.selectNoInput = 0
75          except KeyError:
76 <            msg = "Error: datasetpath not defined "  
76 >            msg = "Error: datasetpath not defined "
77              raise CrabException(msg)
78  
79          # ML monitoring
# Line 59 | Line 82 | class Cmssw(JobType):
82              self.setParam_('dataset', 'None')
83              self.setParam_('owner', 'None')
84          else:
85 <            datasetpath_split = self.datasetPath.split("/")
86 <            self.setParam_('dataset', datasetpath_split[1])
87 <            self.setParam_('owner', datasetpath_split[-1])
85 >            try:
86 >                datasetpath_split = self.datasetPath.split("/")
87 >                # standard style
88 >                self.setParam_('datasetFull', self.datasetPath)
89 >                self.setParam_('dataset', datasetpath_split[1])
90 >                self.setParam_('owner', datasetpath_split[2])
91 >            except:
92 >                self.setParam_('dataset', self.datasetPath)
93 >                self.setParam_('owner', self.datasetPath)
94  
95          self.setTaskid_()
96          self.setParam_('taskId', self.cfg_params['taskId'])
# Line 85 | Line 114 | class Cmssw(JobType):
114          try:
115              self.pset = cfg_params['CMSSW.pset']
116              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
117 <            if (not os.path.exists(self.pset)):
118 <                raise CrabException("User defined PSet file "+self.pset+" does not exist")
117 >            if self.pset.lower() != 'none' :
118 >                if (not os.path.exists(self.pset)):
119 >                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
120 >            else:
121 >                self.pset = None
122          except KeyError:
123              raise CrabException("PSet file missing. Cannot run cmsRun ")
124  
125          # output files
126 +        ## stuff which must be returned always via sandbox
127 +        self.output_file_sandbox = []
128 +
129 +        # add fjr report by default via sandbox
130 +        self.output_file_sandbox.append(self.fjrFileName)
131 +
132 +        # other output files to be returned via sandbox or copied to SE
133          try:
134              self.output_file = []
96
135              tmp = cfg_params['CMSSW.output_file']
136              if tmp != '':
137                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 103 | Line 141 | class Cmssw(JobType):
141                      self.output_file.append(tmp)
142                      pass
143              else:
144 <                log.message("No output file defined: only stdout/err will be available")
144 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145                  pass
146              pass
147          except KeyError:
148 <            log.message("No output file defined: only stdout/err will be available")
148 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
149              pass
150  
151          # script_exe file as additional file in inputSandbox
152          try:
153              self.scriptExe = cfg_params['USER.script_exe']
116            self.additional_inbox_files.append(self.scriptExe)
154              if self.scriptExe != '':
155                 if not os.path.isfile(self.scriptExe):
156 <                  msg ="WARNING. file "+self.scriptExe+" not found"
156 >                  msg ="ERROR. file "+self.scriptExe+" not found"
157                    raise CrabException(msg)
158 +               self.additional_inbox_files.append(string.strip(self.scriptExe))
159          except KeyError:
160 <           pass
161 <                  
160 >            self.scriptExe = ''
161 >
162 >        #CarlosDaniele
163 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
164 >           msg ="Error. script_exe  not defined"
165 >           raise CrabException(msg)
166 >
167          ## additional input files
168          try:
169 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
169 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
170              for tmp in tmpAddFiles:
171 <                if not os.path.exists(tmp):
172 <                    raise CrabException("Additional input file not found: "+tmp)
173 <                tmp=string.strip(tmp)
174 <                self.additional_inbox_files.append(tmp)
171 >                tmp = string.strip(tmp)
172 >                dirname = ''
173 >                if not tmp[0]=="/": dirname = "."
174 >                files = []
175 >                if string.find(tmp,"*")>-1:
176 >                    files = glob.glob(os.path.join(dirname, tmp))
177 >                    if len(files)==0:
178 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
179 >                else:
180 >                    files.append(tmp)
181 >                for file in files:
182 >                    if not os.path.exists(file):
183 >                        raise CrabException("Additional input file not found: "+file)
184 >                    pass
185 >                    # fname = string.split(file, '/')[-1]
186 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
187 >                    # shutil.copyfile(file, storedFile)
188 >                    self.additional_inbox_files.append(string.strip(file))
189                  pass
190              pass
191 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
192          except KeyError:
193              pass
194  
195          # files per job
196          try:
197 <            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
198 <            self.selectFilesPerJob = 1
197 >            if (cfg_params['CMSSW.files_per_jobs']):
198 >                raise CrabException("files_per_jobs no longer supported.  Quitting.")
199          except KeyError:
200 <            self.filesPerJob = 0
143 <            self.selectFilesPerJob = 0
200 >            pass
201  
202          ## Events per job
203          try:
# Line 149 | Line 206 | class Cmssw(JobType):
206          except KeyError:
207              self.eventsPerJob = -1
208              self.selectEventsPerJob = 0
209 <    
209 >
210          ## number of jobs
211          try:
212              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
# Line 158 | Line 215 | class Cmssw(JobType):
215              self.theNumberOfJobs = 0
216              self.selectNumberOfJobs = 0
217  
161        ## source seed for pythia
218          try:
219 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
219 >            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
220 >            self.selectTotalNumberEvents = 1
221          except KeyError:
222 <            self.sourceSeed = 123456
223 <            common.logger.debug(5,"No seed given, will use "+str(self.sourceSeed))
222 >            self.total_number_of_events = 0
223 >            self.selectTotalNumberEvents = 0
224  
225 <        if not (self.selectFilesPerJob + self.selectEventsPerJob + self.selectNumberOfJobs == 1 ):
226 <            msg = 'Must define either files_per_jobs or events_per_job or number_of_jobs'
227 <            raise CrabException(msg)
225 >        if self.pset != None: #CarlosDaniele
226 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
227 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
228 >                 raise CrabException(msg)
229 >        else:
230 >             if (self.selectNumberOfJobs == 0):
231 >                 msg = 'Must specify  number_of_jobs.'
232 >                 raise CrabException(msg)
233  
234 +        ## source seed for pythia
235          try:
236 <            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
236 >            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
237          except KeyError:
238 <            msg = 'Must define total_number_of_events'
239 <            raise CrabException(msg)
240 <        
178 <        CEBlackList = []
238 >            self.sourceSeed = None
239 >            common.logger.debug(5,"No seed given")
240 >
241          try:
242 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
181 <            for tmp in tmpBad:
182 <                tmp=string.strip(tmp)
183 <                CEBlackList.append(tmp)
242 >            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
243          except KeyError:
244 <            pass
244 >            self.sourceSeedVtx = None
245 >            common.logger.debug(5,"No vertex seed given")
246  
187        self.reCEBlackList=[]
188        for bad in CEBlackList:
189            self.reCEBlackList.append(re.compile( bad ))
190
191        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
192
193        CEWhiteList = []
247          try:
248 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
196 <            for tmp in tmpGood:
197 <                tmp=string.strip(tmp)
198 <                CEWhiteList.append(tmp)
248 >            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
249          except KeyError:
250 <            pass
251 <
202 <        #print 'CEWhiteList: ',CEWhiteList
203 <        self.reCEWhiteList=[]
204 <        for Good in CEWhiteList:
205 <            self.reCEWhiteList.append(re.compile( Good ))
250 >            self.sourceSeedG4 = None
251 >            common.logger.debug(5,"No g4 sim hits seed given")
252  
253 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
253 >        try:
254 >            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
255 >        except KeyError:
256 >            self.sourceSeedMix = None
257 >            common.logger.debug(5,"No mix seed given")
258  
259 <        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
259 >        try:
260 >            self.firstRun = int(cfg_params['CMSSW.first_run'])
261 >        except KeyError:
262 >            self.firstRun = None
263 >            common.logger.debug(5,"No first run given")
264 >        if self.pset != None: #CarlosDaniele
265 >            import PsetManipulator as pp
266 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
267  
268          #DBSDLS-start
269 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
269 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
270          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
271          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
272 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
273          ## Perform the data location and discovery (based on DBS/DLS)
274          ## SL: Don't if NONE is specified as input (pythia use case)
275 <        common.analisys_common_info['sites']=None
275 >        blockSites = {}
276          if self.datasetPath:
277 <            self.DataDiscoveryAndLocation(cfg_params)
278 <        #DBSDLS-end          
277 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
278 >        #DBSDLS-end
279  
280          self.tgzNameWithPath = self.getTarBall(self.executable)
281 <    
281 >
282          ## Select Splitting
283 <        if self.selectNoInput: self.jobSplittingNoInput()
284 <        elif self.selectFilesPerJob or self.selectEventsPerJob or self.selectNumberOfJobs: self.jobSplittingPerFiles()
283 >        if self.selectNoInput:
284 >            if self.pset == None: #CarlosDaniele
285 >                self.jobSplittingForScript()
286 >            else:
287 >                self.jobSplittingNoInput()
288          else:
289 <            msg = 'Don\'t know how to split...'
229 <            raise CrabException(msg)
289 >            self.jobSplittingByBlocks(blockSites)
290  
291          # modify Pset
292 <        try:
293 <            if (self.datasetPath): # standard job
294 <                self.PsetEdit.maxEvent(self.eventsPerJob) #Daniele  
295 <                self.PsetEdit.inputModule("INPUT") #Daniele
296 <
297 <            else:  # pythia like job
298 <                self.PsetEdit.maxEvent(self.eventsPerJob) #Daniele  
299 <                self.PsetEdit.pythiaSeed("INPUT") #Daniele
300 <        
301 <            self.PsetEdit.psetWriter(self.configFilename())
302 <        except:
303 <            msg='Error while manipuliating ParameterSet: exiting...'
304 <            raise CrabException(msg)
292 >        if self.pset != None: #CarlosDaniele
293 >            try:
294 >                if (self.datasetPath): # standard job
295 >                    # allow to processa a fraction of events in a file
296 >                    PsetEdit.inputModule("INPUTFILE")
297 >                    PsetEdit.maxEvent(0)
298 >                    PsetEdit.skipEvent(0)
299 >                else:  # pythia like job
300 >                    PsetEdit.maxEvent(self.eventsPerJob)
301 >                    if (self.firstRun):
302 >                        PsetEdit.pythiaFirstRun(0)  #First Run
303 >                    if (self.sourceSeed) :
304 >                        PsetEdit.pythiaSeed(0)
305 >                        if (self.sourceSeedVtx) :
306 >                            PsetEdit.vtxSeed(0)
307 >                        if (self.sourceSeedG4) :
308 >                            PsetEdit.g4Seed(0)
309 >                        if (self.sourceSeedMix) :
310 >                            PsetEdit.mixSeed(0)
311 >                # add FrameworkJobReport to parameter-set
312 >                PsetEdit.addCrabFJR(self.fjrFileName)
313 >                PsetEdit.psetWriter(self.configFilename())
314 >            except:
315 >                msg='Error while manipuliating ParameterSet: exiting...'
316 >                raise CrabException(msg)
317  
318      def DataDiscoveryAndLocation(self, cfg_params):
319  
320 +        import DataDiscovery
321 +        import DataLocation
322          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
323  
324          datasetPath=self.datasetPath
325  
252        ## TODO
253        dataTiersList = ""
254        dataTiers = dataTiersList.split(',')
255
326          ## Contact the DBS
327 +        common.logger.message("Contacting Data Discovery Services ...")
328          try:
329 <            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
329 >
330 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
331              self.pubdata.fetchDBSInfo()
332  
333 <        except DataDiscovery_EDM.NotExistingDatasetError, ex :
333 >        except DataDiscovery.NotExistingDatasetError, ex :
334              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
335              raise CrabException(msg)
336 <
265 <        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
336 >        except DataDiscovery.NoDataTierinProvenanceError, ex :
337              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
338              raise CrabException(msg)
339 <        except DataDiscovery_EDM.DataDiscoveryError, ex:
340 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
339 >        except DataDiscovery.DataDiscoveryError, ex:
340 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
341              raise CrabException(msg)
342  
343 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
344 <        ## self.DBSPaths=self.pubdata.getDBSPaths()
345 <        common.logger.message("Required data are :"+self.datasetPath)
275 <
276 <        filesbyblock=self.pubdata.getFiles()
277 < #        print filesbyblock
278 <        self.AllInputFiles=filesbyblock.values()
279 <        self.files = self.AllInputFiles        
343 >        self.filesbyblock=self.pubdata.getFiles()
344 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
345 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
346  
347          ## get max number of events
348 <        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
283 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
284 <        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
348 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
349  
350          ## Contact the DLS and build a list of sites hosting the fileblocks
351          try:
352 <            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
352 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
353              dataloc.fetchDLSInfo()
354 <        except DataLocation_EDM.DataLocationError , ex:
354 >        except DataLocation.DataLocationError , ex:
355              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
356              raise CrabException(msg)
293        
294        allsites=dataloc.getSites()
295        common.logger.debug(5,"sites are %s"%allsites)
296        sites=self.checkBlackList(allsites)
297        common.logger.debug(5,"sites are (after black list) %s"%sites)
298        sites=self.checkWhiteList(sites)
299        common.logger.debug(5,"sites are (after white list) %s"%sites)
357  
301        if len(sites)==0:
302            msg = 'No sites hosting all the needed data! Exiting... '
303            raise CrabException(msg)
358  
359 <        common.logger.message("List of Sites hosting the data : "+str(sites))
360 <        common.logger.debug(6, "List of Sites: "+str(sites))
361 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
362 <        self.setParam_('TargetCE', ','.join(sites))
363 <        return
364 <    
365 <    def jobSplittingPerFiles(self):
366 <        """
367 <        Perform job splitting based on number of files to be accessed per job
368 <        """
315 <        common.logger.debug(5,'Splitting per input files')
316 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
317 <        common.logger.message('Available '+str(self.maxEvents)+' events in total ')
318 <        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
319 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
320 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job')
359 >        sites = dataloc.getSites()
360 >        allSites = []
361 >        listSites = sites.values()
362 >        for listSite in listSites:
363 >            for oneSite in listSite:
364 >                allSites.append(oneSite)
365 >        allSites = self.uniquelist(allSites)
366 >
367 >        # screen output
368 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
369  
370 <        ## if asked to process all events, do it
371 <        if self.total_number_of_events == -1:
372 <            self.total_number_of_events=self.maxEvents
370 >        return sites
371 >
372 >    def jobSplittingByBlocks(self, blockSites):
373 >        """
374 >        Perform job splitting. Jobs run over an integer number of files
375 >        and no more than one block.
376 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
377 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
378 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
379 >                  self.maxEvents, self.filesbyblock
380 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
381 >              self.total_number_of_jobs - Total # of jobs
382 >              self.list_of_args - File(s) job will run on (a list of lists)
383 >        """
384 >
385 >        # ---- Handle the possible job splitting configurations ---- #
386 >        if (self.selectTotalNumberEvents):
387 >            totalEventsRequested = self.total_number_of_events
388 >        if (self.selectEventsPerJob):
389 >            eventsPerJobRequested = self.eventsPerJob
390 >            if (self.selectNumberOfJobs):
391 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
392 >
393 >        # If user requested all the events in the dataset
394 >        if (totalEventsRequested == -1):
395 >            eventsRemaining=self.maxEvents
396 >        # If user requested more events than are in the dataset
397 >        elif (totalEventsRequested > self.maxEvents):
398 >            eventsRemaining = self.maxEvents
399 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
400 >        # If user requested less events than are in the dataset
401          else:
402 <            if self.total_number_of_events>self.maxEvents:
327 <                common.logger.message("Asked "+str(self.total_number_of_events)+" but only "+str(self.maxEvents)+" available.")
328 <                self.total_number_of_events=self.maxEvents
329 <            pass
402 >            eventsRemaining = totalEventsRequested
403  
404 <        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
405 <        n_tot_files = (len(self.files[0]))
406 <        ## SL: this is wrong if the files have different number of events
334 <        evPerFile = int(self.maxEvents)/n_tot_files
335 <
336 <        common.logger.debug(5,'Events per File '+str(evPerFile))
337 <
338 <        ## compute job splitting parameters: filesPerJob, eventsPerJob and theNumberOfJobs
339 <        if self.selectFilesPerJob:
340 <            ## user define files per event.
341 <            filesPerJob = self.filesPerJob
342 <            eventsPerJob = filesPerJob*evPerFile
343 <            theNumberOfJobs = int(self.total_number_of_events*1./eventsPerJob)
344 <            check = int(self.total_number_of_events) - (theNumberOfJobs*eventsPerJob)
345 <            if check > 0:
346 <                theNumberOfJobs +=1
347 <                filesLastJob = int(check*1./evPerFile+0.5)
348 <                common.logger.message('Warning: last job will be created with '+str(check)+' files')
349 <            else:
350 <                filesLastJob = filesPerJob
404 >        # If user requested more events per job than are in the dataset
405 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
406 >            eventsPerJobRequested = self.maxEvents
407  
408 <        elif self.selectNumberOfJobs:
409 <            ## User select the number of jobs: last might be bigger to match request of events
410 <            theNumberOfJobs =  self.theNumberOfJobs
411 <
412 <            eventsPerJob = self.total_number_of_events/theNumberOfJobs
413 <            filesPerJob = int(eventsPerJob/evPerFile)
414 <            if (filesPerJob==0) : filesPerJob=1
415 <            check = int(self.total_number_of_events) - (int(theNumberOfJobs)*filesPerJob*evPerFile)
360 <            if not check == 0:
361 <                if check<0:
362 <                    missingFiles = int(check/evPerFile)
363 <                    additionalJobs = int(missingFiles/filesPerJob)
364 <                    #print missingFiles, additionalJobs
365 <                    theNumberOfJobs+=additionalJobs
366 <                    common.logger.message('Warning: will create only '+str(theNumberOfJobs)+' jobs')
367 <                    check = int(self.total_number_of_events) - (int(theNumberOfJobs)*filesPerJob*evPerFile)
368 <                    
369 <                if check >0 :
370 <                    filesLastJob = filesPerJob+int(check*1./evPerFile+0.5)
371 <                    common.logger.message('Warning: last job will be created with '+str(filesLastJob*evPerFile)+' events')
372 <                else:
373 <                    filesLastJob = filesPerJob
374 <            else:
375 <                filesLastJob = filesPerJob
376 <        elif self.selectEventsPerJob:
377 <            # SL case if asked events per job
378 <            ## estimate the number of files per job to match the user requirement
379 <            filesPerJob = int(float(self.eventsPerJob)/float(evPerFile))
380 <            common.logger.debug(5,"filesPerJob "+str(filesPerJob))
381 <            if (filesPerJob==0): filesPerJob=1
382 <            eventsPerJob=filesPerJob*evPerFile
383 <            theNumberOfJobs = int(self.total_number_of_events)/int(self.eventsPerJob)
384 <            check = int(self.total_number_of_events) - (int(theNumberOfJobs)*eventsPerJob)
385 <            if not check == 0:
386 <                missingFiles = int(check/evPerFile)
387 <                additionalJobs = int(missingFiles/filesPerJob)
388 <                #print missingFiles, additionalJobs
389 <                if ( additionalJobs>0) : theNumberOfJobs+=additionalJobs
390 <                check = int(self.total_number_of_events) - (int(theNumberOfJobs)*eventsPerJob)
391 <                if not check == 0:
392 <                    if (check <0 ):
393 <                        filesLastJob = filesPerJob+int(check*1./evPerFile-0.5)
394 <                    else:
395 <                        filesLastJob = int(check*1./evPerFile+0.5)
396 <                    common.logger.message('Warning: last job will be created with '+str(filesLastJob*evPerFile)+' events')
397 <                else:
398 <                    filesLastJob = filesPerJob
399 <            else:
400 <                filesLastJob = filesPerJob
401 <        
402 <        self.total_number_of_jobs = theNumberOfJobs
408 >        # For user info at end
409 >        totalEventCount = 0
410 >
411 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
412 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
413 >
414 >        if (self.selectNumberOfJobs):
415 >            common.logger.message("May not create the exact number_of_jobs requested.")
416  
417 <        totalEventsToBeUsed=theNumberOfJobs*filesPerJob*evPerFile
418 <        if not check == 0:
419 <            totalEventsToBeUsed=(theNumberOfJobs-1)*filesPerJob*evPerFile+filesLastJob*evPerFile
417 >        if ( self.ncjobs == 'all' ) :
418 >            totalNumberOfJobs = 999999999
419 >        else :
420 >            totalNumberOfJobs = self.ncjobs
421  
408        common.logger.message(str(self.total_number_of_jobs)+' jobs will be created, each for '+str(filesPerJob*evPerFile)+' events, for a total of '+str(totalEventsToBeUsed)+' events')
422  
423 <        totalFilesToBeUsed=filesPerJob*(theNumberOfJobs-1)+filesLastJob
423 >        blocks = blockSites.keys()
424 >        blockCount = 0
425 >        # Backup variable in case self.maxEvents counted events in a non-included block
426 >        numBlocksInDataset = len(blocks)
427  
428 <        ## set job arguments (files)
428 >        jobCount = 0
429          list_of_lists = []
414        lastFile=0
415        for i in range(0, int(totalFilesToBeUsed), filesPerJob)[:-1]:
416            parString = "\\{"
417            
418            lastFile=i+filesPerJob
419            params = self.files[0][i: lastFile]
420            for i in range(len(params) - 1):
421                parString += '\\\"' + params[i] + '\\\"\,'
422            
423            parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
424            list_of_lists.append([parString])
425            pass
430  
431 <        ## last job
432 <        parString = "\\{"
433 <        
434 <        params = self.files[0][lastFile: lastFile+filesLastJob]
435 <        for i in range(len(params) - 1):
436 <            parString += '\\\"' + params[i] + '\\\"\,'
437 <        
438 <        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
439 <        list_of_lists.append([parString])
440 <        pass
431 >        # list tracking which jobs are in which jobs belong to which block
432 >        jobsOfBlock = {}
433 >
434 >        # ---- Iterate over the blocks in the dataset until ---- #
435 >        # ---- we've met the requested total # of events    ---- #
436 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
437 >            block = blocks[blockCount]
438 >            blockCount += 1
439 >            if block not in jobsOfBlock.keys() :
440 >                jobsOfBlock[block] = []
441 >
442 >            if self.eventsbyblock.has_key(block) :
443 >                numEventsInBlock = self.eventsbyblock[block]
444 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
445 >
446 >                files = self.filesbyblock[block]
447 >                numFilesInBlock = len(files)
448 >                if (numFilesInBlock <= 0):
449 >                    continue
450 >                fileCount = 0
451 >
452 >                # ---- New block => New job ---- #
453 >                parString = ""
454 >                # counter for number of events in files currently worked on
455 >                filesEventCount = 0
456 >                # flag if next while loop should touch new file
457 >                newFile = 1
458 >                # job event counter
459 >                jobSkipEventCount = 0
460 >
461 >                # ---- Iterate over the files in the block until we've met the requested ---- #
462 >                # ---- total # of events or we've gone over all the files in this block  ---- #
463 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
464 >                    file = files[fileCount]
465 >                    if newFile :
466 >                        try:
467 >                            numEventsInFile = self.eventsbyfile[file]
468 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
469 >                            # increase filesEventCount
470 >                            filesEventCount += numEventsInFile
471 >                            # Add file to current job
472 >                            parString += '\\\"' + file + '\\\"\,'
473 >                            newFile = 0
474 >                        except KeyError:
475 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
476 >
477 >
478 >                    # if less events in file remain than eventsPerJobRequested
479 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
480 >                        # if last file in block
481 >                        if ( fileCount == numFilesInBlock-1 ) :
482 >                            # end job using last file, use remaining events in block
483 >                            # close job and touch new file
484 >                            fullString = parString[:-2]
485 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
486 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
487 >                            self.jobDestination.append(blockSites[block])
488 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
489 >                            # fill jobs of block dictionary
490 >                            jobsOfBlock[block].append(jobCount+1)
491 >                            # reset counter
492 >                            jobCount = jobCount + 1
493 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
494 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
495 >                            jobSkipEventCount = 0
496 >                            # reset file
497 >                            parString = ""
498 >                            filesEventCount = 0
499 >                            newFile = 1
500 >                            fileCount += 1
501 >                        else :
502 >                            # go to next file
503 >                            newFile = 1
504 >                            fileCount += 1
505 >                    # if events in file equal to eventsPerJobRequested
506 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
507 >                        # close job and touch new file
508 >                        fullString = parString[:-2]
509 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
510 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
511 >                        self.jobDestination.append(blockSites[block])
512 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
513 >                        jobsOfBlock[block].append(jobCount+1)
514 >                        # reset counter
515 >                        jobCount = jobCount + 1
516 >                        totalEventCount = totalEventCount + eventsPerJobRequested
517 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
518 >                        jobSkipEventCount = 0
519 >                        # reset file
520 >                        parString = ""
521 >                        filesEventCount = 0
522 >                        newFile = 1
523 >                        fileCount += 1
524 >
525 >                    # if more events in file remain than eventsPerJobRequested
526 >                    else :
527 >                        # close job but don't touch new file
528 >                        fullString = parString[:-2]
529 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
530 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
531 >                        self.jobDestination.append(blockSites[block])
532 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
533 >                        jobsOfBlock[block].append(jobCount+1)
534 >                        # increase counter
535 >                        jobCount = jobCount + 1
536 >                        totalEventCount = totalEventCount + eventsPerJobRequested
537 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
538 >                        # calculate skip events for last file
539 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
540 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
541 >                        # remove all but the last file
542 >                        filesEventCount = self.eventsbyfile[file]
543 >                        parString = ""
544 >                        parString += '\\\"' + file + '\\\"\,'
545 >                    pass # END if
546 >                pass # END while (iterate over files in the block)
547 >        pass # END while (iterate over blocks in the dataset)
548 >        self.ncjobs = self.total_number_of_jobs = jobCount
549 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
550 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
551 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
552 >
553 >        # screen output
554 >        screenOutput = "List of jobs and available destination sites:\n\n"
555 >
556 >        # keep trace of block with no sites to print a warning at the end
557 >        noSiteBlock = []
558 >        bloskNoSite = []
559 >
560 >        blockCounter = 0
561 >        for block in blocks:
562 >            if block in jobsOfBlock.keys() :
563 >                blockCounter += 1
564 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
565 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
566 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
567 >                    bloskNoSite.append( blockCounter )
568 >
569 >        common.logger.message(screenOutput)
570 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
571 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
572 >            virgola = ""
573 >            if len(bloskNoSite) > 1:
574 >                virgola = ","
575 >            for block in bloskNoSite:
576 >                msg += ' ' + str(block) + virgola
577 >            msg += '\n               Related jobs:\n                 '
578 >            virgola = ""
579 >            if len(noSiteBlock) > 1:
580 >                virgola = ","
581 >            for range_jobs in noSiteBlock:
582 >                msg += str(range_jobs) + virgola
583 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
584 >            common.logger.message(msg)
585  
586          self.list_of_args = list_of_lists
439        # print self.list_of_args[0]
587          return
588  
589      def jobSplittingNoInput(self):
# Line 444 | Line 591 | class Cmssw(JobType):
591          Perform job splitting based on number of event per job
592          """
593          common.logger.debug(5,'Splitting per events')
594 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
595 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
596 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
594 >
595 >        if (self.selectEventsPerJob):
596 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
597 >        if (self.selectNumberOfJobs):
598 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
599 >        if (self.selectTotalNumberEvents):
600 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
601  
602          if (self.total_number_of_events < 0):
603              msg='Cannot split jobs per Events with "-1" as total number of events'
604              raise CrabException(msg)
605  
606          if (self.selectEventsPerJob):
607 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
607 >            if (self.selectTotalNumberEvents):
608 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
609 >            elif(self.selectNumberOfJobs) :
610 >                self.total_number_of_jobs =self.theNumberOfJobs
611 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
612 >
613          elif (self.selectNumberOfJobs) :
614              self.total_number_of_jobs = self.theNumberOfJobs
615              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
# Line 465 | Line 621 | class Cmssw(JobType):
621  
622          common.logger.debug(5,'Check  '+str(check))
623  
624 <        common.logger.message(str(self.total_number_of_jobs)+' jobs will be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
624 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
625          if check > 0:
626 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but will do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
471 <
626 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
627  
628          # argument is seed number.$i
629          self.list_of_args = []
630          for i in range(self.total_number_of_jobs):
631 <            self.list_of_args.append([(str(self.sourceSeed)+str(i))])
632 <        #print self.list_of_args
631 >            ## Since there is no input, any site is good
632 >           # self.jobDestination.append(["Any"])
633 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
634 >            args=[]
635 >            if (self.firstRun):
636 >                    ## pythia first run
637 >                #self.list_of_args.append([(str(self.firstRun)+str(i))])
638 >                args.append(str(self.firstRun)+str(i))
639 >            else:
640 >                ## no first run
641 >                #self.list_of_args.append([str(i)])
642 >                args.append(str(i))
643 >            if (self.sourceSeed):
644 >                args.append(str(self.sourceSeed)+str(i))
645 >                if (self.sourceSeedVtx):
646 >                    ## + vtx random seed
647 >                    args.append(str(self.sourceSeedVtx)+str(i))
648 >                if (self.sourceSeedG4):
649 >                    ## + G4 random seed
650 >                    args.append(str(self.sourceSeedG4)+str(i))
651 >                if (self.sourceSeedMix):
652 >                    ## + Mix random seed
653 >                    args.append(str(self.sourceSeedMix)+str(i))
654 >                pass
655 >            pass
656 >            self.list_of_args.append(args)
657 >        pass
658 >
659 >        # print self.list_of_args
660  
661          return
662  
663 +
664 +    def jobSplittingForScript(self):#CarlosDaniele
665 +        """
666 +        Perform job splitting based on number of job
667 +        """
668 +        common.logger.debug(5,'Splitting per job')
669 +        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
670 +
671 +        self.total_number_of_jobs = self.theNumberOfJobs
672 +
673 +        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
674 +
675 +        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
676 +
677 +        # argument is seed number.$i
678 +        self.list_of_args = []
679 +        for i in range(self.total_number_of_jobs):
680 +            ## Since there is no input, any site is good
681 +           # self.jobDestination.append(["Any"])
682 +            self.jobDestination.append([""])
683 +            ## no random seed
684 +            self.list_of_args.append([str(i)])
685 +        return
686 +
687      def split(self, jobParams):
688 <
688 >
689          common.jobDB.load()
690          #### Fabio
691          njobs = self.total_number_of_jobs
# Line 487 | Line 693 | class Cmssw(JobType):
693          # create the empty structure
694          for i in range(njobs):
695              jobParams.append("")
696 <        
696 >
697          for job in range(njobs):
698              jobParams[job] = arglist[job]
699              # print str(arglist[job])
700              # print jobParams[job]
701              common.jobDB.setArguments(job, jobParams[job])
702 +            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
703 +            common.jobDB.setDestination(job, self.jobDestination[job])
704  
705          common.jobDB.save()
706          return
707 <    
707 >
708      def getJobTypeArguments(self, nj, sched):
709          result = ''
710          for i in common.jobDB.arguments(nj):
711              result=result+str(i)+" "
712          return result
713 <  
713 >
714      def numberOfJobs(self):
715          # Fabio
716          return self.total_number_of_jobs
717  
510    def checkBlackList(self, allSites):
511        if len(self.reCEBlackList)==0: return allSites
512        sites = []
513        for site in allSites:
514            common.logger.debug(10,'Site '+site)
515            good=1
516            for re in self.reCEBlackList:
517                if re.search(site):
518                    common.logger.message('CE in black list, skipping site '+site)
519                    good=0
520                pass
521            if good: sites.append(site)
522        if len(sites) == 0:
523            common.logger.debug(3,"No sites found after BlackList")
524        return sites
525
526    def checkWhiteList(self, allSites):
527
528        if len(self.reCEWhiteList)==0: return allSites
529        sites = []
530        for site in allSites:
531            good=0
532            for re in self.reCEWhiteList:
533                if re.search(site):
534                    common.logger.debug(5,'CE in white list, adding site '+site)
535                    good=1
536                if not good: continue
537                sites.append(site)
538        if len(sites) == 0:
539            common.logger.message("No sites found after WhiteList\n")
540        else:
541            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
542        return sites
543
718      def getTarBall(self, exe):
719          """
720          Return the TarBall with lib and exe
721          """
722 <        
722 >
723          # if it exist, just return it
724 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
724 >        #
725 >        # Marco. Let's start to use relative path for Boss XML files
726 >        #
727 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
728          if os.path.exists(self.tgzNameWithPath):
729              return self.tgzNameWithPath
730  
# Line 561 | Line 738 | class Cmssw(JobType):
738          # First of all declare the user Scram area
739          swArea = self.scram.getSWArea_()
740          #print "swArea = ", swArea
741 <        swVersion = self.scram.getSWVersion()
742 <        #print "swVersion = ", swVersion
741 >        # swVersion = self.scram.getSWVersion()
742 >        # print "swVersion = ", swVersion
743          swReleaseTop = self.scram.getReleaseTop_()
744          #print "swReleaseTop = ", swReleaseTop
745 <        
745 >
746          ## check if working area is release top
747          if swReleaseTop == '' or swArea == swReleaseTop:
748              return
749  
750 <        filesToBeTarred = []
751 <        ## First find the executable
752 <        if (self.executable != ''):
753 <            exeWithPath = self.scram.findFile_(executable)
754 < #           print exeWithPath
755 <            if ( not exeWithPath ):
756 <                raise CrabException('User executable '+executable+' not found')
757 <
758 <            ## then check if it's private or not
759 <            if exeWithPath.find(swReleaseTop) == -1:
760 <                # the exe is private, so we must ship
761 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
762 <                path = swArea+'/'
763 <                exe = string.replace(exeWithPath, path,'')
764 <                filesToBeTarred.append(exe)
765 <                pass
766 <            else:
767 <                # the exe is from release, we'll find it on WN
768 <                pass
769 <
770 <        ## Now get the libraries: only those in local working area
771 <        libDir = 'lib'
772 <        lib = swArea+'/' +libDir
773 <        common.logger.debug(5,"lib "+lib+" to be tarred")
774 <        if os.path.exists(lib):
775 <            filesToBeTarred.append(libDir)
776 <
777 <        ## Now check if module dir is present
778 <        moduleDir = 'module'
779 <        if os.path.isdir(swArea+'/'+moduleDir):
780 <            filesToBeTarred.append(moduleDir)
781 <
782 <        ## Now check if the Data dir is present
783 <        dataDir = 'src/Data/'
784 <        if os.path.isdir(swArea+'/'+dataDir):
785 <            filesToBeTarred.append(dataDir)
786 <
787 <        ## Create the tar-ball
788 <        if len(filesToBeTarred)>0:
789 <            cwd = os.getcwd()
790 <            os.chdir(swArea)
791 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
792 <            for line in filesToBeTarred:
793 <                tarcmd = tarcmd + line + ' '
794 <            cout = runCommand(tarcmd)
795 <            if not cout:
796 <                raise CrabException('Could not create tar-ball')
797 <            os.chdir(cwd)
798 <        else:
799 <            common.logger.debug(5,"No files to be to be tarred")
800 <        
750 >        import tarfile
751 >        try: # create tar ball
752 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
753 >            ## First find the executable
754 >            if (self.executable != ''):
755 >                exeWithPath = self.scram.findFile_(executable)
756 >                if ( not exeWithPath ):
757 >                    raise CrabException('User executable '+executable+' not found')
758 >
759 >                ## then check if it's private or not
760 >                if exeWithPath.find(swReleaseTop) == -1:
761 >                    # the exe is private, so we must ship
762 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
763 >                    path = swArea+'/'
764 >                    # distinguish case when script is in user project area or given by full path somewhere else
765 >                    if exeWithPath.find(path) >= 0 :
766 >                        exe = string.replace(exeWithPath, path,'')
767 >                        tar.add(path+exe,exe)
768 >                    else :
769 >                        tar.add(exeWithPath,os.path.basename(executable))
770 >                    pass
771 >                else:
772 >                    # the exe is from release, we'll find it on WN
773 >                    pass
774 >
775 >            ## Now get the libraries: only those in local working area
776 >            libDir = 'lib'
777 >            lib = swArea+'/' +libDir
778 >            common.logger.debug(5,"lib "+lib+" to be tarred")
779 >            if os.path.exists(lib):
780 >                tar.add(lib,libDir)
781 >
782 >            ## Now check if module dir is present
783 >            moduleDir = 'module'
784 >            module = swArea + '/' + moduleDir
785 >            if os.path.isdir(module):
786 >                tar.add(module,moduleDir)
787 >
788 >            ## Now check if any data dir(s) is present
789 >            swAreaLen=len(swArea)
790 >            for root, dirs, files in os.walk(swArea):
791 >                if "data" in dirs:
792 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
793 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
794 >
795 >            ## Add ProdAgent dir to tar
796 >            paDir = 'ProdAgentApi'
797 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
798 >            if os.path.isdir(pa):
799 >                tar.add(pa,paDir)
800 >
801 >            ### FEDE FOR DBS PUBLICATION
802 >            ## Add PRODCOMMON dir to tar
803 >            prodcommonDir = 'ProdCommon'
804 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
805 >            if os.path.isdir(prodcommonPath):
806 >                tar.add(prodcommonPath,prodcommonDir)
807 >            #############################
808 >
809 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
810 >            tar.close()
811 >        except :
812 >            raise CrabException('Could not create tar-ball')
813 >
814 >        ## check for tarball size
815 >        tarballinfo = os.stat(self.tgzNameWithPath)
816 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
817 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
818 >
819 >        ## create tar-ball with ML stuff
820 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
821 >        try:
822 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
823 >            path=os.environ['CRABDIR'] + '/python/'
824 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
825 >                tar.add(path+file,file)
826 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
827 >            tar.close()
828 >        except :
829 >            raise CrabException('Could not create ML files tar-ball')
830 >
831          return
832 <        
832 >
833 >    def additionalInputFileTgz(self):
834 >        """
835 >        Put all additional files into a tar ball and return its name
836 >        """
837 >        import tarfile
838 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
839 >        tar = tarfile.open(tarName, "w:gz")
840 >        for file in self.additional_inbox_files:
841 >            tar.add(file,string.split(file,'/')[-1])
842 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
843 >        tar.close()
844 >        return tarName
845 >
846      def wsSetupEnvironment(self, nj):
847          """
848          Returns part of a job script which prepares
849          the execution environment for the job 'nj'.
850          """
851          # Prepare JobType-independent part
852 <        txt = ''
853 <  
854 <        ## OLI_Daniele at this level  middleware already known
635 <
636 <        txt += 'if [ $middleware == LCG ]; then \n'
852 >        txt = ''
853 >        txt += 'echo ">>> setup environment"\n'
854 >        txt += 'if [ $middleware == LCG ]; then \n'
855          txt += self.wsSetupCMSLCGEnvironment_()
856          txt += 'elif [ $middleware == OSG ]; then\n'
857 <        txt += '    time=`date -u +"%s"`\n'
858 <        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
641 <        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
642 <        txt += '    /bin/mkdir -p $WORKING_DIR\n'
643 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
857 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
858 >        txt += '    if [ ! $? == 0 ] ;then\n'
859          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
860 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
861 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
862 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
648 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
649 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
650 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
860 >        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
861 >        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
862 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
863          txt += '        exit 1\n'
864          txt += '    fi\n'
865 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
866          txt += '\n'
867          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
868          txt += '    cd $WORKING_DIR\n'
869 <        txt += self.wsSetupCMSOSGEnvironment_()
869 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
870 >        txt += self.wsSetupCMSOSGEnvironment_()
871 >        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
872 >        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
873          txt += 'fi\n'
874  
875          # Prepare JobType-specific part
876          scram = self.scram.commandName()
877          txt += '\n\n'
878 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
878 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
879 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
880          txt += scram+' project CMSSW '+self.version+'\n'
881          txt += 'status=$?\n'
882          txt += 'if [ $status != 0 ] ; then\n'
883 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
884 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
885 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
886 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
670 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
671 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
672 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
673 <        ## OLI_Daniele
883 >        txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
884 >        txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
885 >        txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
886 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
887          txt += '    if [ $middleware == OSG ]; then \n'
675        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
888          txt += '        cd $RUNTIME_AREA\n'
889 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
890 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
891          txt += '        /bin/rm -rf $WORKING_DIR\n'
892          txt += '        if [ -d $WORKING_DIR ] ;then\n'
893 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
894 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
895 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
896 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
683 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
684 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
685 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
893 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
894 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
895 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
896 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
897          txt += '        fi\n'
898          txt += '    fi \n'
899 <        txt += '   exit 1 \n'
899 >        txt += '    exit 1 \n'
900          txt += 'fi \n'
690        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
901          txt += 'cd '+self.version+'\n'
902 +        ########## FEDE FOR DBS2 ######################
903 +        txt += 'SOFTWARE_DIR=`pwd`\n'
904 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
905 +        ###############################################
906          ### needed grep for bug in scramv1 ###
907 +        txt += scram+' runtime -sh\n'
908          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
909 +        txt += 'echo $PATH\n'
910  
911          # Handle the arguments:
912          txt += "\n"
913          txt += "## number of arguments (first argument always jobnumber)\n"
914          txt += "\n"
915 <        txt += "narg=$#\n"
700 <        txt += "if [ $narg -lt 2 ]\n"
915 >        txt += "if [ $nargs -lt 2 ]\n"
916          txt += "then\n"
917 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
917 >        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
918          txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
919          txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
920          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
706        txt += '    rm -f $RUNTIME_AREA/$repo \n'
707        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
708        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
709        ## OLI_Daniele
921          txt += '    if [ $middleware == OSG ]; then \n'
711        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
922          txt += '        cd $RUNTIME_AREA\n'
923 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
924 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
925          txt += '        /bin/rm -rf $WORKING_DIR\n'
926          txt += '        if [ -d $WORKING_DIR ] ;then\n'
927 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
928 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
929 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
930 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
719 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
720 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
721 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
927 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
928 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
929 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
930 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
931          txt += '        fi\n'
932          txt += '    fi \n'
933          txt += "    exit 1\n"
# Line 727 | Line 936 | class Cmssw(JobType):
936  
937          # Prepare job-specific part
938          job = common.job_list[nj]
939 <        pset = os.path.basename(job.configFilename())
940 <        txt += '\n'
941 <        if (self.datasetPath): # standard job
942 <            txt += 'InputFiles=$2\n'
734 <            txt += 'echo "Inputfiles:<$InputFiles>"\n'
735 <            txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
736 <        else:  # pythia like job
737 <            txt += 'Seed=$2\n'
738 <            txt += 'echo "Seed: <$Seed>"\n'
739 <            txt += 'sed "s#INPUT#$Seed#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
939 >        ### FEDE FOR DBS OUTPUT PUBLICATION
940 >        if (self.datasetPath):
941 >            txt += '\n'
942 >            txt += 'DatasetPath='+self.datasetPath+'\n'
943  
944 <        if len(self.additional_inbox_files) > 0:
742 <            for file in self.additional_inbox_files:
743 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
744 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
745 <                txt += '   chmod +x '+file+'\n'
746 <                txt += 'fi\n'
747 <            pass
944 >            datasetpath_split = self.datasetPath.split("/")
945  
946 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
946 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
947 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
948 >            txt += 'ApplicationFamily=cmsRun\n'
949  
950 <        txt += '\n'
951 <        txt += 'echo "***** cat pset.cfg *********"\n'
952 <        txt += 'cat pset.cfg\n'
953 <        txt += 'echo "****** end pset.cfg ********"\n'
954 <        txt += '\n'
955 <        # txt += 'echo "***** cat pset1.cfg *********"\n'
956 <        # txt += 'cat pset1.cfg\n'
957 <        # txt += 'echo "****** end pset1.cfg ********"\n'
950 >        else:
951 >            txt += 'DatasetPath=MCDataTier\n'
952 >            txt += 'PrimaryDataset=null\n'
953 >            txt += 'DataTier=null\n'
954 >            txt += 'ApplicationFamily=MCDataTier\n'
955 >        if self.pset != None: #CarlosDaniele
956 >            pset = os.path.basename(job.configFilename())
957 >            txt += '\n'
958 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
959 >            if (self.datasetPath): # standard job
960 >                txt += 'InputFiles=${args[1]}\n'
961 >                txt += 'MaxEvents=${args[2]}\n'
962 >                txt += 'SkipEvents=${args[3]}\n'
963 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
964 >                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
965 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
966 >                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
967 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
968 >                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
969 >            else:  # pythia like job
970 >                seedIndex=1
971 >                if (self.firstRun):
972 >                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
973 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
974 >                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
975 >                    seedIndex=seedIndex+1
976 >
977 >                if (self.sourceSeed):
978 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
979 >                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
980 >                    seedIndex=seedIndex+1
981 >                    ## the following seeds are not always present
982 >                    if (self.sourceSeedVtx):
983 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
984 >                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
985 >                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986 >                        seedIndex += 1
987 >                    if (self.sourceSeedG4):
988 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
989 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
990 >                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
991 >                        seedIndex += 1
992 >                    if (self.sourceSeedMix):
993 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
994 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
995 >                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
996 >                        seedIndex += 1
997 >                    pass
998 >                pass
999 >            txt += 'mv -f '+pset+' pset.cfg\n'
1000 >
1001 >        if len(self.additional_inbox_files) > 0:
1002 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1003 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1004 >            txt += 'fi\n'
1005 >            pass
1006 >
1007 >        if self.pset != None: #CarlosDaniele
1008 >            txt += '\n'
1009 >            txt += 'echo "***** cat pset.cfg *********"\n'
1010 >            txt += 'cat pset.cfg\n'
1011 >            txt += 'echo "****** end pset.cfg ********"\n'
1012 >            txt += '\n'
1013 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1014 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1015 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1016 >            ##############
1017 >            txt += '\n'
1018          return txt
1019  
1020 <    def wsBuildExe(self, nj):
1020 >    def wsBuildExe(self, nj=0):
1021          """
1022          Put in the script the commands to build an executable
1023          or a library.
# Line 767 | Line 1026 | class Cmssw(JobType):
1026          txt = ""
1027  
1028          if os.path.isfile(self.tgzNameWithPath):
1029 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1029 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1030              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1031              txt += 'untar_status=$? \n'
1032              txt += 'if [ $untar_status -ne 0 ]; then \n'
# Line 775 | Line 1034 | class Cmssw(JobType):
1034              txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
1035              txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1036              txt += '   if [ $middleware == OSG ]; then \n'
778            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1037              txt += '       cd $RUNTIME_AREA\n'
1038 +            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1039 +            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1040              txt += '       /bin/rm -rf $WORKING_DIR\n'
1041              txt += '       if [ -d $WORKING_DIR ] ;then\n'
1042              txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1043              txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1044              txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1045              txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
786            txt += '           rm -f $RUNTIME_AREA/$repo \n'
787            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
788            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1046              txt += '       fi\n'
1047              txt += '   fi \n'
1048              txt += '   \n'
# Line 793 | Line 1050 | class Cmssw(JobType):
1050              txt += 'else \n'
1051              txt += '   echo "Successful untar" \n'
1052              txt += 'fi \n'
1053 +            txt += '\n'
1054 +            txt += 'echo ">>> Include ProdAgentApi and PRODCOMMON in PYTHONPATH:"\n'
1055 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1056 +            #### FEDE FOR DBS OUTPUT PUBLICATION
1057 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1058 +            txt += 'else\n'
1059 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1060 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1061 +            ###################
1062 +            txt += 'fi\n'
1063 +            txt += '\n'
1064 +
1065              pass
1066 <        
1066 >
1067          return txt
1068  
1069      def modifySteeringCards(self, nj):
1070          """
1071 <        modify the card provided by the user,
1071 >        modify the card provided by the user,
1072          writing a new card into share dir
1073          """
1074 <        
1074 >
1075      def executableName(self):
1076 <        return self.executable
1076 >        if self.scriptExe: #CarlosDaniele
1077 >            return "sh "
1078 >        else:
1079 >            return self.executable
1080  
1081      def executableArgs(self):
1082 <        return " -p pset.cfg"
1082 >        if self.scriptExe:#CarlosDaniele
1083 >            return   self.scriptExe + " $NJob"
1084 >        else:
1085 >            # if >= CMSSW_1_5_X, add -e
1086 >            version_array = self.scram.getSWVersion().split('_')
1087 >            major = 0
1088 >            minor = 0
1089 >            try:
1090 >                major = int(version_array[1])
1091 >                minor = int(version_array[2])
1092 >            except:
1093 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1094 >                raise CrabException(msg)
1095 >            if major >= 1 and minor >= 5 :
1096 >                return " -e -p pset.cfg"
1097 >            else:
1098 >                return " -p pset.cfg"
1099  
1100      def inputSandbox(self, nj):
1101          """
1102          Returns a list of filenames to be put in JDL input sandbox.
1103          """
1104          inp_box = []
1105 <        # dict added to delete duplicate from input sandbox file list
1106 <        seen = {}
1105 >        # # dict added to delete duplicate from input sandbox file list
1106 >        # seen = {}
1107          ## code
1108          if os.path.isfile(self.tgzNameWithPath):
1109              inp_box.append(self.tgzNameWithPath)
1110 +        if os.path.isfile(self.MLtgzfile):
1111 +            inp_box.append(self.MLtgzfile)
1112          ## config
1113 <        inp_box.append(common.job_list[nj].configFilename())
1113 >        if not self.pset is None:
1114 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1115          ## additional input files
1116 <        #for file in self.additional_inbox_files:
1117 <        #    inp_box.append(common.work_space.cwdDir()+file)
1116 >        tgz = self.additionalInputFileTgz()
1117 >        inp_box.append(tgz)
1118          return inp_box
1119  
1120      def outputSandbox(self, nj):
# Line 832 | Line 1123 | class Cmssw(JobType):
1123          """
1124          out_box = []
1125  
835        stdout=common.job_list[nj].stdout()
836        stderr=common.job_list[nj].stderr()
837
1126          ## User Declared output files
1127 <        for out in self.output_file:
1128 <            n_out = nj + 1
1127 >        for out in (self.output_file+self.output_file_sandbox):
1128 >            n_out = nj + 1
1129              out_box.append(self.numberFile_(out,str(n_out)))
1130          return out_box
843        return []
1131  
1132      def prepareSteeringCards(self):
1133          """
# Line 854 | Line 1141 | class Cmssw(JobType):
1141          """
1142  
1143          txt = '\n'
1144 <        txt += '# directory content\n'
1144 >        txt += 'echo" >>> directory content:"\n'
1145          txt += 'ls \n'
1146 <        file_list = ''
1147 <        for fileWithSuffix in self.output_file:
1146 >        txt = '\n'
1147 >
1148 >        txt += 'output_exit_status=0\n'
1149 >
1150 >        for fileWithSuffix in (self.output_file_sandbox):
1151              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
862            file_list=file_list+output_file_num+' '
1152              txt += '\n'
1153              txt += '# check output file\n'
1154 <            txt += 'ls '+fileWithSuffix+'\n'
1155 <            txt += 'ls_result=$?\n'
1156 <            #txt += 'exe_result=$?\n'
1157 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1158 <            txt += '   echo "ERROR: Problem with output file"\n'
1159 <            #txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
871 <            #txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
872 <            #txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
873 <            ### OLI_DANIELE
1154 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1155 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1156 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1157 >            txt += 'else\n'
1158 >            txt += '    exit_status=60302\n'
1159 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1160              if common.scheduler.boss_scheduler_name == 'condor_g':
1161                  txt += '    if [ $middleware == OSG ]; then \n'
1162                  txt += '        echo "prepare dummy output file"\n'
1163                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1164                  txt += '    fi \n'
1165 +            txt += 'fi\n'
1166 +
1167 +        for fileWithSuffix in (self.output_file):
1168 +            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1169 +            txt += '\n'
1170 +            txt += '# check output file\n'
1171 +            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1172 +            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1173 +            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1174              txt += 'else\n'
1175 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1175 >            txt += '    exit_status=60302\n'
1176 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1177 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1178 >            txt += '    output_exit_status=$exit_status\n'
1179 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1180 >                txt += '    if [ $middleware == OSG ]; then \n'
1181 >                txt += '        echo "prepare dummy output file"\n'
1182 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1183 >                txt += '    fi \n'
1184              txt += 'fi\n'
1185 <      
1186 <        txt += 'cd $RUNTIME_AREA\n'
1187 <        file_list=file_list[:-1]
1188 <        txt += 'file_list="'+file_list+'"\n'
1185 >        file_list = []
1186 >        for fileWithSuffix in (self.output_file):
1187 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1188 >
1189 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1190          txt += 'cd $RUNTIME_AREA\n'
1191 <        ### OLI_DANIELE
888 <        txt += 'if [ $middleware == OSG ]; then\n'  
889 <        txt += '    cd $RUNTIME_AREA\n'
890 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
891 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
892 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
893 <        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
894 <        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
895 <        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
896 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
897 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
898 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
899 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
900 <        txt += '    fi\n'
901 <        txt += 'fi\n'
902 <        txt += '\n'
1191 >        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1192          return txt
1193  
1194      def numberFile_(self, file, txt):
# Line 910 | Line 1199 | class Cmssw(JobType):
1199          # take away last extension
1200          name = p[0]
1201          for x in p[1:-1]:
1202 <           name=name+"."+x
1202 >            name=name+"."+x
1203          # add "_txt"
1204          if len(p)>1:
1205 <          ext = p[len(p)-1]
1206 <          #result = name + '_' + str(txt) + "." + ext
918 <          result = name + '_' + txt + "." + ext
1205 >            ext = p[len(p)-1]
1206 >            result = name + '_' + txt + "." + ext
1207          else:
1208 <          #result = name + '_' + str(txt)
1209 <          result = name + '_' + txt
922 <        
1208 >            result = name + '_' + txt
1209 >
1210          return result
1211  
1212 <    def getRequirements(self):
1212 >    def getRequirements(self, nj=[]):
1213          """
1214 <        return job requirements to add to jdl files
1214 >        return job requirements to add to jdl files
1215          """
1216          req = ''
1217 <        if common.analisys_common_info['sw_version']:
1217 >        if self.version:
1218              req='Member("VO-cms-' + \
1219 <                 common.analisys_common_info['sw_version'] + \
1219 >                 self.version + \
1220 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1221 >        ## SL add requirement for OS version only if SL4
1222 >        #reSL4 = re.compile( r'slc4' )
1223 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1224 >            req+=' && Member("VO-cms-' + \
1225 >                 self.executable_arch + \
1226                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1227 <        if common.analisys_common_info['sites']:
1228 <            if len(common.analisys_common_info['sites'])>0:
1229 <                req = req + ' && ('
937 <                for i in range(len(common.analisys_common_info['sites'])):
938 <                    req = req + 'other.GlueCEInfoHostName == "' \
939 <                         + common.analisys_common_info['sites'][i] + '"'
940 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
941 <                        req = req + ' || '
942 <            req = req + ')'
943 <        #print "req = ", req
1227 >
1228 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1229 >
1230          return req
1231  
1232      def configFilename(self):
1233          """ return the config filename """
1234          return self.name()+'.cfg'
1235  
950    ### OLI_DANIELE
1236      def wsSetupCMSOSGEnvironment_(self):
1237          """
1238          Returns part of a job script which is prepares
1239          the execution environment and which is common for all CMS jobs.
1240          """
1241 <        txt = '\n'
1242 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1243 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1244 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1245 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1246 <        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1247 <        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1248 <        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1249 <        txt += '   else\n'
1250 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1251 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1252 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
968 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
969 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
970 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
971 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
972 <        txt += '       exit 1\n'
1241 >        txt = '    echo ">>> setup CMS OSG environment:"\n'
1242 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1243 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1244 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1245 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1246 >        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1247 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1248 >        txt += '    else\n'
1249 >        txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1250 >        txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1251 >        txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1252 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1253          txt += '\n'
1254 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1255 <        txt += '       cd $RUNTIME_AREA\n'
1256 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1257 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1258 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1259 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1260 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1261 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1262 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1263 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
984 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
985 <        txt += '       fi\n'
1254 >        txt += '        cd $RUNTIME_AREA\n'
1255 >        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1256 >        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1257 >        txt += '        /bin/rm -rf $WORKING_DIR\n'
1258 >        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1259 >        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1260 >        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1261 >        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1262 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1263 >        txt += '        fi\n'
1264          txt += '\n'
1265 <        txt += '       exit 1\n'
1266 <        txt += '   fi\n'
1265 >        txt += '        exit 1\n'
1266 >        txt += '    fi\n'
1267          txt += '\n'
1268 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1269 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1268 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1269 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1270  
1271          return txt
1272 <
1272 >
1273      ### OLI_DANIELE
1274      def wsSetupCMSLCGEnvironment_(self):
1275          """
1276          Returns part of a job script which is prepares
1277          the execution environment and which is common for all CMS jobs.
1278          """
1279 <        txt  = '   \n'
1280 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1281 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1282 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1283 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1284 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1285 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1286 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1287 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1288 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1289 <        txt += '       exit 1\n'
1290 <        txt += '   else\n'
1291 <        txt += '       echo "Sourcing environment... "\n'
1292 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1293 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1294 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1295 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1296 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1297 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1298 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1299 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1300 <        txt += '           exit 1\n'
1301 <        txt += '       fi\n'
1302 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1303 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1304 <        txt += '       result=$?\n'
1305 <        txt += '       if [ $result -ne 0 ]; then\n'
1306 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1307 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1308 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1309 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1310 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1311 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1312 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1313 <        txt += '           exit 1\n'
1314 <        txt += '       fi\n'
1315 <        txt += '   fi\n'
1316 <        txt += '   \n'
1317 <        txt += '   string=`cat /etc/redhat-release`\n'
1318 <        txt += '   echo $string\n'
1319 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
1320 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1321 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1322 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1323 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1324 <        txt += '   else\n'
1325 <        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1326 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1327 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1328 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1329 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1330 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1331 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1332 <        txt += '       exit 1\n'
1333 <        txt += '   fi\n'
1334 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1335 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1279 >        txt = '    echo ">>> setup CMS LCG environment:"\n'
1280 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1281 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1282 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1283 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1284 >        txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1285 >        txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1286 >        txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1287 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1288 >        txt += '        exit 1\n'
1289 >        txt += '    else\n'
1290 >        txt += '        echo "Sourcing environment... "\n'
1291 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1292 >        txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1293 >        txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1294 >        txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1295 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1296 >        txt += '            exit 1\n'
1297 >        txt += '        fi\n'
1298 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1299 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1300 >        txt += '        result=$?\n'
1301 >        txt += '        if [ $result -ne 0 ]; then\n'
1302 >        txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1303 >        txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1304 >        txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1305 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1306 >        txt += '            exit 1\n'
1307 >        txt += '        fi\n'
1308 >        txt += '    fi\n'
1309 >        txt += '    \n'
1310 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1311 >        return txt
1312 >
1313 >    ### FEDE FOR DBS OUTPUT PUBLICATION
1314 >    def modifyReport(self, nj):
1315 >        """
1316 >        insert the part of the script that modifies the FrameworkJob Report
1317 >        """
1318 >
1319 >        txt = ''
1320 >        try:
1321 >            publish_data = int(self.cfg_params['USER.publish_data'])
1322 >        except KeyError:
1323 >            publish_data = 0
1324 >        if (publish_data == 1):
1325 >            txt += 'echo ">>> Modify Job Report:" \n'
1326 >            ################ FEDE FOR DBS2 #############################################
1327 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1328 >            #############################################################################
1329 >
1330 >            txt += 'if [ -z "$SE" ]; then\n'
1331 >            txt += '    SE="" \n'
1332 >            txt += 'fi \n'
1333 >            txt += 'if [ -z "$SE_PATH" ]; then\n'
1334 >            txt += '    SE_PATH="" \n'
1335 >            txt += 'fi \n'
1336 >            txt += 'echo "SE = $SE"\n'
1337 >            txt += 'echo "SE_PATH = $SE_PATH"\n'
1338 >
1339 >            processedDataset = self.cfg_params['USER.publish_data_name']
1340 >            txt += 'ProcessedDataset='+processedDataset+'\n'
1341 >            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1342 >            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1343 >            #### FEDE: added slash in LFN ##############
1344 >            txt += '    FOR_LFN=/copy_problems/ \n'
1345 >            txt += 'else \n'
1346 >            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1347 >            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1348 >            txt += '    FOR_LFN=/store$tmp \n'
1349 >            txt += 'fi \n'
1350 >            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1351 >            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1352 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1353 >            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1354 >            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1355 >            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1356 >            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1357 >
1358 >            txt += 'modifyReport_result=$?\n'
1359 >            txt += 'echo modifyReport_result = $modifyReport_result\n'
1360 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1361 >            txt += '    exit_status=1\n'
1362 >            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1363 >            txt += 'else\n'
1364 >            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1365 >            txt += 'fi\n'
1366 >        else:
1367 >            txt += 'echo "no data publication required"\n'
1368 >        return txt
1369 >
1370 >    def cleanEnv(self):
1371 >        txt = ''
1372 >        txt += 'if [ $middleware == OSG ]; then\n'
1373 >        txt += '    cd $RUNTIME_AREA\n'
1374 >        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1375 >        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1376 >        txt += '    /bin/rm -rf $WORKING_DIR\n'
1377 >        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1378 >        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1379 >        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1380 >        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1381 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1382 >        txt += '    fi\n'
1383 >        txt += 'fi\n'
1384 >        txt += '\n'
1385          return txt
1386  
1387      def setParam_(self, param, value):
# Line 1065 | Line 1392 | class Cmssw(JobType):
1392  
1393      def setTaskid_(self):
1394          self._taskId = self.cfg_params['taskId']
1395 <        
1395 >
1396      def getTaskid(self):
1397          return self._taskId
1398 +
1399 +    def uniquelist(self, old):
1400 +        """
1401 +        remove duplicates from a list
1402 +        """
1403 +        nd={}
1404 +        for e in old:
1405 +            nd[e]=0
1406 +        return nd.keys()
1407 +
1408 +
1409 +    def checkOut(self, limit):
1410 +        """
1411 +        check the dimension of the output files
1412 +        """
1413 +        txt += 'echo ">>> Starting output sandbox limit check :"\n'
1414 +        allOutFiles = ""
1415 +        listOutFiles = []
1416 +        for fileOut in (self.output_file+self.output_file_sandbox):
1417 +             if fileOut.find('crab_fjr') == -1:
1418 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1419 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1420 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1421 +        txt += 'ls -gGhrta;\n'
1422 +        txt += 'sum=0;\n'
1423 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1424 +        txt += '    if [ -e $file ]; then\n'
1425 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1426 +        txt += '        sum=`expr $sum + $tt`\n'
1427 +        txt += '    else\n'
1428 +        txt += '        echo "WARNING: output file $file not found!"\n'
1429 +        txt += '    fi\n'
1430 +        txt += 'done\n'
1431 +        txt += 'echo "Total Output dimension: $sum";\n'
1432 +        txt += 'limit='+str(limit)+';\n'
1433 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1434 +        txt += 'if [ $limit -lt $sum ]; then\n'
1435 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1436 +        txt += '    echo "         checking the output file sizes..."\n'
1437 +        """
1438 +        txt += '    dim=0;\n'
1439 +        txt += '    exclude=0;\n'
1440 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1441 +        txt += '        sumTemp=0;\n'
1442 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1443 +        txt += '            if [ $file != $file2 ]; then\n'
1444 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1445 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1446 +        txt += '            fi\n'
1447 +        txt += '        done\n'
1448 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1449 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1450 +        txt += '                dim=$sumTemp;\n'
1451 +        txt += '                exclude=$file;\n'
1452 +        txt += '            fi\n'
1453 +        txt += '        fi\n'
1454 +        txt += '    done\n'
1455 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1456 +        """
1457 +        txt += '    tot=0;\n'
1458 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1459 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1460 +        txt += '        tot=`expr $tot + $tt`;\n'
1461 +        txt += '        if [ $limit -lt $tot ]; then\n'
1462 +        txt += '            tot=`expr $tot - $tt`;\n'
1463 +        txt += '            fileLast=$file;\n'
1464 +        txt += '            break;\n'
1465 +        txt += '        fi\n'
1466 +        txt += '    done\n'
1467 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1468 +        txt += '    flag=0;\n'
1469 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1470 +        txt += '        if [ $fileLast = $filess ]; then\n'
1471 +        txt += '            flag=1;\n'
1472 +        txt += '        fi\n'
1473 +        txt += '        if [ $flag -eq 1 ]; then\n'
1474 +        txt += '            rm -f $filess;\n'
1475 +        txt += '        fi\n'
1476 +        txt += '    done\n'
1477 +        txt += '    ls -agGhrt;\n'
1478 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1479 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1480 +        txt += '    exit_status=70000;\n'
1481 +        txt += 'else'
1482 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1483 +        txt += 'fi\n'
1484 +        txt += 'echo "Ending output sandbox limit check"\n'
1485 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines