ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.26 by slacapra, Thu Jul 6 11:08:46 2006 UTC vs.
Revision 1.132 by ewv, Mon Oct 29 21:03:11 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 < import math
5 > from BlackWhiteListParser import BlackWhiteListParser
6   import common
7 import PsetManipulator  
8
9 import DBSInfo_EDM
10 import DataDiscovery_EDM
11 import DataLocation_EDM
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
21        self.analisys_common_info = {}
22        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18 +
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27 +        # number of jobs requested to be created, limit obj splitting
28 +        self.ncjobs = ncjobs
29 +
30          log = common.logger
31 <        
31 >
32          self.scram = Scram.Scram(cfg_params)
28        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39 +        self.scriptName = 'CMSSW.sh'
40 +        self.pset = ''      #scrip use case Da
41 +        self.datasetPath = '' #scrip use case Da
42  
43 +        # set FJR file name
44 +        self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56 +            raise CrabException(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +
61 +        common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
37        common.analisys_common_info['sw_version'] = self.version
38        ### FEDE
39        common.analisys_common_info['copy_input_data'] = 0
40        common.analisys_common_info['events_management'] = 1
63  
64          ### collect Data cards
65 +
66 +        ## get DBS mode
67 +        try:
68 +            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 +        except KeyError:
70 +            self.use_dbs_1 = 0
71 +
72          try:
73              tmp =  cfg_params['CMSSW.datasetpath']
74              log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
# Line 50 | Line 79 | class Cmssw(JobType):
79                  self.datasetPath = tmp
80                  self.selectNoInput = 0
81          except KeyError:
82 <            msg = "Error: datasetpath not defined "  
82 >            msg = "Error: datasetpath not defined "
83              raise CrabException(msg)
84  
85          # ML monitoring
# Line 59 | Line 88 | class Cmssw(JobType):
88              self.setParam_('dataset', 'None')
89              self.setParam_('owner', 'None')
90          else:
91 <            datasetpath_split = self.datasetPath.split("/")
92 <            self.setParam_('dataset', datasetpath_split[1])
93 <            self.setParam_('owner', datasetpath_split[-1])
91 >            try:
92 >                datasetpath_split = self.datasetPath.split("/")
93 >                # standard style
94 >                self.setParam_('datasetFull', self.datasetPath)
95 >                if self.use_dbs_1 == 1 :
96 >                    self.setParam_('dataset', datasetpath_split[1])
97 >                    self.setParam_('owner', datasetpath_split[-1])
98 >                else:
99 >                    self.setParam_('dataset', datasetpath_split[1])
100 >                    self.setParam_('owner', datasetpath_split[2])
101 >            except:
102 >                self.setParam_('dataset', self.datasetPath)
103 >                self.setParam_('owner', self.datasetPath)
104  
105          self.setTaskid_()
106          self.setParam_('taskId', self.cfg_params['taskId'])
# Line 85 | Line 124 | class Cmssw(JobType):
124          try:
125              self.pset = cfg_params['CMSSW.pset']
126              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if (not os.path.exists(self.pset)):
128 <                raise CrabException("User defined PSet file "+self.pset+" does not exist")
127 >            if self.pset.lower() != 'none' :
128 >                if (not os.path.exists(self.pset)):
129 >                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
130 >            else:
131 >                self.pset = None
132          except KeyError:
133              raise CrabException("PSet file missing. Cannot run cmsRun ")
134  
135          # output files
136 +        ## stuff which must be returned always via sandbox
137 +        self.output_file_sandbox = []
138 +
139 +        # add fjr report by default via sandbox
140 +        self.output_file_sandbox.append(self.fjrFileName)
141 +
142 +        # other output files to be returned via sandbox or copied to SE
143          try:
144              self.output_file = []
96
145              tmp = cfg_params['CMSSW.output_file']
146              if tmp != '':
147                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 103 | Line 151 | class Cmssw(JobType):
151                      self.output_file.append(tmp)
152                      pass
153              else:
154 <                log.message("No output file defined: only stdout/err will be available")
154 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155                  pass
156              pass
157          except KeyError:
158 <            log.message("No output file defined: only stdout/err will be available")
158 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159              pass
160  
161          # script_exe file as additional file in inputSandbox
162          try:
163              self.scriptExe = cfg_params['USER.script_exe']
116            self.additional_inbox_files.append(self.scriptExe)
164              if self.scriptExe != '':
165                 if not os.path.isfile(self.scriptExe):
166 <                  msg ="WARNING. file "+self.scriptExe+" not found"
166 >                  msg ="ERROR. file "+self.scriptExe+" not found"
167                    raise CrabException(msg)
168 +               self.additional_inbox_files.append(string.strip(self.scriptExe))
169          except KeyError:
170 <           pass
171 <                  
170 >            self.scriptExe = ''
171 >
172 >        #CarlosDaniele
173 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 >           msg ="Error. script_exe  not defined"
175 >           raise CrabException(msg)
176 >
177          ## additional input files
178          try:
179 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
179 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180              for tmp in tmpAddFiles:
181 <                if not os.path.exists(tmp):
182 <                    raise CrabException("Additional input file not found: "+tmp)
183 <                tmp=string.strip(tmp)
184 <                self.additional_inbox_files.append(tmp)
181 >                tmp = string.strip(tmp)
182 >                dirname = ''
183 >                if not tmp[0]=="/": dirname = "."
184 >                files = []
185 >                if string.find(tmp,"*")>-1:
186 >                    files = glob.glob(os.path.join(dirname, tmp))
187 >                    if len(files)==0:
188 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
189 >                else:
190 >                    files.append(tmp)
191 >                for file in files:
192 >                    if not os.path.exists(file):
193 >                        raise CrabException("Additional input file not found: "+file)
194 >                    pass
195 >                    # fname = string.split(file, '/')[-1]
196 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197 >                    # shutil.copyfile(file, storedFile)
198 >                    self.additional_inbox_files.append(string.strip(file))
199                  pass
200              pass
201 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202          except KeyError:
203              pass
204  
205          # files per job
206          try:
207 <            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
208 <            self.selectFilesPerJob = 1
207 >            if (cfg_params['CMSSW.files_per_jobs']):
208 >                raise CrabException("files_per_jobs no longer supported.  Quitting.")
209          except KeyError:
210 <            self.filesPerJob = 0
143 <            self.selectFilesPerJob = 0
210 >            pass
211  
212          ## Events per job
213          try:
# Line 149 | Line 216 | class Cmssw(JobType):
216          except KeyError:
217              self.eventsPerJob = -1
218              self.selectEventsPerJob = 0
219 <    
219 >
220          ## number of jobs
221          try:
222              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
# Line 158 | Line 225 | class Cmssw(JobType):
225              self.theNumberOfJobs = 0
226              self.selectNumberOfJobs = 0
227  
228 +        try:
229 +            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230 +            self.selectTotalNumberEvents = 1
231 +        except KeyError:
232 +            self.total_number_of_events = 0
233 +            self.selectTotalNumberEvents = 0
234 +
235 +        if self.pset != None: #CarlosDaniele
236 +             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237 +                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238 +                 raise CrabException(msg)
239 +        else:
240 +             if (self.selectNumberOfJobs == 0):
241 +                 msg = 'Must specify  number_of_jobs.'
242 +                 raise CrabException(msg)
243 +
244          ## source seed for pythia
245          try:
246              self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
# Line 165 | Line 248 | class Cmssw(JobType):
248              self.sourceSeed = None
249              common.logger.debug(5,"No seed given")
250  
168        if not (self.selectFilesPerJob + self.selectEventsPerJob + self.selectNumberOfJobs == 1 ):
169            msg = 'Must define either files_per_jobs or events_per_job or number_of_jobs'
170            raise CrabException(msg)
171
251          try:
252 <            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
252 >            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253          except KeyError:
254 <            msg = 'Must define total_number_of_events'
255 <            raise CrabException(msg)
256 <        
178 <        CEBlackList = []
254 >            self.sourceSeedVtx = None
255 >            common.logger.debug(5,"No vertex seed given")
256 >
257          try:
258 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
181 <            for tmp in tmpBad:
182 <                tmp=string.strip(tmp)
183 <                CEBlackList.append(tmp)
258 >            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259          except KeyError:
260 <            pass
260 >            self.sourceSeedG4 = None
261 >            common.logger.debug(5,"No g4 sim hits seed given")
262  
263 <        self.reCEBlackList=[]
264 <        for bad in CEBlackList:
189 <            self.reCEBlackList.append(re.compile( bad ))
190 <
191 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
192 <
193 <        CEWhiteList = []
194 <        try:
195 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
196 <            for tmp in tmpGood:
197 <                tmp=string.strip(tmp)
198 <                CEWhiteList.append(tmp)
263 >        try:
264 >            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265          except KeyError:
266 <            pass
266 >            self.sourceSeedMix = None
267 >            common.logger.debug(5,"No mix seed given")
268  
269 <        #print 'CEWhiteList: ',CEWhiteList
270 <        self.reCEWhiteList=[]
271 <        for Good in CEWhiteList:
272 <            self.reCEWhiteList.append(re.compile( Good ))
273 <
274 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
275 <
276 <        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
269 >        try:
270 >            self.firstRun = int(cfg_params['CMSSW.first_run'])
271 >        except KeyError:
272 >            self.firstRun = None
273 >            common.logger.debug(5,"No first run given")
274 >        if self.pset != None: #CarlosDaniele
275 >            import PsetManipulator as pp
276 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
277  
278          #DBSDLS-start
279 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
279 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
280          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
281          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
282 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
283          ## Perform the data location and discovery (based on DBS/DLS)
284          ## SL: Don't if NONE is specified as input (pythia use case)
285 <        common.analisys_common_info['sites']=None
285 >        blockSites = {}
286          if self.datasetPath:
287 <            self.DataDiscoveryAndLocation(cfg_params)
288 <        #DBSDLS-end          
287 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
288 >        #DBSDLS-end
289  
290          self.tgzNameWithPath = self.getTarBall(self.executable)
291 <    
291 >
292          ## Select Splitting
293 <        if self.selectNoInput: self.jobSplittingNoInput()
294 <        elif self.selectFilesPerJob or self.selectEventsPerJob or self.selectNumberOfJobs: self.jobSplittingPerFiles()
293 >        if self.selectNoInput:
294 >            if self.pset == None: #CarlosDaniele
295 >                self.jobSplittingForScript()
296 >            else:
297 >                self.jobSplittingNoInput()
298          else:
299 <            msg = 'Don\'t know how to split...'
229 <            raise CrabException(msg)
299 >            self.jobSplittingByBlocks(blockSites)
300  
301          # modify Pset
302 <        try:
303 <            if (self.datasetPath): # standard job
304 <                #self.PsetEdit.maxEvent(self.eventsPerJob)
305 <                # always process all events in a file
306 <                self.PsetEdit.maxEvent("-1")
307 <                self.PsetEdit.inputModule("INPUT")
308 <
309 <            else:  # pythia like job
310 <                self.PsetEdit.maxEvent(self.eventsPerJob)
311 <                if (self.sourceSeed) :
312 <                    self.PsetEdit.pythiaSeed("INPUT")
313 <        
314 <            self.PsetEdit.psetWriter(self.configFilename())
315 <        except:
316 <            msg='Error while manipuliating ParameterSet: exiting...'
317 <            raise CrabException(msg)
302 >        if self.pset != None: #CarlosDaniele
303 >            try:
304 >                if (self.datasetPath): # standard job
305 >                    # allow to processa a fraction of events in a file
306 >                    PsetEdit.inputModule("INPUTFILE")
307 >                    PsetEdit.maxEvent(0)
308 >                    PsetEdit.skipEvent(0)
309 >                else:  # pythia like job
310 >                    PsetEdit.maxEvent(self.eventsPerJob)
311 >                    if (self.firstRun):
312 >                        PsetEdit.pythiaFirstRun(0)  #First Run
313 >                    if (self.sourceSeed) :
314 >                        PsetEdit.pythiaSeed(0)
315 >                        if (self.sourceSeedVtx) :
316 >                            PsetEdit.vtxSeed(0)
317 >                        if (self.sourceSeedG4) :
318 >                            PsetEdit.g4Seed(0)
319 >                        if (self.sourceSeedMix) :
320 >                            PsetEdit.mixSeed(0)
321 >                # add FrameworkJobReport to parameter-set
322 >                PsetEdit.addCrabFJR(self.fjrFileName)
323 >                PsetEdit.psetWriter(self.configFilename())
324 >            except:
325 >                msg='Error while manipuliating ParameterSet: exiting...'
326 >                raise CrabException(msg)
327  
328      def DataDiscoveryAndLocation(self, cfg_params):
329  
330 +        import DataDiscovery
331 +        import DataDiscovery_DBS2
332 +        import DataLocation
333          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
334  
335          datasetPath=self.datasetPath
336  
255        ## TODO
256        dataTiersList = ""
257        dataTiers = dataTiersList.split(',')
258
337          ## Contact the DBS
338 +        common.logger.message("Contacting Data Discovery Services ...")
339          try:
340 <            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
340 >
341 >            if self.use_dbs_1 == 1 :
342 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
343 >            else :
344 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
345              self.pubdata.fetchDBSInfo()
346  
347 <        except DataDiscovery_EDM.NotExistingDatasetError, ex :
347 >        except DataDiscovery.NotExistingDatasetError, ex :
348              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
349              raise CrabException(msg)
350 <
351 <        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
350 >        except DataDiscovery.NoDataTierinProvenanceError, ex :
351 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
352 >            raise CrabException(msg)
353 >        except DataDiscovery.DataDiscoveryError, ex:
354 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
355 >            raise CrabException(msg)
356 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
357 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
358 >            raise CrabException(msg)
359 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
360              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
361              raise CrabException(msg)
362 <        except DataDiscovery_EDM.DataDiscoveryError, ex:
363 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
362 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
363 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
364              raise CrabException(msg)
365  
366 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
367 <        ## self.DBSPaths=self.pubdata.getDBSPaths()
368 <        common.logger.message("Required data are :"+self.datasetPath)
278 <
279 <        filesbyblock=self.pubdata.getFiles()
280 < #        print filesbyblock
281 <        self.AllInputFiles=filesbyblock.values()
282 <        self.files = self.AllInputFiles        
366 >        self.filesbyblock=self.pubdata.getFiles()
367 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
368 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
369  
370          ## get max number of events
371 <        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
286 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
287 <        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
371 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
372  
373          ## Contact the DLS and build a list of sites hosting the fileblocks
374          try:
375 <            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
375 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
376              dataloc.fetchDLSInfo()
377 <        except DataLocation_EDM.DataLocationError , ex:
377 >        except DataLocation.DataLocationError , ex:
378              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
379              raise CrabException(msg)
296        
297        allsites=dataloc.getSites()
298        common.logger.debug(5,"sites are %s"%allsites)
299        sites=self.checkBlackList(allsites)
300        common.logger.debug(5,"sites are (after black list) %s"%sites)
301        sites=self.checkWhiteList(sites)
302        common.logger.debug(5,"sites are (after white list) %s"%sites)
380  
304        if len(sites)==0:
305            msg = 'No sites hosting all the needed data! Exiting... '
306            raise CrabException(msg)
381  
382 <        common.logger.message("List of Sites ("+str(len(sites))+") hosting the data : "+str(sites))
383 <        common.logger.debug(6, "List of Sites: "+str(sites))
384 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
385 <        self.setParam_('TargetCE', ','.join(sites))
386 <        return
387 <    
388 <    def jobSplittingPerFiles(self):
389 <        """
390 <        Perform job splitting based on number of files to be accessed per job
391 <        """
392 <        common.logger.debug(5,'Splitting per input files')
393 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
320 <        common.logger.message('Available '+str(self.maxEvents)+' events in total ')
321 <        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
322 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
323 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job')
382 >        sites = dataloc.getSites()
383 >        allSites = []
384 >        listSites = sites.values()
385 >        for listSite in listSites:
386 >            for oneSite in listSite:
387 >                allSites.append(oneSite)
388 >        allSites = self.uniquelist(allSites)
389 >
390 >        # screen output
391 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
392 >
393 >        return sites
394  
395 <        ## if asked to process all events, do it
396 <        if self.total_number_of_events == -1:
397 <            self.total_number_of_events=self.maxEvents
395 >    def jobSplittingByBlocks(self, blockSites):
396 >        """
397 >        Perform job splitting. Jobs run over an integer number of files
398 >        and no more than one block.
399 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
400 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
401 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
402 >                  self.maxEvents, self.filesbyblock
403 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
404 >              self.total_number_of_jobs - Total # of jobs
405 >              self.list_of_args - File(s) job will run on (a list of lists)
406 >        """
407 >
408 >        # ---- Handle the possible job splitting configurations ---- #
409 >        if (self.selectTotalNumberEvents):
410 >            totalEventsRequested = self.total_number_of_events
411 >        if (self.selectEventsPerJob):
412 >            eventsPerJobRequested = self.eventsPerJob
413 >            if (self.selectNumberOfJobs):
414 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
415 >
416 >        # If user requested all the events in the dataset
417 >        if (totalEventsRequested == -1):
418 >            eventsRemaining=self.maxEvents
419 >        # If user requested more events than are in the dataset
420 >        elif (totalEventsRequested > self.maxEvents):
421 >            eventsRemaining = self.maxEvents
422 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
423 >        # If user requested less events than are in the dataset
424          else:
425 <            if self.total_number_of_events>self.maxEvents:
330 <                common.logger.message("Asked "+str(self.total_number_of_events)+" but only "+str(self.maxEvents)+" available.")
331 <                self.total_number_of_events=self.maxEvents
332 <            pass
425 >            eventsRemaining = totalEventsRequested
426  
427 <        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
428 <        n_tot_files = (len(self.files[0]))
429 <        ## SL: this is wrong if the files have different number of events
337 <        evPerFile = int(self.maxEvents)/n_tot_files
338 <
339 <        common.logger.debug(5,'Events per File '+str(evPerFile))
340 <
341 <        ## compute job splitting parameters: filesPerJob, eventsPerJob and theNumberOfJobs
342 <        if self.selectFilesPerJob:
343 <            ## user define files per event.
344 <            filesPerJob = self.filesPerJob
345 <            eventsPerJob = filesPerJob*evPerFile
346 <            theNumberOfJobs = int(self.total_number_of_events*1./eventsPerJob)
347 <            check = int(self.total_number_of_events) - (theNumberOfJobs*eventsPerJob)
348 <            if check > 0:
349 <                theNumberOfJobs +=1
350 <                filesLastJob = int(check*1./evPerFile+0.5)
351 <                common.logger.message('Warning: last job will be created with '+str(check)+' files')
352 <            else:
353 <                filesLastJob = filesPerJob
427 >        # If user requested more events per job than are in the dataset
428 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
429 >            eventsPerJobRequested = self.maxEvents
430  
431 <        elif self.selectNumberOfJobs:
432 <            ## User select the number of jobs: last might be bigger to match request of events
357 <            theNumberOfJobs =  self.theNumberOfJobs
358 <
359 <            eventsPerJob = self.total_number_of_events/theNumberOfJobs
360 <            filesPerJob = int(eventsPerJob/evPerFile)
361 <            if (filesPerJob==0) : filesPerJob=1
362 <            check = int(self.total_number_of_events) - (int(theNumberOfJobs)*filesPerJob*evPerFile)
363 <            if not check == 0:
364 <                if check<0:
365 <                    missingFiles = int(check/evPerFile)
366 <                    additionalJobs = int(missingFiles/filesPerJob)
367 <                    #print missingFiles, additionalJobs
368 <                    theNumberOfJobs+=additionalJobs
369 <                    common.logger.message('Warning: will create only '+str(theNumberOfJobs)+' jobs')
370 <                    check = int(self.total_number_of_events) - (int(theNumberOfJobs)*filesPerJob*evPerFile)
371 <                    
372 <                if check >0 :
373 <                    filesLastJob = filesPerJob+int(check*1./evPerFile+0.5)
374 <                    common.logger.message('Warning: last job will be created with '+str(filesLastJob*evPerFile)+' events')
375 <                else:
376 <                    filesLastJob = filesPerJob
377 <            else:
378 <                filesLastJob = filesPerJob
379 <        elif self.selectEventsPerJob:
380 <            # SL case if asked events per job
381 <            ## estimate the number of files per job to match the user requirement
382 <            filesPerJob = int(float(self.eventsPerJob)/float(evPerFile))
383 <            if filesPerJob==0: filesPerJob=1
384 <            common.logger.debug(5,"filesPerJob "+str(filesPerJob))
385 <            if (filesPerJob==0): filesPerJob=1
386 <            eventsPerJob=filesPerJob*evPerFile
387 <            theNumberOfJobs = int(self.total_number_of_events)/int(eventsPerJob)
388 <            check = int(self.total_number_of_events) - (int(theNumberOfJobs)*eventsPerJob)
389 <            if not check == 0:
390 <                missingFiles = int(check/evPerFile)
391 <                additionalJobs = int(missingFiles/filesPerJob)
392 <                if ( additionalJobs>0) : theNumberOfJobs+=additionalJobs
393 <                check = int(self.total_number_of_events) - (int(theNumberOfJobs)*eventsPerJob)
394 <                if not check == 0:
395 <                    if (check <0 ):
396 <                        filesLastJob = filesPerJob+int(check*1./evPerFile-0.5)
397 <                    else:
398 <                        theNumberOfJobs+=1
399 <                        filesLastJob = int(check*1./evPerFile+0.5)
431 >        # For user info at end
432 >        totalEventCount = 0
433  
434 <                    common.logger.message('Warning: last job will be created with '+str(filesLastJob*evPerFile)+' events')
435 <                else:
403 <                    filesLastJob = filesPerJob
404 <            else:
405 <                filesLastJob = filesPerJob
406 <        
407 <        self.total_number_of_jobs = theNumberOfJobs
434 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
435 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
436  
437 <        totalEventsToBeUsed=theNumberOfJobs*filesPerJob*evPerFile
438 <        if not check == 0:
411 <        #    print (theNumberOfJobs-1)*filesPerJob*evPerFile,filesLastJob*evPerFile
412 <            totalEventsToBeUsed=(theNumberOfJobs-1)*filesPerJob*evPerFile+filesLastJob*evPerFile
437 >        if (self.selectNumberOfJobs):
438 >            common.logger.message("May not create the exact number_of_jobs requested.")
439  
440 <        common.logger.message(str(self.total_number_of_jobs)+' jobs will be created, each for '+str(filesPerJob*evPerFile)+' events, for a total of '+str(totalEventsToBeUsed)+' events')
440 >        if ( self.ncjobs == 'all' ) :
441 >            totalNumberOfJobs = 999999999
442 >        else :
443 >            totalNumberOfJobs = self.ncjobs
444  
416        totalFilesToBeUsed=filesPerJob*(theNumberOfJobs-1)+filesLastJob
445  
446 <        ## set job arguments (files)
446 >        blocks = blockSites.keys()
447 >        blockCount = 0
448 >        # Backup variable in case self.maxEvents counted events in a non-included block
449 >        numBlocksInDataset = len(blocks)
450 >
451 >        jobCount = 0
452          list_of_lists = []
420        lastFile=0
421        for i in range(0, int(totalFilesToBeUsed), filesPerJob)[:-1]:
422            parString = "\\{"
423            
424            lastFile=i+filesPerJob
425            params = self.files[0][i: lastFile]
426            for i in range(len(params) - 1):
427                parString += '\\\"' + params[i] + '\\\"\,'
428            
429            parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
430            list_of_lists.append([parString])
431            pass
453  
454 <        ## last job
455 <        parString = "\\{"
456 <        
457 <        params = self.files[0][lastFile: lastFile+filesLastJob]
458 <        for i in range(len(params) - 1):
459 <            parString += '\\\"' + params[i] + '\\\"\,'
460 <        
461 <        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
462 <        list_of_lists.append([parString])
463 <        pass
454 >        # list tracking which jobs are in which jobs belong to which block
455 >        jobsOfBlock = {}
456 >
457 >        # ---- Iterate over the blocks in the dataset until ---- #
458 >        # ---- we've met the requested total # of events    ---- #
459 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
460 >            block = blocks[blockCount]
461 >            blockCount += 1
462 >            if block not in jobsOfBlock.keys() :
463 >                jobsOfBlock[block] = []
464 >
465 >            if self.eventsbyblock.has_key(block) :
466 >                numEventsInBlock = self.eventsbyblock[block]
467 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
468 >
469 >                files = self.filesbyblock[block]
470 >                numFilesInBlock = len(files)
471 >                if (numFilesInBlock <= 0):
472 >                    continue
473 >                fileCount = 0
474 >
475 >                # ---- New block => New job ---- #
476 >                parString = ""
477 >                # counter for number of events in files currently worked on
478 >                filesEventCount = 0
479 >                # flag if next while loop should touch new file
480 >                newFile = 1
481 >                # job event counter
482 >                jobSkipEventCount = 0
483 >
484 >                # ---- Iterate over the files in the block until we've met the requested ---- #
485 >                # ---- total # of events or we've gone over all the files in this block  ---- #
486 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
487 >                    file = files[fileCount]
488 >                    if newFile :
489 >                        try:
490 >                            numEventsInFile = self.eventsbyfile[file]
491 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
492 >                            # increase filesEventCount
493 >                            filesEventCount += numEventsInFile
494 >                            # Add file to current job
495 >                            parString += '\\\"' + file + '\\\"\,'
496 >                            newFile = 0
497 >                        except KeyError:
498 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
499 >
500 >
501 >                    # if less events in file remain than eventsPerJobRequested
502 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
503 >                        # if last file in block
504 >                        if ( fileCount == numFilesInBlock-1 ) :
505 >                            # end job using last file, use remaining events in block
506 >                            # close job and touch new file
507 >                            fullString = parString[:-2]
508 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
509 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
510 >                            self.jobDestination.append(blockSites[block])
511 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
512 >                            # fill jobs of block dictionary
513 >                            jobsOfBlock[block].append(jobCount+1)
514 >                            # reset counter
515 >                            jobCount = jobCount + 1
516 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
518 >                            jobSkipEventCount = 0
519 >                            # reset file
520 >                            parString = ""
521 >                            filesEventCount = 0
522 >                            newFile = 1
523 >                            fileCount += 1
524 >                        else :
525 >                            # go to next file
526 >                            newFile = 1
527 >                            fileCount += 1
528 >                    # if events in file equal to eventsPerJobRequested
529 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
530 >                        # close job and touch new file
531 >                        fullString = parString[:-2]
532 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
533 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
534 >                        self.jobDestination.append(blockSites[block])
535 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
536 >                        jobsOfBlock[block].append(jobCount+1)
537 >                        # reset counter
538 >                        jobCount = jobCount + 1
539 >                        totalEventCount = totalEventCount + eventsPerJobRequested
540 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
541 >                        jobSkipEventCount = 0
542 >                        # reset file
543 >                        parString = ""
544 >                        filesEventCount = 0
545 >                        newFile = 1
546 >                        fileCount += 1
547 >
548 >                    # if more events in file remain than eventsPerJobRequested
549 >                    else :
550 >                        # close job but don't touch new file
551 >                        fullString = parString[:-2]
552 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
553 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
554 >                        self.jobDestination.append(blockSites[block])
555 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
556 >                        jobsOfBlock[block].append(jobCount+1)
557 >                        # increase counter
558 >                        jobCount = jobCount + 1
559 >                        totalEventCount = totalEventCount + eventsPerJobRequested
560 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
561 >                        # calculate skip events for last file
562 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
563 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
564 >                        # remove all but the last file
565 >                        filesEventCount = self.eventsbyfile[file]
566 >                        parString = ""
567 >                        parString += '\\\"' + file + '\\\"\,'
568 >                    pass # END if
569 >                pass # END while (iterate over files in the block)
570 >        pass # END while (iterate over blocks in the dataset)
571 >        self.ncjobs = self.total_number_of_jobs = jobCount
572 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
573 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
574 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
575 >
576 >        # screen output
577 >        screenOutput = "List of jobs and available destination sites:\n\n"
578 >
579 >        # keep trace of block with no sites to print a warning at the end
580 >        noSiteBlock = []
581 >        bloskNoSite = []
582 >
583 >        blockCounter = 0
584 >        for block in blocks:
585 >            if block in jobsOfBlock.keys() :
586 >                blockCounter += 1
587 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
588 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
589 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
590 >                    bloskNoSite.append( blockCounter )
591 >
592 >        common.logger.message(screenOutput)
593 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
594 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
595 >            virgola = ""
596 >            if len(bloskNoSite) > 1:
597 >                virgola = ","
598 >            for block in bloskNoSite:
599 >                msg += ' ' + str(block) + virgola
600 >            msg += '\n               Related jobs:\n                 '
601 >            virgola = ""
602 >            if len(noSiteBlock) > 1:
603 >                virgola = ","
604 >            for range_jobs in noSiteBlock:
605 >                msg += str(range_jobs) + virgola
606 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
607 >            common.logger.message(msg)
608  
609          self.list_of_args = list_of_lists
445        # print self.list_of_args[0]
610          return
611  
612      def jobSplittingNoInput(self):
# Line 450 | Line 614 | class Cmssw(JobType):
614          Perform job splitting based on number of event per job
615          """
616          common.logger.debug(5,'Splitting per events')
617 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
618 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
619 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
617 >
618 >        if (self.selectEventsPerJob):
619 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
620 >        if (self.selectNumberOfJobs):
621 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
622 >        if (self.selectTotalNumberEvents):
623 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
624  
625          if (self.total_number_of_events < 0):
626              msg='Cannot split jobs per Events with "-1" as total number of events'
627              raise CrabException(msg)
628  
629          if (self.selectEventsPerJob):
630 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
630 >            if (self.selectTotalNumberEvents):
631 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
632 >            elif(self.selectNumberOfJobs) :
633 >                self.total_number_of_jobs =self.theNumberOfJobs
634 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
635 >
636          elif (self.selectNumberOfJobs) :
637              self.total_number_of_jobs = self.theNumberOfJobs
638              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
# Line 471 | Line 644 | class Cmssw(JobType):
644  
645          common.logger.debug(5,'Check  '+str(check))
646  
647 <        common.logger.message(str(self.total_number_of_jobs)+' jobs will be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
647 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
648          if check > 0:
649 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but will do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
477 <
649 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
650  
651          # argument is seed number.$i
652          self.list_of_args = []
653          for i in range(self.total_number_of_jobs):
654 <            if (self.sourceSeed):
655 <                self.list_of_args.append([(str(self.sourceSeed)+str(i))])
654 >            ## Since there is no input, any site is good
655 >           # self.jobDestination.append(["Any"])
656 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
657 >            args=[]
658 >            if (self.firstRun):
659 >                    ## pythia first run
660 >                #self.list_of_args.append([(str(self.firstRun)+str(i))])
661 >                args.append(str(self.firstRun)+str(i))
662              else:
663 <                self.list_of_args.append([str(i)])
664 <        #print self.list_of_args
663 >                ## no first run
664 >                #self.list_of_args.append([str(i)])
665 >                args.append(str(i))
666 >            if (self.sourceSeed):
667 >                args.append(str(self.sourceSeed)+str(i))
668 >                if (self.sourceSeedVtx):
669 >                    ## + vtx random seed
670 >                    args.append(str(self.sourceSeedVtx)+str(i))
671 >                if (self.sourceSeedG4):
672 >                    ## + G4 random seed
673 >                    args.append(str(self.sourceSeedG4)+str(i))
674 >                if (self.sourceSeedMix):
675 >                    ## + Mix random seed
676 >                    args.append(str(self.sourceSeedMix)+str(i))
677 >                pass
678 >            pass
679 >            self.list_of_args.append(args)
680 >        pass
681 >
682 >        # print self.list_of_args
683 >
684 >        return
685 >
686 >
687 >    def jobSplittingForScript(self):#CarlosDaniele
688 >        """
689 >        Perform job splitting based on number of job
690 >        """
691 >        common.logger.debug(5,'Splitting per job')
692 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
693 >
694 >        self.total_number_of_jobs = self.theNumberOfJobs
695 >
696 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
697 >
698 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
699  
700 +        # argument is seed number.$i
701 +        self.list_of_args = []
702 +        for i in range(self.total_number_of_jobs):
703 +            ## Since there is no input, any site is good
704 +           # self.jobDestination.append(["Any"])
705 +            self.jobDestination.append([""])
706 +            ## no random seed
707 +            self.list_of_args.append([str(i)])
708          return
709  
710      def split(self, jobParams):
711 <
711 >
712          common.jobDB.load()
713          #### Fabio
714          njobs = self.total_number_of_jobs
# Line 496 | Line 716 | class Cmssw(JobType):
716          # create the empty structure
717          for i in range(njobs):
718              jobParams.append("")
719 <        
719 >
720          for job in range(njobs):
721              jobParams[job] = arglist[job]
722              # print str(arglist[job])
723              # print jobParams[job]
724              common.jobDB.setArguments(job, jobParams[job])
725 +            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
726 +            common.jobDB.setDestination(job, self.jobDestination[job])
727  
728          common.jobDB.save()
729          return
730 <    
730 >
731      def getJobTypeArguments(self, nj, sched):
732          result = ''
733          for i in common.jobDB.arguments(nj):
734              result=result+str(i)+" "
735          return result
736 <  
736 >
737      def numberOfJobs(self):
738          # Fabio
739          return self.total_number_of_jobs
740  
519    def checkBlackList(self, allSites):
520        if len(self.reCEBlackList)==0: return allSites
521        sites = []
522        for site in allSites:
523            common.logger.debug(10,'Site '+site)
524            good=1
525            for re in self.reCEBlackList:
526                if re.search(site):
527                    common.logger.message('CE in black list, skipping site '+site)
528                    good=0
529                pass
530            if good: sites.append(site)
531        if len(sites) == 0:
532            common.logger.debug(3,"No sites found after BlackList")
533        return sites
534
535    def checkWhiteList(self, allSites):
536
537        if len(self.reCEWhiteList)==0: return allSites
538        sites = []
539        for site in allSites:
540            good=0
541            for re in self.reCEWhiteList:
542                if re.search(site):
543                    common.logger.debug(5,'CE in white list, adding site '+site)
544                    good=1
545                if not good: continue
546                sites.append(site)
547        if len(sites) == 0:
548            common.logger.message("No sites found after WhiteList\n")
549        else:
550            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
551        return sites
552
741      def getTarBall(self, exe):
742          """
743          Return the TarBall with lib and exe
744          """
745 <        
745 >
746          # if it exist, just return it
747 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
747 >        #
748 >        # Marco. Let's start to use relative path for Boss XML files
749 >        #
750 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
751          if os.path.exists(self.tgzNameWithPath):
752              return self.tgzNameWithPath
753  
# Line 570 | Line 761 | class Cmssw(JobType):
761          # First of all declare the user Scram area
762          swArea = self.scram.getSWArea_()
763          #print "swArea = ", swArea
764 <        swVersion = self.scram.getSWVersion()
765 <        #print "swVersion = ", swVersion
764 >        # swVersion = self.scram.getSWVersion()
765 >        # print "swVersion = ", swVersion
766          swReleaseTop = self.scram.getReleaseTop_()
767          #print "swReleaseTop = ", swReleaseTop
768 <        
768 >
769          ## check if working area is release top
770          if swReleaseTop == '' or swArea == swReleaseTop:
771              return
772  
773 <        filesToBeTarred = []
774 <        ## First find the executable
775 <        if (self.executable != ''):
776 <            exeWithPath = self.scram.findFile_(executable)
777 < #           print exeWithPath
778 <            if ( not exeWithPath ):
779 <                raise CrabException('User executable '+executable+' not found')
780 <
781 <            ## then check if it's private or not
782 <            if exeWithPath.find(swReleaseTop) == -1:
783 <                # the exe is private, so we must ship
784 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
785 <                path = swArea+'/'
786 <                exe = string.replace(exeWithPath, path,'')
787 <                filesToBeTarred.append(exe)
788 <                pass
789 <            else:
790 <                # the exe is from release, we'll find it on WN
791 <                pass
792 <
793 <        ## Now get the libraries: only those in local working area
794 <        libDir = 'lib'
795 <        lib = swArea+'/' +libDir
796 <        common.logger.debug(5,"lib "+lib+" to be tarred")
797 <        if os.path.exists(lib):
798 <            filesToBeTarred.append(libDir)
799 <
800 <        ## Now check if module dir is present
801 <        moduleDir = 'module'
802 <        if os.path.isdir(swArea+'/'+moduleDir):
803 <            filesToBeTarred.append(moduleDir)
804 <
805 <        ## Now check if the Data dir is present
806 <        dataDir = 'src/Data/'
807 <        if os.path.isdir(swArea+'/'+dataDir):
808 <            filesToBeTarred.append(dataDir)
809 <
810 <        ## Create the tar-ball
811 <        if len(filesToBeTarred)>0:
812 <            cwd = os.getcwd()
813 <            os.chdir(swArea)
814 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
815 <            for line in filesToBeTarred:
816 <                tarcmd = tarcmd + line + ' '
817 <            cout = runCommand(tarcmd)
818 <            if not cout:
819 <                raise CrabException('Could not create tar-ball')
820 <            os.chdir(cwd)
821 <        else:
822 <            common.logger.debug(5,"No files to be to be tarred")
823 <        
773 >        import tarfile
774 >        try: # create tar ball
775 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
776 >            ## First find the executable
777 >            if (self.executable != ''):
778 >                exeWithPath = self.scram.findFile_(executable)
779 >                if ( not exeWithPath ):
780 >                    raise CrabException('User executable '+executable+' not found')
781 >
782 >                ## then check if it's private or not
783 >                if exeWithPath.find(swReleaseTop) == -1:
784 >                    # the exe is private, so we must ship
785 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
786 >                    path = swArea+'/'
787 >                    # distinguish case when script is in user project area or given by full path somewhere else
788 >                    if exeWithPath.find(path) >= 0 :
789 >                        exe = string.replace(exeWithPath, path,'')
790 >                        tar.add(path+exe,exe)
791 >                    else :
792 >                        tar.add(exeWithPath,os.path.basename(executable))
793 >                    pass
794 >                else:
795 >                    # the exe is from release, we'll find it on WN
796 >                    pass
797 >
798 >            ## Now get the libraries: only those in local working area
799 >            libDir = 'lib'
800 >            lib = swArea+'/' +libDir
801 >            common.logger.debug(5,"lib "+lib+" to be tarred")
802 >            if os.path.exists(lib):
803 >                tar.add(lib,libDir)
804 >
805 >            ## Now check if module dir is present
806 >            moduleDir = 'module'
807 >            module = swArea + '/' + moduleDir
808 >            if os.path.isdir(module):
809 >                tar.add(module,moduleDir)
810 >
811 >            ## Now check if any data dir(s) is present
812 >            swAreaLen=len(swArea)
813 >            for root, dirs, files in os.walk(swArea):
814 >                if "data" in dirs:
815 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
816 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
817 >
818 >            ## Add ProdAgent dir to tar
819 >            paDir = 'ProdAgentApi'
820 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
821 >            if os.path.isdir(pa):
822 >                tar.add(pa,paDir)
823 >
824 >            ### FEDE FOR DBS PUBLICATION
825 >            ## Add PRODCOMMON dir to tar
826 >            prodcommonDir = 'ProdCommon'
827 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
828 >            if os.path.isdir(prodcommonPath):
829 >                tar.add(prodcommonPath,prodcommonDir)
830 >            #############################
831 >
832 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
833 >            tar.close()
834 >        except :
835 >            raise CrabException('Could not create tar-ball')
836 >
837 >        ## check for tarball size
838 >        tarballinfo = os.stat(self.tgzNameWithPath)
839 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
840 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
841 >
842 >        ## create tar-ball with ML stuff
843 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
844 >        try:
845 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
846 >            path=os.environ['CRABDIR'] + '/python/'
847 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
848 >                tar.add(path+file,file)
849 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
850 >            tar.close()
851 >        except :
852 >            raise CrabException('Could not create ML files tar-ball')
853 >
854          return
855 <        
855 >
856 >    def additionalInputFileTgz(self):
857 >        """
858 >        Put all additional files into a tar ball and return its name
859 >        """
860 >        import tarfile
861 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
862 >        tar = tarfile.open(tarName, "w:gz")
863 >        for file in self.additional_inbox_files:
864 >            tar.add(file,string.split(file,'/')[-1])
865 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
866 >        tar.close()
867 >        return tarName
868 >
869      def wsSetupEnvironment(self, nj):
870          """
871          Returns part of a job script which prepares
872          the execution environment for the job 'nj'.
873          """
874          # Prepare JobType-independent part
875 <        txt = ''
876 <  
875 >        txt = ''
876 >
877          ## OLI_Daniele at this level  middleware already known
878  
879 <        txt += 'if [ $middleware == LCG ]; then \n'
879 >        txt += 'if [ $middleware == LCG ]; then \n'
880 >        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
881 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
882 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
883          txt += self.wsSetupCMSLCGEnvironment_()
884          txt += 'elif [ $middleware == OSG ]; then\n'
885 <        txt += '    time=`date -u +"%s"`\n'
886 <        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
650 <        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
651 <        txt += '    /bin/mkdir -p $WORKING_DIR\n'
652 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
885 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
886 >        txt += '    if [ ! $? == 0 ] ;then\n'
887          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
888 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
889 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
890 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
888 >        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
889 >        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
890 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
891          txt += '        rm -f $RUNTIME_AREA/$repo \n'
892          txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
893          txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
894          txt += '        exit 1\n'
895          txt += '    fi\n'
896 +        txt += '    echo "Created working directory: $WORKING_DIR"\n'
897          txt += '\n'
898          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
899          txt += '    cd $WORKING_DIR\n'
900 <        txt += self.wsSetupCMSOSGEnvironment_()
900 >        txt += self.wsSetupCMSOSGEnvironment_()
901 >        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
902 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
903          txt += 'fi\n'
904  
905          # Prepare JobType-specific part
# Line 685 | Line 922 | class Cmssw(JobType):
922          txt += '        cd $RUNTIME_AREA\n'
923          txt += '        /bin/rm -rf $WORKING_DIR\n'
924          txt += '        if [ -d $WORKING_DIR ] ;then\n'
925 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
926 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
927 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
928 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
925 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
926 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
927 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
928 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
929          txt += '            rm -f $RUNTIME_AREA/$repo \n'
930          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
931          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 698 | Line 935 | class Cmssw(JobType):
935          txt += 'fi \n'
936          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
937          txt += 'cd '+self.version+'\n'
938 +        ########## FEDE FOR DBS2 ######################
939 +        txt += 'SOFTWARE_DIR=`pwd`\n'
940 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
941 +        ###############################################
942          ### needed grep for bug in scramv1 ###
943 +        txt += scram+' runtime -sh\n'
944          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
945 +        txt += 'echo $PATH\n'
946  
947          # Handle the arguments:
948          txt += "\n"
949          txt += "## number of arguments (first argument always jobnumber)\n"
950          txt += "\n"
951 <        txt += "narg=$#\n"
952 <        txt += "if [ $narg -lt 2 ]\n"
951 > #        txt += "narg=$#\n"
952 >        txt += "if [ $nargs -lt 2 ]\n"
953          txt += "then\n"
954 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
954 >        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
955          txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
956          txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
957          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
# Line 721 | Line 964 | class Cmssw(JobType):
964          txt += '        cd $RUNTIME_AREA\n'
965          txt += '        /bin/rm -rf $WORKING_DIR\n'
966          txt += '        if [ -d $WORKING_DIR ] ;then\n'
967 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
968 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
969 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
970 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
967 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
968 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
969 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
970 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
971          txt += '            rm -f $RUNTIME_AREA/$repo \n'
972          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
973          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 736 | Line 979 | class Cmssw(JobType):
979  
980          # Prepare job-specific part
981          job = common.job_list[nj]
982 <        pset = os.path.basename(job.configFilename())
983 <        txt += '\n'
984 <        if (self.datasetPath): # standard job
985 <            txt += 'InputFiles=$2\n'
986 <            txt += 'echo "Inputfiles:<$InputFiles>"\n'
987 <            txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
988 <        else:  # pythia like job
989 <            if (self.sourceSeed):
990 <                txt += 'Seed=$2\n'
991 <                txt += 'echo "Seed: <$Seed>"\n'
992 <                txt += 'sed "s#INPUT#$Seed#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
750 <            else:
751 <                txt += '# Copy untouched pset\n'
752 <                txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
982 >        ### FEDE FOR DBS OUTPUT PUBLICATION
983 >        if (self.datasetPath):
984 >            txt += '\n'
985 >            txt += 'DatasetPath='+self.datasetPath+'\n'
986 >
987 >            datasetpath_split = self.datasetPath.split("/")
988 >
989 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
990 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
991 >            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
992 >            txt += 'ApplicationFamily=cmsRun\n'
993  
994 +        else:
995 +            txt += 'DatasetPath=MCDataTier\n'
996 +            txt += 'PrimaryDataset=null\n'
997 +            txt += 'DataTier=null\n'
998 +            #txt += 'ProcessedDataset=null\n'
999 +            txt += 'ApplicationFamily=MCDataTier\n'
1000 +        if self.pset != None: #CarlosDaniele
1001 +            pset = os.path.basename(job.configFilename())
1002 +            txt += '\n'
1003 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
1004 +            if (self.datasetPath): # standard job
1005 +                #txt += 'InputFiles=$2\n'
1006 +                txt += 'InputFiles=${args[1]}\n'
1007 +                txt += 'MaxEvents=${args[2]}\n'
1008 +                txt += 'SkipEvents=${args[3]}\n'
1009 +                txt += 'echo "Inputfiles:<$InputFiles>"\n'
1010 +                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1011 +                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1012 +                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1013 +                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1014 +                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1015 +            else:  # pythia like job
1016 +                seedIndex=1
1017 +                if (self.firstRun):
1018 +                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1019 +                    txt += 'echo "FirstRun: <$FirstRun>"\n'
1020 +                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1021 +                    seedIndex=seedIndex+1
1022 +
1023 +                if (self.sourceSeed):
1024 +                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1025 +                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1026 +                    seedIndex=seedIndex+1
1027 +                    ## the following seeds are not always present
1028 +                    if (self.sourceSeedVtx):
1029 +                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1030 +                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1031 +                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1032 +                        seedIndex += 1
1033 +                    if (self.sourceSeedG4):
1034 +                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1035 +                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1036 +                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1037 +                        seedIndex += 1
1038 +                    if (self.sourceSeedMix):
1039 +                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1040 +                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1041 +                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1042 +                        seedIndex += 1
1043 +                    pass
1044 +                pass
1045 +            txt += 'mv -f '+pset+' pset.cfg\n'
1046  
1047          if len(self.additional_inbox_files) > 0:
1048 <            for file in self.additional_inbox_files:
1049 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
1050 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
1051 <                txt += '   chmod +x '+file+'\n'
760 <                txt += 'fi\n'
761 <            pass
1048 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1049 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1050 >            txt += 'fi\n'
1051 >            pass
1052  
1053 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1053 >        if self.pset != None: #CarlosDaniele
1054 >            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1055  
1056 <        txt += '\n'
1057 <        txt += 'echo "***** cat pset.cfg *********"\n'
1058 <        txt += 'cat pset.cfg\n'
1059 <        txt += 'echo "****** end pset.cfg ********"\n'
1060 <        txt += '\n'
1061 <        # txt += 'echo "***** cat pset1.cfg *********"\n'
1062 <        # txt += 'cat pset1.cfg\n'
1063 <        # txt += 'echo "****** end pset1.cfg ********"\n'
1056 >            txt += '\n'
1057 >            txt += 'echo "***** cat pset.cfg *********"\n'
1058 >            txt += 'cat pset.cfg\n'
1059 >            txt += 'echo "****** end pset.cfg ********"\n'
1060 >            txt += '\n'
1061 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1062 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1063 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1064 >            ##############
1065 >            txt += '\n'
1066 >            # txt += 'echo "***** cat pset1.cfg *********"\n'
1067 >            # txt += 'cat pset1.cfg\n'
1068 >            # txt += 'echo "****** end pset1.cfg ********"\n'
1069          return txt
1070  
1071 <    def wsBuildExe(self, nj):
1071 >    def wsBuildExe(self, nj=0):
1072          """
1073          Put in the script the commands to build an executable
1074          or a library.
# Line 807 | Line 1103 | class Cmssw(JobType):
1103              txt += 'else \n'
1104              txt += '   echo "Successful untar" \n'
1105              txt += 'fi \n'
1106 +            txt += '\n'
1107 +            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1108 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1109 +            #### FEDE FOR DBS OUTPUT PUBLICATION
1110 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1111 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1112 +            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1113 +            txt += 'else\n'
1114 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1115 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1116 +            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1117 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1118 +            ###################
1119 +            txt += 'fi\n'
1120 +            txt += '\n'
1121 +
1122              pass
1123 <        
1123 >
1124          return txt
1125  
1126      def modifySteeringCards(self, nj):
1127          """
1128 <        modify the card provided by the user,
1128 >        modify the card provided by the user,
1129          writing a new card into share dir
1130          """
1131 <        
1131 >
1132      def executableName(self):
1133 <        return self.executable
1133 >        if self.scriptExe: #CarlosDaniele
1134 >            return "sh "
1135 >        else:
1136 >            return self.executable
1137  
1138      def executableArgs(self):
1139 <        return " -p pset.cfg"
1139 >        if self.scriptExe:#CarlosDaniele
1140 >            return   self.scriptExe + " $NJob"
1141 >        else:
1142 >            # if >= CMSSW_1_5_X, add -e
1143 >            version_array = self.scram.getSWVersion().split('_')
1144 >            major = 0
1145 >            minor = 0
1146 >            try:
1147 >                major = int(version_array[1])
1148 >                minor = int(version_array[2])
1149 >            except:
1150 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1151 >                raise CrabException(msg)
1152 >            if major >= 1 and minor >= 5 :
1153 >                return " -e -p pset.cfg"
1154 >            else:
1155 >                return " -p pset.cfg"
1156  
1157      def inputSandbox(self, nj):
1158          """
1159          Returns a list of filenames to be put in JDL input sandbox.
1160          """
1161          inp_box = []
1162 <        # dict added to delete duplicate from input sandbox file list
1163 <        seen = {}
1162 >        # # dict added to delete duplicate from input sandbox file list
1163 >        # seen = {}
1164          ## code
1165          if os.path.isfile(self.tgzNameWithPath):
1166              inp_box.append(self.tgzNameWithPath)
1167 +        if os.path.isfile(self.MLtgzfile):
1168 +            inp_box.append(self.MLtgzfile)
1169          ## config
1170 <        inp_box.append(common.job_list[nj].configFilename())
1170 >        if not self.pset is None:
1171 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1172          ## additional input files
1173 <        #for file in self.additional_inbox_files:
1174 <        #    inp_box.append(common.work_space.cwdDir()+file)
1173 >        tgz = self.additionalInputFileTgz()
1174 >        inp_box.append(tgz)
1175          return inp_box
1176  
1177      def outputSandbox(self, nj):
# Line 846 | Line 1180 | class Cmssw(JobType):
1180          """
1181          out_box = []
1182  
849        stdout=common.job_list[nj].stdout()
850        stderr=common.job_list[nj].stderr()
851
1183          ## User Declared output files
1184 <        for out in self.output_file:
1185 <            n_out = nj + 1
1184 >        for out in (self.output_file+self.output_file_sandbox):
1185 >            n_out = nj + 1
1186              out_box.append(self.numberFile_(out,str(n_out)))
1187          return out_box
857        return []
1188  
1189      def prepareSteeringCards(self):
1190          """
# Line 870 | Line 1200 | class Cmssw(JobType):
1200          txt = '\n'
1201          txt += '# directory content\n'
1202          txt += 'ls \n'
1203 <        file_list = ''
1204 <        for fileWithSuffix in self.output_file:
1203 >
1204 >        txt += 'output_exit_status=0\n'
1205 >
1206 >        for fileWithSuffix in (self.output_file_sandbox):
1207              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
876            file_list=file_list+output_file_num+' '
1208              txt += '\n'
1209              txt += '# check output file\n'
1210 <            txt += 'ls '+fileWithSuffix+'\n'
1211 <            txt += 'ls_result=$?\n'
1212 <            #txt += 'exe_result=$?\n'
1213 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1214 <            txt += '   echo "ERROR: Problem with output file"\n'
1215 <            #txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
885 <            #txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
886 <            #txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
887 <            ### OLI_DANIELE
1210 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1211 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1212 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1213 >            txt += 'else\n'
1214 >            txt += '    exit_status=60302\n'
1215 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1216              if common.scheduler.boss_scheduler_name == 'condor_g':
1217                  txt += '    if [ $middleware == OSG ]; then \n'
1218                  txt += '        echo "prepare dummy output file"\n'
1219                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1220                  txt += '    fi \n'
1221 +            txt += 'fi\n'
1222 +
1223 +        for fileWithSuffix in (self.output_file):
1224 +            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1225 +            txt += '\n'
1226 +            txt += '# check output file\n'
1227 +            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1228 +            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1229 +            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1230              txt += 'else\n'
1231 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1231 >            txt += '    exit_status=60302\n'
1232 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1233 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1234 >            txt += '    output_exit_status=$exit_status\n'
1235 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1236 >                txt += '    if [ $middleware == OSG ]; then \n'
1237 >                txt += '        echo "prepare dummy output file"\n'
1238 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1239 >                txt += '    fi \n'
1240              txt += 'fi\n'
1241 <      
1242 <        txt += 'cd $RUNTIME_AREA\n'
1243 <        file_list=file_list[:-1]
1244 <        txt += 'file_list="'+file_list+'"\n'
1241 >        file_list = []
1242 >        for fileWithSuffix in (self.output_file):
1243 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1244 >
1245 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1246          txt += 'cd $RUNTIME_AREA\n'
901        ### OLI_DANIELE
902        txt += 'if [ $middleware == OSG ]; then\n'  
903        txt += '    cd $RUNTIME_AREA\n'
904        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
905        txt += '    /bin/rm -rf $WORKING_DIR\n'
906        txt += '    if [ -d $WORKING_DIR ] ;then\n'
907        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
908        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
909        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
910        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
911        txt += '        rm -f $RUNTIME_AREA/$repo \n'
912        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
913        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
914        txt += '    fi\n'
915        txt += 'fi\n'
916        txt += '\n'
1247          return txt
1248  
1249      def numberFile_(self, file, txt):
# Line 924 | Line 1254 | class Cmssw(JobType):
1254          # take away last extension
1255          name = p[0]
1256          for x in p[1:-1]:
1257 <           name=name+"."+x
1257 >            name=name+"."+x
1258          # add "_txt"
1259          if len(p)>1:
1260 <          ext = p[len(p)-1]
1261 <          #result = name + '_' + str(txt) + "." + ext
932 <          result = name + '_' + txt + "." + ext
1260 >            ext = p[len(p)-1]
1261 >            result = name + '_' + txt + "." + ext
1262          else:
1263 <          #result = name + '_' + str(txt)
1264 <          result = name + '_' + txt
936 <        
1263 >            result = name + '_' + txt
1264 >
1265          return result
1266  
1267 <    def getRequirements(self):
1267 >    def getRequirements(self, nj=[]):
1268          """
1269 <        return job requirements to add to jdl files
1269 >        return job requirements to add to jdl files
1270          """
1271          req = ''
1272 <        if common.analisys_common_info['sw_version']:
1272 >        if self.version:
1273              req='Member("VO-cms-' + \
1274 <                 common.analisys_common_info['sw_version'] + \
1274 >                 self.version + \
1275 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1276 >        ## SL add requirement for OS version only if SL4
1277 >        #reSL4 = re.compile( r'slc4' )
1278 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1279 >            req+=' && Member("VO-cms-' + \
1280 >                 self.executable_arch + \
1281                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1282 <        if common.analisys_common_info['sites']:
1283 <            if len(common.analisys_common_info['sites'])>0:
1284 <                req = req + ' && ('
951 <                for i in range(len(common.analisys_common_info['sites'])):
952 <                    req = req + 'other.GlueCEInfoHostName == "' \
953 <                         + common.analisys_common_info['sites'][i] + '"'
954 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
955 <                        req = req + ' || '
956 <            req = req + ')'
957 <        #print "req = ", req
1282 >
1283 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1284 >
1285          return req
1286  
1287      def configFilename(self):
# Line 971 | Line 1298 | class Cmssw(JobType):
1298          txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1299          txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1300          txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1301 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1302          txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1303 <        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1304 <        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1305 <        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1303 >        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1304 >        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1305 >        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1306 >        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1307          txt += '   else\n'
1308 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1308 >        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1309          txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1310          txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1311          txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
# Line 989 | Line 1318 | class Cmssw(JobType):
1318          txt += '       cd $RUNTIME_AREA\n'
1319          txt += '       /bin/rm -rf $WORKING_DIR\n'
1320          txt += '       if [ -d $WORKING_DIR ] ;then\n'
1321 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1322 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1323 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1324 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1325 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1326 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1327 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1321 >        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1322 >        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1323 >        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1324 >        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1325 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1326 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1327 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1328          txt += '       fi\n'
1329          txt += '\n'
1330          txt += '       exit 1\n'
# Line 1005 | Line 1334 | class Cmssw(JobType):
1334          txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1335  
1336          return txt
1337 <
1337 >
1338      ### OLI_DANIELE
1339      def wsSetupCMSLCGEnvironment_(self):
1340          """
# Line 1050 | Line 1379 | class Cmssw(JobType):
1379          txt += '       fi\n'
1380          txt += '   fi\n'
1381          txt += '   \n'
1053        txt += '   string=`cat /etc/redhat-release`\n'
1054        txt += '   echo $string\n'
1055        txt += '   if [[ $string = *alhalla* ]]; then\n'
1056        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1057        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1058        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1059        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1060        txt += '   else\n'
1061        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1062        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1063        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1064        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1065        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1066        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1067        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1068        txt += '       exit 1\n'
1069        txt += '   fi\n'
1382          txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1383          txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1384          return txt
1385  
1386 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1387 +    def modifyReport(self, nj):
1388 +        """
1389 +        insert the part of the script that modifies the FrameworkJob Report
1390 +        """
1391 +
1392 +        txt = ''
1393 +        try:
1394 +            publish_data = int(self.cfg_params['USER.publish_data'])
1395 +        except KeyError:
1396 +            publish_data = 0
1397 +        if (publish_data == 1):
1398 +            txt += 'echo "Modify Job Report" \n'
1399 +            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1400 +            ################ FEDE FOR DBS2 #############################################
1401 +            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1402 +            #############################################################################
1403 +            #try:
1404 +            #    publish_data = int(self.cfg_params['USER.publish_data'])
1405 +            #except KeyError:
1406 +            #    publish_data = 0
1407 +
1408 +            txt += 'if [ -z "$SE" ]; then\n'
1409 +            txt += '    SE="" \n'
1410 +            txt += 'fi \n'
1411 +            txt += 'if [ -z "$SE_PATH" ]; then\n'
1412 +            txt += '    SE_PATH="" \n'
1413 +            txt += 'fi \n'
1414 +            txt += 'echo "SE = $SE"\n'
1415 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1416 +
1417 +        #if (publish_data == 1):
1418 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1419 +            processedDataset = self.cfg_params['USER.publish_data_name']
1420 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1421 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1422 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1423 +            #### FEDE: added slash in LFN ##############
1424 +            txt += '    FOR_LFN=/copy_problems/ \n'
1425 +            txt += 'else \n'
1426 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1427 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1428 +            txt += '    FOR_LFN=/store$tmp \n'
1429 +            txt += 'fi \n'
1430 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1431 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1432 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1433 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1434 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1435 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1436 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1437 +
1438 +            txt += 'modifyReport_result=$?\n'
1439 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1440 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1441 +            txt += '    exit_status=1\n'
1442 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1443 +            txt += 'else\n'
1444 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1445 +            txt += 'fi\n'
1446 +        else:
1447 +            txt += 'echo "no data publication required"\n'
1448 +            #txt += 'ProcessedDataset=no_data_to_publish \n'
1449 +            #### FEDE: added slash in LFN ##############
1450 +            #txt += 'FOR_LFN=/local/ \n'
1451 +            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1452 +            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1453 +        return txt
1454 +
1455 +    def cleanEnv(self):
1456 +        ### OLI_DANIELE
1457 +        txt = ''
1458 +        txt += 'if [ $middleware == OSG ]; then\n'
1459 +        txt += '    cd $RUNTIME_AREA\n'
1460 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1461 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1462 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1463 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1464 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1465 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1466 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1467 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1468 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1469 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1470 +        txt += '    fi\n'
1471 +        txt += 'fi\n'
1472 +        txt += '\n'
1473 +        return txt
1474 +
1475      def setParam_(self, param, value):
1476          self._params[param] = value
1477  
# Line 1079 | Line 1480 | class Cmssw(JobType):
1480  
1481      def setTaskid_(self):
1482          self._taskId = self.cfg_params['taskId']
1483 <        
1483 >
1484      def getTaskid(self):
1485          return self._taskId
1486 +
1487 +    def uniquelist(self, old):
1488 +        """
1489 +        remove duplicates from a list
1490 +        """
1491 +        nd={}
1492 +        for e in old:
1493 +            nd[e]=0
1494 +        return nd.keys()
1495 +
1496 +
1497 +    def checkOut(self, limit):
1498 +        """
1499 +        check the dimension of the output files
1500 +        """
1501 +        txt = 'echo "*****************************************"\n'
1502 +        txt += 'echo "** Starting output sandbox limit check **"\n'
1503 +        txt += 'echo "*****************************************"\n'
1504 +        allOutFiles = ""
1505 +        listOutFiles = []
1506 +        for fileOut in (self.output_file+self.output_file_sandbox):
1507 +             if fileOut.find('crab_fjr') == -1:
1508 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1509 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1510 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1511 +        txt += 'ls -gGhrta;\n'
1512 +        txt += 'sum=0;\n'
1513 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1514 +        txt += '    if [ -e $file ]; then\n'
1515 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1516 +        txt += '        sum=`expr $sum + $tt`\n'
1517 +        txt += '    else\n'
1518 +        txt += '        echo "WARNING: output file $file not found!"\n'
1519 +        txt += '    fi\n'
1520 +        txt += 'done\n'
1521 +        txt += 'echo "Total Output dimension: $sum";\n'
1522 +        txt += 'limit='+str(limit)+';\n'
1523 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1524 +        txt += 'if [ $limit -lt $sum ]; then\n'
1525 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1526 +        txt += '    echo "         checking the output file sizes..."\n'
1527 +        """
1528 +        txt += '    dim=0;\n'
1529 +        txt += '    exclude=0;\n'
1530 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1531 +        txt += '        sumTemp=0;\n'
1532 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1533 +        txt += '            if [ $file != $file2 ]; then\n'
1534 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1535 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1536 +        txt += '            fi\n'
1537 +        txt += '        done\n'
1538 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1539 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1540 +        txt += '                dim=$sumTemp;\n'
1541 +        txt += '                exclude=$file;\n'
1542 +        txt += '            fi\n'
1543 +        txt += '        fi\n'
1544 +        txt += '    done\n'
1545 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1546 +        """
1547 +        txt += '    tot=0;\n'
1548 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1549 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1550 +        txt += '        tot=`expr $tot + $tt`;\n'
1551 +        txt += '        if [ $limit -lt $tot ]; then\n'
1552 +        txt += '            tot=`expr $tot - $tt`;\n'
1553 +        txt += '            fileLast=$file;\n'
1554 +        txt += '            break;\n'
1555 +        txt += '        fi\n'
1556 +        txt += '    done\n'
1557 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1558 +        txt += '    flag=0;\n'
1559 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1560 +        txt += '        if [ $fileLast = $filess ]; then\n'
1561 +        txt += '            flag=1;\n'
1562 +        txt += '        fi\n'
1563 +        txt += '        if [ $flag -eq 1 ]; then\n'
1564 +        txt += '            rm -f $filess;\n'
1565 +        txt += '        fi\n'
1566 +        txt += '    done\n'
1567 +        txt += '    ls -agGhrt;\n'
1568 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1569 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1570 +        txt += '    exit_status=70000;\n'
1571 +        txt += 'else'
1572 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1573 +        txt += 'fi\n'
1574 +        txt += 'echo "*****************************************"\n'
1575 +        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1576 +        txt += 'echo "*****************************************"\n'
1577 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines