ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.49 by slacapra, Thu Oct 5 15:32:20 2006 UTC vs.
Revision 1.157 by spiga, Sun Feb 17 20:13:00 2008 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 < import math
5 > from BlackWhiteListParser import BlackWhiteListParser
6   import common
7 import PsetManipulator  
8
9 import DBSInfo
10 import DataDiscovery
11 import DataLocation
7   import Scram
8  
9 < import glob, os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12      def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
16 <        # Marco.
16 >        self.argsList = []
17 >
18          self._params = {}
19          self.cfg_params = cfg_params
20 +        # init BlackWhiteListParser
21 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22 +
23 +        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
24  
25          # number of jobs requested to be created, limit obj splitting
26          self.ncjobs = ncjobs
27  
28          log = common.logger
29 <        
29 >
30          self.scram = Scram.Scram(cfg_params)
31        scramArea = ''
31          self.additional_inbox_files = []
32          self.scriptExe = ''
33          self.executable = ''
34 +        self.executable_arch = self.scram.getArch()
35          self.tgz_name = 'default.tgz'
36 <        self.pset = ''      #scrip use case Da  
36 >        self.additional_tgz_name = 'additional.tgz'
37 >        self.scriptName = 'CMSSW.sh'
38 >        self.pset = ''      #scrip use case Da
39          self.datasetPath = '' #scrip use case Da
40  
41 +        # set FJR file name
42 +        self.fjrFileName = 'crab_fjr.xml'
43 +
44          self.version = self.scram.getSWVersion()
45 +
46 +        #
47 +        # Try to block creation in case of arch/version mismatch
48 +        #
49 +
50 +        a = string.split(self.version, "_")
51 +
52 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 +            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54 +            common.logger.message(msg)
55 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
57 +            raise CrabException(msg)
58 +
59 +        common.taskDB.setDict('codeVersion',self.version)
60          self.setParam_('application', self.version)
61  
62          ### collect Data cards
63 <        try:
64 <            tmp =  cfg_params['CMSSW.datasetpath']
65 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
46 <            if string.lower(tmp)=='none':
47 <                self.datasetPath = None
48 <                self.selectNoInput = 1
49 <            else:
50 <                self.datasetPath = tmp
51 <                self.selectNoInput = 0
52 <        except KeyError:
53 <            msg = "Error: datasetpath not defined "  
63 >
64 >        if not cfg_params.has_key('CMSSW.datasetpath'):
65 >            msg = "Error: datasetpath not defined "
66              raise CrabException(msg)
67 +        tmp =  cfg_params['CMSSW.datasetpath']
68 +        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69 +        if string.lower(tmp)=='none':
70 +            self.datasetPath = None
71 +            self.selectNoInput = 1
72 +        else:
73 +            self.datasetPath = tmp
74 +            self.selectNoInput = 0
75  
76          # ML monitoring
77          # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
# Line 59 | Line 79 | class Cmssw(JobType):
79              self.setParam_('dataset', 'None')
80              self.setParam_('owner', 'None')
81          else:
82 <            datasetpath_split = self.datasetPath.split("/")
83 <            self.setParam_('dataset', datasetpath_split[1])
84 <            self.setParam_('owner', datasetpath_split[-1])
82 >            ## SL what is supposed to fail here?
83 >            try:
84 >                datasetpath_split = self.datasetPath.split("/")
85 >                # standard style
86 >                self.setParam_('datasetFull', self.datasetPath)
87 >                self.setParam_('dataset', datasetpath_split[1])
88 >                self.setParam_('owner', datasetpath_split[2])
89 >            except:
90 >                self.setParam_('dataset', self.datasetPath)
91 >                self.setParam_('owner', self.datasetPath)
92  
93 <        self.setTaskid_()
67 <        self.setParam_('taskId', self.cfg_params['taskId'])
93 >        self.setParam_('taskId', common.taskDB.dict('taskId'))
94  
95          self.dataTiers = []
96  
97          ## now the application
98 <        try:
99 <            self.executable = cfg_params['CMSSW.executable']
100 <            self.setParam_('exe', self.executable)
75 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
76 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
77 <            log.debug(3,msg)
78 <        except KeyError:
79 <            self.executable = 'cmsRun'
80 <            self.setParam_('exe', self.executable)
81 <            msg = "User executable not defined. Use cmsRun"
82 <            log.debug(3,msg)
83 <            pass
98 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
99 >        self.setParam_('exe', self.executable)
100 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
101  
102 <        try:
86 <            self.pset = cfg_params['CMSSW.pset']
87 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
88 <            if self.pset.lower() != 'none' :
89 <                if (not os.path.exists(self.pset)):
90 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
91 <            else:
92 <                self.pset = None
93 <        except KeyError:
102 >        if not cfg_params.has_key('CMSSW.pset'):
103              raise CrabException("PSet file missing. Cannot run cmsRun ")
104 +        self.pset = cfg_params['CMSSW.pset']
105 +        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
106 +        if self.pset.lower() != 'none' :
107 +            if (not os.path.exists(self.pset)):
108 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
109 +        else:
110 +            self.pset = None
111  
112          # output files
113 <        try:
114 <            self.output_file = []
113 >        ## stuff which must be returned always via sandbox
114 >        self.output_file_sandbox = []
115  
116 <            tmp = cfg_params['CMSSW.output_file']
117 <            if tmp != '':
118 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
119 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
120 <                for tmp in tmpOutFiles:
121 <                    tmp=string.strip(tmp)
122 <                    self.output_file.append(tmp)
123 <                    pass
124 <            else:
125 <                log.message("No output file defined: only stdout/err will be available")
116 >        # add fjr report by default via sandbox
117 >        self.output_file_sandbox.append(self.fjrFileName)
118 >
119 >        # other output files to be returned via sandbox or copied to SE
120 >        self.output_file = []
121 >        tmp = cfg_params.get('CMSSW.output_file',None)
122 >        if tmp :
123 >            tmpOutFiles = string.split(tmp,',')
124 >            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
125 >            for tmp in tmpOutFiles:
126 >                tmp=string.strip(tmp)
127 >                self.output_file.append(tmp)
128                  pass
129 <            pass
130 <        except KeyError:
131 <            log.message("No output file defined: only stdout/err will be available")
114 <            pass
129 >        else:
130 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 >        pass
132  
133          # script_exe file as additional file in inputSandbox
134 <        try:
135 <            self.scriptExe = cfg_params['USER.script_exe']
136 <            if self.scriptExe != '':
137 <               if not os.path.isfile(self.scriptExe):
138 <                  msg ="WARNING. file "+self.scriptExe+" not found"
139 <                  raise CrabException(msg)
140 <               self.additional_inbox_files.append(string.strip(self.scriptExe))
124 <        except KeyError:
125 <            self.scriptExe = ''
134 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
135 >        if self.scriptExe :
136 >           if not os.path.isfile(self.scriptExe):
137 >              msg ="ERROR. file "+self.scriptExe+" not found"
138 >              raise CrabException(msg)
139 >           self.additional_inbox_files.append(string.strip(self.scriptExe))
140 >
141          #CarlosDaniele
142          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
143 <           msg ="WARNING. script_exe  not defined"
143 >           msg ="Error. script_exe  not defined"
144             raise CrabException(msg)
145  
146          ## additional input files
147 <        try:
147 >        if cfg_params.has_key('USER.additional_input_files'):
148              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
149              for tmp in tmpAddFiles:
150 +                tmp = string.strip(tmp)
151                  dirname = ''
152                  if not tmp[0]=="/": dirname = "."
153 <                files = glob.glob(os.path.join(dirname, tmp))
153 >                files = []
154 >                if string.find(tmp,"*")>-1:
155 >                    files = glob.glob(os.path.join(dirname, tmp))
156 >                    if len(files)==0:
157 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
158 >                else:
159 >                    files.append(tmp)
160                  for file in files:
161                      if not os.path.exists(file):
162                          raise CrabException("Additional input file not found: "+file)
163                      pass
164 +                    # fname = string.split(file, '/')[-1]
165 +                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
166 +                    # shutil.copyfile(file, storedFile)
167                      self.additional_inbox_files.append(string.strip(file))
168                  pass
169              pass
170              common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
171 <        except KeyError:
147 <            pass
148 <
149 <        # files per job
150 <        try:
151 <            if (cfg_params['CMSSW.files_per_jobs']):
152 <                raise CrabException("files_per_jobs no longer supported.  Quitting.")
153 <        except KeyError:
154 <            pass
171 >        pass
172  
173          ## Events per job
174 <        try:
174 >        if cfg_params.has_key('CMSSW.events_per_job'):
175              self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
176              self.selectEventsPerJob = 1
177 <        except KeyError:
177 >        else:
178              self.eventsPerJob = -1
179              self.selectEventsPerJob = 0
180 <    
180 >
181          ## number of jobs
182 <        try:
182 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
183              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
184              self.selectNumberOfJobs = 1
185 <        except KeyError:
185 >        else:
186              self.theNumberOfJobs = 0
187              self.selectNumberOfJobs = 0
188  
189 <        try:
189 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
190              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
191              self.selectTotalNumberEvents = 1
192 <        except KeyError:
192 >        else:
193              self.total_number_of_events = 0
194              self.selectTotalNumberEvents = 0
195  
196 <        if self.pset != None: #CarlosDaniele
196 >        if self.pset != None: #CarlosDaniele
197               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
198                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
199                   raise CrabException(msg)
# Line 186 | Line 203 | class Cmssw(JobType):
203                   raise CrabException(msg)
204  
205          ## source seed for pythia
206 <        try:
207 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
208 <        except KeyError:
209 <            self.sourceSeed = None
210 <            common.logger.debug(5,"No seed given")
206 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
207 >
208 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
209 >
210 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
211 >
212 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
213 >
214 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
215  
195        try:
196            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
197        except KeyError:
198            self.sourceSeedVtx = None
199            common.logger.debug(5,"No vertex seed given")
216          if self.pset != None: #CarlosDaniele
217 <            self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
217 >            import PsetManipulator as pp
218 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
219 >
220 >        # Copy/return
221 >
222 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
223 >        self.return_data = int(cfg_params.get('USER.return_data',0))
224  
225          #DBSDLS-start
226 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
226 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
227          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
228          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
229          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 210 | Line 232 | class Cmssw(JobType):
232          blockSites = {}
233          if self.datasetPath:
234              blockSites = self.DataDiscoveryAndLocation(cfg_params)
235 <        #DBSDLS-end          
235 >        #DBSDLS-end
236  
237          self.tgzNameWithPath = self.getTarBall(self.executable)
238 <    
238 >
239          ## Select Splitting
240 <        if self.selectNoInput:
240 >        if self.selectNoInput:
241              if self.pset == None: #CarlosDaniele
242                  self.jobSplittingForScript()
243              else:
244                  self.jobSplittingNoInput()
245 <        else: self.jobSplittingByBlocks(blockSites)
245 >        else:
246 >            self.jobSplittingByBlocks(blockSites)
247  
248          # modify Pset
249          if self.pset != None: #CarlosDaniele
250              try:
251                  if (self.datasetPath): # standard job
252                      # allow to processa a fraction of events in a file
253 <                    self.PsetEdit.inputModule("INPUT")
254 <                    self.PsetEdit.maxEvent("INPUTMAXEVENTS")
255 <                    self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
253 >                    PsetEdit.inputModule("INPUTFILE")
254 >                    PsetEdit.maxEvent(0)
255 >                    PsetEdit.skipEvent(0)
256                  else:  # pythia like job
257 <                    self.PsetEdit.maxEvent(self.eventsPerJob)
257 >                    PsetEdit.maxEvent(self.eventsPerJob)
258 >                    if (self.firstRun):
259 >                        PsetEdit.pythiaFirstRun(0)  #First Run
260                      if (self.sourceSeed) :
261 <                        self.PsetEdit.pythiaSeed("INPUT")
261 >                        PsetEdit.pythiaSeed(0)
262                          if (self.sourceSeedVtx) :
263 <                            self.PsetEdit.pythiaSeedVtx("INPUTVTX")
264 <                self.PsetEdit.psetWriter(self.configFilename())
263 >                            PsetEdit.vtxSeed(0)
264 >                        if (self.sourceSeedG4) :
265 >                            PsetEdit.g4Seed(0)
266 >                        if (self.sourceSeedMix) :
267 >                            PsetEdit.mixSeed(0)
268 >                # add FrameworkJobReport to parameter-set
269 >                PsetEdit.addCrabFJR(self.fjrFileName)
270 >                PsetEdit.psetWriter(self.configFilename())
271              except:
272                  msg='Error while manipuliating ParameterSet: exiting...'
273                  raise CrabException(msg)
274  
275      def DataDiscoveryAndLocation(self, cfg_params):
276  
277 +        import DataDiscovery
278 +        import DataLocation
279          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
280  
281          datasetPath=self.datasetPath
282  
250        ## TODO
251        dataTiersList = ""
252        dataTiers = dataTiersList.split(',')
253
283          ## Contact the DBS
284 <        common.logger.message("Contacting DBS...")
284 >        common.logger.message("Contacting Data Discovery Services ...")
285          try:
286 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, dataTiers, cfg_params)
286 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
287              self.pubdata.fetchDBSInfo()
288  
289          except DataDiscovery.NotExistingDatasetError, ex :
290              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
291              raise CrabException(msg)
263
292          except DataDiscovery.NoDataTierinProvenanceError, ex :
293              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
294              raise CrabException(msg)
295          except DataDiscovery.DataDiscoveryError, ex:
296 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
296 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
297              raise CrabException(msg)
298  
271        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
272        ## self.DBSPaths=self.pubdata.getDBSPaths()
273        common.logger.message("Required data are :"+self.datasetPath)
274
299          self.filesbyblock=self.pubdata.getFiles()
300          self.eventsbyblock=self.pubdata.getEventsPerBlock()
301          self.eventsbyfile=self.pubdata.getEventsPerFile()
278        # print str(self.filesbyblock)
279        # print 'self.eventsbyfile',len(self.eventsbyfile)
280        # print str(self.eventsbyfile)
302  
303          ## get max number of events
304 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
284 <        common.logger.message("The number of available events is %s\n"%self.maxEvents)
304 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
305  
286        common.logger.message("Contacting DLS...")
306          ## Contact the DLS and build a list of sites hosting the fileblocks
307          try:
308              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
# Line 291 | Line 310 | class Cmssw(JobType):
310          except DataLocation.DataLocationError , ex:
311              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
312              raise CrabException(msg)
313 <        
313 >
314  
315          sites = dataloc.getSites()
316          allSites = []
317          listSites = sites.values()
318 <        for list in listSites:
319 <            for oneSite in list:
318 >        for listSite in listSites:
319 >            for oneSite in listSite:
320                  allSites.append(oneSite)
321          allSites = self.uniquelist(allSites)
322  
323 <        common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
324 <        common.logger.debug(6, "List of Sites: "+str(allSites))
323 >        # screen output
324 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
325 >
326          return sites
327 <    
327 >
328 >    def setArgsList(self, argsList):
329 >        self.argsList = argsList
330 >
331      def jobSplittingByBlocks(self, blockSites):
332          """
333          Perform job splitting. Jobs run over an integer number of files
# Line 354 | Line 377 | class Cmssw(JobType):
377              totalNumberOfJobs = 999999999
378          else :
379              totalNumberOfJobs = self.ncjobs
380 <            
380 >
381  
382          blocks = blockSites.keys()
383          blockCount = 0
# Line 364 | Line 387 | class Cmssw(JobType):
387          jobCount = 0
388          list_of_lists = []
389  
390 +        # list tracking which jobs are in which jobs belong to which block
391 +        jobsOfBlock = {}
392 +
393          # ---- Iterate over the blocks in the dataset until ---- #
394          # ---- we've met the requested total # of events    ---- #
395          while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
396              block = blocks[blockCount]
397              blockCount += 1
398 <            
398 >            if block not in jobsOfBlock.keys() :
399 >                jobsOfBlock[block] = []
400  
401 <            numEventsInBlock = self.eventsbyblock[block]
402 <            common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
403 <            
404 <            files = self.filesbyblock[block]
405 <            numFilesInBlock = len(files)
406 <            if (numFilesInBlock <= 0):
407 <                continue
408 <            fileCount = 0
409 <
410 <            # ---- New block => New job ---- #
411 <            parString = "\\{"
412 <            # counter for number of events in files currently worked on
413 <            filesEventCount = 0
414 <            # flag if next while loop should touch new file
415 <            newFile = 1
416 <            # job event counter
417 <            jobSkipEventCount = 0
418 <            
419 <            # ---- Iterate over the files in the block until we've met the requested ---- #
420 <            # ---- total # of events or we've gone over all the files in this block  ---- #
421 <            while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
422 <                file = files[fileCount]
423 <                if newFile :
424 <                    try:
425 <                        numEventsInFile = self.eventsbyfile[file]
426 <                        common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
427 <                        # increase filesEventCount
428 <                        filesEventCount += numEventsInFile
429 <                        # Add file to current job
430 <                        parString += '\\\"' + file + '\\\"\,'
431 <                        newFile = 0
432 <                    except KeyError:
433 <                        common.logger.message("File "+str(file)+" has unknown number of events: skipping")
434 <                        
435 <
436 <                # if less events in file remain than eventsPerJobRequested
437 <                if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
438 <                    # if last file in block
439 <                    if ( fileCount == numFilesInBlock-1 ) :
440 <                        # end job using last file, use remaining events in block
401 >            if self.eventsbyblock.has_key(block) :
402 >                numEventsInBlock = self.eventsbyblock[block]
403 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
404 >
405 >                files = self.filesbyblock[block]
406 >                numFilesInBlock = len(files)
407 >                if (numFilesInBlock <= 0):
408 >                    continue
409 >                fileCount = 0
410 >
411 >                # ---- New block => New job ---- #
412 >                parString = ""
413 >                # counter for number of events in files currently worked on
414 >                filesEventCount = 0
415 >                # flag if next while loop should touch new file
416 >                newFile = 1
417 >                # job event counter
418 >                jobSkipEventCount = 0
419 >
420 >                # ---- Iterate over the files in the block until we've met the requested ---- #
421 >                # ---- total # of events or we've gone over all the files in this block  ---- #
422 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
423 >                    file = files[fileCount]
424 >                    if newFile :
425 >                        try:
426 >                            numEventsInFile = self.eventsbyfile[file]
427 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
428 >                            # increase filesEventCount
429 >                            filesEventCount += numEventsInFile
430 >                            # Add file to current job
431 >                            parString += '\\\"' + file + '\\\"\,'
432 >                            newFile = 0
433 >                        except KeyError:
434 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
435 >
436 >
437 >                    # if less events in file remain than eventsPerJobRequested
438 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
439 >                        # if last file in block
440 >                        if ( fileCount == numFilesInBlock-1 ) :
441 >                            # end job using last file, use remaining events in block
442 >                            # close job and touch new file
443 >                            fullString = parString[:-2]
444 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
445 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
446 >                            self.jobDestination.append(blockSites[block])
447 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
448 >                            # fill jobs of block dictionary
449 >                            jobsOfBlock[block].append(jobCount+1)
450 >                            # reset counter
451 >                            jobCount = jobCount + 1
452 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
453 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
454 >                            jobSkipEventCount = 0
455 >                            # reset file
456 >                            parString = ""
457 >                            filesEventCount = 0
458 >                            newFile = 1
459 >                            fileCount += 1
460 >                        else :
461 >                            # go to next file
462 >                            newFile = 1
463 >                            fileCount += 1
464 >                    # if events in file equal to eventsPerJobRequested
465 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
466                          # close job and touch new file
467                          fullString = parString[:-2]
468 <                        fullString += '\\}'
469 <                        list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
418 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
468 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
469 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
470                          self.jobDestination.append(blockSites[block])
471                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
472 +                        jobsOfBlock[block].append(jobCount+1)
473                          # reset counter
474                          jobCount = jobCount + 1
475 <                        totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
476 <                        eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
475 >                        totalEventCount = totalEventCount + eventsPerJobRequested
476 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
477                          jobSkipEventCount = 0
478                          # reset file
479 <                        parString = "\\{"
479 >                        parString = ""
480                          filesEventCount = 0
481                          newFile = 1
482                          fileCount += 1
483 +
484 +                    # if more events in file remain than eventsPerJobRequested
485                      else :
486 <                        # go to next file
487 <                        newFile = 1
488 <                        fileCount += 1
489 <                # if events in file equal to eventsPerJobRequested
490 <                elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
491 <                    # close job and touch new file
492 <                    fullString = parString[:-2]
493 <                    fullString += '\\}'
494 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
495 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
496 <                    self.jobDestination.append(blockSites[block])
497 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
498 <                    # reset counter
499 <                    jobCount = jobCount + 1
500 <                    totalEventCount = totalEventCount + eventsPerJobRequested
501 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
502 <                    jobSkipEventCount = 0
503 <                    # reset file
504 <                    parString = "\\{"
505 <                    filesEventCount = 0
452 <                    newFile = 1
453 <                    fileCount += 1
454 <                    
455 <                # if more events in file remain than eventsPerJobRequested
456 <                else :
457 <                    # close job but don't touch new file
458 <                    fullString = parString[:-2]
459 <                    fullString += '\\}'
460 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
461 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
462 <                    self.jobDestination.append(blockSites[block])
463 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
464 <                    # increase counter
465 <                    jobCount = jobCount + 1
466 <                    totalEventCount = totalEventCount + eventsPerJobRequested
467 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
468 <                    # calculate skip events for last file
469 <                    # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
470 <                    jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
471 <                    # remove all but the last file
472 <                    filesEventCount = self.eventsbyfile[file]
473 <                    parString = "\\{"
474 <                    parString += '\\\"' + file + '\\\"\,'
475 <                pass # END if
476 <            pass # END while (iterate over files in the block)
486 >                        # close job but don't touch new file
487 >                        fullString = parString[:-2]
488 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
489 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
490 >                        self.jobDestination.append(blockSites[block])
491 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
492 >                        jobsOfBlock[block].append(jobCount+1)
493 >                        # increase counter
494 >                        jobCount = jobCount + 1
495 >                        totalEventCount = totalEventCount + eventsPerJobRequested
496 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
497 >                        # calculate skip events for last file
498 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
499 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
500 >                        # remove all but the last file
501 >                        filesEventCount = self.eventsbyfile[file]
502 >                        parString = ""
503 >                        parString += '\\\"' + file + '\\\"\,'
504 >                    pass # END if
505 >                pass # END while (iterate over files in the block)
506          pass # END while (iterate over blocks in the dataset)
507          self.ncjobs = self.total_number_of_jobs = jobCount
508          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
509              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
510 <        common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
511 <        
510 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
511 >
512 >        # screen output
513 >        screenOutput = "List of jobs and available destination sites:\n\n"
514 >
515 >        # keep trace of block with no sites to print a warning at the end
516 >        noSiteBlock = []
517 >        bloskNoSite = []
518 >
519 >        blockCounter = 0
520 >        for block in blocks:
521 >            if block in jobsOfBlock.keys() :
522 >                blockCounter += 1
523 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
524 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
525 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
526 >                    bloskNoSite.append( blockCounter )
527 >
528 >        common.logger.message(screenOutput)
529 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
530 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
531 >            virgola = ""
532 >            if len(bloskNoSite) > 1:
533 >                virgola = ","
534 >            for block in bloskNoSite:
535 >                msg += ' ' + str(block) + virgola
536 >            msg += '\n               Related jobs:\n                 '
537 >            virgola = ""
538 >            if len(noSiteBlock) > 1:
539 >                virgola = ","
540 >            for range_jobs in noSiteBlock:
541 >                msg += str(range_jobs) + virgola
542 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
543 >            if self.cfg_params.has_key('EDG.se_white_list'):
544 >                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
545 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
546 >                msg += 'Please check if the dataset is available at this site!)\n'
547 >            if self.cfg_params.has_key('EDG.ce_white_list'):
548 >                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
549 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
550 >                msg += 'Please check if the dataset is available at this site!)\n'
551 >
552 >            common.logger.message(msg)
553 >
554          self.list_of_args = list_of_lists
555          return
556  
# Line 488 | Line 559 | class Cmssw(JobType):
559          Perform job splitting based on number of event per job
560          """
561          common.logger.debug(5,'Splitting per events')
562 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
563 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
564 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
562 >
563 >        if (self.selectEventsPerJob):
564 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
565 >        if (self.selectNumberOfJobs):
566 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
567 >        if (self.selectTotalNumberEvents):
568 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
569  
570          if (self.total_number_of_events < 0):
571              msg='Cannot split jobs per Events with "-1" as total number of events'
572              raise CrabException(msg)
573  
574          if (self.selectEventsPerJob):
575 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
575 >            if (self.selectTotalNumberEvents):
576 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
577 >            elif(self.selectNumberOfJobs) :
578 >                self.total_number_of_jobs =self.theNumberOfJobs
579 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
580 >
581          elif (self.selectNumberOfJobs) :
582              self.total_number_of_jobs = self.theNumberOfJobs
583              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
# Line 517 | Line 597 | class Cmssw(JobType):
597          self.list_of_args = []
598          for i in range(self.total_number_of_jobs):
599              ## Since there is no input, any site is good
600 <           # self.jobDestination.append(["Any"])
601 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
600 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
601 >            args=[]
602 >            if (self.firstRun):
603 >                ## pythia first run
604 >                args.append(str(self.firstRun)+str(i))
605              if (self.sourceSeed):
606 +                args.append(str(self.sourceSeed)+str(i))
607                  if (self.sourceSeedVtx):
608 <                    ## pythia + vtx random seed
609 <                    self.list_of_args.append([
610 <                                              str(self.sourceSeed)+str(i),
611 <                                              str(self.sourceSeedVtx)+str(i)
612 <                                              ])
613 <                else:
614 <                    ## only pythia random seed
615 <                    self.list_of_args.append([(str(self.sourceSeed)+str(i))])
616 <            else:
617 <                ## no random seed
618 <                self.list_of_args.append([str(i)])
619 <        #print self.list_of_args
608 >                    ## + vtx random seed
609 >                    args.append(str(self.sourceSeedVtx)+str(i))
610 >                if (self.sourceSeedG4):
611 >                    ## + G4 random seed
612 >                    args.append(str(self.sourceSeedG4)+str(i))
613 >                if (self.sourceSeedMix):
614 >                    ## + Mix random seed
615 >                    args.append(str(self.sourceSeedMix)+str(i))
616 >                pass
617 >            pass
618 >            self.list_of_args.append(args)
619 >        pass
620  
621          return
622  
# Line 561 | Line 645 | class Cmssw(JobType):
645          return
646  
647      def split(self, jobParams):
648 <
648 >
649          common.jobDB.load()
650          #### Fabio
651          njobs = self.total_number_of_jobs
# Line 569 | Line 653 | class Cmssw(JobType):
653          # create the empty structure
654          for i in range(njobs):
655              jobParams.append("")
656 <        
656 >
657          for job in range(njobs):
658              jobParams[job] = arglist[job]
659              # print str(arglist[job])
# Line 580 | Line 664 | class Cmssw(JobType):
664  
665          common.jobDB.save()
666          return
667 <    
667 >
668      def getJobTypeArguments(self, nj, sched):
669          result = ''
670          for i in common.jobDB.arguments(nj):
671              result=result+str(i)+" "
672          return result
673 <  
673 >
674      def numberOfJobs(self):
675          # Fabio
676          return self.total_number_of_jobs
# Line 595 | Line 679 | class Cmssw(JobType):
679          """
680          Return the TarBall with lib and exe
681          """
682 <        
682 >
683          # if it exist, just return it
684 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
684 >        #
685 >        # Marco. Let's start to use relative path for Boss XML files
686 >        #
687 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
688          if os.path.exists(self.tgzNameWithPath):
689              return self.tgzNameWithPath
690  
# Line 611 | Line 698 | class Cmssw(JobType):
698          # First of all declare the user Scram area
699          swArea = self.scram.getSWArea_()
700          #print "swArea = ", swArea
701 <        swVersion = self.scram.getSWVersion()
702 <        #print "swVersion = ", swVersion
701 >        # swVersion = self.scram.getSWVersion()
702 >        # print "swVersion = ", swVersion
703          swReleaseTop = self.scram.getReleaseTop_()
704          #print "swReleaseTop = ", swReleaseTop
705 <        
705 >
706          ## check if working area is release top
707          if swReleaseTop == '' or swArea == swReleaseTop:
708              return
709  
710 <        filesToBeTarred = []
711 <        ## First find the executable
712 <        if (self.executable != ''):
713 <            exeWithPath = self.scram.findFile_(executable)
714 < #           print exeWithPath
715 <            if ( not exeWithPath ):
716 <                raise CrabException('User executable '+executable+' not found')
717 <
718 <            ## then check if it's private or not
719 <            if exeWithPath.find(swReleaseTop) == -1:
720 <                # the exe is private, so we must ship
721 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
722 <                path = swArea+'/'
723 <                exe = string.replace(exeWithPath, path,'')
724 <                filesToBeTarred.append(exe)
725 <                pass
726 <            else:
727 <                # the exe is from release, we'll find it on WN
728 <                pass
729 <
730 <        ## Now get the libraries: only those in local working area
731 <        libDir = 'lib'
732 <        lib = swArea+'/' +libDir
733 <        common.logger.debug(5,"lib "+lib+" to be tarred")
734 <        if os.path.exists(lib):
735 <            filesToBeTarred.append(libDir)
736 <
737 <        ## Now check if module dir is present
738 <        moduleDir = 'module'
739 <        if os.path.isdir(swArea+'/'+moduleDir):
740 <            filesToBeTarred.append(moduleDir)
741 <
742 <        ## Now check if the Data dir is present
743 <        dataDir = 'src/Data/'
744 <        if os.path.isdir(swArea+'/'+dataDir):
745 <            filesToBeTarred.append(dataDir)
746 <
747 <        ## Create the tar-ball
748 <        if len(filesToBeTarred)>0:
749 <            cwd = os.getcwd()
750 <            os.chdir(swArea)
751 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
752 <            for line in filesToBeTarred:
753 <                tarcmd = tarcmd + line + ' '
754 <            cout = runCommand(tarcmd)
755 <            if not cout:
756 <                raise CrabException('Could not create tar-ball')
757 <            os.chdir(cwd)
758 <        else:
759 <            common.logger.debug(5,"No files to be to be tarred")
760 <        
710 >        import tarfile
711 >        try: # create tar ball
712 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
713 >            ## First find the executable
714 >            if (self.executable != ''):
715 >                exeWithPath = self.scram.findFile_(executable)
716 >                if ( not exeWithPath ):
717 >                    raise CrabException('User executable '+executable+' not found')
718 >
719 >                ## then check if it's private or not
720 >                if exeWithPath.find(swReleaseTop) == -1:
721 >                    # the exe is private, so we must ship
722 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
723 >                    path = swArea+'/'
724 >                    # distinguish case when script is in user project area or given by full path somewhere else
725 >                    if exeWithPath.find(path) >= 0 :
726 >                        exe = string.replace(exeWithPath, path,'')
727 >                        tar.add(path+exe,exe)
728 >                    else :
729 >                        tar.add(exeWithPath,os.path.basename(executable))
730 >                    pass
731 >                else:
732 >                    # the exe is from release, we'll find it on WN
733 >                    pass
734 >
735 >            ## Now get the libraries: only those in local working area
736 >            libDir = 'lib'
737 >            lib = swArea+'/' +libDir
738 >            common.logger.debug(5,"lib "+lib+" to be tarred")
739 >            if os.path.exists(lib):
740 >                tar.add(lib,libDir)
741 >
742 >            ## Now check if module dir is present
743 >            moduleDir = 'module'
744 >            module = swArea + '/' + moduleDir
745 >            if os.path.isdir(module):
746 >                tar.add(module,moduleDir)
747 >
748 >            ## Now check if any data dir(s) is present
749 >            swAreaLen=len(swArea)
750 >            for root, dirs, files in os.walk(swArea):
751 >                if "data" in dirs:
752 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
753 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
754 >
755 >            ### Removed ProdAgent Api dependencies ###
756 >            ### Add ProdAgent dir to tar
757 >            #paDir = 'ProdAgentApi'
758 >            #pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
759 >            #if os.path.isdir(pa):
760 >            #    tar.add(pa,paDir)
761 >
762 >            ## Add ProdCommon dir to tar
763 >            prodcommonDir = 'ProdCommon'
764 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
765 >            if os.path.isdir(prodcommonPath):
766 >                tar.add(prodcommonPath,prodcommonDir)
767 >
768 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
769 >            tar.close()
770 >        except :
771 >            raise CrabException('Could not create tar-ball')
772 >
773 >        ## check for tarball size
774 >        tarballinfo = os.stat(self.tgzNameWithPath)
775 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
776 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
777 >
778 >        ## create tar-ball with ML stuff
779 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
780 >        try:
781 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
782 >            path=os.environ['CRABDIR'] + '/python/'
783 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
784 >                tar.add(path+file,file)
785 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
786 >            tar.close()
787 >        except :
788 >            raise CrabException('Could not create ML files tar-ball')
789 >
790          return
791 <        
791 >
792 >    def additionalInputFileTgz(self):
793 >        """
794 >        Put all additional files into a tar ball and return its name
795 >        """
796 >        import tarfile
797 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
798 >        tar = tarfile.open(tarName, "w:gz")
799 >        for file in self.additional_inbox_files:
800 >            tar.add(file,string.split(file,'/')[-1])
801 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
802 >        tar.close()
803 >        return tarName
804 >
805      def wsSetupEnvironment(self, nj):
806          """
807          Returns part of a job script which prepares
808          the execution environment for the job 'nj'.
809          """
810          # Prepare JobType-independent part
811 <        txt = ''
812 <  
813 <        ## OLI_Daniele at this level  middleware already known
685 <
686 <        txt += 'if [ $middleware == LCG ]; then \n'
811 >        txt = ''
812 >        txt += 'echo ">>> setup environment"\n'
813 >        txt += 'if [ $middleware == LCG ]; then \n'
814          txt += self.wsSetupCMSLCGEnvironment_()
815          txt += 'elif [ $middleware == OSG ]; then\n'
816          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
817 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
691 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
817 >        txt += '    if [ ! $? == 0 ] ;then\n'
818          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
819 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
820 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
821 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
696 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
697 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
698 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
819 >        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
820 >        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
821 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
822          txt += '        exit 1\n'
823          txt += '    fi\n'
824 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
825          txt += '\n'
826          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
827          txt += '    cd $WORKING_DIR\n'
828 <        txt += self.wsSetupCMSOSGEnvironment_()
828 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
829 >        txt += self.wsSetupCMSOSGEnvironment_()
830 >        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
831 >        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
832          txt += 'fi\n'
833  
834          # Prepare JobType-specific part
835          scram = self.scram.commandName()
836          txt += '\n\n'
837 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
837 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
838 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
839          txt += scram+' project CMSSW '+self.version+'\n'
840          txt += 'status=$?\n'
841          txt += 'if [ $status != 0 ] ; then\n'
842 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
843 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
844 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
845 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
718 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
719 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
720 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
721 <        ## OLI_Daniele
842 >        txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
843 >        txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
844 >        txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
845 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
846          txt += '    if [ $middleware == OSG ]; then \n'
723        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
847          txt += '        cd $RUNTIME_AREA\n'
848 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
849 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
850          txt += '        /bin/rm -rf $WORKING_DIR\n'
851          txt += '        if [ -d $WORKING_DIR ] ;then\n'
852 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
853 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
854 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
855 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
731 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
732 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
733 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
852 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
853 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
854 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
855 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
856          txt += '        fi\n'
857          txt += '    fi \n'
858 <        txt += '   exit 1 \n'
858 >        txt += '    exit 1 \n'
859          txt += 'fi \n'
738        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
860          txt += 'cd '+self.version+'\n'
861 +        ########## FEDE FOR DBS2 ######################
862 +        txt += 'SOFTWARE_DIR=`pwd`\n'
863 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
864 +        ###############################################
865          ### needed grep for bug in scramv1 ###
866          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
742
867          # Handle the arguments:
868          txt += "\n"
869          txt += "## number of arguments (first argument always jobnumber)\n"
870          txt += "\n"
871 < #        txt += "narg=$#\n"
748 <        txt += "if [ $nargs -lt 2 ]\n"
871 >        txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
872          txt += "then\n"
873          txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
874          txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
875          txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
876          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
754        txt += '    rm -f $RUNTIME_AREA/$repo \n'
755        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
756        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
757        ## OLI_Daniele
877          txt += '    if [ $middleware == OSG ]; then \n'
759        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
878          txt += '        cd $RUNTIME_AREA\n'
879 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
880 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
881          txt += '        /bin/rm -rf $WORKING_DIR\n'
882          txt += '        if [ -d $WORKING_DIR ] ;then\n'
883 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
884 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
885 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
886 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
767 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
768 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
769 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
883 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
884 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
885 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
886 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
887          txt += '        fi\n'
888          txt += '    fi \n'
889          txt += "    exit 1\n"
# Line 775 | Line 892 | class Cmssw(JobType):
892  
893          # Prepare job-specific part
894          job = common.job_list[nj]
895 +        ### FEDE FOR DBS OUTPUT PUBLICATION
896 +        if (self.datasetPath):
897 +            txt += '\n'
898 +            txt += 'DatasetPath='+self.datasetPath+'\n'
899 +
900 +            datasetpath_split = self.datasetPath.split("/")
901 +
902 +            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
903 +            txt += 'DataTier='+datasetpath_split[2]+'\n'
904 +            txt += 'ApplicationFamily=cmsRun\n'
905 +
906 +        else:
907 +            txt += 'DatasetPath=MCDataTier\n'
908 +            txt += 'PrimaryDataset=null\n'
909 +            txt += 'DataTier=null\n'
910 +            txt += 'ApplicationFamily=MCDataTier\n'
911          if self.pset != None: #CarlosDaniele
912              pset = os.path.basename(job.configFilename())
913              txt += '\n'
914 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
915              if (self.datasetPath): # standard job
782                #txt += 'InputFiles=$2\n'
916                  txt += 'InputFiles=${args[1]}\n'
917                  txt += 'MaxEvents=${args[2]}\n'
918                  txt += 'SkipEvents=${args[3]}\n'
919                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
920 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n'
920 >                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
921                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
922 <                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n'
922 >                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
923                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
924 <                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n'
924 >                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
925              else:  # pythia like job
926 +                seedIndex=1
927 +                if (self.firstRun):
928 +                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
929 +                    txt += 'echo "FirstRun: <$FirstRun>"\n'
930 +                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
931 +                    seedIndex=seedIndex+1
932 +
933                  if (self.sourceSeed):
934 < #                    txt += 'Seed=$2\n'
935 <                    txt += 'Seed=${args[1]}\n'
936 <                    txt += 'echo "Seed: <$Seed>"\n'
937 <                    txt += 'sed "s#\<INPUT\>#$Seed#" $RUNTIME_AREA/'+pset+' > tmp.cfg\n'
934 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
935 >                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
936 >                    seedIndex=seedIndex+1
937 >                    ## the following seeds are not always present
938                      if (self.sourceSeedVtx):
939 < #                        txt += 'VtxSeed=$3\n'
800 <                        txt += 'VtxSeed=${args[2]}\n'
939 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
940                          txt += 'echo "VtxSeed: <$VtxSeed>"\n'
941 <                        txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp.cfg > pset.cfg\n'
942 <                    else:
943 <                        txt += 'mv tmp.cfg pset.cfg\n'
944 <                else:
945 <                    txt += '# Copy untouched pset\n'
946 <                    txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
947 <
941 >                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
942 >                        seedIndex += 1
943 >                    if (self.sourceSeedG4):
944 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
945 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
946 >                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
947 >                        seedIndex += 1
948 >                    if (self.sourceSeedMix):
949 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
950 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
951 >                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
952 >                        seedIndex += 1
953 >                    pass
954 >                pass
955 >            txt += 'mv -f '+pset+' pset.cfg\n'
956  
957          if len(self.additional_inbox_files) > 0:
958 <            for file in self.additional_inbox_files:
959 <                relFile = file.split("/")[-1]
960 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
961 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
815 <                txt += '   chmod +x '+relFile+'\n'
816 <                txt += 'fi\n'
817 <            pass
958 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
959 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
960 >            txt += 'fi\n'
961 >            pass
962  
963          if self.pset != None: #CarlosDaniele
820            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
821        
964              txt += '\n'
965              txt += 'echo "***** cat pset.cfg *********"\n'
966              txt += 'cat pset.cfg\n'
967              txt += 'echo "****** end pset.cfg ********"\n'
968              txt += '\n'
969 <            # txt += 'echo "***** cat pset1.cfg *********"\n'
970 <            # txt += 'cat pset1.cfg\n'
971 <            # txt += 'echo "****** end pset1.cfg ********"\n'
969 >            ### FEDE FOR DBS OUTPUT PUBLICATION
970 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
971 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
972 >            ##############
973 >            txt += '\n'
974          return txt
975  
976 <    def wsBuildExe(self, nj):
976 >    def wsBuildExe(self, nj=0):
977          """
978          Put in the script the commands to build an executable
979          or a library.
# Line 838 | Line 982 | class Cmssw(JobType):
982          txt = ""
983  
984          if os.path.isfile(self.tgzNameWithPath):
985 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
985 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
986              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
987              txt += 'untar_status=$? \n'
988              txt += 'if [ $untar_status -ne 0 ]; then \n'
# Line 846 | Line 990 | class Cmssw(JobType):
990              txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
991              txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
992              txt += '   if [ $middleware == OSG ]; then \n'
849            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
993              txt += '       cd $RUNTIME_AREA\n'
994 +            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
995 +            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
996              txt += '       /bin/rm -rf $WORKING_DIR\n'
997              txt += '       if [ -d $WORKING_DIR ] ;then\n'
998              txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
999              txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1000              txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1001              txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
857            txt += '           rm -f $RUNTIME_AREA/$repo \n'
858            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
859            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1002              txt += '       fi\n'
1003              txt += '   fi \n'
1004              txt += '   \n'
# Line 864 | Line 1006 | class Cmssw(JobType):
1006              txt += 'else \n'
1007              txt += '   echo "Successful untar" \n'
1008              txt += 'fi \n'
1009 +            txt += '\n'
1010 +            #### Removed ProdAgent API dependencies
1011 +            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
1012 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1013 +            #### FEDE FOR DBS OUTPUT PUBLICATION
1014 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1015 +            txt += 'else\n'
1016 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1017 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1018 +            ###################
1019 +            txt += 'fi\n'
1020 +            txt += '\n'
1021 +
1022              pass
1023 <        
1023 >
1024          return txt
1025  
1026      def modifySteeringCards(self, nj):
1027          """
1028 <        modify the card provided by the user,
1028 >        modify the card provided by the user,
1029          writing a new card into share dir
1030          """
1031 <        
1031 >
1032      def executableName(self):
1033 <        if self.pset == None: #CarlosDaniele
1033 >        if self.scriptExe: #CarlosDaniele
1034              return "sh "
1035          else:
1036              return self.executable
1037  
1038      def executableArgs(self):
1039 <        if self.pset == None:#CarlosDaniele
1039 >        if self.scriptExe:#CarlosDaniele
1040              return   self.scriptExe + " $NJob"
1041 <        else:
1042 <            return " -p pset.cfg"
1041 >        else:
1042 >            # if >= CMSSW_1_5_X, add -j crab_fjr.xml
1043 >            version_array = self.scram.getSWVersion().split('_')
1044 >            major = 0
1045 >            minor = 0
1046 >            try:
1047 >                major = int(version_array[1])
1048 >                minor = int(version_array[2])
1049 >            except:
1050 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1051 >                raise CrabException(msg)
1052 >            if major >= 1 and minor >= 5 :
1053 >                return " -j " + self.fjrFileName + " -p pset.cfg"
1054 >            else:
1055 >                return " -p pset.cfg"
1056  
1057      def inputSandbox(self, nj):
1058          """
1059          Returns a list of filenames to be put in JDL input sandbox.
1060          """
1061          inp_box = []
1062 <        # dict added to delete duplicate from input sandbox file list
1063 <        seen = {}
1062 >        # # dict added to delete duplicate from input sandbox file list
1063 >        # seen = {}
1064          ## code
1065          if os.path.isfile(self.tgzNameWithPath):
1066              inp_box.append(self.tgzNameWithPath)
1067 +        if os.path.isfile(self.MLtgzfile):
1068 +            inp_box.append(self.MLtgzfile)
1069          ## config
1070 <        if not self.pset is None: #CarlosDaniele
1071 <            inp_box.append(common.job_list[nj].configFilename())
1070 >        if not self.pset is None:
1071 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1072          ## additional input files
1073 <        #for file in self.additional_inbox_files:
1074 <        #    inp_box.append(common.work_space.cwdDir()+file)
1073 >        tgz = self.additionalInputFileTgz()
1074 >        inp_box.append(tgz)
1075          return inp_box
1076  
1077      def outputSandbox(self, nj):
# Line 910 | Line 1080 | class Cmssw(JobType):
1080          """
1081          out_box = []
1082  
913        stdout=common.job_list[nj].stdout()
914        stderr=common.job_list[nj].stderr()
915
1083          ## User Declared output files
1084 <        for out in self.output_file:
1085 <            n_out = nj + 1
1084 >        for out in (self.output_file+self.output_file_sandbox):
1085 >            n_out = nj + 1
1086              out_box.append(self.numberFile_(out,str(n_out)))
1087          return out_box
921        return []
1088  
1089      def prepareSteeringCards(self):
1090          """
# Line 932 | Line 1098 | class Cmssw(JobType):
1098          """
1099  
1100          txt = '\n'
1101 <        txt += '# directory content\n'
1101 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1102 >        txt += 'echo ">>> current directory content:"\n'
1103          txt += 'ls \n'
1104 <        file_list = ''
1105 <        for fileWithSuffix in self.output_file:
1104 >        txt += '\n'
1105 >
1106 >        txt += 'output_exit_status=0\n'
1107 >
1108 >        for fileWithSuffix in (self.output_file_sandbox):
1109              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
940            file_list=file_list+output_file_num+' '
1110              txt += '\n'
1111              txt += '# check output file\n'
1112 <            txt += 'ls '+fileWithSuffix+'\n'
1113 <            txt += 'ls_result=$?\n'
1114 <            #txt += 'exe_result=$?\n'
1115 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1116 <            txt += '   echo "ERROR: Problem with output file"\n'
1117 <            #txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
1118 <            #txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
950 <            #txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
951 <            ### OLI_DANIELE
952 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1112 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1113 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1114 >            txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1115 >            txt += 'else\n'
1116 >            txt += '    exit_status=60302\n'
1117 >            txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1118 >            if common.scheduler.name().upper() == 'CONDOR_G':
1119                  txt += '    if [ $middleware == OSG ]; then \n'
1120                  txt += '        echo "prepare dummy output file"\n'
1121                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1122                  txt += '    fi \n'
1123 +            txt += 'fi\n'
1124 +
1125 +        for fileWithSuffix in (self.output_file):
1126 +            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1127 +            txt += '\n'
1128 +            txt += '# check output file\n'
1129 +            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1130 +            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1131 +                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1132 +                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1133 +            else:
1134 +                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1135 +                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1136              txt += 'else\n'
1137 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1137 >            txt += '    exit_status=60302\n'
1138 >            txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1139 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1140 >            txt += '    output_exit_status=$exit_status\n'
1141 >            if common.scheduler.name().upper() == 'CONDOR_G':
1142 >                txt += '    if [ $middleware == OSG ]; then \n'
1143 >                txt += '        echo "prepare dummy output file"\n'
1144 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1145 >                txt += '    fi \n'
1146              txt += 'fi\n'
1147 <      
1148 <        txt += 'cd $RUNTIME_AREA\n'
1149 <        file_list=file_list[:-1]
1150 <        txt += 'file_list="'+file_list+'"\n'
1151 <        txt += 'cd $RUNTIME_AREA\n'
1152 <        ### OLI_DANIELE
1153 <        txt += 'if [ $middleware == OSG ]; then\n'  
1154 <        txt += '    cd $RUNTIME_AREA\n'
1155 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
969 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
970 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
971 <        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
972 <        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
973 <        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
974 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
975 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
976 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
977 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
978 <        txt += '    fi\n'
979 <        txt += 'fi\n'
1147 >        file_list = []
1148 >        for fileWithSuffix in (self.output_file):
1149 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1150 >
1151 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1152 >        txt += '\n'
1153 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1154 >        txt += 'echo ">>> current directory content:"\n'
1155 >        txt += 'ls \n'
1156          txt += '\n'
1157 +        txt += 'cd $RUNTIME_AREA\n'
1158 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1159          return txt
1160  
1161      def numberFile_(self, file, txt):
# Line 988 | Line 1166 | class Cmssw(JobType):
1166          # take away last extension
1167          name = p[0]
1168          for x in p[1:-1]:
1169 <           name=name+"."+x
1169 >            name=name+"."+x
1170          # add "_txt"
1171          if len(p)>1:
1172 <          ext = p[len(p)-1]
1173 <          result = name + '_' + txt + "." + ext
1172 >            ext = p[len(p)-1]
1173 >            result = name + '_' + txt + "." + ext
1174          else:
1175 <          result = name + '_' + txt
1176 <        
1175 >            result = name + '_' + txt
1176 >
1177          return result
1178  
1179 <    def getRequirements(self):
1179 >    def getRequirements(self, nj=[]):
1180          """
1181 <        return job requirements to add to jdl files
1181 >        return job requirements to add to jdl files
1182          """
1183          req = ''
1184          if self.version:
1185              req='Member("VO-cms-' + \
1186                   self.version + \
1187                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1188 +        ## SL add requirement for OS version only if SL4
1189 +        #reSL4 = re.compile( r'slc4' )
1190 +        if self.executable_arch: # and reSL4.search(self.executable_arch):
1191 +            req+=' && Member("VO-cms-' + \
1192 +                 self.executable_arch + \
1193 +                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1194  
1195          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1196  
# Line 1016 | Line 1200 | class Cmssw(JobType):
1200          """ return the config filename """
1201          return self.name()+'.cfg'
1202  
1019    ### OLI_DANIELE
1203      def wsSetupCMSOSGEnvironment_(self):
1204          """
1205          Returns part of a job script which is prepares
1206          the execution environment and which is common for all CMS jobs.
1207          """
1208 <        txt = '\n'
1209 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1210 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1211 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1212 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1030 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1208 >        txt = '    echo ">>> setup CMS OSG environment:"\n'
1209 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1210 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1211 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1212 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1213          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1214 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1215 <        txt += '   else\n'
1216 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1217 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1218 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1219 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1038 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1039 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1040 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1041 <        txt += '       exit 1\n'
1214 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1215 >        txt += '    else\n'
1216 >        txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1217 >        txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1218 >        txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1219 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1220          txt += '\n'
1221 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1222 <        txt += '       cd $RUNTIME_AREA\n'
1223 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1224 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1225 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1226 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1227 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1228 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1229 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1230 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1053 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1054 <        txt += '       fi\n'
1221 >        txt += '        cd $RUNTIME_AREA\n'
1222 >        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1223 >        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1224 >        txt += '        /bin/rm -rf $WORKING_DIR\n'
1225 >        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1226 >        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1227 >        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1228 >        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1229 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1230 >        txt += '        fi\n'
1231          txt += '\n'
1232 <        txt += '       exit 1\n'
1233 <        txt += '   fi\n'
1232 >        txt += '        exit 1\n'
1233 >        txt += '    fi\n'
1234          txt += '\n'
1235 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1236 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1235 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1236 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1237  
1238          return txt
1239 <
1239 >
1240      ### OLI_DANIELE
1241      def wsSetupCMSLCGEnvironment_(self):
1242          """
1243          Returns part of a job script which is prepares
1244          the execution environment and which is common for all CMS jobs.
1245          """
1246 <        txt  = '   \n'
1247 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1248 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1249 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1250 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1251 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1252 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1253 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1254 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1255 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1256 <        txt += '       exit 1\n'
1257 <        txt += '   else\n'
1258 <        txt += '       echo "Sourcing environment... "\n'
1259 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1260 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1261 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1262 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1263 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1264 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1265 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1266 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1267 <        txt += '           exit 1\n'
1268 <        txt += '       fi\n'
1269 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1270 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1271 <        txt += '       result=$?\n'
1272 <        txt += '       if [ $result -ne 0 ]; then\n'
1273 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1274 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1275 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1276 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1277 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1278 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1279 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1280 <        txt += '           exit 1\n'
1281 <        txt += '       fi\n'
1282 <        txt += '   fi\n'
1283 <        txt += '   \n'
1284 <        txt += '   string=`cat /etc/redhat-release`\n'
1285 <        txt += '   echo $string\n'
1286 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
1287 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1288 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1289 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1290 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1291 <        txt += '   else\n'
1292 <        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1293 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1294 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1295 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1296 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1297 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1298 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1299 <        txt += '       exit 1\n'
1300 <        txt += '   fi\n'
1301 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1302 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1246 >        txt = '    echo ">>> setup CMS LCG environment:"\n'
1247 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1248 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1249 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1250 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1251 >        txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1252 >        txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1253 >        txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1254 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1255 >        txt += '        exit 1\n'
1256 >        txt += '    else\n'
1257 >        txt += '        echo "Sourcing environment... "\n'
1258 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1259 >        txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1260 >        txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1261 >        txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1262 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1263 >        txt += '            exit 1\n'
1264 >        txt += '        fi\n'
1265 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1266 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1267 >        txt += '        result=$?\n'
1268 >        txt += '        if [ $result -ne 0 ]; then\n'
1269 >        txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1270 >        txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1271 >        txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1272 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1273 >        txt += '            exit 1\n'
1274 >        txt += '        fi\n'
1275 >        txt += '    fi\n'
1276 >        txt += '    \n'
1277 >        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1278 >        return txt
1279 >
1280 >    ### FEDE FOR DBS OUTPUT PUBLICATION
1281 >    def modifyReport(self, nj):
1282 >        """
1283 >        insert the part of the script that modifies the FrameworkJob Report
1284 >        """
1285 >
1286 >        txt = ''
1287 >        try:
1288 >            publish_data = int(self.cfg_params['USER.publish_data'])
1289 >        except KeyError:
1290 >            publish_data = 0
1291 >        if (publish_data == 1):
1292 >            txt += 'echo ">>> Modify Job Report:" \n'
1293 >            ################ FEDE FOR DBS2 #############################################
1294 >            #txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1295 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1296 >            #############################################################################
1297 >
1298 >            txt += 'if [ -z "$SE" ]; then\n'
1299 >            txt += '    SE="" \n'
1300 >            txt += 'fi \n'
1301 >            txt += 'if [ -z "$SE_PATH" ]; then\n'
1302 >            txt += '    SE_PATH="" \n'
1303 >            txt += 'fi \n'
1304 >            txt += 'echo "SE = $SE"\n'
1305 >            txt += 'echo "SE_PATH = $SE_PATH"\n'
1306 >
1307 >            processedDataset = self.cfg_params['USER.publish_data_name']
1308 >            txt += 'ProcessedDataset='+processedDataset+'\n'
1309 >            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1310 >            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1311 >            #### FEDE: added slash in LFN ##############
1312 >            txt += '    FOR_LFN=/copy_problems/ \n'
1313 >            txt += 'else \n'
1314 >            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1315 >            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1316 >            txt += '    FOR_LFN=/store$tmp \n'
1317 >            txt += 'fi \n'
1318 >            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1319 >            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1320 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1321 >            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1322 >            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1323 >
1324 >            txt += 'modifyReport_result=$?\n'
1325 >            txt += 'echo modifyReport_result = $modifyReport_result\n'
1326 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1327 >            txt += '    exit_status=1\n'
1328 >            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1329 >            txt += 'else\n'
1330 >            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1331 >            txt += 'fi\n'
1332 >        else:
1333 >            txt += 'echo "no data publication required"\n'
1334 >        return txt
1335 >
1336 >    def cleanEnv(self):
1337 >        txt = ''
1338 >        txt += 'if [ $middleware == OSG ]; then\n'
1339 >        txt += '    cd $RUNTIME_AREA\n'
1340 >        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1341 >        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1342 >        txt += '    /bin/rm -rf $WORKING_DIR\n'
1343 >        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1344 >        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1345 >        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1346 >        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1347 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1348 >        txt += '    fi\n'
1349 >        txt += 'fi\n'
1350 >        txt += '\n'
1351          return txt
1352  
1353      def setParam_(self, param, value):
# Line 1132 | Line 1356 | class Cmssw(JobType):
1356      def getParams(self):
1357          return self._params
1358  
1135    def setTaskid_(self):
1136        self._taskId = self.cfg_params['taskId']
1137        
1138    def getTaskid(self):
1139        return self._taskId
1140
1141 #######################################################################
1359      def uniquelist(self, old):
1360          """
1361          remove duplicates from a list
# Line 1147 | Line 1364 | class Cmssw(JobType):
1364          for e in old:
1365              nd[e]=0
1366          return nd.keys()
1367 +
1368 +
1369 +    def checkOut(self, limit):
1370 +        """
1371 +        check the dimension of the output files
1372 +        """
1373 +        txt = 'echo ">>> Starting output sandbox limit check :"\n'
1374 +        listOutFiles = []
1375 +        txt += 'stdoutFile=`ls *stdout` \n'
1376 +        txt += 'stderrFile=`ls *stderr` \n'
1377 +        if (self.return_data == 1):
1378 +            for file in (self.output_file+self.output_file_sandbox):
1379 +                listOutFiles.append(self.numberFile_(file, '$NJob'))
1380 +            listOutFiles.append('$stdoutFile')
1381 +            listOutFiles.append('$stderrFile')
1382 +        else:
1383 +            for file in (self.output_file_sandbox):
1384 +                listOutFiles.append(self.numberFile_(file, '$NJob'))
1385 +            listOutFiles.append('$stdoutFile')
1386 +            listOutFiles.append('$stderrFile')
1387 +  
1388 +        txt += 'echo "OUTPUT files: '+string.join(listOutFiles,' ')+'"\n'
1389 +        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1390 +       # txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1391 +        txt += 'ls -gGhrta;\n'
1392 +        txt += 'sum=0;\n'
1393 +        txt += 'for file in $filesToCheck ; do\n'
1394 +        txt += '    if [ -e $file ]; then\n'
1395 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1396 +        txt += '        sum=`expr $sum + $tt`\n'
1397 +        txt += '    else\n'
1398 +        txt += '        echo "WARNING: output file $file not found!"\n'
1399 +        txt += '    fi\n'
1400 +        txt += 'done\n'
1401 +        txt += 'echo "Total Output dimension: $sum";\n'
1402 +        txt += 'limit='+str(limit)+';\n'
1403 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1404 +        txt += 'if [ $limit -lt $sum ]; then\n'
1405 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1406 +        txt += '    echo "         checking the output file sizes..."\n'
1407 +        txt += '    tot=0;\n'
1408 +        txt += '    for filefile in $filesToCheck ; do\n'
1409 +        txt += '        dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1410 +        txt += '        tot=`expr $tot + $tt`;\n'
1411 +        txt += '        if [ $limit -lt $dimFile ]; then\n'
1412 +        txt += '            echo "deleting file: $filefile";\n'
1413 +        txt += '            rm -f $filefile\n'
1414 +        txt += '        elif [ $limit -lt $tot ]; then\n'
1415 +        txt += '            echo "deleting file: $filefile";\n'
1416 +        txt += '            rm -f $filefile\n'
1417 +        txt += '        else\n'
1418 +        txt += '            echo "saving file: $filefile"\n'
1419 +        txt += '        fi\n'
1420 +        txt += '    done\n'
1421 +
1422 +        txt += '    ls -agGhrt;\n'
1423 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1424 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1425 +        txt += '    exit_status=70000;\n'
1426 +        txt += 'else'
1427 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1428 +        txt += 'fi\n'
1429 +        txt += 'echo "Ending output sandbox limit check"\n'
1430 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines