ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.164 by fanzago, Tue Mar 4 18:15:38 2008 UTC vs.
Revision 1.314 by spiga, Thu Jun 18 17:23:02 2009 UTC

# Line 1 | Line 1
1   from JobType import JobType
2 from crab_logger import Logger
2   from crab_exceptions import *
3   from crab_util import *
5 from BlackWhiteListParser import BlackWhiteListParser
4   import common
5   import Scram
6 + from Splitter import JobSplitter
7  
8 + from IMProv.IMProvNode import IMProvNode
9   import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params, ncjobs):
12 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
13          JobType.__init__(self, 'CMSSW')
14 <        common.logger.debug(3,'CMSSW::__init__')
15 <
16 <        self.argsList = []
14 >        common.logger.debug('CMSSW::__init__')
15 >        self.skip_blocks = skip_blocks
16 >        self.argsList = 1
17  
18          self._params = {}
19          self.cfg_params = cfg_params
20        # init BlackWhiteListParser
21        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
20  
21 <        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
21 >        ### Temporary patch to automatically skip the ISB size check:
22 >        server=self.cfg_params.get('CRAB.server_name',None)
23 >        size = 9.5
24 >        if server or common.scheduler.name().upper() in ['LSF','CAF']: size = 99999
25 >        ### D.S.
26 >        self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
27  
28          # number of jobs requested to be created, limit obj splitting
29          self.ncjobs = ncjobs
30  
28        log = common.logger
29
31          self.scram = Scram.Scram(cfg_params)
32          self.additional_inbox_files = []
33          self.scriptExe = ''
34          self.executable = ''
35          self.executable_arch = self.scram.getArch()
36 <        self.tgz_name = 'default.tgz'
37 <        self.additional_tgz_name = 'additional.tgz'
36 >        self.tgz_name = 'default.tar.gz'
37 >        self.tar_name = 'default.tar'
38          self.scriptName = 'CMSSW.sh'
39 <        self.pset = ''      #scrip use case Da
40 <        self.datasetPath = '' #scrip use case Da
39 >        self.pset = ''
40 >        self.datasetPath = ''
41  
42 +        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
43          # set FJR file name
44          self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +        common.logger.log(10-1,"CMSSW version is: "+str(self.version))
48 +
49 +        version_array = self.version.split('_')
50 +        self.CMSSW_major = 0
51 +        self.CMSSW_minor = 0
52 +        self.CMSSW_patch = 0
53 +        try:
54 +            self.CMSSW_major = int(version_array[1])
55 +            self.CMSSW_minor = int(version_array[2])
56 +            self.CMSSW_patch = int(version_array[3])
57 +        except:
58 +            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
59 +            raise CrabException(msg)
60  
61 <        #
62 <        # Try to block creation in case of arch/version mismatch
63 <        #
64 <
65 < #        a = string.split(self.version, "_")
66 < #
67 < #        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
68 < #            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
69 < #            common.logger.message(msg)
55 < #        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56 < #            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
57 < #            raise CrabException(msg)
58 < #
59 <
60 <        self.setParam_('application', self.version)
61 >        if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5):
62 >            msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version."
63 >            raise CrabException(msg)
64 >            """
65 >            As CMSSW versions are dropped we can drop more code:
66 >            1.X dropped: drop support for running .cfg on WN
67 >            2.0 dropped: drop all support for cfg here and in writeCfg
68 >            2.0 dropped: Recheck the random number seed support
69 >            """
70  
71          ### collect Data cards
72  
73 <        if not cfg_params.has_key('CMSSW.datasetpath'):
73 >
74 >        ### Temporary: added to remove input file control in the case of PU
75 >        self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
76 >
77 >        tmp =  cfg_params['CMSSW.datasetpath']
78 >        common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
79 >
80 >        if tmp =='':
81              msg = "Error: datasetpath not defined "
82              raise CrabException(msg)
83 <        tmp =  cfg_params['CMSSW.datasetpath']
68 <        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69 <        if string.lower(tmp)=='none':
83 >        elif string.lower(tmp)=='none':
84              self.datasetPath = None
85              self.selectNoInput = 1
86          else:
87              self.datasetPath = tmp
88              self.selectNoInput = 0
89  
76        # ML monitoring
77        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
78        if not self.datasetPath:
79            self.setParam_('dataset', 'None')
80            self.setParam_('owner', 'None')
81        else:
82            ## SL what is supposed to fail here?
83            try:
84                datasetpath_split = self.datasetPath.split("/")
85                # standard style
86                self.setParam_('datasetFull', self.datasetPath)
87                self.setParam_('dataset', datasetpath_split[1])
88                self.setParam_('owner', datasetpath_split[2])
89            except:
90                self.setParam_('dataset', self.datasetPath)
91                self.setParam_('owner', self.datasetPath)
92
93        self.setParam_('taskId', common._db.queryTask('name')) ## new BL--DS
94
90          self.dataTiers = []
91  
92 +        self.debugWrap=''
93 +        self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
94 +        if self.debug_wrapper == 1: self.debugWrap='--debug'
95 +
96          ## now the application
97 +        self.managedGenerators = ['madgraph', 'comphep', 'lhe']
98 +        self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
99          self.executable = cfg_params.get('CMSSW.executable','cmsRun')
100 <        self.setParam_('exe', self.executable)
100 <        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
100 >        common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
101  
102          if not cfg_params.has_key('CMSSW.pset'):
103              raise CrabException("PSet file missing. Cannot run cmsRun ")
104          self.pset = cfg_params['CMSSW.pset']
105 <        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
105 >        common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
106          if self.pset.lower() != 'none' :
107              if (not os.path.exists(self.pset)):
108                  raise CrabException("User defined PSet file "+self.pset+" does not exist")
# Line 117 | Line 117 | class Cmssw(JobType):
117          self.output_file_sandbox.append(self.fjrFileName)
118  
119          # other output files to be returned via sandbox or copied to SE
120 +        outfileflag = False
121          self.output_file = []
122          tmp = cfg_params.get('CMSSW.output_file',None)
123          if tmp :
124 <            tmpOutFiles = string.split(tmp,',')
125 <            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
126 <            for tmp in tmpOutFiles:
127 <                tmp=string.strip(tmp)
127 <                self.output_file.append(tmp)
128 <                pass
129 <        else:
130 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 <        pass
124 >            self.output_file = [x.strip() for x in tmp.split(',')]
125 >            outfileflag = True #output found
126 >        #else:
127 >        #    log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
128  
129          # script_exe file as additional file in inputSandbox
130          self.scriptExe = cfg_params.get('USER.script_exe',None)
131          if self.scriptExe :
132 <           if not os.path.isfile(self.scriptExe):
133 <              msg ="ERROR. file "+self.scriptExe+" not found"
134 <              raise CrabException(msg)
135 <           self.additional_inbox_files.append(string.strip(self.scriptExe))
132 >            if not os.path.isfile(self.scriptExe):
133 >                msg ="ERROR. file "+self.scriptExe+" not found"
134 >                raise CrabException(msg)
135 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
136 >
137 >        self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
138 >        if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
139  
141        #CarlosDaniele
140          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
141 <           msg ="Error. script_exe  not defined"
142 <           raise CrabException(msg)
141 >            msg ="Error. script_exe  not defined"
142 >            raise CrabException(msg)
143 >
144 >        # use parent files...
145 >        self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
146  
147          ## additional input files
148          if cfg_params.has_key('USER.additional_input_files'):
# Line 161 | Line 162 | class Cmssw(JobType):
162                      if not os.path.exists(file):
163                          raise CrabException("Additional input file not found: "+file)
164                      pass
164                    # fname = string.split(file, '/')[-1]
165                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
166                    # shutil.copyfile(file, storedFile)
165                      self.additional_inbox_files.append(string.strip(file))
166                  pass
167              pass
168 <            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
168 >            common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
169          pass
170  
173        ## Events per job
174        if cfg_params.has_key('CMSSW.events_per_job'):
175            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
176            self.selectEventsPerJob = 1
177        else:
178            self.eventsPerJob = -1
179            self.selectEventsPerJob = 0
180
181        ## number of jobs
182        if cfg_params.has_key('CMSSW.number_of_jobs'):
183            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
184            self.selectNumberOfJobs = 1
185        else:
186            self.theNumberOfJobs = 0
187            self.selectNumberOfJobs = 0
188
189        if cfg_params.has_key('CMSSW.total_number_of_events'):
190            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
191            self.selectTotalNumberEvents = 1
192        else:
193            self.total_number_of_events = 0
194            self.selectTotalNumberEvents = 0
195
196        if self.pset != None: #CarlosDaniele
197             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
198                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
199                 raise CrabException(msg)
200        else:
201             if (self.selectNumberOfJobs == 0):
202                 msg = 'Must specify  number_of_jobs.'
203                 raise CrabException(msg)
171  
172          ## New method of dealing with seeds
173          self.incrementSeeds = []
# Line 216 | Line 183 | class Cmssw(JobType):
183                  tmp.strip()
184                  self.incrementSeeds.append(tmp)
185  
219        ## Old method of dealing with seeds
220        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
221        ## remove
222        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
223        if self.sourceSeed:
224          print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
225          self.incrementSeeds.append('sourceSeed')
226
227        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
228        if self.sourceSeedVtx:
229          print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
230          self.incrementSeeds.append('VtxSmeared')
231
232        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
233        if self.sourceSeedG4:
234          print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
235          self.incrementSeeds.append('g4SimHits')
236
237        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
238        if self.sourceSeedMix:
239          print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
240          self.incrementSeeds.append('mix')
241
186          self.firstRun = cfg_params.get('CMSSW.first_run',None)
187  
244        if self.pset != None: #CarlosDaniele
245            import PsetManipulator as pp
246            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
247
188          # Copy/return
249
189          self.copy_data = int(cfg_params.get('USER.copy_data',0))
190          self.return_data = int(cfg_params.get('USER.return_data',0))
191  
192 +        self.conf = {}
193 +        self.conf['pubdata'] = None
194 +        # number of jobs requested to be created, limit obj splitting DD
195          #DBSDLS-start
196          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
197          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
# Line 261 | Line 203 | class Cmssw(JobType):
203          if self.datasetPath:
204              blockSites = self.DataDiscoveryAndLocation(cfg_params)
205          #DBSDLS-end
206 <
265 <        self.tgzNameWithPath = self.getTarBall(self.executable)
206 >        self.conf['blockSites']=blockSites
207  
208          ## Select Splitting
209 +        splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
210 +
211          if self.selectNoInput:
212 <            if self.pset == None: #CarlosDaniele
213 <                self.jobSplittingForScript()
212 >            if self.pset == None:
213 >                self.algo = 'ForScript'
214              else:
215 <                self.jobSplittingNoInput()
215 >                self.algo = 'NoInput'
216 >                self.conf['managedGenerators']=self.managedGenerators
217 >                self.conf['generator']=self.generator
218 >        elif splitByRun ==1:
219 >            self.algo = 'RunBased'
220          else:
221 <            self.jobSplittingByBlocks(blockSites)
221 >            self.algo = 'EventBased'
222 >
223 > #        self.algo = 'LumiBased'
224 >        splitter = JobSplitter(self.cfg_params,self.conf)
225 >        self.dict = splitter.Algos()[self.algo]()
226 >
227 >        self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
228 >        self.rootArgsFilename= 'arguments'
229 >        # modify Pset only the first time
230 >        if (isNew and self.pset != None): self.ModifyPset()
231 >
232 >        ## Prepare inputSandbox TarBall (only the first time)
233 >        self.tarNameWithPath = self.getTarBall(self.executable)
234 >
235 >
236 >    def ModifyPset(self):
237 >        import PsetManipulator as pp
238 >        PsetEdit = pp.PsetManipulator(self.pset)
239 >        try:
240 >            # Add FrameworkJobReport to parameter-set, set max events.
241 >            # Reset later for data jobs by writeCFG which does all modifications
242 >            PsetEdit.maxEvent(1)
243 >            PsetEdit.skipEvent(0)
244 >            PsetEdit.psetWriter(self.configFilename())
245 >            ## If present, add TFileService to output files
246 >            if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
247 >                tfsOutput = PsetEdit.getTFileService()
248 >                if tfsOutput:
249 >                    if tfsOutput in self.output_file:
250 >                        common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
251 >                    else:
252 >                        outfileflag = True #output found
253 >                        self.output_file.append(tfsOutput)
254 >                        common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
255 >                    pass
256 >                pass
257 >            ## If present and requested, add PoolOutputModule to output files
258 >            edmOutput = PsetEdit.getPoolOutputModule()
259 >            if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
260 >                if edmOutput:
261 >                    if edmOutput in self.output_file:
262 >                        common.logger.debug("Output from PoolOutputModule "+edmOutput+" already in output files")
263 >                    else:
264 >                        self.output_file.append(edmOutput)
265 >                        common.logger.info("Adding "+edmOutput+" (from PoolOutputModule) to list of output files")
266 >                    pass
267 >                pass
268 >            # not required: check anyhow if present, to avoid accidental T2 overload
269 >            else:
270 >                if edmOutput and (edmOutput not in self.output_file):
271 >                    msg = "ERROR: a PoolOutputModule is present in your ParameteSet %s \n"%self.pset
272 >                    msg +="         but the file produced ( %s ) is not in the list of output files\n"%edmOutput
273 >                    msg += "WARNING: please remove it. If you want to keep it, add the file to output_files or use CMSSW.get_edm_output\n"
274 >                    if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
275 >                        msg +=" CMSSW.ignore_edm_output==True : Hope you know what you are doing...\n"
276 >                        common.logger.info(msg)
277 >                    else:
278 >                        raise CrabException(msg)
279 >                pass
280 >            pass
281 >
282 >            if (PsetEdit.getBadFilesSetting()):
283 >                msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
284 >                common.logger.info(msg)
285 >
286 >        except CrabException, msg:
287 >            common.logger.info(str(msg))
288 >            msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
289 >            raise CrabException(msg)
290  
276        # modify Pset
277        if self.pset != None: #CarlosDaniele
278            try:
279                # Add FrameworkJobReport to parameter-set, set max events.
280                # Reset later for data jobs by writeCFG which does all modifications
281                PsetEdit.addCrabFJR(self.fjrFileName)
282                PsetEdit.maxEvent(self.eventsPerJob)
283                PsetEdit.psetWriter(self.configFilename())
284            except:
285                msg='Error while manipuliating ParameterSet: exiting...'
286                raise CrabException(msg)
291  
292      def DataDiscoveryAndLocation(self, cfg_params):
293  
294          import DataDiscovery
295          import DataLocation
296 <        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
296 >        common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
297  
298          datasetPath=self.datasetPath
299  
300          ## Contact the DBS
301 <        common.logger.message("Contacting Data Discovery Services ...")
301 >        common.logger.info("Contacting Data Discovery Services ...")
302          try:
303 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
303 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
304              self.pubdata.fetchDBSInfo()
305  
306          except DataDiscovery.NotExistingDatasetError, ex :
# Line 310 | Line 314 | class Cmssw(JobType):
314              raise CrabException(msg)
315  
316          self.filesbyblock=self.pubdata.getFiles()
317 <        self.eventsbyblock=self.pubdata.getEventsPerBlock()
318 <        self.eventsbyfile=self.pubdata.getEventsPerFile()
317 >        #print self.filesbyblock
318 >        self.conf['pubdata']=self.pubdata
319  
320          ## get max number of events
321 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
321 >        self.maxEvents=self.pubdata.getMaxEvents()
322  
323          ## Contact the DLS and build a list of sites hosting the fileblocks
324          try:
325              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
326              dataloc.fetchDLSInfo()
327 +
328          except DataLocation.DataLocationError , ex:
329              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
330              raise CrabException(msg)
331  
332  
333 <        sites = dataloc.getSites()
333 >        unsorted_sites = dataloc.getSites()
334 >        sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
335 >        for lfn in self.filesbyblock.keys():
336 >            if unsorted_sites.has_key(lfn):
337 >                sites[lfn]=unsorted_sites[lfn]
338 >            else:
339 >                sites[lfn]=[]
340 >
341 >        if len(sites)==0:
342 >            msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
343 >            msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
344 >            msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
345 >            raise CrabException(msg)
346 >
347          allSites = []
348          listSites = sites.values()
349          for listSite in listSites:
350              for oneSite in listSite:
351                  allSites.append(oneSite)
352 <        allSites = self.uniquelist(allSites)
352 >        [allSites.append(it) for it in allSites if not allSites.count(it)]
353 >
354  
355          # screen output
356 <        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
356 >        common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
357  
358          return sites
359  
341    def setArgsList(self, argsList):
342        self.argsList = argsList
360  
361 <    def jobSplittingByBlocks(self, blockSites):
345 <        """
346 <        Perform job splitting. Jobs run over an integer number of files
347 <        and no more than one block.
348 <        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
349 <        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
350 <                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
351 <                  self.maxEvents, self.filesbyblock
352 <        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
353 <              self.total_number_of_jobs - Total # of jobs
354 <              self.list_of_args - File(s) job will run on (a list of lists)
355 <        """
356 <
357 <        # ---- Handle the possible job splitting configurations ---- #
358 <        if (self.selectTotalNumberEvents):
359 <            totalEventsRequested = self.total_number_of_events
360 <        if (self.selectEventsPerJob):
361 <            eventsPerJobRequested = self.eventsPerJob
362 <            if (self.selectNumberOfJobs):
363 <                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
364 <
365 <        # If user requested all the events in the dataset
366 <        if (totalEventsRequested == -1):
367 <            eventsRemaining=self.maxEvents
368 <        # If user requested more events than are in the dataset
369 <        elif (totalEventsRequested > self.maxEvents):
370 <            eventsRemaining = self.maxEvents
371 <            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
372 <        # If user requested less events than are in the dataset
373 <        else:
374 <            eventsRemaining = totalEventsRequested
375 <
376 <        # If user requested more events per job than are in the dataset
377 <        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
378 <            eventsPerJobRequested = self.maxEvents
379 <
380 <        # For user info at end
381 <        totalEventCount = 0
382 <
383 <        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
384 <            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
385 <
386 <        if (self.selectNumberOfJobs):
387 <            common.logger.message("May not create the exact number_of_jobs requested.")
388 <
389 <        if ( self.ncjobs == 'all' ) :
390 <            totalNumberOfJobs = 999999999
391 <        else :
392 <            totalNumberOfJobs = self.ncjobs
393 <
394 <
395 <        blocks = blockSites.keys()
396 <        blockCount = 0
397 <        # Backup variable in case self.maxEvents counted events in a non-included block
398 <        numBlocksInDataset = len(blocks)
399 <
400 <        jobCount = 0
401 <        list_of_lists = []
402 <
403 <        # list tracking which jobs are in which jobs belong to which block
404 <        jobsOfBlock = {}
405 <
406 <        # ---- Iterate over the blocks in the dataset until ---- #
407 <        # ---- we've met the requested total # of events    ---- #
408 <        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
409 <            block = blocks[blockCount]
410 <            blockCount += 1
411 <            if block not in jobsOfBlock.keys() :
412 <                jobsOfBlock[block] = []
413 <
414 <            if self.eventsbyblock.has_key(block) :
415 <                numEventsInBlock = self.eventsbyblock[block]
416 <                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
417 <
418 <                files = self.filesbyblock[block]
419 <                numFilesInBlock = len(files)
420 <                if (numFilesInBlock <= 0):
421 <                    continue
422 <                fileCount = 0
361 >    def split(self, jobParams,firstJobID):
362  
363 <                # ---- New block => New job ---- #
364 <                parString = ""
365 <                # counter for number of events in files currently worked on
427 <                filesEventCount = 0
428 <                # flag if next while loop should touch new file
429 <                newFile = 1
430 <                # job event counter
431 <                jobSkipEventCount = 0
432 <
433 <                # ---- Iterate over the files in the block until we've met the requested ---- #
434 <                # ---- total # of events or we've gone over all the files in this block  ---- #
435 <                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
436 <                    file = files[fileCount]
437 <                    if newFile :
438 <                        try:
439 <                            numEventsInFile = self.eventsbyfile[file]
440 <                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
441 <                            # increase filesEventCount
442 <                            filesEventCount += numEventsInFile
443 <                            # Add file to current job
444 <                            parString += '\\\"' + file + '\\\"\,'
445 <                            newFile = 0
446 <                        except KeyError:
447 <                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
448 <
449 <
450 <                    # if less events in file remain than eventsPerJobRequested
451 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
452 <                        # if last file in block
453 <                        if ( fileCount == numFilesInBlock-1 ) :
454 <                            # end job using last file, use remaining events in block
455 <                            # close job and touch new file
456 <                            fullString = parString[:-2]
457 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
458 <                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
459 <                            self.jobDestination.append(blockSites[block])
460 <                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
461 <                            # fill jobs of block dictionary
462 <                            jobsOfBlock[block].append(jobCount+1)
463 <                            # reset counter
464 <                            jobCount = jobCount + 1
465 <                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
466 <                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
467 <                            jobSkipEventCount = 0
468 <                            # reset file
469 <                            parString = ""
470 <                            filesEventCount = 0
471 <                            newFile = 1
472 <                            fileCount += 1
473 <                        else :
474 <                            # go to next file
475 <                            newFile = 1
476 <                            fileCount += 1
477 <                    # if events in file equal to eventsPerJobRequested
478 <                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
479 <                        # close job and touch new file
480 <                        fullString = parString[:-2]
481 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
482 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
483 <                        self.jobDestination.append(blockSites[block])
484 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
485 <                        jobsOfBlock[block].append(jobCount+1)
486 <                        # reset counter
487 <                        jobCount = jobCount + 1
488 <                        totalEventCount = totalEventCount + eventsPerJobRequested
489 <                        eventsRemaining = eventsRemaining - eventsPerJobRequested
490 <                        jobSkipEventCount = 0
491 <                        # reset file
492 <                        parString = ""
493 <                        filesEventCount = 0
494 <                        newFile = 1
495 <                        fileCount += 1
363 >        jobParams = self.dict['args']
364 >        njobs = self.dict['njobs']
365 >        self.jobDestination = self.dict['jobDestination']
366  
367 <                    # if more events in file remain than eventsPerJobRequested
368 <                    else :
499 <                        # close job but don't touch new file
500 <                        fullString = parString[:-2]
501 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
502 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
503 <                        self.jobDestination.append(blockSites[block])
504 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
505 <                        jobsOfBlock[block].append(jobCount+1)
506 <                        # increase counter
507 <                        jobCount = jobCount + 1
508 <                        totalEventCount = totalEventCount + eventsPerJobRequested
509 <                        eventsRemaining = eventsRemaining - eventsPerJobRequested
510 <                        # calculate skip events for last file
511 <                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
512 <                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
513 <                        # remove all but the last file
514 <                        filesEventCount = self.eventsbyfile[file]
515 <                        parString = '\\\"' + file + '\\\"\,'
516 <                    pass # END if
517 <                pass # END while (iterate over files in the block)
518 <        pass # END while (iterate over blocks in the dataset)
519 <        self.ncjobs = self.total_number_of_jobs = jobCount
520 <        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
521 <            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
522 <        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
523 <
524 <        # screen output
525 <        screenOutput = "List of jobs and available destination sites:\n\n"
526 <
527 <        # keep trace of block with no sites to print a warning at the end
528 <        noSiteBlock = []
529 <        bloskNoSite = []
530 <
531 <        blockCounter = 0
532 <        for block in blocks:
533 <            if block in jobsOfBlock.keys() :
534 <                blockCounter += 1
535 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
536 <                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
537 <                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
538 <                    bloskNoSite.append( blockCounter )
539 <
540 <        common.logger.message(screenOutput)
541 <        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
542 <            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
543 <            virgola = ""
544 <            if len(bloskNoSite) > 1:
545 <                virgola = ","
546 <            for block in bloskNoSite:
547 <                msg += ' ' + str(block) + virgola
548 <            msg += '\n               Related jobs:\n                 '
549 <            virgola = ""
550 <            if len(noSiteBlock) > 1:
551 <                virgola = ","
552 <            for range_jobs in noSiteBlock:
553 <                msg += str(range_jobs) + virgola
554 <            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
555 <            if self.cfg_params.has_key('EDG.se_white_list'):
556 <                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
557 <                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
558 <                msg += 'Please check if the dataset is available at this site!)\n'
559 <            if self.cfg_params.has_key('EDG.ce_white_list'):
560 <                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
561 <                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
562 <                msg += 'Please check if the dataset is available at this site!)\n'
367 >        if njobs==0:
368 >            raise CrabException("Ask to split "+str(njobs)+" jobs: aborting")
369  
370 <            common.logger.message(msg)
370 >        # create the empty structure
371 >        for i in range(njobs):
372 >            jobParams.append("")
373  
374 <        self.list_of_args = list_of_lists
374 >        listID=[]
375 >        listField=[]
376 >        listDictions=[]
377 >        exist= os.path.exists(self.argsFile)
378 >        for id in range(njobs):
379 >            job = id + int(firstJobID)
380 >            listID.append(job+1)
381 >            job_ToSave ={}
382 >            concString = ' '
383 >            argu=''
384 >            str_argu = str(job+1)
385 >            if len(jobParams[id]):
386 >                argu = {'JobID': job+1}
387 >                for i in range(len(jobParams[id])):
388 >                    argu[self.dict['params'][i]]=jobParams[id][i]
389 >                # just for debug
390 >                str_argu += concString.join(jobParams[id])
391 >            if argu != '': listDictions.append(argu)
392 >            job_ToSave['arguments']= str(job+1)
393 >            job_ToSave['dlsDestination']= self.jobDestination[id]
394 >            listField.append(job_ToSave)
395 >            from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
396 >            cms_se = CmsSEMap()
397 >            msg="Job  %s  Arguments:  %s\n"%(str(job+1),str_argu)
398 >            msg+="\t  Destination: %s "%(str(self.jobDestination[id]))
399 >            SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
400 >            msg+="\t  CMSDestination: %s "%(str(SEDestination))
401 >            common.logger.log(10-1,msg)
402 >        # write xml
403 >        if len(listDictions):
404 >            if exist==False: self.CreateXML()
405 >            self.addEntry(listDictions)
406 >            self.addXMLfile()
407 >        common._db.updateJob_(listID,listField)
408 >        self.zipTarFile()
409          return
410  
411 <    def jobSplittingNoInput(self):
570 <        """
571 <        Perform job splitting based on number of event per job
572 <        """
573 <        common.logger.debug(5,'Splitting per events')
411 >    def addXMLfile(self):
412  
413 <        if (self.selectEventsPerJob):
414 <            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
415 <        if (self.selectNumberOfJobs):
416 <            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
417 <        if (self.selectTotalNumberEvents):
418 <            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
419 <
420 <        if (self.total_number_of_events < 0):
421 <            msg='Cannot split jobs per Events with "-1" as total number of events'
413 >        import tarfile
414 >        try:
415 >            tar = tarfile.open(self.tarNameWithPath, "a")
416 >            tar.add(self.argsFile, os.path.basename(self.argsFile))
417 >            tar.close()
418 >        except IOError, exc:
419 >            msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
420 >            msg += str(exc)
421 >            raise CrabException(msg)
422 >        except tarfile.TarError, exc:
423 >            msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
424 >            msg += str(exc)
425              raise CrabException(msg)
426  
427 <        if (self.selectEventsPerJob):
587 <            if (self.selectTotalNumberEvents):
588 <                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
589 <            elif(self.selectNumberOfJobs) :
590 <                self.total_number_of_jobs =self.theNumberOfJobs
591 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
592 <
593 <        elif (self.selectNumberOfJobs) :
594 <            self.total_number_of_jobs = self.theNumberOfJobs
595 <            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
596 <
597 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
598 <
599 <        # is there any remainder?
600 <        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
601 <
602 <        common.logger.debug(5,'Check  '+str(check))
603 <
604 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
605 <        if check > 0:
606 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
607 <
608 <        # argument is seed number.$i
609 <        self.list_of_args = []
610 <        for i in range(self.total_number_of_jobs):
611 <            ## Since there is no input, any site is good
612 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
613 <            args=[]
614 <            if (self.firstRun):
615 <                ## pythia first run
616 <                args.append(str(self.firstRun)+str(i))
617 <            self.list_of_args.append(args)
618 <
619 <        return
620 <
621 <
622 <    def jobSplittingForScript(self):#CarlosDaniele
427 >    def CreateXML(self):
428          """
624        Perform job splitting based on number of job
429          """
430 <        common.logger.debug(5,'Splitting per job')
431 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
628 <
629 <        self.total_number_of_jobs = self.theNumberOfJobs
630 <
631 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
632 <
633 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
634 <
635 <        # argument is seed number.$i
636 <        self.list_of_args = []
637 <        for i in range(self.total_number_of_jobs):
638 <            ## Since there is no input, any site is good
639 <           # self.jobDestination.append(["Any"])
640 <            self.jobDestination.append([""])
641 <            ## no random seed
642 <            self.list_of_args.append([str(i)])
430 >        result = IMProvNode( self.rootArgsFilename )
431 >        outfile = file( self.argsFile, 'w').write(str(result))
432          return
433  
434 <    def split(self, jobParams):
435 <
436 <        #### Fabio
648 <        njobs = self.total_number_of_jobs
649 <        arglist = self.list_of_args
650 <        # create the empty structure
651 <        for i in range(njobs):
652 <            jobParams.append("")
653 <
654 <        for job in range(njobs):
655 <            jobParams[job] = arglist[job]
656 <            job_ToSave ={}
657 <            job_ToSave['arguments']= str(jobParams[job])## new BL--DS
658 <            job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
659 <            common._db.updateJob_(job,job_ToSave)## new BL--DS
660 <            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
434 >    def addEntry(self, listDictions):
435 >        """
436 >        _addEntry_
437  
438 +        add an entry to the xml file
439 +        """
440 +        from IMProv.IMProvLoader import loadIMProvFile
441 +        ## load xml
442 +        improvDoc = loadIMProvFile(self.argsFile)
443 +        entrname= 'Job'
444 +        for dictions in listDictions:
445 +           report = IMProvNode(entrname , None, **dictions)
446 +           improvDoc.addNode(report)
447 +        outfile = file( self.argsFile, 'w').write(str(improvDoc))
448          return
449  
664    def getJobTypeArguments(self, nj, sched):
665        result = ''
666        jobs=[]
667        jobs.append(nj)
668        for i in common._db.queryJob('arguments',jobs):##  BL--DS
669            result=result+str(i)+" "
670        return result
671
450      def numberOfJobs(self):
451 <        # Fabio
674 <        return self.total_number_of_jobs
451 >        return self.dict['njobs']
452  
453      def getTarBall(self, exe):
454          """
455          Return the TarBall with lib and exe
456          """
457 <
458 <        # if it exist, just return it
459 <        #
683 <        # Marco. Let's start to use relative path for Boss XML files
684 <        #
685 <        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
686 <        if os.path.exists(self.tgzNameWithPath):
687 <            return self.tgzNameWithPath
457 >        self.tarNameWithPath = common.work_space.pathForTgz()+self.tar_name
458 >        if os.path.exists(self.tarNameWithPath):
459 >            return self.tarNameWithPath
460  
461          # Prepare a tar gzipped file with user binaries.
462          self.buildTar_(exe)
463  
464 <        return string.strip(self.tgzNameWithPath)
464 >        return string.strip(self.tarNameWithPath)
465  
466      def buildTar_(self, executable):
467  
468          # First of all declare the user Scram area
469          swArea = self.scram.getSWArea_()
698        #print "swArea = ", swArea
699        # swVersion = self.scram.getSWVersion()
700        # print "swVersion = ", swVersion
470          swReleaseTop = self.scram.getReleaseTop_()
702        #print "swReleaseTop = ", swReleaseTop
471  
472          ## check if working area is release top
473          if swReleaseTop == '' or swArea == swReleaseTop:
474 +            common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
475              return
476  
477          import tarfile
478          try: # create tar ball
479 <            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
479 >            #tar = tarfile.open(self.tgzNameWithPath, "w:gz")
480 >            tar = tarfile.open(self.tarNameWithPath, "w")
481              ## First find the executable
482              if (self.executable != ''):
483                  exeWithPath = self.scram.findFile_(executable)
# Line 717 | Line 487 | class Cmssw(JobType):
487                  ## then check if it's private or not
488                  if exeWithPath.find(swReleaseTop) == -1:
489                      # the exe is private, so we must ship
490 <                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
490 >                    common.logger.debug("Exe "+exeWithPath+" to be tarred")
491                      path = swArea+'/'
492                      # distinguish case when script is in user project area or given by full path somewhere else
493                      if exeWithPath.find(path) >= 0 :
# Line 731 | Line 501 | class Cmssw(JobType):
501                      pass
502  
503              ## Now get the libraries: only those in local working area
504 +            tar.dereference=True
505              libDir = 'lib'
506              lib = swArea+'/' +libDir
507 <            common.logger.debug(5,"lib "+lib+" to be tarred")
507 >            common.logger.debug("lib "+lib+" to be tarred")
508              if os.path.exists(lib):
509                  tar.add(lib,libDir)
510  
# Line 742 | Line 513 | class Cmssw(JobType):
513              module = swArea + '/' + moduleDir
514              if os.path.isdir(module):
515                  tar.add(module,moduleDir)
516 +            tar.dereference=False
517  
518              ## Now check if any data dir(s) is present
519 <            swAreaLen=len(swArea)
520 <            for root, dirs, files in os.walk(swArea):
521 <                if "data" in dirs:
522 <                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
523 <                    tar.add(root+"/data",root[swAreaLen:]+"/data")
519 >            self.dataExist = False
520 >            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
521 >            while len(todo_list):
522 >                entry, name = todo_list.pop()
523 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
524 >                    continue
525 >                if os.path.isdir(swArea+"/src/"+entry):
526 >                    entryPath = entry + '/'
527 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
528 >                    if name == 'data':
529 >                        self.dataExist=True
530 >                        common.logger.debug("data "+entry+" to be tarred")
531 >                        tar.add(swArea+"/src/"+entry,"src/"+entry)
532 >                    pass
533 >                pass
534 >
535 >            ### CMSSW ParameterSet
536 >            if not self.pset is None:
537 >                cfg_file = common.work_space.jobDir()+self.configFilename()
538 >                tar.add(cfg_file,self.configFilename())
539  
540 +            try:
541 +                crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
542 +                tar.add(crab_cfg_file,'crab.cfg')
543 +            except:
544 +                pass
545  
546              ## Add ProdCommon dir to tar
547 <            prodcommonDir = 'ProdCommon'
548 <            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
549 <            if os.path.isdir(prodcommonPath):
550 <                tar.add(prodcommonPath,prodcommonDir)
547 >            prodcommonDir = './'
548 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
549 >            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
550 >                           'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
551 >                           'WMCore/__init__.py','WMCore/Algorithms']
552 >            for file in neededStuff:
553 >                tar.add(prodcommonPath+file,prodcommonDir+file)
554 >
555 >            ##### ML stuff
556 >            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
557 >            path=os.environ['CRABDIR'] + '/python/'
558 >            for file in ML_file_list:
559 >                tar.add(path+file,file)
560 >
561 >            ##### Utils
562 >            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
563 >            for file in Utils_file_list:
564 >                tar.add(path+file,file)
565 >
566 >            ##### AdditionalFiles
567 >            tar.dereference=True
568 >            for file in self.additional_inbox_files:
569 >                tar.add(file,string.split(file,'/')[-1])
570 >            tar.dereference=False
571 >            common.logger.log(10-1,"Files in "+self.tarNameWithPath+" : "+str(tar.getnames()))
572  
760            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
573              tar.close()
574 <        except :
575 <            raise CrabException('Could not create tar-ball')
574 >        except IOError, exc:
575 >            msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
576 >            msg += str(exc)
577 >            raise CrabException(msg)
578 >        except tarfile.TarError, exc:
579 >            msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
580 >            msg += str(exc)
581 >            raise CrabException(msg)
582 >
583 >    def zipTarFile(self):
584 >
585 >        cmd = "gzip -c %s > %s "%(self.tarNameWithPath,self.tgzNameWithPath)
586 >        res=runCommand(cmd)
587  
765        ## check for tarball size
588          tarballinfo = os.stat(self.tgzNameWithPath)
589          if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
590 <            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
590 >            msg  = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
591 >               +'MB input sandbox limit \n'
592 >            msg += '      and not supported by the direct GRID submission system.\n'
593 >            msg += '      Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
594 >            msg += '      For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
595 >            raise CrabException(msg)
596  
597          ## create tar-ball with ML stuff
771        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
772        try:
773            tar = tarfile.open(self.MLtgzfile, "w:gz")
774            path=os.environ['CRABDIR'] + '/python/'
775            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py']:
776                tar.add(path+file,file)
777            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
778            tar.close()
779        except :
780            raise CrabException('Could not create ML files tar-ball')
781
782        return
783
784    def additionalInputFileTgz(self):
785        """
786        Put all additional files into a tar ball and return its name
787        """
788        import tarfile
789        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
790        tar = tarfile.open(tarName, "w:gz")
791        for file in self.additional_inbox_files:
792            tar.add(file,string.split(file,'/')[-1])
793        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
794        tar.close()
795        return tarName
598  
599 <    def wsSetupEnvironment(self, nj):
599 >    def wsSetupEnvironment(self, nj=0):
600          """
601          Returns part of a job script which prepares
602          the execution environment for the job 'nj'.
603          """
604 +        # FUTURE: Drop support for .cfg when possible
605 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
606 +            psetName = 'pset.py'
607 +        else:
608 +            psetName = 'pset.cfg'
609          # Prepare JobType-independent part
610          txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
611          txt += 'echo ">>> setup environment"\n'
612 <        txt += 'if [ $middleware == LCG ]; then \n'
612 >        txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
613          txt += self.wsSetupCMSLCGEnvironment_()
614          txt += 'elif [ $middleware == OSG ]; then\n'
615          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
616          txt += '    if [ ! $? == 0 ] ;then\n'
810        #txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
811        #txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
812        #txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
813        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
814        #txt += '        exit 1\n'
617          txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
618          txt += '        job_exit_code=10016\n'
619          txt += '        func_exit\n'
# Line 822 | Line 624 | class Cmssw(JobType):
624          txt += '    cd $WORKING_DIR\n'
625          txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
626          txt += self.wsSetupCMSOSGEnvironment_()
627 <        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
628 <        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
627 >        #Setup SGE Environment
628 >        txt += 'elif [ $middleware == SGE ]; then\n'
629 >        txt += self.wsSetupCMSLCGEnvironment_()
630 >
631 >        txt += 'elif [ $middleware == ARC ]; then\n'
632 >        txt += self.wsSetupCMSLCGEnvironment_()
633 >
634          txt += 'fi\n'
635  
636          # Prepare JobType-specific part
# Line 834 | Line 641 | class Cmssw(JobType):
641          txt += scram+' project CMSSW '+self.version+'\n'
642          txt += 'status=$?\n'
643          txt += 'if [ $status != 0 ] ; then\n'
837        #txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
838        #txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
839        #txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
840        #txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
644          txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
645          txt += '    job_exit_code=10034\n'
843        txt += '    if [ $middleware == OSG ]; then \n'
844        txt += '        cd $RUNTIME_AREA\n'
845        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
846        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
847        txt += '        /bin/rm -rf $WORKING_DIR\n'
848        txt += '        if [ -d $WORKING_DIR ] ;then\n'
849        #txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
850        #txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
851        #txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
852        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
853        txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
854        txt += '            job_exit_code=10017\n'
855        txt += '        fi\n'
856        txt += '    fi \n'
857        #txt += '    exit 1 \n'
646          txt += '    func_exit\n'
647          txt += 'fi \n'
648          txt += 'cd '+self.version+'\n'
649 <        ########## FEDE FOR DBS2 ######################
862 <        txt += 'SOFTWARE_DIR=`pwd`\n'
649 >        txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
650          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
864        ###############################################
865        ### needed grep for bug in scramv1 ###
651          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
652 +        txt += 'if [ $? != 0 ] ; then\n'
653 +        txt += '    echo "ERROR ==> Problem with the command: "\n'
654 +        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
655 +        txt += '    job_exit_code=10034\n'
656 +        txt += '    func_exit\n'
657 +        txt += 'fi \n'
658          # Handle the arguments:
659          txt += "\n"
660          txt += "## number of arguments (first argument always jobnumber)\n"
661          txt += "\n"
662 <        txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
662 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
663          txt += "then\n"
873        #txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
874        #txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
875        #txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
876        #txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
664          txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
665          txt += '    job_exit_code=50113\n'
879        txt += '    if [ $middleware == OSG ]; then \n'
880        txt += '        cd $RUNTIME_AREA\n'
881        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
882        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
883        txt += '        /bin/rm -rf $WORKING_DIR\n'
884        txt += '        if [ -d $WORKING_DIR ] ;then\n'
885        #txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
886        #txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
887        #txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
888        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
889        txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
890        txt += '            job_exit_code=10017\n'
891        txt += '        fi\n'
892        txt += '    fi\n'
893        #txt += "    exit 1\n"
666          txt += "    func_exit\n"
667          txt += "fi\n"
668          txt += "\n"
669  
670          # Prepare job-specific part
671          job = common.job_list[nj]
900        ### FEDE FOR DBS OUTPUT PUBLICATION
672          if (self.datasetPath):
673 +            self.primaryDataset = self.datasetPath.split("/")[1]
674 +            DataTier = self.datasetPath.split("/")[2]
675              txt += '\n'
676              txt += 'DatasetPath='+self.datasetPath+'\n'
677  
678 <            datasetpath_split = self.datasetPath.split("/")
679 <
907 <            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
908 <            txt += 'DataTier='+datasetpath_split[2]+'\n'
678 >            txt += 'PrimaryDataset='+self.primaryDataset +'\n'
679 >            txt += 'DataTier='+DataTier+'\n'
680              txt += 'ApplicationFamily=cmsRun\n'
681  
682          else:
683 +            self.primaryDataset = 'null'
684              txt += 'DatasetPath=MCDataTier\n'
685              txt += 'PrimaryDataset=null\n'
686              txt += 'DataTier=null\n'
687              txt += 'ApplicationFamily=MCDataTier\n'
688 <        if self.pset != None:
688 >        if self.pset != None:
689              pset = os.path.basename(job.configFilename())
690              txt += '\n'
691              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
920            if (self.datasetPath): # standard job
921                txt += 'InputFiles=${args[1]}; export InputFiles\n'
922                txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
923                txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
924                txt += 'echo "Inputfiles:<$InputFiles>"\n'
925                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
926                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
927            else:  # pythia like job
928                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
929                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
930                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
931                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
932                if (self.firstRun):
933                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
934                    txt += 'echo "FirstRun: <$FirstRun>"\n'
935
936            txt += 'mv -f '+pset+' pset.cfg\n'
937
938        if len(self.additional_inbox_files) > 0:
939            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
940            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
941            txt += 'fi\n'
942            pass
692  
693 <        if self.pset != None:
694 <            txt += '\n'
695 <            txt += 'echo "***** cat pset.cfg *********"\n'
696 <            txt += 'cat pset.cfg\n'
697 <            txt += 'echo "****** end pset.cfg ********"\n'
698 <            txt += '\n'
699 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
951 <            txt += 'echo "PSETHASH = $PSETHASH" \n'
693 >            txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
694 >            txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
695 >            txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
696 >            txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
697 >
698 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
699 >        else:
700              txt += '\n'
701 +            txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs)
702 +
703          return txt
704  
705 <    def wsBuildExe(self, nj=0):
705 >    def wsUntarSoftware(self, nj=0):
706          """
707          Put in the script the commands to build an executable
708          or a library.
709          """
710  
711 <        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
711 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
712  
713          if os.path.isfile(self.tgzNameWithPath):
714              txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
715 <            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
715 >            txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
716 >            if  self.debug_wrapper==1 :
717 >                txt += 'ls -Al \n'
718              txt += 'untar_status=$? \n'
719              txt += 'if [ $untar_status -ne 0 ]; then \n'
968            #txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
969            #txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
970            #txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
720              txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
721              txt += '   job_exit_code=$untar_status\n'
973            txt += '   echo "JobExitCode=$untar_status" >> $RUNTIME_AREA/$repo\n'
974            txt += '   if [ $middleware == OSG ]; then \n'
975            txt += '       cd $RUNTIME_AREA\n'
976            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
977            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
978            txt += '       /bin/rm -rf $WORKING_DIR\n'
979            txt += '       if [ -d $WORKING_DIR ] ;then\n'
980            #txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
981            #txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
982            #txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
983            #txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
984            txt += '           echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
985            txt += '           job_exit_code=10017\n'
986            txt += '       fi\n'
987            txt += '   fi \n'
988            txt += '   \n'
989            #txt += '   exit 1 \n'
722              txt += '   func_exit\n'
723              txt += 'else \n'
724              txt += '   echo "Successful untar" \n'
725              txt += 'fi \n'
726              txt += '\n'
727 <            #### Removed ProdAgent API dependencies
996 <            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
727 >            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
728              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
729 <            #### FEDE FOR DBS OUTPUT PUBLICATION
999 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
729 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
730              txt += 'else\n'
731 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
731 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
732              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1003            ###################
733              txt += 'fi\n'
734              txt += '\n'
735  
# Line 1008 | Line 737 | class Cmssw(JobType):
737  
738          return txt
739  
740 <    def modifySteeringCards(self, nj):
740 >    def wsBuildExe(self, nj=0):
741          """
742 <        modify the card provided by the user,
743 <        writing a new card into share dir
742 >        Put in the script the commands to build an executable
743 >        or a library.
744          """
745  
746 +        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
747 +        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
748 +
749 +        txt += 'rm -r lib/ module/ \n'
750 +        txt += 'mv $RUNTIME_AREA/lib/ . \n'
751 +        txt += 'mv $RUNTIME_AREA/module/ . \n'
752 +        if self.dataExist == True:
753 +            txt += 'rm -r src/ \n'
754 +            txt += 'mv $RUNTIME_AREA/src/ . \n'
755 +        if len(self.additional_inbox_files)>0:
756 +            for file in self.additional_inbox_files:
757 +                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
758 +        # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
759 +        # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
760 +
761 +        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
762 +        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
763 +        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
764 +        txt += 'else\n'
765 +        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
766 +        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
767 +        txt += 'fi\n'
768 +        txt += '\n'
769 +
770 +        if self.pset != None:
771 +            # FUTURE: Drop support for .cfg when possible
772 +            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
773 +                psetName = 'pset.py'
774 +            else:
775 +                psetName = 'pset.cfg'
776 +            # FUTURE: Can simply for 2_1_x and higher
777 +            txt += '\n'
778 +            if self.debug_wrapper == 1:
779 +                txt += 'echo "***** cat ' + psetName + ' *********"\n'
780 +                txt += 'cat ' + psetName + '\n'
781 +                txt += 'echo "****** end ' + psetName + ' ********"\n'
782 +                txt += '\n'
783 +                txt += 'echo "***********************" \n'
784 +                txt += 'which edmConfigHash \n'
785 +                txt += 'echo "***********************" \n'
786 +            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
787 +                txt += 'edmConfigHash ' + psetName + ' \n'
788 +                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
789 +            else:
790 +                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
791 +            txt += 'echo "PSETHASH = $PSETHASH" \n'
792 +            #### FEDE temporary fix for noEdm files #####
793 +            txt += 'if [ -z "$PSETHASH" ]; then \n'
794 +            txt += '   export PSETHASH=null\n'
795 +            txt += 'fi \n'
796 +            #############################################
797 +            txt += '\n'
798 +        return txt
799 +
800 +
801      def executableName(self):
802 <        if self.scriptExe: #CarlosDaniele
802 >        if self.scriptExe:
803              return "sh "
804          else:
805              return self.executable
806  
807      def executableArgs(self):
808          # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
809 <        if self.scriptExe:#CarlosDaniele
810 <            return   self.scriptExe + " $NJob"
809 >        if self.scriptExe:
810 >            return self.scriptExe + " $NJob $AdditionalArgs"
811          else:
1028            version_array = self.scram.getSWVersion().split('_')
1029            major = 0
1030            minor = 0
1031            try:
1032                major = int(version_array[1])
1033                minor = int(version_array[2])
1034            except:
1035                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1036                raise CrabException(msg)
1037
812              ex_args = ""
813 <
814 <            # Framework job report
815 <            if major >= 1 and minor >= 5 :
816 <                ex_args += " -j " + self.fjrFileName
1043 <
1044 <            # Type of cfg file
1045 <            if major >= 2 :
1046 <                ex_args += " -p pset.pycfg"
813 >            ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
814 >            # Type of config file depends on CMSSW version
815 >            if self.CMSSW_major >= 2 :
816 >                ex_args += " -p pset.py"
817              else:
818                  ex_args += " -p pset.cfg"
819              return ex_args
# Line 1053 | Line 823 | class Cmssw(JobType):
823          Returns a list of filenames to be put in JDL input sandbox.
824          """
825          inp_box = []
1056        # # dict added to delete duplicate from input sandbox file list
1057        # seen = {}
1058        ## code
826          if os.path.isfile(self.tgzNameWithPath):
827              inp_box.append(self.tgzNameWithPath)
828 <        if os.path.isfile(self.MLtgzfile):
1062 <            inp_box.append(self.MLtgzfile)
1063 <        ## config
1064 <        if not self.pset is None:
1065 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1066 <        ## additional input files
1067 <        tgz = self.additionalInputFileTgz()
1068 <        inp_box.append(tgz)
828 >        inp_box.append(common.work_space.jobDir() + self.scriptName)
829          return inp_box
830  
831      def outputSandbox(self, nj):
# Line 1077 | Line 837 | class Cmssw(JobType):
837          ## User Declared output files
838          for out in (self.output_file+self.output_file_sandbox):
839              n_out = nj + 1
840 <            out_box.append(self.numberFile_(out,str(n_out)))
840 >            out_box.append(numberFile(out,str(n_out)))
841          return out_box
842  
1083    def prepareSteeringCards(self):
1084        """
1085        Make initial modifications of the user's steering card file.
1086        """
1087        return
843  
844      def wsRenameOutput(self, nj):
845          """
# Line 1094 | Line 849 | class Cmssw(JobType):
849          txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
850          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
851          txt += 'echo ">>> current directory content:"\n'
852 <        txt += 'ls \n'
852 >        if self.debug_wrapper==1:
853 >            txt += 'ls -Al\n'
854          txt += '\n'
855  
1100        #txt += 'output_exit_status=0\n'
1101
1102        for fileWithSuffix in (self.output_file_sandbox):
1103            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1104            txt += '\n'
1105            txt += '# check output file\n'
1106            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1107            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1108            txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1109            txt += 'else\n'
1110            #txt += '    exit_status=60302\n'
1111            #txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1112            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1113            txt += '    job_exit_code=60302\n'
1114            if common.scheduler.name().upper() == 'CONDOR_G':
1115                txt += '    if [ $middleware == OSG ]; then \n'
1116                txt += '        echo "prepare dummy output file"\n'
1117                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1118                txt += '    fi \n'
1119            txt += 'fi\n'
1120
856          for fileWithSuffix in (self.output_file):
857 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
857 >            output_file_num = numberFile(fileWithSuffix, '$NJob')
858              txt += '\n'
859              txt += '# check output file\n'
860              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
# Line 1130 | Line 865 | class Cmssw(JobType):
865                  txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
866                  txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
867              txt += 'else\n'
1133            #txt += '    exit_status=60302\n'
1134            #txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1135            #txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1136            #txt += '    output_exit_status=$exit_status\n'
868              txt += '    job_exit_code=60302\n'
869              txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
870              if common.scheduler.name().upper() == 'CONDOR_G':
# Line 1144 | Line 875 | class Cmssw(JobType):
875              txt += 'fi\n'
876          file_list = []
877          for fileWithSuffix in (self.output_file):
878 <             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
878 >             file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
879  
880 <        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
880 >        txt += 'file_list="'+string.join(file_list,',')+'"\n'
881          txt += '\n'
882          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
883          txt += 'echo ">>> current directory content:"\n'
884 <        txt += 'ls \n'
884 >        if self.debug_wrapper==1:
885 >            txt += 'ls -Al\n'
886          txt += '\n'
887          txt += 'cd $RUNTIME_AREA\n'
888          txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
889          return txt
890  
1159    def numberFile_(self, file, txt):
1160        """
1161        append _'txt' before last extension of a file
1162        """
1163        p = string.split(file,".")
1164        # take away last extension
1165        name = p[0]
1166        for x in p[1:-1]:
1167            name=name+"."+x
1168        # add "_txt"
1169        if len(p)>1:
1170            ext = p[len(p)-1]
1171            result = name + '_' + txt + "." + ext
1172        else:
1173            result = name + '_' + txt
1174
1175        return result
1176
891      def getRequirements(self, nj=[]):
892          """
893          return job requirements to add to jdl files
# Line 1183 | Line 897 | class Cmssw(JobType):
897              req='Member("VO-cms-' + \
898                   self.version + \
899                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
900 <        ## SL add requirement for OS version only if SL4
1187 <        #reSL4 = re.compile( r'slc4' )
1188 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
900 >        if self.executable_arch:
901              req+=' && Member("VO-cms-' + \
902                   self.executable_arch + \
903                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
904  
905          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
906 <        if common.scheduler.name() == "glitecoll":
906 >        if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
907              req += ' && other.GlueCEStateStatus == "Production" '
908  
909          return req
910  
911      def configFilename(self):
912          """ return the config filename """
913 <        return self.name()+'.cfg'
913 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
914 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
915 >          return self.name()+'.py'
916 >        else:
917 >          return self.name()+'.cfg'
918  
919      def wsSetupCMSOSGEnvironment_(self):
920          """
# Line 1214 | Line 930 | class Cmssw(JobType):
930          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
931          txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
932          txt += '    else\n'
1217        #txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1218        #txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1219        #txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1220        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
933          txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
934          txt += '        job_exit_code=10020\n'
1223        txt += '        cd $RUNTIME_AREA\n'
1224        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1225        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1226        txt += '        /bin/rm -rf $WORKING_DIR\n'
1227        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1228        #txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1229        #txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1230        #txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1231        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1232        txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1233        txt += '            job_exit_code=10017\n'
1234        txt += '        fi\n'
1235        txt += '\n'
1236        #txt += '        exit 1\n'
935          txt += '        func_exit\n'
936          txt += '    fi\n'
937          txt += '\n'
# Line 1242 | Line 940 | class Cmssw(JobType):
940  
941          return txt
942  
1245    ### OLI_DANIELE
943      def wsSetupCMSLCGEnvironment_(self):
944          """
945          Returns part of a job script which is prepares
# Line 1254 | Line 951 | class Cmssw(JobType):
951          txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
952          txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
953          txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1257        #txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1258        #txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1259        #txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1260        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1261        #txt += '        exit 1\n'
954          txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
955          txt += '        job_exit_code=10031\n'
956          txt += '        func_exit\n'
957          txt += '    else\n'
958          txt += '        echo "Sourcing environment... "\n'
959          txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1268        #txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1269        #txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1270        #txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1271        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1272        #txt += '            exit 1\n'
960          txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
961          txt += '            job_exit_code=10020\n'
962          txt += '            func_exit\n'
# Line 1278 | Line 965 | class Cmssw(JobType):
965          txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
966          txt += '        result=$?\n'
967          txt += '        if [ $result -ne 0 ]; then\n'
1281        #txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1282        #txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1283        #txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1284        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1285        #txt += '            exit 1\n'
968          txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
969          txt += '            job_exit_code=10032\n'
970          txt += '            func_exit\n'
# Line 1292 | Line 974 | class Cmssw(JobType):
974          txt += '    echo "==> setup cms environment ok"\n'
975          return txt
976  
977 <    ### FEDE FOR DBS OUTPUT PUBLICATION
1296 <    def modifyReport(self, nj):
977 >    def wsModifyReport(self, nj):
978          """
979          insert the part of the script that modifies the FrameworkJob Report
980          """
981  
982 <        txt = '\n#Written by cms_cmssw::modifyReport\n'
983 <        try:
984 <            publish_data = int(self.cfg_params['USER.publish_data'])
985 <        except KeyError:
986 <            publish_data = 0
987 <        if (publish_data == 1):
988 <            
989 <            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
990 <            txt += '    echo ">>> Modify Job Report:" \n'
991 <            txt += '    chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
992 <            #txt += '    if [ -z "$SE" ]; then\n'
993 <            #txt += '        SE="" \n'
994 <            #txt += '    fi \n'
995 <            #txt += '    if [ -z "$SE_PATH" ]; then\n'
996 <            #txt += '        SE_PATH="" \n'
997 <            #txt += '    fi \n'
998 <            txt += '    echo "SE = $SE"\n'
999 <            txt += '    echo "SE_PATH = $SE_PATH"\n'
1000 <
1001 <            processedDataset = self.cfg_params['USER.publish_data_name']
1002 <            txt += '    ProcessedDataset='+processedDataset+'\n'
1003 <            #txt += '    if [ "$SE_PATH" == "" ]; then\n'
1004 <            #txt += '        FOR_LFN=/copy_problems/ \n'
1005 <            #txt += '    else \n'
1006 <            #txt += '        tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1007 <            #txt += '        FOR_LFN=/store$tmp \n'
1008 <            #txt += '    fi \n'
1009 <            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1010 <            txt += '    FOR_LFN=/store$tmp \n'
1011 <            txt += '    echo "ProcessedDataset = $ProcessedDataset"\n'
1012 <            txt += '    echo "FOR_LFN = $FOR_LFN" \n'
1013 <            txt += '    echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1014 <            txt += '    echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1015 <            txt += '    $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1016 <
1017 <            txt += '    modifyReport_result=$?\n'
1018 <            txt += '    if [ $modifyReport_result -ne 0 ]; then\n'
1019 <            txt += '        modifyReport_result=70500\n'
1020 <            txt += '        job_exit_code=$modifyReport_result\n'
1340 <            txt += '        echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1341 <            txt += '        echo "WARNING: Problem with ModifyJobReport"\n'
1342 <            txt += '    else\n'
1343 <            txt += '        mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1344 <            txt += '    fi\n'
982 >        txt = ''
983 >        publish_data = int(self.cfg_params.get('USER.publish_data',0))
984 >        #if (publish_data == 1):
985 >        if (self.copy_data == 1):
986 >            txt = '\n#Written by cms_cmssw::wsModifyReport\n'
987 >            publish_data = int(self.cfg_params.get('USER.publish_data',0))
988 >
989 >
990 >            txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
991 >            txt += '    FOR_LFN=$LFNBaseName\n'
992 >            txt += 'else\n'
993 >            txt += '    FOR_LFN=/copy_problems/ \n'
994 >            txt += 'fi\n'
995 >
996 >            txt += 'echo ">>> Modify Job Report:" \n'
997 >            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
998 >            txt += 'echo "SE = $SE"\n'
999 >            txt += 'echo "SE_PATH = $SE_PATH"\n'
1000 >            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1001 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1002 >
1003 >
1004 >            args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset  ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH'
1005 >            if (publish_data == 1):
1006 >                processedDataset = self.cfg_params['USER.publish_data_name']
1007 >                txt += 'ProcessedDataset='+processedDataset+'\n'
1008 >                txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1009 >                args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1010 >
1011 >            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1012 >            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1013 >            txt += 'modifyReport_result=$?\n'
1014 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1015 >            txt += '    modifyReport_result=70500\n'
1016 >            txt += '    job_exit_code=$modifyReport_result\n'
1017 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1018 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1019 >            txt += 'else\n'
1020 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1021              txt += 'fi\n'
1022          return txt
1023  
1024 <    def cleanEnv(self):
1025 <        txt = '\n#Written by cms_cmssw::cleanEnv\n'
1026 <        txt += 'if [ $middleware == OSG ]; then\n'
1027 <        txt += '    cd $RUNTIME_AREA\n'
1028 <        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1029 <        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1030 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1031 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1032 <        #txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1033 <        #txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1034 <        #txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1035 <        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1036 <        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1037 <        txt += '        job_exit_code=10017\n'
1038 <        txt += '        func_exit\n'
1024 >    def wsParseFJR(self):
1025 >        """
1026 >        Parse the FrameworkJobReport to obtain useful infos
1027 >        """
1028 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1029 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1030 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1031 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1032 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1033 >        if self.debug_wrapper==1 :
1034 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1035 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1036 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1037 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1038 >        txt += '            cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1039 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1040 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1041 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1042 >        txt += '        else\n'
1043 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1044 >        txt += '        fi\n'
1045 >        txt += '    else\n'
1046 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1047 >        txt += '    fi\n'
1048 >          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1049 >        txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1050 >        txt += '        echo ">>> Executable succeded  $executable_exit_status"\n'
1051 >        ## This cannot more work given the changes on the Job argumentsJob
1052 >        """
1053 >        if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1054 >          # VERIFY PROCESSED DATA
1055 >            txt += '        echo ">>> Verify list of processed files:"\n'
1056 >            txt += '        echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1057 >            txt += '        python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1058 >            txt += '        cat input-files.txt  | sort | uniq > tmp.txt\n'
1059 >            txt += '        mv tmp.txt input-files.txt\n'
1060 >            txt += '        echo "cat input-files.txt"\n'
1061 >            txt += '        echo "----------------------"\n'
1062 >            txt += '        cat input-files.txt\n'
1063 >            txt += '        cat processed-files.txt | sort | uniq > tmp.txt\n'
1064 >            txt += '        mv tmp.txt processed-files.txt\n'
1065 >            txt += '        echo "----------------------"\n'
1066 >            txt += '        echo "cat processed-files.txt"\n'
1067 >            txt += '        echo "----------------------"\n'
1068 >            txt += '        cat processed-files.txt\n'
1069 >            txt += '        echo "----------------------"\n'
1070 >            txt += '        diff -qbB input-files.txt processed-files.txt\n'
1071 >            txt += '        fileverify_status=$?\n'
1072 >            txt += '        if [ $fileverify_status -ne 0 ]; then\n'
1073 >            txt += '            executable_exit_status=30001\n'
1074 >            txt += '            echo "ERROR ==> not all input files processed"\n'
1075 >            txt += '            echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1076 >            txt += '            echo "      ==> diff input-files.txt processed-files.txt"\n'
1077 >            txt += '        fi\n'
1078 >        """
1079          txt += '    fi\n'
1080 +        txt += 'else\n'
1081 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1082          txt += 'fi\n'
1083          txt += '\n'
1084 +        txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1085 +        txt += '    echo ">>> Executable failed  $executable_exit_status"\n'
1086 +        txt += '    echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1087 +        txt += '    echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1088 +        txt += '    job_exit_code=$executable_exit_status\n'
1089 +        txt += '    func_exit\n'
1090 +        txt += 'fi\n\n'
1091 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1092 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1093 +        txt += 'job_exit_code=$executable_exit_status\n'
1094 +
1095          return txt
1096  
1097      def setParam_(self, param, value):
# Line 1371 | Line 1100 | class Cmssw(JobType):
1100      def getParams(self):
1101          return self._params
1102  
1103 <    def uniquelist(self, old):
1375 <        """
1376 <        remove duplicates from a list
1377 <        """
1378 <        nd={}
1379 <        for e in old:
1380 <            nd[e]=0
1381 <        return nd.keys()
1382 <
1383 <
1384 <    def checkOut(self, limit):
1103 >    def outList(self,list=False):
1104          """
1105          check the dimension of the output files
1106          """
1107 <        txt = '\n#Written by cms_cmssw::checkOut\n'
1108 <        txt += 'echo ">>> Starting output sandbox limit check :"\n'
1107 >        txt = ''
1108 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1109          listOutFiles = []
1110 <        txt += 'stdoutFile=`ls *stdout` \n'
1111 <        txt += 'stderrFile=`ls *stderr` \n'
1110 >        stdout = 'CMSSW_$NJob.stdout'
1111 >        stderr = 'CMSSW_$NJob.stderr'
1112 >        if len(self.output_file) <= 0:
1113 >            msg ="WARNING: no output files name have been defined!!\n"
1114 >            msg+="\tno output files will be reported back/staged\n"
1115 >            common.logger.info(msg)
1116          if (self.return_data == 1):
1117              for file in (self.output_file+self.output_file_sandbox):
1118 <                listOutFiles.append(self.numberFile_(file, '$NJob'))
1119 <            listOutFiles.append('$stdoutFile')
1120 <            listOutFiles.append('$stderrFile')
1118 >                listOutFiles.append(numberFile(file, '$NJob'))
1119 >            listOutFiles.append(stdout)
1120 >            listOutFiles.append(stderr)
1121          else:
1122              for file in (self.output_file_sandbox):
1123 <                listOutFiles.append(self.numberFile_(file, '$NJob'))
1124 <            listOutFiles.append('$stdoutFile')
1125 <            listOutFiles.append('$stderrFile')
1403 <
1123 >                listOutFiles.append(numberFile(file, '$NJob'))
1124 >            listOutFiles.append(stdout)
1125 >            listOutFiles.append(stderr)
1126          txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1127          txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1128 <        txt += 'ls -gGhrta;\n'
1407 <        txt += 'sum=0;\n'
1408 <        txt += 'for file in $filesToCheck ; do\n'
1409 <        txt += '    if [ -e $file ]; then\n'
1410 <        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1411 <        txt += '        sum=`expr $sum + $tt`\n'
1412 <        txt += '    else\n'
1413 <        txt += '        echo "WARNING: output file $file not found!"\n'
1414 <        txt += '    fi\n'
1415 <        txt += 'done\n'
1416 <        txt += 'echo "Total Output dimension: $sum";\n'
1417 <        txt += 'limit='+str(limit)+';\n'
1418 <        txt += 'echo "WARNING: output files size limit is set to: $limit";\n'
1419 <        txt += 'if [ $limit -lt $sum ]; then\n'
1420 <        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1421 <        txt += '    echo "         checking the output file sizes..."\n'
1422 <        txt += '    tot=0;\n'
1423 <        txt += '    for filefile in $filesToCheck ; do\n'
1424 <        txt += '        dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1425 <        txt += '        tot=`expr $tot + $tt`;\n'
1426 <        txt += '        if [ $limit -lt $dimFile ]; then\n'
1427 <        txt += '            echo "deleting file: $filefile";\n'
1428 <        txt += '            rm -f $filefile\n'
1429 <        txt += '        elif [ $limit -lt $tot ]; then\n'
1430 <        txt += '            echo "deleting file: $filefile";\n'
1431 <        txt += '            rm -f $filefile\n'
1432 <        txt += '        else\n'
1433 <        txt += '            echo "saving file: $filefile"\n'
1434 <        txt += '        fi\n'
1435 <        txt += '    done\n'
1128 >        txt += 'export filesToCheck\n'
1129  
1130 <        txt += '    ls -agGhrt\n'
1438 <        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox."\n'
1439 <        #txt += '    echo "JOB_EXIT_STATUS = 70000"\n'
1440 <        #txt += '    exit_status=70000\n'
1441 <        txt += '    job_exit_code=70000\n'
1442 <        txt += 'else\n'
1443 <        txt += '    echo "Total Output dimension $sum is fine."\n'
1444 <        txt += 'fi\n'
1445 <        txt += 'echo "Ending output sandbox limit check"\n'
1130 >        if list : return self.output_file
1131          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines