ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.247 by fanzago, Mon Sep 29 17:19:16 2008 UTC vs.
Revision 1.314 by spiga, Thu Jun 18 17:23:02 2009 UTC

# Line 1 | Line 1
1   from JobType import JobType
2 from crab_logger import Logger
2   from crab_exceptions import *
3   from crab_util import *
5 from BlackWhiteListParser import SEBlackWhiteListParser
4   import common
5   import Scram
6 + from Splitter import JobSplitter
7  
8 + from IMProv.IMProvNode import IMProvNode
9   import os, string, glob
10  
11   class Cmssw(JobType):
12      def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
13          JobType.__init__(self, 'CMSSW')
14 <        common.logger.debug(3,'CMSSW::__init__')
14 >        common.logger.debug('CMSSW::__init__')
15          self.skip_blocks = skip_blocks
16 <
17 <        self.argsList = []
16 >        self.argsList = 1
17  
18          self._params = {}
19          self.cfg_params = cfg_params
21        # init BlackWhiteListParser
22        self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
20  
21          ### Temporary patch to automatically skip the ISB size check:
22          server=self.cfg_params.get('CRAB.server_name',None)
23 <        size = 9.5
24 <        if server: size = 99999
23 >        size = 9.5
24 >        if server or common.scheduler.name().upper() in ['LSF','CAF']: size = 99999
25          ### D.S.
26 <        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',size))
26 >        self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
27  
28          # number of jobs requested to be created, limit obj splitting
29          self.ncjobs = ncjobs
30  
34        log = common.logger
35
31          self.scram = Scram.Scram(cfg_params)
32          self.additional_inbox_files = []
33          self.scriptExe = ''
34          self.executable = ''
35          self.executable_arch = self.scram.getArch()
36 <        self.tgz_name = 'default.tgz'
36 >        self.tgz_name = 'default.tar.gz'
37 >        self.tar_name = 'default.tar'
38          self.scriptName = 'CMSSW.sh'
39          self.pset = ''
40          self.datasetPath = ''
41  
42 +        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
43          # set FJR file name
44          self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +        common.logger.log(10-1,"CMSSW version is: "+str(self.version))
48 +
49          version_array = self.version.split('_')
50          self.CMSSW_major = 0
51          self.CMSSW_minor = 0
# Line 59 | Line 58 | class Cmssw(JobType):
58              msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
59              raise CrabException(msg)
60  
61 +        if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5):
62 +            msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version."
63 +            raise CrabException(msg)
64 +            """
65 +            As CMSSW versions are dropped we can drop more code:
66 +            1.X dropped: drop support for running .cfg on WN
67 +            2.0 dropped: drop all support for cfg here and in writeCfg
68 +            2.0 dropped: Recheck the random number seed support
69 +            """
70 +
71          ### collect Data cards
72  
64        if not cfg_params.has_key('CMSSW.datasetpath'):
65            msg = "Error: datasetpath not defined "
66            raise CrabException(msg)
73  
74          ### Temporary: added to remove input file control in the case of PU
75          self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
76  
77          tmp =  cfg_params['CMSSW.datasetpath']
78 <        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
78 >        common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
79  
80          if tmp =='':
81              msg = "Error: datasetpath not defined "
# Line 82 | Line 88 | class Cmssw(JobType):
88              self.selectNoInput = 0
89  
90          self.dataTiers = []
91 <        self.debugWrap = ''
92 <        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
93 <        if self.debug_wrapper: self.debugWrap='--debug'
91 >
92 >        self.debugWrap=''
93 >        self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
94 >        if self.debug_wrapper == 1: self.debugWrap='--debug'
95 >
96          ## now the application
97 +        self.managedGenerators = ['madgraph', 'comphep', 'lhe']
98 +        self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
99          self.executable = cfg_params.get('CMSSW.executable','cmsRun')
100 <        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
100 >        common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
101  
102          if not cfg_params.has_key('CMSSW.pset'):
103              raise CrabException("PSet file missing. Cannot run cmsRun ")
104          self.pset = cfg_params['CMSSW.pset']
105 <        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
105 >        common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
106          if self.pset.lower() != 'none' :
107              if (not os.path.exists(self.pset)):
108                  raise CrabException("User defined PSet file "+self.pset+" does not exist")
# Line 124 | Line 134 | class Cmssw(JobType):
134                  raise CrabException(msg)
135              self.additional_inbox_files.append(string.strip(self.scriptExe))
136  
137 +        self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
138 +        if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
139 +
140          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
141              msg ="Error. script_exe  not defined"
142              raise CrabException(msg)
143  
144          # use parent files...
145 <        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
145 >        self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
146  
147          ## additional input files
148          if cfg_params.has_key('USER.additional_input_files'):
# Line 152 | Line 165 | class Cmssw(JobType):
165                      self.additional_inbox_files.append(string.strip(file))
166                  pass
167              pass
168 <            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
168 >            common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
169          pass
170  
158        ## Events per job
159        if cfg_params.has_key('CMSSW.events_per_job'):
160            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
161            self.selectEventsPerJob = 1
162        else:
163            self.eventsPerJob = -1
164            self.selectEventsPerJob = 0
165
166        ## number of jobs
167        if cfg_params.has_key('CMSSW.number_of_jobs'):
168            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
169            self.selectNumberOfJobs = 1
170        else:
171            self.theNumberOfJobs = 0
172            self.selectNumberOfJobs = 0
173
174        if cfg_params.has_key('CMSSW.total_number_of_events'):
175            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
176            self.selectTotalNumberEvents = 1
177            if self.selectNumberOfJobs  == 1:
178                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
179                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
180                    raise CrabException(msg)
181        else:
182            self.total_number_of_events = 0
183            self.selectTotalNumberEvents = 0
184
185        if self.pset != None:
186             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
187                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
188                 raise CrabException(msg)
189        else:
190             if (self.selectNumberOfJobs == 0):
191                 msg = 'Must specify  number_of_jobs.'
192                 raise CrabException(msg)
171  
172          ## New method of dealing with seeds
173          self.incrementSeeds = []
# Line 205 | Line 183 | class Cmssw(JobType):
183                  tmp.strip()
184                  self.incrementSeeds.append(tmp)
185  
208        ## FUTURE: Can remove in CRAB 2.4.0
209        self.sourceSeed    = cfg_params.get('CMSSW.pythia_seed',None)
210        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
211        self.sourceSeedG4  = cfg_params.get('CMSSW.g4_seed',None)
212        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
213        if self.sourceSeed or self.sourceSeedVtx or self.sourceSeedG4 or self.sourceSeedMix:
214            msg = 'pythia_seed, vtx_seed, g4_seed, and mix_seed are no longer valid settings. You must use increment_seeds or preserve_seeds'
215            raise CrabException(msg)
216
186          self.firstRun = cfg_params.get('CMSSW.first_run',None)
187  
188          # Copy/return
189          self.copy_data = int(cfg_params.get('USER.copy_data',0))
190          self.return_data = int(cfg_params.get('USER.return_data',0))
191  
192 +        self.conf = {}
193 +        self.conf['pubdata'] = None
194 +        # number of jobs requested to be created, limit obj splitting DD
195          #DBSDLS-start
196          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
197          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
# Line 231 | Line 203 | class Cmssw(JobType):
203          if self.datasetPath:
204              blockSites = self.DataDiscoveryAndLocation(cfg_params)
205          #DBSDLS-end
206 +        self.conf['blockSites']=blockSites
207  
208          ## Select Splitting
209 +        splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
210 +
211          if self.selectNoInput:
212              if self.pset == None:
213 <                self.jobSplittingForScript()
213 >                self.algo = 'ForScript'
214              else:
215 <                self.jobSplittingNoInput()
216 <        elif (cfg_params.get('CMSSW.noblockboundary',0)):
217 <            self.jobSplittingNoBlockBoundary(blockSites)
215 >                self.algo = 'NoInput'
216 >                self.conf['managedGenerators']=self.managedGenerators
217 >                self.conf['generator']=self.generator
218 >        elif splitByRun ==1:
219 >            self.algo = 'RunBased'
220          else:
221 <            self.jobSplittingByBlocks(blockSites)
221 >            self.algo = 'EventBased'
222 >
223 > #        self.algo = 'LumiBased'
224 >        splitter = JobSplitter(self.cfg_params,self.conf)
225 >        self.dict = splitter.Algos()[self.algo]()
226  
227 +        self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
228 +        self.rootArgsFilename= 'arguments'
229          # modify Pset only the first time
230 <        if isNew:
231 <            if self.pset != None:
232 <                import PsetManipulator as pp
233 <                PsetEdit = pp.PsetManipulator(self.pset)
234 <                try:
235 <                    # Add FrameworkJobReport to parameter-set, set max events.
236 <                    # Reset later for data jobs by writeCFG which does all modifications
237 <                    PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
238 <                    PsetEdit.maxEvent(self.eventsPerJob)
239 <                    PsetEdit.psetWriter(self.configFilename())
240 <                    ## If present, add TFileService to output files
241 <                    if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
242 <                        tfsOutput = PsetEdit.getTFileService()
243 <                        if tfsOutput:
244 <                            if tfsOutput in self.output_file:
245 <                                common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
246 <                            else:
247 <                                outfileflag = True #output found
248 <                                self.output_file.append(tfsOutput)
249 <                                common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
250 <                            pass
251 <                        pass
252 <                    ## If present and requested, add PoolOutputModule to output files
253 <                    if int(cfg_params.get('CMSSW.get_edm_output',0)):
254 <                        edmOutput = PsetEdit.getPoolOutputModule()
255 <                        if edmOutput:
256 <                            if edmOutput in self.output_file:
257 <                                common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
258 <                            else:
259 <                                self.output_file.append(edmOutput)
260 <                                common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
261 <                            pass
262 <                        pass
263 <                except CrabException:
264 <                    msg='Error while manipulating ParameterSet: exiting...'
265 <                    raise CrabException(msg)
266 <            ## Prepare inputSandbox TarBall (only the first time)
267 <            self.tgzNameWithPath = self.getTarBall(self.executable)
230 >        if (isNew and self.pset != None): self.ModifyPset()
231 >
232 >        ## Prepare inputSandbox TarBall (only the first time)
233 >        self.tarNameWithPath = self.getTarBall(self.executable)
234 >
235 >
236 >    def ModifyPset(self):
237 >        import PsetManipulator as pp
238 >        PsetEdit = pp.PsetManipulator(self.pset)
239 >        try:
240 >            # Add FrameworkJobReport to parameter-set, set max events.
241 >            # Reset later for data jobs by writeCFG which does all modifications
242 >            PsetEdit.maxEvent(1)
243 >            PsetEdit.skipEvent(0)
244 >            PsetEdit.psetWriter(self.configFilename())
245 >            ## If present, add TFileService to output files
246 >            if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
247 >                tfsOutput = PsetEdit.getTFileService()
248 >                if tfsOutput:
249 >                    if tfsOutput in self.output_file:
250 >                        common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
251 >                    else:
252 >                        outfileflag = True #output found
253 >                        self.output_file.append(tfsOutput)
254 >                        common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
255 >                    pass
256 >                pass
257 >            ## If present and requested, add PoolOutputModule to output files
258 >            edmOutput = PsetEdit.getPoolOutputModule()
259 >            if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
260 >                if edmOutput:
261 >                    if edmOutput in self.output_file:
262 >                        common.logger.debug("Output from PoolOutputModule "+edmOutput+" already in output files")
263 >                    else:
264 >                        self.output_file.append(edmOutput)
265 >                        common.logger.info("Adding "+edmOutput+" (from PoolOutputModule) to list of output files")
266 >                    pass
267 >                pass
268 >            # not required: check anyhow if present, to avoid accidental T2 overload
269 >            else:
270 >                if edmOutput and (edmOutput not in self.output_file):
271 >                    msg = "ERROR: a PoolOutputModule is present in your ParameteSet %s \n"%self.pset
272 >                    msg +="         but the file produced ( %s ) is not in the list of output files\n"%edmOutput
273 >                    msg += "WARNING: please remove it. If you want to keep it, add the file to output_files or use CMSSW.get_edm_output\n"
274 >                    if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
275 >                        msg +=" CMSSW.ignore_edm_output==True : Hope you know what you are doing...\n"
276 >                        common.logger.info(msg)
277 >                    else:
278 >                        raise CrabException(msg)
279 >                pass
280 >            pass
281 >
282 >            if (PsetEdit.getBadFilesSetting()):
283 >                msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
284 >                common.logger.info(msg)
285 >
286 >        except CrabException, msg:
287 >            common.logger.info(str(msg))
288 >            msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
289 >            raise CrabException(msg)
290 >
291  
292      def DataDiscoveryAndLocation(self, cfg_params):
293  
294          import DataDiscovery
295          import DataLocation
296 <        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
296 >        common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
297  
298          datasetPath=self.datasetPath
299  
300          ## Contact the DBS
301 <        common.logger.message("Contacting Data Discovery Services ...")
301 >        common.logger.info("Contacting Data Discovery Services ...")
302          try:
303              self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
304              self.pubdata.fetchDBSInfo()
# Line 308 | Line 314 | class Cmssw(JobType):
314              raise CrabException(msg)
315  
316          self.filesbyblock=self.pubdata.getFiles()
317 <        self.eventsbyblock=self.pubdata.getEventsPerBlock()
318 <        self.eventsbyfile=self.pubdata.getEventsPerFile()
313 <        self.parentFiles=self.pubdata.getParent()
317 >        #print self.filesbyblock
318 >        self.conf['pubdata']=self.pubdata
319  
320          ## get max number of events
321          self.maxEvents=self.pubdata.getMaxEvents()
# Line 319 | Line 324 | class Cmssw(JobType):
324          try:
325              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
326              dataloc.fetchDLSInfo()
327 +
328          except DataLocation.DataLocationError , ex:
329              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
330              raise CrabException(msg)
331  
332  
333 <        sites = dataloc.getSites()
333 >        unsorted_sites = dataloc.getSites()
334 >        sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
335 >        for lfn in self.filesbyblock.keys():
336 >            if unsorted_sites.has_key(lfn):
337 >                sites[lfn]=unsorted_sites[lfn]
338 >            else:
339 >                sites[lfn]=[]
340 >
341 >        if len(sites)==0:
342 >            msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
343 >            msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
344 >            msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
345 >            raise CrabException(msg)
346 >
347          allSites = []
348          listSites = sites.values()
349          for listSite in listSites:
350              for oneSite in listSite:
351                  allSites.append(oneSite)
352 <        allSites = self.uniquelist(allSites)
334 <
335 <        # screen output
336 <        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
352 >        [allSites.append(it) for it in allSites if not allSites.count(it)]
353  
338        return sites
339
340    def jobSplittingByBlocks(self, blockSites):
341        """
342        Perform job splitting. Jobs run over an integer number of files
343        and no more than one block.
344        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
345        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
346                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
347                  self.maxEvents, self.filesbyblock
348        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
349              self.total_number_of_jobs - Total # of jobs
350              self.list_of_args - File(s) job will run on (a list of lists)
351        """
352
353        # ---- Handle the possible job splitting configurations ---- #
354        if (self.selectTotalNumberEvents):
355            totalEventsRequested = self.total_number_of_events
356        if (self.selectEventsPerJob):
357            eventsPerJobRequested = self.eventsPerJob
358            if (self.selectNumberOfJobs):
359                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
360
361        # If user requested all the events in the dataset
362        if (totalEventsRequested == -1):
363            eventsRemaining=self.maxEvents
364        # If user requested more events than are in the dataset
365        elif (totalEventsRequested > self.maxEvents):
366            eventsRemaining = self.maxEvents
367            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
368        # If user requested less events than are in the dataset
369        else:
370            eventsRemaining = totalEventsRequested
371
372        # If user requested more events per job than are in the dataset
373        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
374            eventsPerJobRequested = self.maxEvents
375
376        # For user info at end
377        totalEventCount = 0
378
379        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
380            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
381
382        if (self.selectNumberOfJobs):
383            common.logger.message("May not create the exact number_of_jobs requested.")
384
385        if ( self.ncjobs == 'all' ) :
386            totalNumberOfJobs = 999999999
387        else :
388            totalNumberOfJobs = self.ncjobs
389
390        blocks = blockSites.keys()
391        blockCount = 0
392        # Backup variable in case self.maxEvents counted events in a non-included block
393        numBlocksInDataset = len(blocks)
394
395        jobCount = 0
396        list_of_lists = []
397
398        # list tracking which jobs are in which jobs belong to which block
399        jobsOfBlock = {}
400
401        # ---- Iterate over the blocks in the dataset until ---- #
402        # ---- we've met the requested total # of events    ---- #
403        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
404            block = blocks[blockCount]
405            blockCount += 1
406            if block not in jobsOfBlock.keys() :
407                jobsOfBlock[block] = []
408
409            if self.eventsbyblock.has_key(block) :
410                numEventsInBlock = self.eventsbyblock[block]
411                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
412
413                files = self.filesbyblock[block]
414                numFilesInBlock = len(files)
415                if (numFilesInBlock <= 0):
416                    continue
417                fileCount = 0
418
419                # ---- New block => New job ---- #
420                parString = ""
421                # counter for number of events in files currently worked on
422                filesEventCount = 0
423                # flag if next while loop should touch new file
424                newFile = 1
425                # job event counter
426                jobSkipEventCount = 0
427
428                # ---- Iterate over the files in the block until we've met the requested ---- #
429                # ---- total # of events or we've gone over all the files in this block  ---- #
430                pString=''
431                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
432                    file = files[fileCount]
433                    if self.useParent:
434                        parent = self.parentFiles[file]
435                        for f in parent :
436                            pString += '\\\"' + f + '\\\"\,'
437                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
438                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
439                    if newFile :
440                        try:
441                            numEventsInFile = self.eventsbyfile[file]
442                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
443                            # increase filesEventCount
444                            filesEventCount += numEventsInFile
445                            # Add file to current job
446                            parString += '\\\"' + file + '\\\"\,'
447                            newFile = 0
448                        except KeyError:
449                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
450
451                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
452                    # if less events in file remain than eventsPerJobRequested
453                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
454                        # if last file in block
455                        if ( fileCount == numFilesInBlock-1 ) :
456                            # end job using last file, use remaining events in block
457                            # close job and touch new file
458                            fullString = parString[:-2]
459                            if self.useParent:
460                                fullParentString = pString[:-2]
461                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
462                            else:
463                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
464                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
465                            self.jobDestination.append(blockSites[block])
466                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
467                            # fill jobs of block dictionary
468                            jobsOfBlock[block].append(jobCount+1)
469                            # reset counter
470                            jobCount = jobCount + 1
471                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
472                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
473                            jobSkipEventCount = 0
474                            # reset file
475                            pString = ""
476                            parString = ""
477                            filesEventCount = 0
478                            newFile = 1
479                            fileCount += 1
480                        else :
481                            # go to next file
482                            newFile = 1
483                            fileCount += 1
484                    # if events in file equal to eventsPerJobRequested
485                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
486                        # close job and touch new file
487                        fullString = parString[:-2]
488                        if self.useParent:
489                            fullParentString = pString[:-2]
490                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
491                        else:
492                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
493                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
494                        self.jobDestination.append(blockSites[block])
495                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
496                        jobsOfBlock[block].append(jobCount+1)
497                        # reset counter
498                        jobCount = jobCount + 1
499                        totalEventCount = totalEventCount + eventsPerJobRequested
500                        eventsRemaining = eventsRemaining - eventsPerJobRequested
501                        jobSkipEventCount = 0
502                        # reset file
503                        pString = ""
504                        parString = ""
505                        filesEventCount = 0
506                        newFile = 1
507                        fileCount += 1
508
509                    # if more events in file remain than eventsPerJobRequested
510                    else :
511                        # close job but don't touch new file
512                        fullString = parString[:-2]
513                        if self.useParent:
514                            fullParentString = pString[:-2]
515                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
516                        else:
517                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
518                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
519                        self.jobDestination.append(blockSites[block])
520                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
521                        jobsOfBlock[block].append(jobCount+1)
522                        # increase counter
523                        jobCount = jobCount + 1
524                        totalEventCount = totalEventCount + eventsPerJobRequested
525                        eventsRemaining = eventsRemaining - eventsPerJobRequested
526                        # calculate skip events for last file
527                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
528                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
529                        # remove all but the last file
530                        filesEventCount = self.eventsbyfile[file]
531                        if self.useParent:
532                            for f in parent : pString += '\\\"' + f + '\\\"\,'
533                        parString = '\\\"' + file + '\\\"\,'
534                    pass # END if
535                pass # END while (iterate over files in the block)
536        pass # END while (iterate over blocks in the dataset)
537        self.ncjobs = self.total_number_of_jobs = jobCount
538        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
539            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
540        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
354  
355          # screen output
356 <        screenOutput = "List of jobs and available destination sites:\n\n"
544 <
545 <        # keep trace of block with no sites to print a warning at the end
546 <        noSiteBlock = []
547 <        bloskNoSite = []
548 <
549 <        blockCounter = 0
550 <        for block in blocks:
551 <            if block in jobsOfBlock.keys() :
552 <                blockCounter += 1
553 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
554 <                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
555 <                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
556 <                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
557 <                    bloskNoSite.append( blockCounter )
558 <
559 <        common.logger.message(screenOutput)
560 <        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
561 <            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
562 <            virgola = ""
563 <            if len(bloskNoSite) > 1:
564 <                virgola = ","
565 <            for block in bloskNoSite:
566 <                msg += ' ' + str(block) + virgola
567 <            msg += '\n               Related jobs:\n                 '
568 <            virgola = ""
569 <            if len(noSiteBlock) > 1:
570 <                virgola = ","
571 <            for range_jobs in noSiteBlock:
572 <                msg += str(range_jobs) + virgola
573 <            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
574 <            if self.cfg_params.has_key('EDG.se_white_list'):
575 <                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
576 <                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
577 <                msg += 'Please check if the dataset is available at this site!)\n'
578 <            if self.cfg_params.has_key('EDG.ce_white_list'):
579 <                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
580 <                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
581 <                msg += 'Please check if the dataset is available at this site!)\n'
582 <
583 <            common.logger.message(msg)
584 <
585 <        self.list_of_args = list_of_lists
586 <        return
587 <
588 <    def jobSplittingNoBlockBoundary(self,blockSites):
589 <        """
590 <        """
591 <        # ---- Handle the possible job splitting configurations ---- #
592 <        if (self.selectTotalNumberEvents):
593 <            totalEventsRequested = self.total_number_of_events
594 <        if (self.selectEventsPerJob):
595 <            eventsPerJobRequested = self.eventsPerJob
596 <            if (self.selectNumberOfJobs):
597 <                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
598 <                                                                                                          
599 <        # If user requested all the events in the dataset
600 <        if (totalEventsRequested == -1):
601 <            eventsRemaining=self.maxEvents
602 <        # If user requested more events than are in the dataset
603 <        elif (totalEventsRequested > self.maxEvents):
604 <            eventsRemaining = self.maxEvents
605 <            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
606 <        # If user requested less events than are in the dataset
607 <        else:
608 <            eventsRemaining = totalEventsRequested
609 <                                                                                                          
610 <        # If user requested more events per job than are in the dataset
611 <        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
612 <            eventsPerJobRequested = self.maxEvents
613 <                                                                                                          
614 <        # For user info at end
615 <        totalEventCount = 0
616 <
617 <        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
618 <            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
619 <                                                                                                          
620 <        if (self.selectNumberOfJobs):
621 <            common.logger.message("May not create the exact number_of_jobs requested.")
622 <                                                                                                          
623 <        if ( self.ncjobs == 'all' ) :
624 <            totalNumberOfJobs = 999999999
625 <        else :
626 <            totalNumberOfJobs = self.ncjobs
627 <                                                                                                          
628 <        blocks = blockSites.keys()
629 <        blockCount = 0
630 <        # Backup variable in case self.maxEvents counted events in a non-included block
631 <        numBlocksInDataset = len(blocks)
632 <                                                                                                          
633 <        jobCount = 0
634 <        list_of_lists = []
635 <
636 <        #AF
637 <        #AF do not reset input files and event count on block boundary
638 <        #AF
639 <        parString=""
640 <        filesEventCount = 0
641 <        #AF
642 <
643 <        # list tracking which jobs are in which jobs belong to which block
644 <        jobsOfBlock = {}
645 <        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
646 <            block = blocks[blockCount]
647 <            blockCount += 1
648 <            if block not in jobsOfBlock.keys() :
649 <                jobsOfBlock[block] = []
650 <
651 <            if self.eventsbyblock.has_key(block) :
652 <                numEventsInBlock = self.eventsbyblock[block]
653 <                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
654 <                files = self.filesbyblock[block]
655 <                numFilesInBlock = len(files)
656 <                if (numFilesInBlock <= 0):
657 <                    continue
658 <                fileCount = 0
659 <                #AF
660 <                #AF do not reset input files and event count of block boundary
661 <                #AF
662 <                ## ---- New block => New job ---- #
663 <                #parString = ""
664 <                # counter for number of events in files currently worked on
665 <                #filesEventCount = 0
666 <                #AF
667 <                # flag if next while loop should touch new file
668 <                newFile = 1
669 <                # job event counter
670 <                jobSkipEventCount = 0
671 <
672 <                # ---- Iterate over the files in the block until we've met the requested ---- #
673 <                # ---- total # of events or we've gone over all the files in this block  ---- #
674 <                pString=''
675 <                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
676 <                    file = files[fileCount]
677 <                    if self.useParent:
678 <                        parent = self.parentFiles[file]
679 <                        for f in parent :
680 <                            pString += '\\\"' + f + '\\\"\,'
681 <                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
682 <                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
683 <                    if newFile :
684 <                        try:
685 <                            numEventsInFile = self.eventsbyfile[file]
686 <                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
687 <                            # increase filesEventCount
688 <                            filesEventCount += numEventsInFile
689 <                            # Add file to current job
690 <                            parString += '\\\"' + file + '\\\"\,'
691 <                            newFile = 0
692 <                        except KeyError:
693 <                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
694 <                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
695 <                    #common.logger.message("AF filesEventCount %s - jobSkipEventCount %s "%(filesEventCount,jobSkipEventCount))  
696 <                    # if less events in file remain than eventsPerJobRequested
697 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
698 <                      #AF
699 <                      #AF skip fileboundary part
700 <                      #AF
701 <                            # go to next file
702 <                            newFile = 1
703 <                            fileCount += 1
704 <                    # if events in file equal to eventsPerJobRequested
705 <                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
706 <                        # close job and touch new file
707 <                        fullString = parString[:-2]
708 <                        if self.useParent:
709 <                            fullParentString = pString[:-2]
710 <                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
711 <                        else:
712 <                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
713 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
714 <                        self.jobDestination.append(blockSites[block])
715 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
716 <                        jobsOfBlock[block].append(jobCount+1)
717 <                        # reset counter
718 <                        jobCount = jobCount + 1
719 <                        totalEventCount = totalEventCount + eventsPerJobRequested
720 <                        eventsRemaining = eventsRemaining - eventsPerJobRequested
721 <                        jobSkipEventCount = 0
722 <                        # reset file
723 <                        pString = ""
724 <                        parString = ""
725 <                        filesEventCount = 0
726 <                        newFile = 1
727 <                        fileCount += 1
356 >        common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
357  
358 <                    # if more events in file remain than eventsPerJobRequested
730 <                    else :
731 <                        # close job but don't touch new file
732 <                        fullString = parString[:-2]
733 <                        if self.useParent:
734 <                            fullParentString = pString[:-2]
735 <                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
736 <                        else:
737 <                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
738 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
739 <                        self.jobDestination.append(blockSites[block])
740 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
741 <                        jobsOfBlock[block].append(jobCount+1)
742 <                        # increase counter
743 <                        jobCount = jobCount + 1
744 <                        totalEventCount = totalEventCount + eventsPerJobRequested
745 <                        eventsRemaining = eventsRemaining - eventsPerJobRequested
746 <                        # calculate skip events for last file
747 <                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
748 <                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
749 <                        # remove all but the last file
750 <                        filesEventCount = self.eventsbyfile[file]
751 <                        if self.useParent:
752 <                            for f in parent : pString += '\\\"' + f + '\\\"\,'
753 <                        parString = '\\\"' + file + '\\\"\,'
754 <                    pass # END if
755 <                pass # END while (iterate over files in the block)
756 <        pass # END while (iterate over blocks in the dataset)
757 <        self.ncjobs = self.total_number_of_jobs = jobCount
758 <        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
759 <            common.logger.message("eventsRemaining "+str(eventsRemaining))
760 <            common.logger.message("jobCount "+str(jobCount))
761 <            common.logger.message(" totalNumberOfJobs "+str(totalNumberOfJobs))
762 <            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
763 <        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
764 <
765 <        # screen output
766 <        screenOutput = "List of jobs and available destination sites:\n\n"
767 <
768 <        #AF
769 <        #AF   skip check on  block with no sites
770 <        #AF
771 <        self.list_of_args = list_of_lists
772 <
773 <        return
774 <
775 <
776 <
777 <    def jobSplittingNoInput(self):
778 <        """
779 <        Perform job splitting based on number of event per job
780 <        """
781 <        common.logger.debug(5,'Splitting per events')
782 <
783 <        if (self.selectEventsPerJob):
784 <            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
785 <        if (self.selectNumberOfJobs):
786 <            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
787 <        if (self.selectTotalNumberEvents):
788 <            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
789 <
790 <        if (self.total_number_of_events < 0):
791 <            msg='Cannot split jobs per Events with "-1" as total number of events'
792 <            raise CrabException(msg)
793 <
794 <        if (self.selectEventsPerJob):
795 <            if (self.selectTotalNumberEvents):
796 <                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
797 <            elif(self.selectNumberOfJobs) :
798 <                self.total_number_of_jobs =self.theNumberOfJobs
799 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
800 <
801 <        elif (self.selectNumberOfJobs) :
802 <            self.total_number_of_jobs = self.theNumberOfJobs
803 <            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
804 <
805 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
806 <
807 <        # is there any remainder?
808 <        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
809 <
810 <        common.logger.debug(5,'Check  '+str(check))
811 <
812 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
813 <        if check > 0:
814 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
815 <
816 <        # argument is seed number.$i
817 <        self.list_of_args = []
818 <        for i in range(self.total_number_of_jobs):
819 <            ## Since there is no input, any site is good
820 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
821 <            args=[]
822 <            if (self.firstRun):
823 <                ## pythia first run
824 <                args.append(str(self.firstRun)+str(i))
825 <            self.list_of_args.append(args)
826 <
827 <        return
828 <
829 <
830 <    def jobSplittingForScript(self):
831 <        """
832 <        Perform job splitting based on number of job
833 <        """
834 <        common.logger.debug(5,'Splitting per job')
835 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
836 <
837 <        self.total_number_of_jobs = self.theNumberOfJobs
358 >        return sites
359  
839        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
360  
361 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
361 >    def split(self, jobParams,firstJobID):
362  
363 <        # argument is seed number.$i
364 <        self.list_of_args = []
365 <        for i in range(self.total_number_of_jobs):
846 <            self.jobDestination.append([""])
847 <            self.list_of_args.append([str(i)])
848 <        return
363 >        jobParams = self.dict['args']
364 >        njobs = self.dict['njobs']
365 >        self.jobDestination = self.dict['jobDestination']
366  
367 <    def split(self, jobParams,firstJobID):
367 >        if njobs==0:
368 >            raise CrabException("Ask to split "+str(njobs)+" jobs: aborting")
369  
852        njobs = self.total_number_of_jobs
853        arglist = self.list_of_args
370          # create the empty structure
371          for i in range(njobs):
372              jobParams.append("")
373  
374          listID=[]
375          listField=[]
376 +        listDictions=[]
377 +        exist= os.path.exists(self.argsFile)
378          for id in range(njobs):
379              job = id + int(firstJobID)
862            jobParams[id] = arglist[id]
380              listID.append(job+1)
381              job_ToSave ={}
382              concString = ' '
383              argu=''
384 +            str_argu = str(job+1)
385              if len(jobParams[id]):
386 <                argu +=   concString.join(jobParams[id] )
387 <            job_ToSave['arguments']= str(job+1)+' '+argu
386 >                argu = {'JobID': job+1}
387 >                for i in range(len(jobParams[id])):
388 >                    argu[self.dict['params'][i]]=jobParams[id][i]
389 >                # just for debug
390 >                str_argu += concString.join(jobParams[id])
391 >            if argu != '': listDictions.append(argu)
392 >            job_ToSave['arguments']= str(job+1)
393              job_ToSave['dlsDestination']= self.jobDestination[id]
394              listField.append(job_ToSave)
395 <            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
396 <            +"                     Destination: "+str(self.jobDestination[id])
397 <            common.logger.debug(5,msg)
395 >            from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
396 >            cms_se = CmsSEMap()
397 >            msg="Job  %s  Arguments:  %s\n"%(str(job+1),str_argu)
398 >            msg+="\t  Destination: %s "%(str(self.jobDestination[id]))
399 >            SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
400 >            msg+="\t  CMSDestination: %s "%(str(SEDestination))
401 >            common.logger.log(10-1,msg)
402 >        # write xml
403 >        if len(listDictions):
404 >            if exist==False: self.CreateXML()
405 >            self.addEntry(listDictions)
406 >            self.addXMLfile()
407          common._db.updateJob_(listID,listField)
408 <        self.argsList = (len(jobParams[0])+1)
408 >        self.zipTarFile()
409 >        return
410 >
411 >    def addXMLfile(self):
412  
413 +        import tarfile
414 +        try:
415 +            tar = tarfile.open(self.tarNameWithPath, "a")
416 +            tar.add(self.argsFile, os.path.basename(self.argsFile))
417 +            tar.close()
418 +        except IOError, exc:
419 +            msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
420 +            msg += str(exc)
421 +            raise CrabException(msg)
422 +        except tarfile.TarError, exc:
423 +            msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
424 +            msg += str(exc)
425 +            raise CrabException(msg)
426 +
427 +    def CreateXML(self):
428 +        """
429 +        """
430 +        result = IMProvNode( self.rootArgsFilename )
431 +        outfile = file( self.argsFile, 'w').write(str(result))
432 +        return
433 +
434 +    def addEntry(self, listDictions):
435 +        """
436 +        _addEntry_
437 +
438 +        add an entry to the xml file
439 +        """
440 +        from IMProv.IMProvLoader import loadIMProvFile
441 +        ## load xml
442 +        improvDoc = loadIMProvFile(self.argsFile)
443 +        entrname= 'Job'
444 +        for dictions in listDictions:
445 +           report = IMProvNode(entrname , None, **dictions)
446 +           improvDoc.addNode(report)
447 +        outfile = file( self.argsFile, 'w').write(str(improvDoc))
448          return
449  
450      def numberOfJobs(self):
451 <        return self.total_number_of_jobs
451 >        return self.dict['njobs']
452  
453      def getTarBall(self, exe):
454          """
455          Return the TarBall with lib and exe
456          """
457 <        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
458 <        if os.path.exists(self.tgzNameWithPath):
459 <            return self.tgzNameWithPath
457 >        self.tarNameWithPath = common.work_space.pathForTgz()+self.tar_name
458 >        if os.path.exists(self.tarNameWithPath):
459 >            return self.tarNameWithPath
460  
461          # Prepare a tar gzipped file with user binaries.
462          self.buildTar_(exe)
463  
464 <        return string.strip(self.tgzNameWithPath)
464 >        return string.strip(self.tarNameWithPath)
465  
466      def buildTar_(self, executable):
467  
# Line 901 | Line 471 | class Cmssw(JobType):
471  
472          ## check if working area is release top
473          if swReleaseTop == '' or swArea == swReleaseTop:
474 <            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
474 >            common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
475              return
476  
477          import tarfile
478          try: # create tar ball
479 <            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
479 >            #tar = tarfile.open(self.tgzNameWithPath, "w:gz")
480 >            tar = tarfile.open(self.tarNameWithPath, "w")
481              ## First find the executable
482              if (self.executable != ''):
483                  exeWithPath = self.scram.findFile_(executable)
# Line 916 | Line 487 | class Cmssw(JobType):
487                  ## then check if it's private or not
488                  if exeWithPath.find(swReleaseTop) == -1:
489                      # the exe is private, so we must ship
490 <                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
490 >                    common.logger.debug("Exe "+exeWithPath+" to be tarred")
491                      path = swArea+'/'
492                      # distinguish case when script is in user project area or given by full path somewhere else
493                      if exeWithPath.find(path) >= 0 :
# Line 930 | Line 501 | class Cmssw(JobType):
501                      pass
502  
503              ## Now get the libraries: only those in local working area
504 +            tar.dereference=True
505              libDir = 'lib'
506              lib = swArea+'/' +libDir
507 <            common.logger.debug(5,"lib "+lib+" to be tarred")
507 >            common.logger.debug("lib "+lib+" to be tarred")
508              if os.path.exists(lib):
509                  tar.add(lib,libDir)
510  
# Line 941 | Line 513 | class Cmssw(JobType):
513              module = swArea + '/' + moduleDir
514              if os.path.isdir(module):
515                  tar.add(module,moduleDir)
516 +            tar.dereference=False
517  
518              ## Now check if any data dir(s) is present
519              self.dataExist = False
# Line 954 | Line 527 | class Cmssw(JobType):
527                      todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
528                      if name == 'data':
529                          self.dataExist=True
530 <                        common.logger.debug(5,"data "+entry+" to be tarred")
530 >                        common.logger.debug("data "+entry+" to be tarred")
531                          tar.add(swArea+"/src/"+entry,"src/"+entry)
532                      pass
533                  pass
# Line 963 | Line 536 | class Cmssw(JobType):
536              if not self.pset is None:
537                  cfg_file = common.work_space.jobDir()+self.configFilename()
538                  tar.add(cfg_file,self.configFilename())
966                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
539  
540 +            try:
541 +                crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
542 +                tar.add(crab_cfg_file,'crab.cfg')
543 +            except:
544 +                pass
545  
546              ## Add ProdCommon dir to tar
547              prodcommonDir = './'
548              prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
549              neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
550 <                           'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage']
550 >                           'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
551 >                           'WMCore/__init__.py','WMCore/Algorithms']
552              for file in neededStuff:
553                  tar.add(prodcommonPath+file,prodcommonDir+file)
976            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
554  
555              ##### ML stuff
556              ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
557              path=os.environ['CRABDIR'] + '/python/'
558              for file in ML_file_list:
559                  tar.add(path+file,file)
983            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
560  
561              ##### Utils
562              Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
563              for file in Utils_file_list:
564                  tar.add(path+file,file)
989            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
565  
566              ##### AdditionalFiles
567 +            tar.dereference=True
568              for file in self.additional_inbox_files:
569                  tar.add(file,string.split(file,'/')[-1])
570 <            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
570 >            tar.dereference=False
571 >            common.logger.log(10-1,"Files in "+self.tarNameWithPath+" : "+str(tar.getnames()))
572  
573              tar.close()
574          except IOError, exc:
575 <            common.logger.write(str(exc))
576 <            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
575 >            msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
576 >            msg += str(exc)
577 >            raise CrabException(msg)
578          except tarfile.TarError, exc:
579 <            common.logger.write(str(exc))
580 <            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
579 >            msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
580 >            msg += str(exc)
581 >            raise CrabException(msg)
582 >
583 >    def zipTarFile(self):
584 >
585 >        cmd = "gzip -c %s > %s "%(self.tarNameWithPath,self.tgzNameWithPath)
586 >        res=runCommand(cmd)
587  
1004        ## check for tarball size
588          tarballinfo = os.stat(self.tgzNameWithPath)
589          if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
590              msg  = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
591 <               +'MB input sandbox limit \n'
591 >               +'MB input sandbox limit \n'
592              msg += '      and not supported by the direct GRID submission system.\n'
593              msg += '      Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
594              msg += '      For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
# Line 1018 | Line 601 | class Cmssw(JobType):
601          Returns part of a job script which prepares
602          the execution environment for the job 'nj'.
603          """
604 +        # FUTURE: Drop support for .cfg when possible
605          if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
606              psetName = 'pset.py'
607          else:
# Line 1025 | Line 609 | class Cmssw(JobType):
609          # Prepare JobType-independent part
610          txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
611          txt += 'echo ">>> setup environment"\n'
612 <        txt += 'if [ $middleware == LCG ]; then \n'
612 >        txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
613          txt += self.wsSetupCMSLCGEnvironment_()
614          txt += 'elif [ $middleware == OSG ]; then\n'
615          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
# Line 1040 | Line 624 | class Cmssw(JobType):
624          txt += '    cd $WORKING_DIR\n'
625          txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
626          txt += self.wsSetupCMSOSGEnvironment_()
627 +        #Setup SGE Environment
628 +        txt += 'elif [ $middleware == SGE ]; then\n'
629 +        txt += self.wsSetupCMSLCGEnvironment_()
630 +
631 +        txt += 'elif [ $middleware == ARC ]; then\n'
632 +        txt += self.wsSetupCMSLCGEnvironment_()
633 +
634          txt += 'fi\n'
635  
636          # Prepare JobType-specific part
# Line 1055 | Line 646 | class Cmssw(JobType):
646          txt += '    func_exit\n'
647          txt += 'fi \n'
648          txt += 'cd '+self.version+'\n'
649 <        txt += 'SOFTWARE_DIR=`pwd`\n'
649 >        txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
650          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
651          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
652          txt += 'if [ $? != 0 ] ; then\n'
# Line 1089 | Line 680 | class Cmssw(JobType):
680              txt += 'ApplicationFamily=cmsRun\n'
681  
682          else:
683 <            self.primaryDataset = 'null'
683 >            self.primaryDataset = 'null'
684              txt += 'DatasetPath=MCDataTier\n'
685              txt += 'PrimaryDataset=null\n'
686              txt += 'DataTier=null\n'
# Line 1098 | Line 689 | class Cmssw(JobType):
689              pset = os.path.basename(job.configFilename())
690              txt += '\n'
691              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
1101            if (self.datasetPath): # standard job
1102                txt += 'InputFiles=${args[1]}; export InputFiles\n'
1103                if (self.useParent):
1104                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
1105                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
1106                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
1107                else:
1108                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
1109                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
1110                txt += 'echo "Inputfiles:<$InputFiles>"\n'
1111                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
1112                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1113                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1114            else:  # pythia like job
1115                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
1116                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
1117                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
1118                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
1119                if (self.firstRun):
1120                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
1121                    txt += 'echo "FirstRun: <$FirstRun>"\n'
1122
1123            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
692  
693 +            txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
694 +            txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
695 +            txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
696 +            txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
697  
698 <        if self.pset != None:
699 <            # FUTURE: Can simply for 2_1_x and higher
1128 <            txt += '\n'
1129 <            if self.debug_wrapper==True:
1130 <                txt += 'echo "***** cat ' + psetName + ' *********"\n'
1131 <                txt += 'cat ' + psetName + '\n'
1132 <                txt += 'echo "****** end ' + psetName + ' ********"\n'
1133 <                txt += '\n'
1134 <            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1135 <                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
1136 <            else:
1137 <                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
1138 <            txt += 'echo "PSETHASH = $PSETHASH" \n'
698 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
699 >        else:
700              txt += '\n'
701 +            txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs)
702 +
703          return txt
704  
705      def wsUntarSoftware(self, nj=0):
# Line 1149 | Line 712 | class Cmssw(JobType):
712  
713          if os.path.isfile(self.tgzNameWithPath):
714              txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
715 <            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
716 <            if  self.debug_wrapper:
715 >            txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
716 >            if  self.debug_wrapper==1 :
717                  txt += 'ls -Al \n'
718              txt += 'untar_status=$? \n'
719              txt += 'if [ $untar_status -ne 0 ]; then \n'
# Line 1204 | Line 767 | class Cmssw(JobType):
767          txt += 'fi\n'
768          txt += '\n'
769  
770 +        if self.pset != None:
771 +            # FUTURE: Drop support for .cfg when possible
772 +            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
773 +                psetName = 'pset.py'
774 +            else:
775 +                psetName = 'pset.cfg'
776 +            # FUTURE: Can simply for 2_1_x and higher
777 +            txt += '\n'
778 +            if self.debug_wrapper == 1:
779 +                txt += 'echo "***** cat ' + psetName + ' *********"\n'
780 +                txt += 'cat ' + psetName + '\n'
781 +                txt += 'echo "****** end ' + psetName + ' ********"\n'
782 +                txt += '\n'
783 +                txt += 'echo "***********************" \n'
784 +                txt += 'which edmConfigHash \n'
785 +                txt += 'echo "***********************" \n'
786 +            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
787 +                txt += 'edmConfigHash ' + psetName + ' \n'
788 +                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
789 +            else:
790 +                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
791 +            txt += 'echo "PSETHASH = $PSETHASH" \n'
792 +            #### FEDE temporary fix for noEdm files #####
793 +            txt += 'if [ -z "$PSETHASH" ]; then \n'
794 +            txt += '   export PSETHASH=null\n'
795 +            txt += 'fi \n'
796 +            #############################################
797 +            txt += '\n'
798          return txt
799  
800  
# Line 1215 | Line 806 | class Cmssw(JobType):
806  
807      def executableArgs(self):
808          # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
809 <        if self.scriptExe:#CarlosDaniele
810 <            return   self.scriptExe + " $NJob"
809 >        if self.scriptExe:
810 >            return self.scriptExe + " $NJob $AdditionalArgs"
811          else:
812              ex_args = ""
813 <            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
814 <            # Framework job report
1224 <            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1225 <                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1226 <            # Type of config file
813 >            ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
814 >            # Type of config file depends on CMSSW version
815              if self.CMSSW_major >= 2 :
816                  ex_args += " -p pset.py"
817              else:
# Line 1261 | Line 849 | class Cmssw(JobType):
849          txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
850          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
851          txt += 'echo ">>> current directory content:"\n'
852 <        if self.debug_wrapper:
852 >        if self.debug_wrapper==1:
853              txt += 'ls -Al\n'
854          txt += '\n'
855  
# Line 1293 | Line 881 | class Cmssw(JobType):
881          txt += '\n'
882          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
883          txt += 'echo ">>> current directory content:"\n'
884 <        if self.debug_wrapper:
884 >        if self.debug_wrapper==1:
885              txt += 'ls -Al\n'
886          txt += '\n'
887          txt += 'cd $RUNTIME_AREA\n'
# Line 1390 | Line 978 | class Cmssw(JobType):
978          """
979          insert the part of the script that modifies the FrameworkJob Report
980          """
981 <        txt = '\n#Written by cms_cmssw::wsModifyReport\n'
981 >
982 >        txt = ''
983          publish_data = int(self.cfg_params.get('USER.publish_data',0))
984 <        if (publish_data == 1):
984 >        #if (publish_data == 1):
985 >        if (self.copy_data == 1):
986 >            txt = '\n#Written by cms_cmssw::wsModifyReport\n'
987 >            publish_data = int(self.cfg_params.get('USER.publish_data',0))
988 >
989  
990              txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
991 <            txt += '    FOR_LFN=$LFNBaseName/${PSETHASH}/\n'
991 >            txt += '    FOR_LFN=$LFNBaseName\n'
992              txt += 'else\n'
993              txt += '    FOR_LFN=/copy_problems/ \n'
1401            txt += '    SE=""\n'
1402            txt += '    SE_PATH=""\n'
994              txt += 'fi\n'
995  
996              txt += 'echo ">>> Modify Job Report:" \n'
997              txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1407            txt += 'ProcessedDataset= $procDataset \n'
1408            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
998              txt += 'echo "SE = $SE"\n'
999              txt += 'echo "SE_PATH = $SE_PATH"\n'
1000              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1001              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1002 <            args = '$RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' \
1003 <                   '$User -$ProcessedDataset-$PSETHASH $ApplicationFamily '+ \
1004 <                    '  $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH'
1002 >
1003 >
1004 >            args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset  ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH'
1005 >            if (publish_data == 1):
1006 >                processedDataset = self.cfg_params['USER.publish_data_name']
1007 >                txt += 'ProcessedDataset='+processedDataset+'\n'
1008 >                txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1009 >                args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1010 >
1011              txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1012              txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1013              txt += 'modifyReport_result=$?\n'
# Line 1435 | Line 1030 | class Cmssw(JobType):
1030          txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1031          txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1032          txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1033 <        if self.debug_wrapper :
1033 >        if self.debug_wrapper==1 :
1034              txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1035          txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1036          txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
# Line 1452 | Line 1047 | class Cmssw(JobType):
1047          txt += '    fi\n'
1048            #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1049          txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1050 <        txt += '      echo ">>> Executable succeded  $executable_exit_status"\n'
1051 <        if (self.datasetPath and not (self.dataset_pu or self.useParent)) :
1050 >        txt += '        echo ">>> Executable succeded  $executable_exit_status"\n'
1051 >        ## This cannot more work given the changes on the Job argumentsJob
1052 >        """
1053 >        if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1054            # VERIFY PROCESSED DATA
1055 <            txt += '      echo ">>> Verify list of processed files:"\n'
1056 <            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1057 <            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1058 <            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1059 <            txt += '      mv tmp.txt input-files.txt\n'
1060 <            txt += '      echo "cat input-files.txt"\n'
1061 <            txt += '      echo "----------------------"\n'
1062 <            txt += '      cat input-files.txt\n'
1063 <            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1064 <            txt += '      mv tmp.txt processed-files.txt\n'
1065 <            txt += '      echo "----------------------"\n'
1066 <            txt += '      echo "cat processed-files.txt"\n'
1067 <            txt += '      echo "----------------------"\n'
1068 <            txt += '      cat processed-files.txt\n'
1069 <            txt += '      echo "----------------------"\n'
1070 <            txt += '      diff -q input-files.txt processed-files.txt\n'
1071 <            txt += '      fileverify_status=$?\n'
1072 <            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1073 <            txt += '         executable_exit_status=30001\n'
1074 <            txt += '         echo "ERROR ==> not all input files processed"\n'
1075 <            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1076 <            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1077 <            txt += '      fi\n'
1078 <        txt += '    elif [ $executable_exit_status -ne 0 ] || [ $executable_exit_status -ne 50015 ] || [ $executable_exit_status -ne 50017 ];then\n'
1482 <        txt += '      echo ">>> Executable failed  $executable_exit_status"\n'
1483 <        txt += '      func_exit\n'
1055 >            txt += '        echo ">>> Verify list of processed files:"\n'
1056 >            txt += '        echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1057 >            txt += '        python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1058 >            txt += '        cat input-files.txt  | sort | uniq > tmp.txt\n'
1059 >            txt += '        mv tmp.txt input-files.txt\n'
1060 >            txt += '        echo "cat input-files.txt"\n'
1061 >            txt += '        echo "----------------------"\n'
1062 >            txt += '        cat input-files.txt\n'
1063 >            txt += '        cat processed-files.txt | sort | uniq > tmp.txt\n'
1064 >            txt += '        mv tmp.txt processed-files.txt\n'
1065 >            txt += '        echo "----------------------"\n'
1066 >            txt += '        echo "cat processed-files.txt"\n'
1067 >            txt += '        echo "----------------------"\n'
1068 >            txt += '        cat processed-files.txt\n'
1069 >            txt += '        echo "----------------------"\n'
1070 >            txt += '        diff -qbB input-files.txt processed-files.txt\n'
1071 >            txt += '        fileverify_status=$?\n'
1072 >            txt += '        if [ $fileverify_status -ne 0 ]; then\n'
1073 >            txt += '            executable_exit_status=30001\n'
1074 >            txt += '            echo "ERROR ==> not all input files processed"\n'
1075 >            txt += '            echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1076 >            txt += '            echo "      ==> diff input-files.txt processed-files.txt"\n'
1077 >            txt += '        fi\n'
1078 >        """
1079          txt += '    fi\n'
1485        txt += '\n'
1080          txt += 'else\n'
1081          txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1082          txt += 'fi\n'
1083          txt += '\n'
1084 +        txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1085 +        txt += '    echo ">>> Executable failed  $executable_exit_status"\n'
1086 +        txt += '    echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1087 +        txt += '    echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1088 +        txt += '    job_exit_code=$executable_exit_status\n'
1089 +        txt += '    func_exit\n'
1090 +        txt += 'fi\n\n'
1091          txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1092          txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1093          txt += 'job_exit_code=$executable_exit_status\n'
# Line 1499 | Line 1100 | class Cmssw(JobType):
1100      def getParams(self):
1101          return self._params
1102  
1103 <    def uniquelist(self, old):
1503 <        """
1504 <        remove duplicates from a list
1505 <        """
1506 <        nd={}
1507 <        for e in old:
1508 <            nd[e]=0
1509 <        return nd.keys()
1510 <
1511 <    def outList(self):
1103 >    def outList(self,list=False):
1104          """
1105          check the dimension of the output files
1106          """
# Line 1517 | Line 1109 | class Cmssw(JobType):
1109          listOutFiles = []
1110          stdout = 'CMSSW_$NJob.stdout'
1111          stderr = 'CMSSW_$NJob.stderr'
1112 +        if len(self.output_file) <= 0:
1113 +            msg ="WARNING: no output files name have been defined!!\n"
1114 +            msg+="\tno output files will be reported back/staged\n"
1115 +            common.logger.info(msg)
1116          if (self.return_data == 1):
1117              for file in (self.output_file+self.output_file_sandbox):
1118                  listOutFiles.append(numberFile(file, '$NJob'))
# Line 1530 | Line 1126 | class Cmssw(JobType):
1126          txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1127          txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1128          txt += 'export filesToCheck\n'
1129 +
1130 +        if list : return self.output_file
1131          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines