ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.52 by slacapra, Tue Oct 17 11:54:02 2006 UTC vs.
Revision 1.128 by fanzago, Thu Oct 11 16:23:44 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 < import math
5 > from BlackWhiteListParser import BlackWhiteListParser
6   import common
7 import PsetManipulator  
8
9 import DBSInfo
10 import DataDiscovery
11 import DataLocation
7   import Scram
8  
9 < import glob, os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12      def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
21        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18  
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27          # number of jobs requested to be created, limit obj splitting
28          self.ncjobs = ncjobs
29  
30          log = common.logger
31          
32          self.scram = Scram.Scram(cfg_params)
31        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39 +        self.scriptName = 'CMSSW.sh'
40          self.pset = ''      #scrip use case Da  
41          self.datasetPath = '' #scrip use case Da
42  
# Line 40 | Line 44 | class Cmssw(JobType):
44          self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +        
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56 +            raise CrabException(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +        
61 +        common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
63  
64          ### collect Data cards
65 +
66 +        ## get DBS mode
67 +        try:
68 +            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 +        except KeyError:
70 +            self.use_dbs_1 = 0
71 +            
72          try:
73              tmp =  cfg_params['CMSSW.datasetpath']
74              log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
# Line 62 | Line 88 | class Cmssw(JobType):
88              self.setParam_('dataset', 'None')
89              self.setParam_('owner', 'None')
90          else:
91 <            datasetpath_split = self.datasetPath.split("/")
92 <            self.setParam_('dataset', datasetpath_split[1])
93 <            self.setParam_('owner', datasetpath_split[-1])
94 <
91 >            try:
92 >                datasetpath_split = self.datasetPath.split("/")
93 >                # standard style
94 >                self.setParam_('datasetFull', self.datasetPath)
95 >                if self.use_dbs_1 == 1 :
96 >                    self.setParam_('dataset', datasetpath_split[1])
97 >                    self.setParam_('owner', datasetpath_split[-1])
98 >                else:
99 >                    self.setParam_('dataset', datasetpath_split[1])
100 >                    self.setParam_('owner', datasetpath_split[2])
101 >            except:
102 >                self.setParam_('dataset', self.datasetPath)
103 >                self.setParam_('owner', self.datasetPath)
104 >                
105          self.setTaskid_()
106          self.setParam_('taskId', self.cfg_params['taskId'])
107  
# Line 97 | Line 133 | class Cmssw(JobType):
133              raise CrabException("PSet file missing. Cannot run cmsRun ")
134  
135          # output files
136 <        try:
137 <            self.output_file = []
136 >        ## stuff which must be returned always via sandbox
137 >        self.output_file_sandbox = []
138  
139 <            # add fjr report by default
140 <            self.output_file.append(self.fjrFileName)
139 >        # add fjr report by default via sandbox
140 >        self.output_file_sandbox.append(self.fjrFileName)
141  
142 +        # other output files to be returned via sandbox or copied to SE
143 +        try:
144 +            self.output_file = []
145              tmp = cfg_params['CMSSW.output_file']
146              if tmp != '':
147                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 112 | Line 151 | class Cmssw(JobType):
151                      self.output_file.append(tmp)
152                      pass
153              else:
154 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
154 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155                  pass
156              pass
157          except KeyError:
158 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
158 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159              pass
160  
161          # script_exe file as additional file in inputSandbox
# Line 124 | Line 163 | class Cmssw(JobType):
163              self.scriptExe = cfg_params['USER.script_exe']
164              if self.scriptExe != '':
165                 if not os.path.isfile(self.scriptExe):
166 <                  msg ="WARNING. file "+self.scriptExe+" not found"
166 >                  msg ="ERROR. file "+self.scriptExe+" not found"
167                    raise CrabException(msg)
168                 self.additional_inbox_files.append(string.strip(self.scriptExe))
169          except KeyError:
170              self.scriptExe = ''
171 +
172          #CarlosDaniele
173          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 <           msg ="WARNING. script_exe  not defined"
174 >           msg ="Error. script_exe  not defined"
175             raise CrabException(msg)
176  
177          ## additional input files
# Line 141 | Line 181 | class Cmssw(JobType):
181                  tmp = string.strip(tmp)
182                  dirname = ''
183                  if not tmp[0]=="/": dirname = "."
184 <                files = glob.glob(os.path.join(dirname, tmp))
184 >                files = []
185 >                if string.find(tmp,"*")>-1:
186 >                    files = glob.glob(os.path.join(dirname, tmp))
187 >                    if len(files)==0:
188 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
189 >                else:
190 >                    files.append(tmp)
191                  for file in files:
192                      if not os.path.exists(file):
193                          raise CrabException("Additional input file not found: "+file)
194                      pass
195 +                    # fname = string.split(file, '/')[-1]
196 +                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197 +                    # shutil.copyfile(file, storedFile)
198                      self.additional_inbox_files.append(string.strip(file))
199                  pass
200              pass
# Line 204 | Line 253 | class Cmssw(JobType):
253          except KeyError:
254              self.sourceSeedVtx = None
255              common.logger.debug(5,"No vertex seed given")
256 +
257 +        try:
258 +            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259 +        except KeyError:
260 +            self.sourceSeedG4 = None
261 +            common.logger.debug(5,"No g4 sim hits seed given")
262 +
263 +        try:
264 +            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265 +        except KeyError:
266 +            self.sourceSeedMix = None
267 +            common.logger.debug(5,"No mix seed given")
268 +
269 +        try:
270 +            self.firstRun = int(cfg_params['CMSSW.first_run'])
271 +        except KeyError:
272 +            self.firstRun = None
273 +            common.logger.debug(5,"No first run given")
274          if self.pset != None: #CarlosDaniele
275 <            self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
275 >            ver = string.split(self.version,"_")
276 >            if (int(ver[1])>=1 and int(ver[2])>=5):
277 >                import PsetManipulator150 as pp
278 >            else:
279 >                import PsetManipulator as pp
280 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281  
282          #DBSDLS-start
283          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
# Line 227 | Line 299 | class Cmssw(JobType):
299                  self.jobSplittingForScript()
300              else:
301                  self.jobSplittingNoInput()
302 <        else: self.jobSplittingByBlocks(blockSites)
302 >        else:
303 >            self.jobSplittingByBlocks(blockSites)
304  
305          # modify Pset
306          if self.pset != None: #CarlosDaniele
307              try:
308                  if (self.datasetPath): # standard job
309                      # allow to processa a fraction of events in a file
310 <                    self.PsetEdit.inputModule("INPUT")
311 <                    self.PsetEdit.maxEvent("INPUTMAXEVENTS")
312 <                    self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
310 >                    PsetEdit.inputModule("INPUT")
311 >                    PsetEdit.maxEvent("INPUTMAXEVENTS")
312 >                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
313                  else:  # pythia like job
314 <                    self.PsetEdit.maxEvent(self.eventsPerJob)
314 >                    PsetEdit.maxEvent(self.eventsPerJob)
315 >                    if (self.firstRun):
316 >                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
317                      if (self.sourceSeed) :
318 <                        self.PsetEdit.pythiaSeed("INPUT")
318 >                        PsetEdit.pythiaSeed("INPUT")
319                          if (self.sourceSeedVtx) :
320 <                            self.PsetEdit.pythiaSeedVtx("INPUTVTX")
320 >                            PsetEdit.vtxSeed("INPUTVTX")
321 >                        if (self.sourceSeedG4) :
322 >                            PsetEdit.g4Seed("INPUTG4")
323 >                        if (self.sourceSeedMix) :
324 >                            PsetEdit.mixSeed("INPUTMIX")
325                  # add FrameworkJobReport to parameter-set
326 <                self.PsetEdit.addCrabFJR(self.fjrFileName)
327 <                self.PsetEdit.psetWriter(self.configFilename())
326 >                PsetEdit.addCrabFJR(self.fjrFileName)
327 >                PsetEdit.psetWriter(self.configFilename())
328              except:
329                  msg='Error while manipuliating ParameterSet: exiting...'
330                  raise CrabException(msg)
331  
332      def DataDiscoveryAndLocation(self, cfg_params):
333  
334 +        import DataDiscovery
335 +        import DataDiscovery_DBS2
336 +        import DataLocation
337          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338  
339          datasetPath=self.datasetPath
340  
259        ## TODO
260        dataTiersList = ""
261        dataTiers = dataTiersList.split(',')
262
341          ## Contact the DBS
342 <        common.logger.message("Contacting DBS...")
342 >        common.logger.message("Contacting Data Discovery Services ...")
343          try:
344 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, dataTiers, cfg_params)
344 >
345 >            if self.use_dbs_1 == 1 :
346 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347 >            else :
348 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349              self.pubdata.fetchDBSInfo()
350  
351          except DataDiscovery.NotExistingDatasetError, ex :
352              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353              raise CrabException(msg)
272
354          except DataDiscovery.NoDataTierinProvenanceError, ex :
355              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356              raise CrabException(msg)
357          except DataDiscovery.DataDiscoveryError, ex:
358 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
358 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
359 >            raise CrabException(msg)
360 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362 >            raise CrabException(msg)
363 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365 >            raise CrabException(msg)
366 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
368              raise CrabException(msg)
279
280        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
281        ## self.DBSPaths=self.pubdata.getDBSPaths()
282        common.logger.message("Required data are :"+self.datasetPath)
369  
370          self.filesbyblock=self.pubdata.getFiles()
371          self.eventsbyblock=self.pubdata.getEventsPerBlock()
372          self.eventsbyfile=self.pubdata.getEventsPerFile()
287        # print str(self.filesbyblock)
288        # print 'self.eventsbyfile',len(self.eventsbyfile)
289        # print str(self.eventsbyfile)
373  
374          ## get max number of events
375          self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
293        common.logger.message("The number of available events is %s\n"%self.maxEvents)
376  
295        common.logger.message("Contacting DLS...")
377          ## Contact the DLS and build a list of sites hosting the fileblocks
378          try:
379              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
# Line 305 | Line 386 | class Cmssw(JobType):
386          sites = dataloc.getSites()
387          allSites = []
388          listSites = sites.values()
389 <        for list in listSites:
390 <            for oneSite in list:
389 >        for listSite in listSites:
390 >            for oneSite in listSite:
391                  allSites.append(oneSite)
392          allSites = self.uniquelist(allSites)
393  
394 <        common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
395 <        common.logger.debug(6, "List of Sites: "+str(allSites))
394 >        # screen output
395 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396 >
397          return sites
398      
399      def jobSplittingByBlocks(self, blockSites):
# Line 373 | Line 455 | class Cmssw(JobType):
455          jobCount = 0
456          list_of_lists = []
457  
458 +        # list tracking which jobs are in which jobs belong to which block
459 +        jobsOfBlock = {}
460 +
461          # ---- Iterate over the blocks in the dataset until ---- #
462          # ---- we've met the requested total # of events    ---- #
463          while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464              block = blocks[blockCount]
465              blockCount += 1
466 +            if block not in jobsOfBlock.keys() :
467 +                jobsOfBlock[block] = []
468              
469 <
470 <            numEventsInBlock = self.eventsbyblock[block]
471 <            common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
469 >            if self.eventsbyblock.has_key(block) :
470 >                numEventsInBlock = self.eventsbyblock[block]
471 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472              
473 <            files = self.filesbyblock[block]
474 <            numFilesInBlock = len(files)
475 <            if (numFilesInBlock <= 0):
476 <                continue
477 <            fileCount = 0
478 <
479 <            # ---- New block => New job ---- #
480 <            parString = "\\{"
481 <            # counter for number of events in files currently worked on
482 <            filesEventCount = 0
483 <            # flag if next while loop should touch new file
484 <            newFile = 1
485 <            # job event counter
486 <            jobSkipEventCount = 0
473 >                files = self.filesbyblock[block]
474 >                numFilesInBlock = len(files)
475 >                if (numFilesInBlock <= 0):
476 >                    continue
477 >                fileCount = 0
478 >
479 >                # ---- New block => New job ---- #
480 >                parString = "\\{"
481 >                # counter for number of events in files currently worked on
482 >                filesEventCount = 0
483 >                # flag if next while loop should touch new file
484 >                newFile = 1
485 >                # job event counter
486 >                jobSkipEventCount = 0
487              
488 <            # ---- Iterate over the files in the block until we've met the requested ---- #
489 <            # ---- total # of events or we've gone over all the files in this block  ---- #
490 <            while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491 <                file = files[fileCount]
492 <                if newFile :
493 <                    try:
494 <                        numEventsInFile = self.eventsbyfile[file]
495 <                        common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496 <                        # increase filesEventCount
497 <                        filesEventCount += numEventsInFile
498 <                        # Add file to current job
499 <                        parString += '\\\"' + file + '\\\"\,'
500 <                        newFile = 0
501 <                    except KeyError:
502 <                        common.logger.message("File "+str(file)+" has unknown number of events: skipping")
488 >                # ---- Iterate over the files in the block until we've met the requested ---- #
489 >                # ---- total # of events or we've gone over all the files in this block  ---- #
490 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491 >                    file = files[fileCount]
492 >                    if newFile :
493 >                        try:
494 >                            numEventsInFile = self.eventsbyfile[file]
495 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496 >                            # increase filesEventCount
497 >                            filesEventCount += numEventsInFile
498 >                            # Add file to current job
499 >                            parString += '\\\"' + file + '\\\"\,'
500 >                            newFile = 0
501 >                        except KeyError:
502 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503                          
504  
505 <                # if less events in file remain than eventsPerJobRequested
506 <                if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507 <                    # if last file in block
508 <                    if ( fileCount == numFilesInBlock-1 ) :
509 <                        # end job using last file, use remaining events in block
505 >                    # if less events in file remain than eventsPerJobRequested
506 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507 >                        # if last file in block
508 >                        if ( fileCount == numFilesInBlock-1 ) :
509 >                            # end job using last file, use remaining events in block
510 >                            # close job and touch new file
511 >                            fullString = parString[:-2]
512 >                            fullString += '\\}'
513 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515 >                            self.jobDestination.append(blockSites[block])
516 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517 >                            # fill jobs of block dictionary
518 >                            jobsOfBlock[block].append(jobCount+1)
519 >                            # reset counter
520 >                            jobCount = jobCount + 1
521 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523 >                            jobSkipEventCount = 0
524 >                            # reset file
525 >                            parString = "\\{"
526 >                            filesEventCount = 0
527 >                            newFile = 1
528 >                            fileCount += 1
529 >                        else :
530 >                            # go to next file
531 >                            newFile = 1
532 >                            fileCount += 1
533 >                    # if events in file equal to eventsPerJobRequested
534 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535                          # close job and touch new file
536                          fullString = parString[:-2]
537                          fullString += '\\}'
538 <                        list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
539 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
538 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540                          self.jobDestination.append(blockSites[block])
541                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542 +                        jobsOfBlock[block].append(jobCount+1)
543                          # reset counter
544                          jobCount = jobCount + 1
545 <                        totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
546 <                        eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
545 >                        totalEventCount = totalEventCount + eventsPerJobRequested
546 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
547                          jobSkipEventCount = 0
548                          # reset file
549                          parString = "\\{"
550                          filesEventCount = 0
551                          newFile = 1
552                          fileCount += 1
553 +                        
554 +                    # if more events in file remain than eventsPerJobRequested
555                      else :
556 <                        # go to next file
557 <                        newFile = 1
558 <                        fileCount += 1
559 <                # if events in file equal to eventsPerJobRequested
560 <                elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
561 <                    # close job and touch new file
562 <                    fullString = parString[:-2]
563 <                    fullString += '\\}'
564 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
565 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
566 <                    self.jobDestination.append(blockSites[block])
567 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
568 <                    # reset counter
569 <                    jobCount = jobCount + 1
570 <                    totalEventCount = totalEventCount + eventsPerJobRequested
571 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
572 <                    jobSkipEventCount = 0
573 <                    # reset file
574 <                    parString = "\\{"
575 <                    filesEventCount = 0
576 <                    newFile = 1
462 <                    fileCount += 1
463 <                    
464 <                # if more events in file remain than eventsPerJobRequested
465 <                else :
466 <                    # close job but don't touch new file
467 <                    fullString = parString[:-2]
468 <                    fullString += '\\}'
469 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
470 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
471 <                    self.jobDestination.append(blockSites[block])
472 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
473 <                    # increase counter
474 <                    jobCount = jobCount + 1
475 <                    totalEventCount = totalEventCount + eventsPerJobRequested
476 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
477 <                    # calculate skip events for last file
478 <                    # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
479 <                    jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
480 <                    # remove all but the last file
481 <                    filesEventCount = self.eventsbyfile[file]
482 <                    parString = "\\{"
483 <                    parString += '\\\"' + file + '\\\"\,'
484 <                pass # END if
485 <            pass # END while (iterate over files in the block)
556 >                        # close job but don't touch new file
557 >                        fullString = parString[:-2]
558 >                        fullString += '\\}'
559 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561 >                        self.jobDestination.append(blockSites[block])
562 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563 >                        jobsOfBlock[block].append(jobCount+1)
564 >                        # increase counter
565 >                        jobCount = jobCount + 1
566 >                        totalEventCount = totalEventCount + eventsPerJobRequested
567 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
568 >                        # calculate skip events for last file
569 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571 >                        # remove all but the last file
572 >                        filesEventCount = self.eventsbyfile[file]
573 >                        parString = "\\{"
574 >                        parString += '\\\"' + file + '\\\"\,'
575 >                    pass # END if
576 >                pass # END while (iterate over files in the block)
577          pass # END while (iterate over blocks in the dataset)
578          self.ncjobs = self.total_number_of_jobs = jobCount
579          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581 <        common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
581 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582 >        
583 >        # screen output
584 >        screenOutput = "List of jobs and available destination sites:\n\n"
585 >
586 >        # keep trace of block with no sites to print a warning at the end
587 >        noSiteBlock = []
588 >        bloskNoSite = []
589 >
590 >        blockCounter = 0
591 >        for block in blocks:
592 >            if block in jobsOfBlock.keys() :
593 >                blockCounter += 1
594 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597 >                    bloskNoSite.append( blockCounter )
598          
599 +        common.logger.message(screenOutput)
600 +        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
601 +            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
602 +            virgola = ""
603 +            if len(bloskNoSite) > 1:
604 +                virgola = ","
605 +            for block in bloskNoSite:
606 +                msg += ' ' + str(block) + virgola
607 +            msg += '\n               Related jobs:\n                 '
608 +            virgola = ""
609 +            if len(noSiteBlock) > 1:
610 +                virgola = ","
611 +            for range_jobs in noSiteBlock:
612 +                msg += str(range_jobs) + virgola
613 +            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
614 +            common.logger.message(msg)
615 +
616          self.list_of_args = list_of_lists
617          return
618  
# Line 506 | Line 630 | class Cmssw(JobType):
630              raise CrabException(msg)
631  
632          if (self.selectEventsPerJob):
633 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
633 >            if (self.selectTotalNumberEvents):
634 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
635 >            elif(self.selectNumberOfJobs) :  
636 >                self.total_number_of_jobs =self.theNumberOfJobs
637 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
638 >
639          elif (self.selectNumberOfJobs) :
640              self.total_number_of_jobs = self.theNumberOfJobs
641              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
642 <
642 >
643          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
644  
645          # is there any remainder?
# Line 528 | Line 657 | class Cmssw(JobType):
657              ## Since there is no input, any site is good
658             # self.jobDestination.append(["Any"])
659              self.jobDestination.append([""]) #must be empty to write correctly the xml
660 +            args=[]
661 +            if (self.firstRun):
662 +                    ## pythia first run
663 +                #self.list_of_args.append([(str(self.firstRun)+str(i))])
664 +                args.append(str(self.firstRun)+str(i))
665 +            else:
666 +                ## no first run
667 +                #self.list_of_args.append([str(i)])
668 +                args.append(str(i))
669              if (self.sourceSeed):
670 +                args.append(str(self.sourceSeed)+str(i))
671                  if (self.sourceSeedVtx):
672 <                    ## pythia + vtx random seed
673 <                    self.list_of_args.append([
674 <                                              str(self.sourceSeed)+str(i),
675 <                                              str(self.sourceSeedVtx)+str(i)
676 <                                              ])
677 <                else:
678 <                    ## only pythia random seed
679 <                    self.list_of_args.append([(str(self.sourceSeed)+str(i))])
680 <            else:
681 <                ## no random seed
682 <                self.list_of_args.append([str(i)])
683 <        #print self.list_of_args
672 >                    ## + vtx random seed
673 >                    args.append(str(self.sourceSeedVtx)+str(i))
674 >                if (self.sourceSeedG4):
675 >                    ## + G4 random seed
676 >                    args.append(str(self.sourceSeedG4)+str(i))
677 >                if (self.sourceSeedMix):    
678 >                    ## + Mix random seed
679 >                    args.append(str(self.sourceSeedMix)+str(i))
680 >                pass
681 >            pass
682 >            self.list_of_args.append(args)
683 >        pass
684 >            
685 >        # print self.list_of_args
686  
687          return
688  
# Line 606 | Line 747 | class Cmssw(JobType):
747          """
748          
749          # if it exist, just return it
750 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
750 >        #
751 >        # Marco. Let's start to use relative path for Boss XML files
752 >        #
753 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
754          if os.path.exists(self.tgzNameWithPath):
755              return self.tgzNameWithPath
756  
# Line 620 | Line 764 | class Cmssw(JobType):
764          # First of all declare the user Scram area
765          swArea = self.scram.getSWArea_()
766          #print "swArea = ", swArea
767 <        swVersion = self.scram.getSWVersion()
768 <        #print "swVersion = ", swVersion
767 >        # swVersion = self.scram.getSWVersion()
768 >        # print "swVersion = ", swVersion
769          swReleaseTop = self.scram.getReleaseTop_()
770          #print "swReleaseTop = ", swReleaseTop
771          
# Line 629 | Line 773 | class Cmssw(JobType):
773          if swReleaseTop == '' or swArea == swReleaseTop:
774              return
775  
776 <        filesToBeTarred = []
777 <        ## First find the executable
778 <        if (self.executable != ''):
779 <            exeWithPath = self.scram.findFile_(executable)
780 < #           print exeWithPath
781 <            if ( not exeWithPath ):
782 <                raise CrabException('User executable '+executable+' not found')
783 <
784 <            ## then check if it's private or not
785 <            if exeWithPath.find(swReleaseTop) == -1:
786 <                # the exe is private, so we must ship
787 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
788 <                path = swArea+'/'
789 <                exe = string.replace(exeWithPath, path,'')
790 <                filesToBeTarred.append(exe)
791 <                pass
792 <            else:
793 <                # the exe is from release, we'll find it on WN
794 <                pass
795 <
796 <        ## Now get the libraries: only those in local working area
797 <        libDir = 'lib'
798 <        lib = swArea+'/' +libDir
799 <        common.logger.debug(5,"lib "+lib+" to be tarred")
800 <        if os.path.exists(lib):
801 <            filesToBeTarred.append(libDir)
802 <
803 <        ## Now check if module dir is present
804 <        moduleDir = 'module'
805 <        if os.path.isdir(swArea+'/'+moduleDir):
806 <            filesToBeTarred.append(moduleDir)
807 <
808 <        ## Now check if the Data dir is present
809 <        dataDir = 'src/Data/'
810 <        if os.path.isdir(swArea+'/'+dataDir):
811 <            filesToBeTarred.append(dataDir)
812 <
813 <        ## copy ProdAgent dir to swArea
814 <        cmd = '\cp -rf ' + os.environ['CRABDIR'] + '/ProdAgentApi ' + swArea
815 <        cmd_out = runCommand(cmd)
816 <        if cmd_out != '':
817 <            common.logger.message('ProdAgentApi directory could not be copied to local CMSSW project directory.')
818 <            common.logger.message('No FrameworkJobreport parsing is possible on the WorkerNode.')
819 <
820 <        ## Now check if the Data dir is present
821 <        paDir = 'ProdAgentApi'
822 <        if os.path.isdir(swArea+'/'+paDir):
823 <            filesToBeTarred.append(paDir)
824 <
825 <        ## Create the tar-ball
826 <        if len(filesToBeTarred)>0:
827 <            cwd = os.getcwd()
828 <            os.chdir(swArea)
829 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
830 <            for line in filesToBeTarred:
831 <                tarcmd = tarcmd + line + ' '
832 <            cout = runCommand(tarcmd)
833 <            if not cout:
834 <                raise CrabException('Could not create tar-ball')
835 <            os.chdir(cwd)
836 <        else:
837 <            common.logger.debug(5,"No files to be to be tarred")
776 >        import tarfile
777 >        try: # create tar ball
778 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
779 >            ## First find the executable
780 >            if (self.executable != ''):
781 >                exeWithPath = self.scram.findFile_(executable)
782 >                if ( not exeWithPath ):
783 >                    raise CrabException('User executable '+executable+' not found')
784 >    
785 >                ## then check if it's private or not
786 >                if exeWithPath.find(swReleaseTop) == -1:
787 >                    # the exe is private, so we must ship
788 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
789 >                    path = swArea+'/'
790 >                    # distinguish case when script is in user project area or given by full path somewhere else
791 >                    if exeWithPath.find(path) >= 0 :
792 >                        exe = string.replace(exeWithPath, path,'')
793 >                        tar.add(path+exe,os.path.basename(executable))
794 >                    else :
795 >                        tar.add(exeWithPath,os.path.basename(executable))
796 >                    pass
797 >                else:
798 >                    # the exe is from release, we'll find it on WN
799 >                    pass
800 >    
801 >            ## Now get the libraries: only those in local working area
802 >            libDir = 'lib'
803 >            lib = swArea+'/' +libDir
804 >            common.logger.debug(5,"lib "+lib+" to be tarred")
805 >            if os.path.exists(lib):
806 >                tar.add(lib,libDir)
807 >    
808 >            ## Now check if module dir is present
809 >            moduleDir = 'module'
810 >            module = swArea + '/' + moduleDir
811 >            if os.path.isdir(module):
812 >                tar.add(module,moduleDir)
813 >
814 >            ## Now check if any data dir(s) is present
815 >            swAreaLen=len(swArea)
816 >            for root, dirs, files in os.walk(swArea):
817 >                if "data" in dirs:
818 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
819 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
820 >
821 >            ## Add ProdAgent dir to tar
822 >            paDir = 'ProdAgentApi'
823 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
824 >            if os.path.isdir(pa):
825 >                tar.add(pa,paDir)
826 >
827 >            ### FEDE FOR DBS PUBLICATION
828 >            ## Add PRODCOMMON dir to tar
829 >            prodcommonDir = 'ProdCommon'
830 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
831 >            if os.path.isdir(prodcommonPath):
832 >                tar.add(prodcommonPath,prodcommonDir)
833 >            #############################    
834 >        
835 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
836 >            tar.close()
837 >        except :
838 >            raise CrabException('Could not create tar-ball')
839 >
840 >        ## check for tarball size
841 >        tarballinfo = os.stat(self.tgzNameWithPath)
842 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
843 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
844 >
845 >        ## create tar-ball with ML stuff
846 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
847 >        try:
848 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
849 >            path=os.environ['CRABDIR'] + '/python/'
850 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
851 >                tar.add(path+file,file)
852 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
853 >            tar.close()
854 >        except :
855 >            raise CrabException('Could not create ML files tar-ball')
856          
857          return
858          
859 +    def additionalInputFileTgz(self):
860 +        """
861 +        Put all additional files into a tar ball and return its name
862 +        """
863 +        import tarfile
864 +        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
865 +        tar = tarfile.open(tarName, "w:gz")
866 +        for file in self.additional_inbox_files:
867 +            tar.add(file,string.split(file,'/')[-1])
868 +        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
869 +        tar.close()
870 +        return tarName
871 +
872      def wsSetupEnvironment(self, nj):
873          """
874          Returns part of a job script which prepares
# Line 705 | Line 880 | class Cmssw(JobType):
880          ## OLI_Daniele at this level  middleware already known
881  
882          txt += 'if [ $middleware == LCG ]; then \n'
883 +        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
884 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
885 +        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
886          txt += self.wsSetupCMSLCGEnvironment_()
887          txt += 'elif [ $middleware == OSG ]; then\n'
888          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
889          txt += '    echo "Created working directory: $WORKING_DIR"\n'
890          txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
891          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
892 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
893 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
894 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
892 >        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
893 >        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
894 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
895          txt += '        rm -f $RUNTIME_AREA/$repo \n'
896          txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
897          txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 723 | Line 901 | class Cmssw(JobType):
901          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
902          txt += '    cd $WORKING_DIR\n'
903          txt += self.wsSetupCMSOSGEnvironment_()
904 +        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
905 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
906          txt += 'fi\n'
907  
908          # Prepare JobType-specific part
# Line 745 | Line 925 | class Cmssw(JobType):
925          txt += '        cd $RUNTIME_AREA\n'
926          txt += '        /bin/rm -rf $WORKING_DIR\n'
927          txt += '        if [ -d $WORKING_DIR ] ;then\n'
928 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
930 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
931 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
928 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
930 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
931 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
932          txt += '            rm -f $RUNTIME_AREA/$repo \n'
933          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
934          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 758 | Line 938 | class Cmssw(JobType):
938          txt += 'fi \n'
939          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
940          txt += 'cd '+self.version+'\n'
941 +        ########## FEDE FOR DBS2 ######################
942 +        txt += 'SOFTWARE_DIR=`pwd`\n'
943 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
944 +        ###############################################
945          ### needed grep for bug in scramv1 ###
946 +        txt += scram+' runtime -sh\n'
947          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
948 +        txt += 'echo $PATH\n'
949  
950          # Handle the arguments:
951          txt += "\n"
# Line 781 | Line 967 | class Cmssw(JobType):
967          txt += '        cd $RUNTIME_AREA\n'
968          txt += '        /bin/rm -rf $WORKING_DIR\n'
969          txt += '        if [ -d $WORKING_DIR ] ;then\n'
970 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
972 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
973 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
970 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
972 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
973 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
974          txt += '            rm -f $RUNTIME_AREA/$repo \n'
975          txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
976          txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
# Line 796 | Line 982 | class Cmssw(JobType):
982  
983          # Prepare job-specific part
984          job = common.job_list[nj]
985 +        ### FEDE FOR DBS OUTPUT PUBLICATION
986 +        if (self.datasetPath):
987 +            txt += '\n'
988 +            txt += 'DatasetPath='+self.datasetPath+'\n'
989 +
990 +            datasetpath_split = self.datasetPath.split("/")
991 +            
992 +            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
993 +            txt += 'DataTier='+datasetpath_split[2]+'\n'
994 +            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
995 +            txt += 'ApplicationFamily=cmsRun\n'
996 +
997 +        else:
998 +            txt += 'DatasetPath=MCDataTier\n'
999 +            txt += 'PrimaryDataset=null\n'
1000 +            txt += 'DataTier=null\n'
1001 +            #txt += 'ProcessedDataset=null\n'
1002 +            txt += 'ApplicationFamily=MCDataTier\n'
1003          if self.pset != None: #CarlosDaniele
1004              pset = os.path.basename(job.configFilename())
1005              txt += '\n'
1006 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
1007              if (self.datasetPath): # standard job
1008                  #txt += 'InputFiles=$2\n'
1009                  txt += 'InputFiles=${args[1]}\n'
1010                  txt += 'MaxEvents=${args[2]}\n'
1011                  txt += 'SkipEvents=${args[3]}\n'
1012                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
1013 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n'
1013 >                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1015 <                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n'
1015 >                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1016                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1017 <                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n'
1017 >                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018              else:  # pythia like job
1019 +                seedIndex=1
1020 +                if (self.firstRun):
1021 +                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1022 +                    txt += 'echo "FirstRun: <$FirstRun>"\n'
1023 +                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1024 +                    seedIndex=seedIndex+1
1025 +
1026                  if (self.sourceSeed):
1027 < #                    txt += 'Seed=$2\n'
1028 <                    txt += 'Seed=${args[1]}\n'
1029 <                    txt += 'echo "Seed: <$Seed>"\n'
1030 <                    txt += 'sed "s#\<INPUT\>#$Seed#" $RUNTIME_AREA/'+pset+' > tmp.cfg\n'
1027 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1028 >                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1029 >                    seedIndex=seedIndex+1
1030 >                    ## the following seeds are not always present
1031                      if (self.sourceSeedVtx):
1032 < #                        txt += 'VtxSeed=$3\n'
821 <                        txt += 'VtxSeed=${args[2]}\n'
1032 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1033                          txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1034 <                        txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp.cfg > pset.cfg\n'
1035 <                    else:
1036 <                        txt += 'mv tmp.cfg pset.cfg\n'
1037 <                else:
1038 <                    txt += '# Copy untouched pset\n'
1039 <                    txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
1040 <
1034 >                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1035 >                        seedIndex += 1
1036 >                    if (self.sourceSeedG4):
1037 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1038 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1039 >                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1040 >                        seedIndex += 1
1041 >                    if (self.sourceSeedMix):
1042 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1043 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1044 >                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1045 >                        seedIndex += 1
1046 >                    pass
1047 >                pass
1048 >            txt += 'mv -f '+pset+' pset.cfg\n'
1049  
1050          if len(self.additional_inbox_files) > 0:
1051 <            for file in self.additional_inbox_files:
1052 <                relFile = file.split("/")[-1]
1053 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
835 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
836 <                txt += '   chmod +x '+relFile+'\n'
837 <                txt += 'fi\n'
1051 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1052 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1053 >            txt += 'fi\n'
1054              pass
1055  
1056          if self.pset != None: #CarlosDaniele
# Line 845 | Line 1061 | class Cmssw(JobType):
1061              txt += 'cat pset.cfg\n'
1062              txt += 'echo "****** end pset.cfg ********"\n'
1063              txt += '\n'
1064 +            ### FEDE FOR DBS OUTPUT PUBLICATION
1065 +            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1066 +            txt += 'echo "PSETHASH = $PSETHASH" \n'
1067 +            ##############
1068 +            txt += '\n'
1069              # txt += 'echo "***** cat pset1.cfg *********"\n'
1070              # txt += 'cat pset1.cfg\n'
1071              # txt += 'echo "****** end pset1.cfg ********"\n'
1072          return txt
1073  
1074 <    def wsBuildExe(self, nj):
1074 >    def wsBuildExe(self, nj=0):
1075          """
1076          Put in the script the commands to build an executable
1077          or a library.
# Line 886 | Line 1107 | class Cmssw(JobType):
1107              txt += '   echo "Successful untar" \n'
1108              txt += 'fi \n'
1109              txt += '\n'
1110 <            txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
1110 >            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1111              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1112 <            txt += '   export PYTHONPATH=ProdAgentApi\n'
1112 >            #### FEDE FOR DBS OUTPUT PUBLICATION
1113 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1114 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1115 >            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1116              txt += 'else\n'
1117 <            txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1117 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1118 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1119 >            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1120 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1121 >            ###################  
1122              txt += 'fi\n'
1123              txt += '\n'
1124  
# Line 905 | Line 1133 | class Cmssw(JobType):
1133          """
1134          
1135      def executableName(self):
1136 <        if self.pset == None: #CarlosDaniele
1136 >        if self.scriptExe: #CarlosDaniele
1137              return "sh "
1138          else:
1139              return self.executable
1140  
1141      def executableArgs(self):
1142 <        if self.pset == None:#CarlosDaniele
1142 >        if self.scriptExe:#CarlosDaniele
1143              return   self.scriptExe + " $NJob"
1144 <        else:
1145 <            return " -p pset.cfg"
1144 >        else:
1145 >            # if >= CMSSW_1_5_X, add -e
1146 >            version_array = self.scram.getSWVersion().split('_')
1147 >            major = 0
1148 >            minor = 0
1149 >            try:
1150 >                major = int(version_array[1])
1151 >                minor = int(version_array[2])
1152 >            except:
1153 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1154 >                raise CrabException(msg)
1155 >            if major >= 1 and minor >= 5 :
1156 >                return " -e -p pset.cfg"
1157 >            else:
1158 >                return " -p pset.cfg"
1159  
1160      def inputSandbox(self, nj):
1161          """
1162          Returns a list of filenames to be put in JDL input sandbox.
1163          """
1164          inp_box = []
1165 <        # dict added to delete duplicate from input sandbox file list
1166 <        seen = {}
1165 >        # # dict added to delete duplicate from input sandbox file list
1166 >        # seen = {}
1167          ## code
1168          if os.path.isfile(self.tgzNameWithPath):
1169              inp_box.append(self.tgzNameWithPath)
1170 +        if os.path.isfile(self.MLtgzfile):
1171 +            inp_box.append(self.MLtgzfile)
1172          ## config
1173 <        if not self.pset is None: #CarlosDaniele
1174 <            inp_box.append(common.job_list[nj].configFilename())
1173 >        if not self.pset is None:
1174 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1175          ## additional input files
1176 <        #for file in self.additional_inbox_files:
1177 <        #    inp_box.append(common.work_space.cwdDir()+file)
1176 >        tgz = self.additionalInputFileTgz()
1177 >        inp_box.append(tgz)
1178          return inp_box
1179  
1180      def outputSandbox(self, nj):
# Line 940 | Line 1183 | class Cmssw(JobType):
1183          """
1184          out_box = []
1185  
943        stdout=common.job_list[nj].stdout()
944        stderr=common.job_list[nj].stderr()
945
1186          ## User Declared output files
1187 <        for out in self.output_file:
1187 >        for out in (self.output_file+self.output_file_sandbox):
1188              n_out = nj + 1
1189              out_box.append(self.numberFile_(out,str(n_out)))
1190          return out_box
951        return []
1191  
1192      def prepareSteeringCards(self):
1193          """
# Line 964 | Line 1203 | class Cmssw(JobType):
1203          txt = '\n'
1204          txt += '# directory content\n'
1205          txt += 'ls \n'
1206 <        file_list = ''
1207 <        for fileWithSuffix in self.output_file:
1206 >
1207 >        txt += 'output_exit_status=0\n'
1208 >        
1209 >        for fileWithSuffix in (self.output_file_sandbox):
1210              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
970            file_list=file_list+output_file_num+' '
1211              txt += '\n'
1212              txt += '# check output file\n'
1213 <            txt += 'ls '+fileWithSuffix+'\n'
1214 <            txt += 'ls_result=$?\n'
1215 <            #txt += 'exe_result=$?\n'
1216 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1217 <            txt += '   echo "ERROR: Problem with output file"\n'
1218 <            #txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
979 <            #txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
980 <            #txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
981 <            ### OLI_DANIELE
1213 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1214 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1215 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1216 >            txt += 'else\n'
1217 >            txt += '    exit_status=60302\n'
1218 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1219              if common.scheduler.boss_scheduler_name == 'condor_g':
1220                  txt += '    if [ $middleware == OSG ]; then \n'
1221                  txt += '        echo "prepare dummy output file"\n'
1222                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1223                  txt += '    fi \n'
1224 +            txt += 'fi\n'
1225 +        
1226 +        for fileWithSuffix in (self.output_file):
1227 +            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1228 +            txt += '\n'
1229 +            txt += '# check output file\n'
1230 +            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1231 +            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1232 +            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1233              txt += 'else\n'
1234 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1234 >            txt += '    exit_status=60302\n'
1235 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1236 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1237 >            txt += '    output_exit_status=$exit_status\n'
1238 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1239 >                txt += '    if [ $middleware == OSG ]; then \n'
1240 >                txt += '        echo "prepare dummy output file"\n'
1241 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1242 >                txt += '    fi \n'
1243              txt += 'fi\n'
1244 <      
1245 <        txt += 'cd $RUNTIME_AREA\n'
1246 <        file_list=file_list[:-1]
1247 <        txt += 'file_list="'+file_list+'"\n'
1244 >        file_list = []
1245 >        for fileWithSuffix in (self.output_file):
1246 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1247 >            
1248 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1249          txt += 'cd $RUNTIME_AREA\n'
995        ### OLI_DANIELE
996        txt += 'if [ $middleware == OSG ]; then\n'  
997        txt += '    cd $RUNTIME_AREA\n'
998        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
999        txt += '    /bin/rm -rf $WORKING_DIR\n'
1000        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1001        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1002        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1003        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1004        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1005        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1006        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1007        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1008        txt += '    fi\n'
1009        txt += 'fi\n'
1010        txt += '\n'
1250          return txt
1251  
1252      def numberFile_(self, file, txt):
# Line 1018 | Line 1257 | class Cmssw(JobType):
1257          # take away last extension
1258          name = p[0]
1259          for x in p[1:-1]:
1260 <           name=name+"."+x
1260 >            name=name+"."+x
1261          # add "_txt"
1262          if len(p)>1:
1263 <          ext = p[len(p)-1]
1264 <          result = name + '_' + txt + "." + ext
1263 >            ext = p[len(p)-1]
1264 >            result = name + '_' + txt + "." + ext
1265          else:
1266 <          result = name + '_' + txt
1266 >            result = name + '_' + txt
1267          
1268          return result
1269  
1270 <    def getRequirements(self):
1270 >    def getRequirements(self, nj=[]):
1271          """
1272          return job requirements to add to jdl files
1273          """
# Line 1037 | Line 1276 | class Cmssw(JobType):
1276              req='Member("VO-cms-' + \
1277                   self.version + \
1278                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1279 +        ## SL add requirement for OS version only if SL4
1280 +        #reSL4 = re.compile( r'slc4' )
1281 +        if self.executable_arch: # and reSL4.search(self.executable_arch):
1282 +            req+=' && Member("VO-cms-' + \
1283 +                 self.executable_arch + \
1284 +                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1285  
1286          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1287  
# Line 1056 | Line 1301 | class Cmssw(JobType):
1301          txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1302          txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1303          txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1304 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1305          txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1306          txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1307          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1308 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1309          txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1310          txt += '   else\n'
1311          txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
# Line 1074 | Line 1321 | class Cmssw(JobType):
1321          txt += '       cd $RUNTIME_AREA\n'
1322          txt += '       /bin/rm -rf $WORKING_DIR\n'
1323          txt += '       if [ -d $WORKING_DIR ] ;then\n'
1324 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1325 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1326 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1327 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1328 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1329 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1330 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1324 >        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1325 >        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1326 >        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1327 >        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1328 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1329 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1330 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1331          txt += '       fi\n'
1332          txt += '\n'
1333          txt += '       exit 1\n'
# Line 1135 | Line 1382 | class Cmssw(JobType):
1382          txt += '       fi\n'
1383          txt += '   fi\n'
1384          txt += '   \n'
1138        txt += '   string=`cat /etc/redhat-release`\n'
1139        txt += '   echo $string\n'
1140        txt += '   if [[ $string = *alhalla* ]]; then\n'
1141        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1142        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1143        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1144        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1145        txt += '   else\n'
1146        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1147        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1148        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1149        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1150        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1151        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1152        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1153        txt += '       exit 1\n'
1154        txt += '   fi\n'
1385          txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1386          txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1387          return txt
1388  
1389 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1390 +    def modifyReport(self, nj):
1391 +        """
1392 +        insert the part of the script that modifies the FrameworkJob Report
1393 +        """
1394 +
1395 +        txt = ''
1396 +        try:
1397 +            publish_data = int(self.cfg_params['USER.publish_data'])          
1398 +        except KeyError:
1399 +            publish_data = 0
1400 +        if (publish_data == 1):  
1401 +            txt += 'echo "Modify Job Report" \n'
1402 +            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1403 +            ################ FEDE FOR DBS2 #############################################
1404 +            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1405 +            #############################################################################
1406 +            #try:
1407 +            #    publish_data = int(self.cfg_params['USER.publish_data'])          
1408 +            #except KeyError:
1409 +            #    publish_data = 0
1410 +
1411 +            txt += 'if [ -z "$SE" ]; then\n'
1412 +            txt += '    SE="" \n'
1413 +            txt += 'fi \n'
1414 +            txt += 'if [ -z "$SE_PATH" ]; then\n'
1415 +            txt += '    SE_PATH="" \n'
1416 +            txt += 'fi \n'
1417 +            txt += 'echo "SE = $SE"\n'
1418 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1419 +
1420 +        #if (publish_data == 1):  
1421 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1422 +            processedDataset = self.cfg_params['USER.publish_data_name']
1423 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1424 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1425 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1426 +            #### FEDE: added slash in LFN ##############
1427 +            txt += '    FOR_LFN=/copy_problems/ \n'
1428 +            txt += 'else \n'
1429 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1430 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1431 +            txt += '    FOR_LFN=/store$tmp \n'
1432 +            txt += 'fi \n'
1433 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1434 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1435 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1436 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1437 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1438 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1439 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1440 +      
1441 +            txt += 'modifyReport_result=$?\n'
1442 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1443 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1444 +            txt += '    exit_status=1\n'
1445 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1446 +            txt += 'else\n'
1447 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1448 +            txt += 'fi\n'
1449 +        else:
1450 +            txt += 'echo "no data publication required"\n'
1451 +            #txt += 'ProcessedDataset=no_data_to_publish \n'
1452 +            #### FEDE: added slash in LFN ##############
1453 +            #txt += 'FOR_LFN=/local/ \n'
1454 +            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1455 +            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1456 +        return txt
1457 +
1458 +    def cleanEnv(self):
1459 +        ### OLI_DANIELE
1460 +        txt = ''
1461 +        txt += 'if [ $middleware == OSG ]; then\n'  
1462 +        txt += '    cd $RUNTIME_AREA\n'
1463 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1464 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1465 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1466 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1467 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1468 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1469 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1470 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1471 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1472 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1473 +        txt += '    fi\n'
1474 +        txt += 'fi\n'
1475 +        txt += '\n'
1476 +        return txt
1477 +
1478      def setParam_(self, param, value):
1479          self._params[param] = value
1480  
# Line 1168 | Line 1487 | class Cmssw(JobType):
1487      def getTaskid(self):
1488          return self._taskId
1489  
1171 #######################################################################
1490      def uniquelist(self, old):
1491          """
1492          remove duplicates from a list
# Line 1177 | Line 1495 | class Cmssw(JobType):
1495          for e in old:
1496              nd[e]=0
1497          return nd.keys()
1498 +
1499 +
1500 +    def checkOut(self, limit):
1501 +        """
1502 +        check the dimension of the output files
1503 +        """
1504 +        txt = 'echo "*****************************************"\n'
1505 +        txt += 'echo "** Starting output sandbox limit check **"\n'
1506 +        txt += 'echo "*****************************************"\n'
1507 +        allOutFiles = ""
1508 +        listOutFiles = []
1509 +        for fileOut in (self.output_file+self.output_file_sandbox):
1510 +             if fileOut.find('crab_fjr') == -1:
1511 +                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1512 +                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1513 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1514 +        txt += 'ls -gGhrta;\n'
1515 +        txt += 'sum=0;\n'
1516 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1517 +        txt += '    if [ -e $file ]; then\n'
1518 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1519 +        txt += '        sum=`expr $sum + $tt`\n'
1520 +        txt += '    else\n'
1521 +        txt += '        echo "WARNING: output file $file not found!"\n'
1522 +        txt += '    fi\n'
1523 +        txt += 'done\n'
1524 +        txt += 'echo "Total Output dimension: $sum";\n'
1525 +        txt += 'limit='+str(limit)+';\n'
1526 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1527 +        txt += 'if [ $limit -lt $sum ]; then\n'
1528 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1529 +        txt += '    echo "         checking the output file sizes..."\n'
1530 +        """
1531 +        txt += '    dim=0;\n'
1532 +        txt += '    exclude=0;\n'
1533 +        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1534 +        txt += '        sumTemp=0;\n'
1535 +        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1536 +        txt += '            if [ $file != $file2 ]; then\n'
1537 +        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1538 +        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1539 +        txt += '            fi\n'
1540 +        txt += '        done\n'
1541 +        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1542 +        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1543 +        txt += '                dim=$sumTemp;\n'
1544 +        txt += '                exclude=$file;\n'
1545 +        txt += '            fi\n'
1546 +        txt += '        fi\n'
1547 +        txt += '    done\n'
1548 +        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1549 +        """
1550 +        txt += '    tot=0;\n'
1551 +        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1552 +        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1553 +        txt += '        tot=`expr $tot + $tt`;\n'
1554 +        txt += '        if [ $limit -lt $tot ]; then\n'
1555 +        txt += '            tot=`expr $tot - $tt`;\n'
1556 +        txt += '            fileLast=$file;\n'
1557 +        txt += '            break;\n'
1558 +        txt += '        fi\n'
1559 +        txt += '    done\n'
1560 +        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1561 +        txt += '    flag=0;\n'    
1562 +        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1563 +        txt += '        if [ $fileLast = $filess ]; then\n'
1564 +        txt += '            flag=1;\n'
1565 +        txt += '        fi\n'
1566 +        txt += '        if [ $flag -eq 1 ]; then\n'
1567 +        txt += '            rm -f $filess;\n'
1568 +        txt += '        fi\n'
1569 +        txt += '    done\n'
1570 +        txt += '    ls -agGhrt;\n'
1571 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1572 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1573 +        txt += '    exit_status=70000;\n'
1574 +        txt += 'else'
1575 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1576 +        txt += 'fi\n'
1577 +        txt += 'echo "*****************************************"\n'
1578 +        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1579 +        txt += 'echo "*****************************************"\n'
1580 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines