ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.116.2.1 by fanzago, Tue Oct 9 10:46:56 2007 UTC vs.
Revision 1.205 by slacapra, Thu Jun 5 16:34:04 2008 UTC

# Line 5 | Line 5 | from crab_util import *
5   from BlackWhiteListParser import BlackWhiteListParser
6   import common
7   import Scram
8 + from LFNBaseName import *
9  
10   import os, string, glob
11  
# Line 13 | Line 14 | class Cmssw(JobType):
14          JobType.__init__(self, 'CMSSW')
15          common.logger.debug(3,'CMSSW::__init__')
16  
17 +        self.argsList = []
18 +
19          self._params = {}
20          self.cfg_params = cfg_params
18
21          # init BlackWhiteListParser
22          self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
23  
24 <        try:
23 <            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 <        except KeyError:
25 <            self.MaxTarBallSize = 9.5
24 >        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
25  
26          # number of jobs requested to be created, limit obj splitting
27          self.ncjobs = ncjobs
28  
29          log = common.logger
30 <        
30 >
31          self.scram = Scram.Scram(cfg_params)
32          self.additional_inbox_files = []
33          self.scriptExe = ''
34          self.executable = ''
35          self.executable_arch = self.scram.getArch()
36          self.tgz_name = 'default.tgz'
38        self.additional_tgz_name = 'additional.tgz'
37          self.scriptName = 'CMSSW.sh'
38 <        self.pset = ''      #scrip use case Da  
39 <        self.datasetPath = '' #scrip use case Da
38 >        self.pset = ''
39 >        self.datasetPath = ''
40  
41          # set FJR file name
42          self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45 <        
46 <        #
47 <        # Try to block creation in case of arch/version mismatch
48 <        #
49 <
50 <        a = string.split(self.version, "_")
51 <
52 <        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
54 <            raise CrabException(msg)
57 <        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
45 >        version_array = self.version.split('_')
46 >        self.CMSSW_major = 0
47 >        self.CMSSW_minor = 0
48 >        self.CMSSW_patch = 0
49 >        try:
50 >            self.CMSSW_major = int(version_array[1])
51 >            self.CMSSW_minor = int(version_array[2])
52 >            self.CMSSW_patch = int(version_array[3])
53 >        except:
54 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
55              raise CrabException(msg)
60        
61        common.taskDB.setDict('codeVersion',self.version)
62        self.setParam_('application', self.version)
56  
57          ### collect Data cards
58  
59 <        ## get DBS mode
60 <        try:
68 <            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 <        except KeyError:
70 <            self.use_dbs_1 = 0
71 <            
72 <        try:
73 <            tmp =  cfg_params['CMSSW.datasetpath']
74 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75 <            if string.lower(tmp)=='none':
76 <                self.datasetPath = None
77 <                self.selectNoInput = 1
78 <            else:
79 <                self.datasetPath = tmp
80 <                self.selectNoInput = 0
81 <        except KeyError:
82 <            msg = "Error: datasetpath not defined "  
59 >        if not cfg_params.has_key('CMSSW.datasetpath'):
60 >            msg = "Error: datasetpath not defined "
61              raise CrabException(msg)
62 <
63 <        # ML monitoring
64 <        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
65 <        if not self.datasetPath:
66 <            self.setParam_('dataset', 'None')
89 <            self.setParam_('owner', 'None')
62 >        tmp =  cfg_params['CMSSW.datasetpath']
63 >        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
64 >        if string.lower(tmp)=='none':
65 >            self.datasetPath = None
66 >            self.selectNoInput = 1
67          else:
68 <            try:
69 <                datasetpath_split = self.datasetPath.split("/")
93 <                # standard style
94 <                self.setParam_('datasetFull', self.datasetPath)
95 <                if self.use_dbs_1 == 1 :
96 <                    self.setParam_('dataset', datasetpath_split[1])
97 <                    self.setParam_('owner', datasetpath_split[-1])
98 <                else:
99 <                    self.setParam_('dataset', datasetpath_split[1])
100 <                    self.setParam_('owner', datasetpath_split[2])
101 <            except:
102 <                self.setParam_('dataset', self.datasetPath)
103 <                self.setParam_('owner', self.datasetPath)
104 <                
105 <        self.setTaskid_()
106 <        self.setParam_('taskId', self.cfg_params['taskId'])
68 >            self.datasetPath = tmp
69 >            self.selectNoInput = 0
70  
71          self.dataTiers = []
72 <
72 >        self.debugWrap = ''
73 >        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
74 >        if self.debug_wrapper: self.debugWrap='--debug'
75          ## now the application
76 <        try:
77 <            self.executable = cfg_params['CMSSW.executable']
113 <            self.setParam_('exe', self.executable)
114 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
116 <            log.debug(3,msg)
117 <        except KeyError:
118 <            self.executable = 'cmsRun'
119 <            self.setParam_('exe', self.executable)
120 <            msg = "User executable not defined. Use cmsRun"
121 <            log.debug(3,msg)
122 <            pass
76 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
77 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
78  
79 <        try:
125 <            self.pset = cfg_params['CMSSW.pset']
126 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if self.pset.lower() != 'none' :
128 <                if (not os.path.exists(self.pset)):
129 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
130 <            else:
131 <                self.pset = None
132 <        except KeyError:
79 >        if not cfg_params.has_key('CMSSW.pset'):
80              raise CrabException("PSet file missing. Cannot run cmsRun ")
81 +        self.pset = cfg_params['CMSSW.pset']
82 +        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
83 +        if self.pset.lower() != 'none' :
84 +            if (not os.path.exists(self.pset)):
85 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
86 +        else:
87 +            self.pset = None
88  
89          # output files
90          ## stuff which must be returned always via sandbox
# Line 140 | Line 94 | class Cmssw(JobType):
94          self.output_file_sandbox.append(self.fjrFileName)
95  
96          # other output files to be returned via sandbox or copied to SE
97 <        try:
98 <            self.output_file = []
99 <            tmp = cfg_params['CMSSW.output_file']
100 <            if tmp != '':
101 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
102 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
103 <                for tmp in tmpOutFiles:
104 <                    tmp=string.strip(tmp)
151 <                    self.output_file.append(tmp)
152 <                    pass
153 <            else:
154 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
97 >        self.output_file = []
98 >        tmp = cfg_params.get('CMSSW.output_file',None)
99 >        if tmp :
100 >            tmpOutFiles = string.split(tmp,',')
101 >            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
102 >            for tmp in tmpOutFiles:
103 >                tmp=string.strip(tmp)
104 >                self.output_file.append(tmp)
105                  pass
106 <            pass
157 <        except KeyError:
106 >        else:
107              log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
108 <            pass
108 >        pass
109  
110          # script_exe file as additional file in inputSandbox
111 <        try:
112 <            self.scriptExe = cfg_params['USER.script_exe']
113 <            if self.scriptExe != '':
114 <               if not os.path.isfile(self.scriptExe):
115 <                  msg ="ERROR. file "+self.scriptExe+" not found"
116 <                  raise CrabException(msg)
168 <               self.additional_inbox_files.append(string.strip(self.scriptExe))
169 <        except KeyError:
170 <            self.scriptExe = ''
111 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
112 >        if self.scriptExe :
113 >            if not os.path.isfile(self.scriptExe):
114 >                msg ="ERROR. file "+self.scriptExe+" not found"
115 >                raise CrabException(msg)
116 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
117  
172        #CarlosDaniele
118          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
119 <           msg ="Error. script_exe  not defined"
120 <           raise CrabException(msg)
119 >            msg ="Error. script_exe  not defined"
120 >            raise CrabException(msg)
121 >
122 >        # use parent files...
123 >        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
124  
125          ## additional input files
126 <        try:
126 >        if cfg_params.has_key('USER.additional_input_files'):
127              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
128              for tmp in tmpAddFiles:
129                  tmp = string.strip(tmp)
# Line 192 | Line 140 | class Cmssw(JobType):
140                      if not os.path.exists(file):
141                          raise CrabException("Additional input file not found: "+file)
142                      pass
195                    # fname = string.split(file, '/')[-1]
196                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197                    # shutil.copyfile(file, storedFile)
143                      self.additional_inbox_files.append(string.strip(file))
144                  pass
145              pass
146              common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
147 <        except KeyError:
203 <            pass
204 <
205 <        # files per job
206 <        try:
207 <            if (cfg_params['CMSSW.files_per_jobs']):
208 <                raise CrabException("files_per_jobs no longer supported.  Quitting.")
209 <        except KeyError:
210 <            pass
147 >        pass
148  
149          ## Events per job
150 <        try:
150 >        if cfg_params.has_key('CMSSW.events_per_job'):
151              self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
152              self.selectEventsPerJob = 1
153 <        except KeyError:
153 >        else:
154              self.eventsPerJob = -1
155              self.selectEventsPerJob = 0
156 <    
156 >
157          ## number of jobs
158 <        try:
158 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
159              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
160              self.selectNumberOfJobs = 1
161 <        except KeyError:
161 >        else:
162              self.theNumberOfJobs = 0
163              self.selectNumberOfJobs = 0
164  
165 <        try:
165 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
166              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
167              self.selectTotalNumberEvents = 1
168 <        except KeyError:
168 >            if self.selectNumberOfJobs  == 1:
169 >                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
170 >                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
171 >                    raise CrabException(msg)
172 >        else:
173              self.total_number_of_events = 0
174              self.selectTotalNumberEvents = 0
175  
176 <        if self.pset != None: #CarlosDaniele
176 >        if self.pset != None:
177               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
178                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
179                   raise CrabException(msg)
# Line 241 | Line 182 | class Cmssw(JobType):
182                   msg = 'Must specify  number_of_jobs.'
183                   raise CrabException(msg)
184  
185 <        ## source seed for pythia
186 <        try:
187 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
188 <        except KeyError:
189 <            self.sourceSeed = None
190 <            common.logger.debug(5,"No seed given")
191 <
192 <        try:
193 <            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
194 <        except KeyError:
195 <            self.sourceSeedVtx = None
196 <            common.logger.debug(5,"No vertex seed given")
185 >        ## New method of dealing with seeds
186 >        self.incrementSeeds = []
187 >        self.preserveSeeds = []
188 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
189 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
190 >            for tmp in tmpList:
191 >                tmp.strip()
192 >                self.preserveSeeds.append(tmp)
193 >        if cfg_params.has_key('CMSSW.increment_seeds'):
194 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
195 >            for tmp in tmpList:
196 >                tmp.strip()
197 >                self.incrementSeeds.append(tmp)
198 >
199 >        ## Old method of dealing with seeds
200 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
201 >        ## remove
202 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
203 >        if self.sourceSeed:
204 >            print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
205 >            self.incrementSeeds.append('sourceSeed')
206 >            self.incrementSeeds.append('theSource')
207 >
208 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
209 >        if self.sourceSeedVtx:
210 >            print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
211 >            self.incrementSeeds.append('VtxSmeared')
212 >
213 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
214 >        if self.sourceSeedG4:
215 >            print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
216 >            self.incrementSeeds.append('g4SimHits')
217 >
218 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
219 >        if self.sourceSeedMix:
220 >            print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
221 >            self.incrementSeeds.append('mix')
222  
223 <        try:
258 <            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259 <        except KeyError:
260 <            self.sourceSeedG4 = None
261 <            common.logger.debug(5,"No g4 sim hits seed given")
262 <
263 <        try:
264 <            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265 <        except KeyError:
266 <            self.sourceSeedMix = None
267 <            common.logger.debug(5,"No mix seed given")
223 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
224  
269        try:
270            self.firstRun = int(cfg_params['CMSSW.first_run'])
271        except KeyError:
272            self.firstRun = None
273            common.logger.debug(5,"No first run given")
225          if self.pset != None: #CarlosDaniele
226 <            ver = string.split(self.version,"_")
276 <            if (int(ver[1])>=1 and int(ver[2])>=5):
277 <                import PsetManipulator150 as pp
278 <            else:
279 <                import PsetManipulator as pp
226 >            import PsetManipulator as pp
227              PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
228  
229 +        # Copy/return
230 +
231 +        self.copy_data = int(cfg_params.get('USER.copy_data',0))
232 +        self.return_data = int(cfg_params.get('USER.return_data',0))
233 +
234          #DBSDLS-start
235 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
235 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
236          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
237          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
238          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 289 | Line 241 | class Cmssw(JobType):
241          blockSites = {}
242          if self.datasetPath:
243              blockSites = self.DataDiscoveryAndLocation(cfg_params)
244 <        #DBSDLS-end          
244 >        #DBSDLS-end
245 >
246  
294        self.tgzNameWithPath = self.getTarBall(self.executable)
295    
247          ## Select Splitting
248 <        if self.selectNoInput:
249 <            if self.pset == None: #CarlosDaniele
248 >        if self.selectNoInput:
249 >            if self.pset == None:
250                  self.jobSplittingForScript()
251              else:
252                  self.jobSplittingNoInput()
# Line 303 | Line 254 | class Cmssw(JobType):
254              self.jobSplittingByBlocks(blockSites)
255  
256          # modify Pset
257 <        if self.pset != None: #CarlosDaniele
257 >        if self.pset != None:
258              try:
259 <                if (self.datasetPath): # standard job
260 <                    # allow to processa a fraction of events in a file
261 <                    PsetEdit.inputModule("INPUT")
262 <                    PsetEdit.maxEvent("INPUTMAXEVENTS")
312 <                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
313 <                else:  # pythia like job
314 <                    PsetEdit.maxEvent(self.eventsPerJob)
315 <                    if (self.firstRun):
316 <                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
317 <                    if (self.sourceSeed) :
318 <                        PsetEdit.pythiaSeed("INPUT")
319 <                        if (self.sourceSeedVtx) :
320 <                            PsetEdit.vtxSeed("INPUTVTX")
321 <                        if (self.sourceSeedG4) :
322 <                            PsetEdit.g4Seed("INPUTG4")
323 <                        if (self.sourceSeedMix) :
324 <                            PsetEdit.mixSeed("INPUTMIX")
325 <                # add FrameworkJobReport to parameter-set
326 <                PsetEdit.addCrabFJR(self.fjrFileName)
259 >                # Add FrameworkJobReport to parameter-set, set max events.
260 >                # Reset later for data jobs by writeCFG which does all modifications
261 >                PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
262 >                PsetEdit.maxEvent(self.eventsPerJob)
263                  PsetEdit.psetWriter(self.configFilename())
264              except:
265 <                msg='Error while manipuliating ParameterSet: exiting...'
265 >                msg='Error while manipulating ParameterSet: exiting...'
266                  raise CrabException(msg)
267 +        self.tgzNameWithPath = self.getTarBall(self.executable)
268  
269      def DataDiscoveryAndLocation(self, cfg_params):
270  
271          import DataDiscovery
335        import DataDiscovery_DBS2
272          import DataLocation
273          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
274  
# Line 341 | Line 277 | class Cmssw(JobType):
277          ## Contact the DBS
278          common.logger.message("Contacting Data Discovery Services ...")
279          try:
280 <
345 <            if self.use_dbs_1 == 1 :
346 <                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347 <            else :
348 <                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
280 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
281              self.pubdata.fetchDBSInfo()
282  
283          except DataDiscovery.NotExistingDatasetError, ex :
# Line 357 | Line 289 | class Cmssw(JobType):
289          except DataDiscovery.DataDiscoveryError, ex:
290              msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
291              raise CrabException(msg)
360        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362            raise CrabException(msg)
363        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365            raise CrabException(msg)
366        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
368            raise CrabException(msg)
292  
293          self.filesbyblock=self.pubdata.getFiles()
294          self.eventsbyblock=self.pubdata.getEventsPerBlock()
295          self.eventsbyfile=self.pubdata.getEventsPerFile()
296 +        self.parentFiles=self.pubdata.getParent()
297  
298          ## get max number of events
299 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
299 >        self.maxEvents=self.pubdata.getMaxEvents()
300  
301          ## Contact the DLS and build a list of sites hosting the fileblocks
302          try:
# Line 381 | Line 305 | class Cmssw(JobType):
305          except DataLocation.DataLocationError , ex:
306              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
307              raise CrabException(msg)
308 <        
308 >
309  
310          sites = dataloc.getSites()
311          allSites = []
# Line 395 | Line 319 | class Cmssw(JobType):
319          common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
320  
321          return sites
322 <    
322 >
323      def jobSplittingByBlocks(self, blockSites):
324          """
325          Perform job splitting. Jobs run over an integer number of files
# Line 445 | Line 369 | class Cmssw(JobType):
369              totalNumberOfJobs = 999999999
370          else :
371              totalNumberOfJobs = self.ncjobs
448            
372  
373          blocks = blockSites.keys()
374          blockCount = 0
# Line 465 | Line 388 | class Cmssw(JobType):
388              blockCount += 1
389              if block not in jobsOfBlock.keys() :
390                  jobsOfBlock[block] = []
391 <            
391 >
392              if self.eventsbyblock.has_key(block) :
393                  numEventsInBlock = self.eventsbyblock[block]
394                  common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
395 <            
395 >
396                  files = self.filesbyblock[block]
397                  numFilesInBlock = len(files)
398                  if (numFilesInBlock <= 0):
# Line 477 | Line 400 | class Cmssw(JobType):
400                  fileCount = 0
401  
402                  # ---- New block => New job ---- #
403 <                parString = "\\{"
403 >                parString = ""
404                  # counter for number of events in files currently worked on
405                  filesEventCount = 0
406                  # flag if next while loop should touch new file
407                  newFile = 1
408                  # job event counter
409                  jobSkipEventCount = 0
410 <            
410 >
411                  # ---- Iterate over the files in the block until we've met the requested ---- #
412                  # ---- total # of events or we've gone over all the files in this block  ---- #
413 +                pString=''
414                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
415                      file = files[fileCount]
416 +                    if self.useParent:
417 +                        parent = self.parentFiles[file]
418 +                        for f in parent :
419 +                            pString += '\\\"' + f + '\\\"\,'
420 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
421 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
422                      if newFile :
423                          try:
424                              numEventsInFile = self.eventsbyfile[file]
# Line 500 | Line 430 | class Cmssw(JobType):
430                              newFile = 0
431                          except KeyError:
432                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503                        
433  
434 +                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
435                      # if less events in file remain than eventsPerJobRequested
436 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
436 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
437                          # if last file in block
438                          if ( fileCount == numFilesInBlock-1 ) :
439                              # end job using last file, use remaining events in block
440                              # close job and touch new file
441                              fullString = parString[:-2]
442 <                            fullString += '\\}'
443 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
442 >                            if self.useParent:
443 >                                fullParentString = pString[:-2]
444 >                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
445 >                            else:
446 >                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
447                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
448                              self.jobDestination.append(blockSites[block])
449                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 522 | Line 455 | class Cmssw(JobType):
455                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
456                              jobSkipEventCount = 0
457                              # reset file
458 <                            parString = "\\{"
458 >                            pString = ""
459 >                            parString = ""
460                              filesEventCount = 0
461                              newFile = 1
462                              fileCount += 1
# Line 534 | Line 468 | class Cmssw(JobType):
468                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
469                          # close job and touch new file
470                          fullString = parString[:-2]
471 <                        fullString += '\\}'
472 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
471 >                        if self.useParent:
472 >                            fullParentString = pString[:-2]
473 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
474 >                        else:
475 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
476                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
477                          self.jobDestination.append(blockSites[block])
478                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 546 | Line 483 | class Cmssw(JobType):
483                          eventsRemaining = eventsRemaining - eventsPerJobRequested
484                          jobSkipEventCount = 0
485                          # reset file
486 <                        parString = "\\{"
486 >                        pString = ""
487 >                        parString = ""
488                          filesEventCount = 0
489                          newFile = 1
490                          fileCount += 1
491 <                        
491 >
492                      # if more events in file remain than eventsPerJobRequested
493                      else :
494                          # close job but don't touch new file
495                          fullString = parString[:-2]
496 <                        fullString += '\\}'
497 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
496 >                        if self.useParent:
497 >                            fullParentString = pString[:-2]
498 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
499 >                        else:
500 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
501                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
502                          self.jobDestination.append(blockSites[block])
503                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 570 | Line 511 | class Cmssw(JobType):
511                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
512                          # remove all but the last file
513                          filesEventCount = self.eventsbyfile[file]
514 <                        parString = "\\{"
515 <                        parString += '\\\"' + file + '\\\"\,'
514 >                        if self.useParent:
515 >                            for f in parent : pString += '\\\"' + f + '\\\"\,'
516 >                        parString = '\\\"' + file + '\\\"\,'
517                      pass # END if
518                  pass # END while (iterate over files in the block)
519          pass # END while (iterate over blocks in the dataset)
# Line 579 | Line 521 | class Cmssw(JobType):
521          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
522              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
523          common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
524 <        
524 >
525          # screen output
526          screenOutput = "List of jobs and available destination sites:\n\n"
527  
528 +        # keep trace of block with no sites to print a warning at the end
529 +        noSiteBlock = []
530 +        bloskNoSite = []
531 +
532          blockCounter = 0
533          for block in blocks:
534              if block in jobsOfBlock.keys() :
535                  blockCounter += 1
536 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
536 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
537 >                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
538 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
539 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
540 >                    bloskNoSite.append( blockCounter )
541  
542          common.logger.message(screenOutput)
543 +        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
544 +            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
545 +            virgola = ""
546 +            if len(bloskNoSite) > 1:
547 +                virgola = ","
548 +            for block in bloskNoSite:
549 +                msg += ' ' + str(block) + virgola
550 +            msg += '\n               Related jobs:\n                 '
551 +            virgola = ""
552 +            if len(noSiteBlock) > 1:
553 +                virgola = ","
554 +            for range_jobs in noSiteBlock:
555 +                msg += str(range_jobs) + virgola
556 +            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
557 +            if self.cfg_params.has_key('EDG.se_white_list'):
558 +                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
559 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
560 +                msg += 'Please check if the dataset is available at this site!)\n'
561 +            if self.cfg_params.has_key('EDG.ce_white_list'):
562 +                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
563 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
564 +                msg += 'Please check if the dataset is available at this site!)\n'
565 +
566 +            common.logger.message(msg)
567  
568          self.list_of_args = list_of_lists
569          return
# Line 599 | Line 573 | class Cmssw(JobType):
573          Perform job splitting based on number of event per job
574          """
575          common.logger.debug(5,'Splitting per events')
576 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
577 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
578 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
576 >
577 >        if (self.selectEventsPerJob):
578 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
579 >        if (self.selectNumberOfJobs):
580 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
581 >        if (self.selectTotalNumberEvents):
582 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
583  
584          if (self.total_number_of_events < 0):
585              msg='Cannot split jobs per Events with "-1" as total number of events'
# Line 610 | Line 588 | class Cmssw(JobType):
588          if (self.selectEventsPerJob):
589              if (self.selectTotalNumberEvents):
590                  self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
591 <            elif(self.selectNumberOfJobs) :  
591 >            elif(self.selectNumberOfJobs) :
592                  self.total_number_of_jobs =self.theNumberOfJobs
593 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
593 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
594  
595          elif (self.selectNumberOfJobs) :
596              self.total_number_of_jobs = self.theNumberOfJobs
597              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
598 <
598 >
599          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
600  
601          # is there any remainder?
# Line 633 | Line 611 | class Cmssw(JobType):
611          self.list_of_args = []
612          for i in range(self.total_number_of_jobs):
613              ## Since there is no input, any site is good
614 <           # self.jobDestination.append(["Any"])
637 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
614 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
615              args=[]
616              if (self.firstRun):
617 <                    ## pythia first run
641 <                #self.list_of_args.append([(str(self.firstRun)+str(i))])
617 >                ## pythia first run
618                  args.append(str(self.firstRun)+str(i))
643            else:
644                ## no first run
645                #self.list_of_args.append([str(i)])
646                args.append(str(i))
647            if (self.sourceSeed):
648                args.append(str(self.sourceSeed)+str(i))
649                if (self.sourceSeedVtx):
650                    ## + vtx random seed
651                    args.append(str(self.sourceSeedVtx)+str(i))
652                if (self.sourceSeedG4):
653                    ## + G4 random seed
654                    args.append(str(self.sourceSeedG4)+str(i))
655                if (self.sourceSeedMix):    
656                    ## + Mix random seed
657                    args.append(str(self.sourceSeedMix)+str(i))
658                pass
659            pass
619              self.list_of_args.append(args)
661        pass
662            
663        # print self.list_of_args
620  
621          return
622  
623  
624 <    def jobSplittingForScript(self):#CarlosDaniele
624 >    def jobSplittingForScript(self):
625          """
626          Perform job splitting based on number of job
627          """
# Line 681 | Line 637 | class Cmssw(JobType):
637          # argument is seed number.$i
638          self.list_of_args = []
639          for i in range(self.total_number_of_jobs):
684            ## Since there is no input, any site is good
685           # self.jobDestination.append(["Any"])
640              self.jobDestination.append([""])
687            ## no random seed
641              self.list_of_args.append([str(i)])
642          return
643  
644      def split(self, jobParams):
645 <
693 <        common.jobDB.load()
694 <        #### Fabio
645 >
646          njobs = self.total_number_of_jobs
647          arglist = self.list_of_args
648          # create the empty structure
649          for i in range(njobs):
650              jobParams.append("")
651 <        
651 >
652 >        listID=[]
653 >        listField=[]
654          for job in range(njobs):
655              jobParams[job] = arglist[job]
656 <            # print str(arglist[job])
657 <            # print jobParams[job]
658 <            common.jobDB.setArguments(job, jobParams[job])
659 <            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
660 <            common.jobDB.setDestination(job, self.jobDestination[job])
656 >            listID.append(job+1)
657 >            job_ToSave ={}
658 >            concString = ' '
659 >            argu=''
660 >            if len(jobParams[job]):
661 >                argu +=   concString.join(jobParams[job] )
662 >            job_ToSave['arguments']= str(job+1)+' '+argu
663 >            job_ToSave['dlsDestination']= self.jobDestination[job]
664 >            listField.append(job_ToSave)
665 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
666 >            +"                     Destination: "+str(self.jobDestination[job])
667 >            common.logger.debug(5,msg)
668 >        common._db.updateJob_(listID,listField)
669 >        self.argsList = (len(jobParams[0])+1)
670  
709        common.jobDB.save()
671          return
672 <    
712 <    def getJobTypeArguments(self, nj, sched):
713 <        result = ''
714 <        for i in common.jobDB.arguments(nj):
715 <            result=result+str(i)+" "
716 <        return result
717 <  
672 >
673      def numberOfJobs(self):
719        # Fabio
674          return self.total_number_of_jobs
675  
676      def getTarBall(self, exe):
677          """
678          Return the TarBall with lib and exe
679          """
726        
727        # if it exist, just return it
728        #
729        # Marco. Let's start to use relative path for Boss XML files
730        #
680          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
681          if os.path.exists(self.tgzNameWithPath):
682              return self.tgzNameWithPath
# Line 741 | Line 690 | class Cmssw(JobType):
690  
691          # First of all declare the user Scram area
692          swArea = self.scram.getSWArea_()
744        #print "swArea = ", swArea
745        # swVersion = self.scram.getSWVersion()
746        # print "swVersion = ", swVersion
693          swReleaseTop = self.scram.getReleaseTop_()
694 <        #print "swReleaseTop = ", swReleaseTop
749 <        
694 >
695          ## check if working area is release top
696          if swReleaseTop == '' or swArea == swReleaseTop:
697 +            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
698              return
699  
700          import tarfile
# Line 759 | Line 705 | class Cmssw(JobType):
705                  exeWithPath = self.scram.findFile_(executable)
706                  if ( not exeWithPath ):
707                      raise CrabException('User executable '+executable+' not found')
708 <    
708 >
709                  ## then check if it's private or not
710                  if exeWithPath.find(swReleaseTop) == -1:
711                      # the exe is private, so we must ship
# Line 768 | Line 714 | class Cmssw(JobType):
714                      # distinguish case when script is in user project area or given by full path somewhere else
715                      if exeWithPath.find(path) >= 0 :
716                          exe = string.replace(exeWithPath, path,'')
717 <                        tar.add(path+exe,os.path.basename(executable))
717 >                        tar.add(path+exe,exe)
718                      else :
719                          tar.add(exeWithPath,os.path.basename(executable))
720                      pass
721                  else:
722                      # the exe is from release, we'll find it on WN
723                      pass
724 <    
724 >
725              ## Now get the libraries: only those in local working area
726              libDir = 'lib'
727              lib = swArea+'/' +libDir
728              common.logger.debug(5,"lib "+lib+" to be tarred")
729              if os.path.exists(lib):
730                  tar.add(lib,libDir)
731 <    
731 >
732              ## Now check if module dir is present
733              moduleDir = 'module'
734              module = swArea + '/' + moduleDir
# Line 791 | Line 737 | class Cmssw(JobType):
737  
738              ## Now check if any data dir(s) is present
739              swAreaLen=len(swArea)
740 +            self.dataExist = False
741              for root, dirs, files in os.walk(swArea):
742                  if "data" in dirs:
743 +                    self.dataExist=True
744                      common.logger.debug(5,"data "+root+"/data"+" to be tarred")
745                      tar.add(root+"/data",root[swAreaLen:]+"/data")
746  
747 <            ## Add ProdAgent dir to tar
748 <            paDir = 'ProdAgentApi'
749 <            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
750 <            if os.path.isdir(pa):
751 <                tar.add(pa,paDir)
747 >            ### CMSSW ParameterSet
748 >            if not self.pset is None:
749 >                cfg_file = common.work_space.jobDir()+self.configFilename()
750 >                tar.add(cfg_file,self.configFilename())
751 >                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
752 >
753  
754 <            ### FEDE FOR DBS PUBLICATION
806 <            ## Add PRODCOMMON dir to tar
754 >            ## Add ProdCommon dir to tar
755              prodcommonDir = 'ProdCommon'
756 <            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
756 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
757              if os.path.isdir(prodcommonPath):
758                  tar.add(prodcommonPath,prodcommonDir)
811            #############################    
812        
759              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
760 +
761 +            ##### ML stuff
762 +            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
763 +            path=os.environ['CRABDIR'] + '/python/'
764 +            for file in ML_file_list:
765 +                tar.add(path+file,file)
766 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
767 +
768 +            ##### Utils
769 +            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
770 +            for file in Utils_file_list:
771 +                tar.add(path+file,file)
772 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
773 +
774 +            ##### AdditionalFiles
775 +            for file in self.additional_inbox_files:
776 +                tar.add(file,string.split(file,'/')[-1])
777 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
778 +
779              tar.close()
780          except :
781              raise CrabException('Could not create tar-ball')
# Line 821 | Line 786 | class Cmssw(JobType):
786              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
787  
788          ## create tar-ball with ML stuff
824        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
825        try:
826            tar = tarfile.open(self.MLtgzfile, "w:gz")
827            path=os.environ['CRABDIR'] + '/python/'
828            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
829                tar.add(path+file,file)
830            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
831            tar.close()
832        except :
833            raise CrabException('Could not create ML files tar-ball')
834        
835        return
836        
837    def additionalInputFileTgz(self):
838        """
839        Put all additional files into a tar ball and return its name
840        """
841        import tarfile
842        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
843        tar = tarfile.open(tarName, "w:gz")
844        for file in self.additional_inbox_files:
845            tar.add(file,string.split(file,'/')[-1])
846        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
847        tar.close()
848        return tarName
789  
790 <    def wsSetupEnvironment(self, nj):
790 >    def wsSetupEnvironment(self, nj=0):
791          """
792          Returns part of a job script which prepares
793          the execution environment for the job 'nj'.
794          """
795 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
796 +            psetName = 'pset.py'
797 +        else:
798 +            psetName = 'pset.cfg'
799          # Prepare JobType-independent part
800 <        txt = ''
801 <  
802 <        ## OLI_Daniele at this level  middleware already known
859 <
860 <        txt += 'if [ $middleware == LCG ]; then \n'
861 <        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
862 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
863 <        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
800 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
801 >        txt += 'echo ">>> setup environment"\n'
802 >        txt += 'if [ $middleware == LCG ]; then \n'
803          txt += self.wsSetupCMSLCGEnvironment_()
804          txt += 'elif [ $middleware == OSG ]; then\n'
805          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
806 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
807 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
808 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
809 <        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
871 <        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
872 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
873 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
874 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
875 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
876 <        txt += '        exit 1\n'
806 >        txt += '    if [ ! $? == 0 ] ;then\n'
807 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
808 >        txt += '        job_exit_code=10016\n'
809 >        txt += '        func_exit\n'
810          txt += '    fi\n'
811 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
812          txt += '\n'
813          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
814          txt += '    cd $WORKING_DIR\n'
815 <        txt += self.wsSetupCMSOSGEnvironment_()
816 <        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
883 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
815 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
816 >        txt += self.wsSetupCMSOSGEnvironment_()
817          txt += 'fi\n'
818  
819          # Prepare JobType-specific part
820          scram = self.scram.commandName()
821          txt += '\n\n'
822 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
822 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
823 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
824          txt += scram+' project CMSSW '+self.version+'\n'
825          txt += 'status=$?\n'
826          txt += 'if [ $status != 0 ] ; then\n'
827 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
828 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
829 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
896 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
897 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
898 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
899 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
900 <        ## OLI_Daniele
901 <        txt += '    if [ $middleware == OSG ]; then \n'
902 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
903 <        txt += '        cd $RUNTIME_AREA\n'
904 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
905 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
906 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
907 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
908 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
909 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
910 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
911 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
912 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
913 <        txt += '        fi\n'
914 <        txt += '    fi \n'
915 <        txt += '   exit 1 \n'
827 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
828 >        txt += '    job_exit_code=10034\n'
829 >        txt += '    func_exit\n'
830          txt += 'fi \n'
917        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
831          txt += 'cd '+self.version+'\n'
919        ########## FEDE FOR DBS2 ######################
832          txt += 'SOFTWARE_DIR=`pwd`\n'
833 <        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
922 <        ###############################################
923 <        ### needed grep for bug in scramv1 ###
924 <        txt += scram+' runtime -sh\n'
833 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
834          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
835 <        txt += 'echo $PATH\n'
836 <
835 >        txt += 'if [ $? != 0 ] ; then\n'
836 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
837 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
838 >        txt += '    job_exit_code=10034\n'
839 >        txt += '    func_exit\n'
840 >        txt += 'fi \n'
841          # Handle the arguments:
842          txt += "\n"
843          txt += "## number of arguments (first argument always jobnumber)\n"
844          txt += "\n"
845 < #        txt += "narg=$#\n"
933 <        txt += "if [ $nargs -lt 2 ]\n"
845 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
846          txt += "then\n"
847 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
848 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
849 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
938 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
939 <        txt += '    rm -f $RUNTIME_AREA/$repo \n'
940 <        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
941 <        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
942 <        ## OLI_Daniele
943 <        txt += '    if [ $middleware == OSG ]; then \n'
944 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
945 <        txt += '        cd $RUNTIME_AREA\n'
946 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
947 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
948 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
949 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
950 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
951 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
952 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
953 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
954 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
955 <        txt += '        fi\n'
956 <        txt += '    fi \n'
957 <        txt += "    exit 1\n"
847 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
848 >        txt += '    job_exit_code=50113\n'
849 >        txt += "    func_exit\n"
850          txt += "fi\n"
851          txt += "\n"
852  
853          # Prepare job-specific part
854          job = common.job_list[nj]
855 <        ### FEDE FOR DBS OUTPUT PUBLICATION
964 <        if (self.datasetPath):
855 >        if (self.datasetPath):
856              txt += '\n'
857              txt += 'DatasetPath='+self.datasetPath+'\n'
858  
859              datasetpath_split = self.datasetPath.split("/")
860 <            
860 >
861              txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
862              txt += 'DataTier='+datasetpath_split[2]+'\n'
972            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
863              txt += 'ApplicationFamily=cmsRun\n'
864  
865          else:
866              txt += 'DatasetPath=MCDataTier\n'
867              txt += 'PrimaryDataset=null\n'
868              txt += 'DataTier=null\n'
979            #txt += 'ProcessedDataset=null\n'
869              txt += 'ApplicationFamily=MCDataTier\n'
870 <        if self.pset != None: #CarlosDaniele
870 >        if self.pset != None:
871              pset = os.path.basename(job.configFilename())
872              txt += '\n'
873              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
874              if (self.datasetPath): # standard job
875 <                #txt += 'InputFiles=$2\n'
876 <                txt += 'InputFiles=${args[1]}\n'
877 <                txt += 'MaxEvents=${args[2]}\n'
878 <                txt += 'SkipEvents=${args[3]}\n'
875 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
876 >                if (self.useParent):  
877 >                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
878 >                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
879 >                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
880 >                else:
881 >                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
882 >                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
883                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
884 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
884 >                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
885                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
993                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
886                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
995                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
887              else:  # pythia like job
888 <                seedIndex=1
888 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
889 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
890 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
891 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
892                  if (self.firstRun):
893 <                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
893 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
894                      txt += 'echo "FirstRun: <$FirstRun>"\n'
1001                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1002                    seedIndex=seedIndex+1
895  
896 <                if (self.sourceSeed):
1005 <                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1006 <                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1007 <                    seedIndex=seedIndex+1
1008 <                    ## the following seeds are not always present
1009 <                    if (self.sourceSeedVtx):
1010 <                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1011 <                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1012 <                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1013 <                        seedIndex += 1
1014 <                    if (self.sourceSeedG4):
1015 <                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1016 <                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1017 <                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018 <                        seedIndex += 1
1019 <                    if (self.sourceSeedMix):
1020 <                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1021 <                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1022 <                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1023 <                        seedIndex += 1
1024 <                    pass
1025 <                pass
1026 <            txt += 'mv -f '+pset+' pset.cfg\n'
896 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
897  
1028        if len(self.additional_inbox_files) > 0:
1029            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1030            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1031            txt += 'fi\n'
1032            pass
898  
899 <        if self.pset != None: #CarlosDaniele
900 <            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1036 <        
1037 <            txt += '\n'
1038 <            txt += 'echo "***** cat pset.cfg *********"\n'
1039 <            txt += 'cat pset.cfg\n'
1040 <            txt += 'echo "****** end pset.cfg ********"\n'
899 >        if self.pset != None:
900 >            # FUTURE: Can simply for 2_1_x and higher
901              txt += '\n'
902 <            ### FEDE FOR DBS OUTPUT PUBLICATION
903 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
902 >            if self.debug_wrapper==True:
903 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
904 >                txt += 'cat ' + psetName + '\n'
905 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
906 >                txt += '\n'
907 >            txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
908              txt += 'echo "PSETHASH = $PSETHASH" \n'
1045            ##############
909              txt += '\n'
1047            # txt += 'echo "***** cat pset1.cfg *********"\n'
1048            # txt += 'cat pset1.cfg\n'
1049            # txt += 'echo "****** end pset1.cfg ********"\n'
910          return txt
911  
912 <    def wsBuildExe(self, nj=0):
912 >    def wsUntarSoftware(self, nj=0):
913          """
914          Put in the script the commands to build an executable
915          or a library.
916          """
917  
918 <        txt = ""
918 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
919  
920          if os.path.isfile(self.tgzNameWithPath):
921 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
921 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
922              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
923 +            if  self.debug_wrapper:
924 +                txt += 'ls -Al \n'
925              txt += 'untar_status=$? \n'
926              txt += 'if [ $untar_status -ne 0 ]; then \n'
927 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
928 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
929 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1068 <            txt += '   if [ $middleware == OSG ]; then \n'
1069 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1070 <            txt += '       cd $RUNTIME_AREA\n'
1071 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
1072 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
1073 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1074 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1075 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1076 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1077 <            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1078 <            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1079 <            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1080 <            txt += '       fi\n'
1081 <            txt += '   fi \n'
1082 <            txt += '   \n'
1083 <            txt += '   exit 1 \n'
927 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
928 >            txt += '   job_exit_code=$untar_status\n'
929 >            txt += '   func_exit\n'
930              txt += 'else \n'
931              txt += '   echo "Successful untar" \n'
932              txt += 'fi \n'
933              txt += '\n'
934 <            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
934 >            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
935              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
936 <            #### FEDE FOR DBS OUTPUT PUBLICATION
1091 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1092 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1093 <            #txt += '   export PYTHONPATH=ProdAgentApi\n'
936 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
937              txt += 'else\n'
938 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1096 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1097 <            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
938 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
939              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1099            ###################  
940              txt += 'fi\n'
941              txt += '\n'
942  
943              pass
944 <        
944 >
945 >        return txt
946 >
947 >    def wsBuildExe(self, nj=0):
948 >        """
949 >        Put in the script the commands to build an executable
950 >        or a library.
951 >        """
952 >
953 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
954 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
955 >
956 >        txt += 'rm -r lib/ module/ \n'
957 >        txt += 'mv $RUNTIME_AREA/lib/ . \n'
958 >        txt += 'mv $RUNTIME_AREA/module/ . \n'
959 >        if self.dataExist == True:
960 >            txt += 'rm -r src/ \n'
961 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
962 >        if len(self.additional_inbox_files)>0:
963 >            for file in self.additional_inbox_files:
964 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
965 >        txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
966 >
967 >        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
968 >        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
969 >        txt += 'else\n'
970 >        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
971 >        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
972 >        txt += 'fi\n'
973 >        txt += '\n'
974 >
975          return txt
976  
977      def modifySteeringCards(self, nj):
978          """
979 <        modify the card provided by the user,
979 >        modify the card provided by the user,
980          writing a new card into share dir
981          """
982 <        
982 >
983      def executableName(self):
984 <        if self.scriptExe: #CarlosDaniele
984 >        if self.scriptExe:
985              return "sh "
986          else:
987              return self.executable
988  
989      def executableArgs(self):
990 +        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
991          if self.scriptExe:#CarlosDaniele
992              return   self.scriptExe + " $NJob"
993          else:
994 <            # if >= CMSSW_1_5_X, add -e
995 <            version_array = self.scram.getSWVersion().split('_')
996 <            major = 0
997 <            minor = 0
998 <            try:
999 <                major = int(version_array[1])
1000 <                minor = int(version_array[2])
1001 <            except:
1131 <                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1132 <                raise CrabException(msg)
1133 <            if major >= 1 and minor >= 5 :
1134 <                return " -e -p pset.cfg"
994 >            ex_args = ""
995 >            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
996 >            # Framework job report
997 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
998 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
999 >            # Type of config file
1000 >            if self.CMSSW_major >= 2 :
1001 >                ex_args += " -p pset.py"
1002              else:
1003 <                return " -p pset.cfg"
1003 >                ex_args += " -p pset.cfg"
1004 >            return ex_args
1005  
1006      def inputSandbox(self, nj):
1007          """
1008          Returns a list of filenames to be put in JDL input sandbox.
1009          """
1010          inp_box = []
1143        # # dict added to delete duplicate from input sandbox file list
1144        # seen = {}
1145        ## code
1011          if os.path.isfile(self.tgzNameWithPath):
1012              inp_box.append(self.tgzNameWithPath)
1013 <        if os.path.isfile(self.MLtgzfile):
1014 <            inp_box.append(self.MLtgzfile)
1150 <        ## config
1151 <        if not self.pset is None:
1152 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1153 <        ## additional input files
1154 <        tgz = self.additionalInputFileTgz()
1155 <        inp_box.append(tgz)
1013 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1014 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1015          return inp_box
1016  
1017      def outputSandbox(self, nj):
# Line 1163 | Line 1022 | class Cmssw(JobType):
1022  
1023          ## User Declared output files
1024          for out in (self.output_file+self.output_file_sandbox):
1025 <            n_out = nj + 1
1025 >            n_out = nj + 1
1026              out_box.append(self.numberFile_(out,str(n_out)))
1027          return out_box
1028  
# Line 1178 | Line 1037 | class Cmssw(JobType):
1037          Returns part of a job script which renames the produced files.
1038          """
1039  
1040 <        txt = '\n'
1041 <        txt += '# directory content\n'
1042 <        txt += 'ls \n'
1040 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1041 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1042 >        txt += 'echo ">>> current directory content:"\n'
1043 >        if self.debug_wrapper:
1044 >            txt += 'ls -Al\n'
1045 >        txt += '\n'
1046  
1047 <        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1047 >        for fileWithSuffix in (self.output_file):
1048              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1049              txt += '\n'
1050              txt += '# check output file\n'
1189            # txt += 'ls '+fileWithSuffix+'\n'
1190            # txt += 'ls_result=$?\n'
1051              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1052 <            ###### FEDE FOR OUTPUT DATA PUBLICATION ########
1053 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1054 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1055 <            ################################################
1052 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1053 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1054 >                #txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1055 >            else:
1056 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1057 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1058              txt += 'else\n'
1059 <            txt += '    exit_status=60302\n'
1060 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1061 <            ############# FEDE ADDED CHECK FOR OUTPUT #############
1200 <            ## MATTY's FIX: the exit option was interrupting the execution
1201 <            if fileWithSuffix in self.output_file:
1202 <                txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1203 <                txt += '    # exit $exit_status\n'
1204 <            #######################################################    
1205 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1059 >            txt += '    job_exit_code=60302\n'
1060 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1061 >            if common.scheduler.name().upper() == 'CONDOR_G':
1062                  txt += '    if [ $middleware == OSG ]; then \n'
1063                  txt += '        echo "prepare dummy output file"\n'
1064                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
# Line 1211 | Line 1067 | class Cmssw(JobType):
1067          file_list = []
1068          for fileWithSuffix in (self.output_file):
1069               file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1070 <            
1070 >
1071          txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1072 +        txt += '\n'
1073 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1074 +        txt += 'echo ">>> current directory content:"\n'
1075 +        if self.debug_wrapper:
1076 +            txt += 'ls -Al\n'
1077 +        txt += '\n'
1078          txt += 'cd $RUNTIME_AREA\n'
1079 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1080          return txt
1081  
1082      def numberFile_(self, file, txt):
# Line 1231 | Line 1094 | class Cmssw(JobType):
1094              result = name + '_' + txt + "." + ext
1095          else:
1096              result = name + '_' + txt
1097 <        
1097 >
1098          return result
1099  
1100      def getRequirements(self, nj=[]):
1101          """
1102 <        return job requirements to add to jdl files
1102 >        return job requirements to add to jdl files
1103          """
1104          req = ''
1105          if self.version:
1106              req='Member("VO-cms-' + \
1107                   self.version + \
1108                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1109 <        ## SL add requirement for OS version only if SL4
1247 <        #reSL4 = re.compile( r'slc4' )
1248 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1109 >        if self.executable_arch:
1110              req+=' && Member("VO-cms-' + \
1111                   self.executable_arch + \
1112                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1113  
1114          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1115 +        if common.scheduler.name() == "glitecoll":
1116 +            req += ' && other.GlueCEStateStatus == "Production" '
1117  
1118          return req
1119  
1120      def configFilename(self):
1121          """ return the config filename """
1122 <        return self.name()+'.cfg'
1122 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1123 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1124 >          return self.name()+'.py'
1125 >        else:
1126 >          return self.name()+'.cfg'
1127  
1261    ### OLI_DANIELE
1128      def wsSetupCMSOSGEnvironment_(self):
1129          """
1130          Returns part of a job script which is prepares
1131          the execution environment and which is common for all CMS jobs.
1132          """
1133 <        txt = '\n'
1134 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1135 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1136 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1137 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1138 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1273 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1133 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1134 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1135 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1136 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1137 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1138 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1139          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1140 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1141 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1142 <        txt += '   else\n'
1143 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1144 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1145 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1281 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1282 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1283 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1284 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1285 <        txt += '       exit 1\n'
1286 <        txt += '\n'
1287 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1288 <        txt += '       cd $RUNTIME_AREA\n'
1289 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1290 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1291 <        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1292 <        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1293 <        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1294 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1295 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1296 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1297 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1298 <        txt += '       fi\n'
1299 <        txt += '\n'
1300 <        txt += '       exit 1\n'
1301 <        txt += '   fi\n'
1140 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1141 >        txt += '    else\n'
1142 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1143 >        txt += '        job_exit_code=10020\n'
1144 >        txt += '        func_exit\n'
1145 >        txt += '    fi\n'
1146          txt += '\n'
1147 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1148 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1147 >        txt += '    echo "==> setup cms environment ok"\n'
1148 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1149  
1150          return txt
1151 <
1308 <    ### OLI_DANIELE
1151 >
1152      def wsSetupCMSLCGEnvironment_(self):
1153          """
1154          Returns part of a job script which is prepares
1155          the execution environment and which is common for all CMS jobs.
1156          """
1157 <        txt  = '   \n'
1158 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1159 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1160 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1161 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1162 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1163 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1164 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1165 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1166 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1167 <        txt += '       exit 1\n'
1168 <        txt += '   else\n'
1169 <        txt += '       echo "Sourcing environment... "\n'
1170 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1171 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1172 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1173 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1174 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1175 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1176 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1177 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1178 <        txt += '           exit 1\n'
1179 <        txt += '       fi\n'
1180 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1181 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1182 <        txt += '       result=$?\n'
1183 <        txt += '       if [ $result -ne 0 ]; then\n'
1341 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1342 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1343 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1344 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1345 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1346 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1347 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1348 <        txt += '           exit 1\n'
1349 <        txt += '       fi\n'
1350 <        txt += '   fi\n'
1351 <        txt += '   \n'
1352 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1353 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1157 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1158 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1159 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1160 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1161 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1162 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1163 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1164 >        txt += '        job_exit_code=10031\n'
1165 >        txt += '        func_exit\n'
1166 >        txt += '    else\n'
1167 >        txt += '        echo "Sourcing environment... "\n'
1168 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1169 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1170 >        txt += '            job_exit_code=10020\n'
1171 >        txt += '            func_exit\n'
1172 >        txt += '        fi\n'
1173 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1174 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1175 >        txt += '        result=$?\n'
1176 >        txt += '        if [ $result -ne 0 ]; then\n'
1177 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1178 >        txt += '            job_exit_code=10032\n'
1179 >        txt += '            func_exit\n'
1180 >        txt += '        fi\n'
1181 >        txt += '    fi\n'
1182 >        txt += '    \n'
1183 >        txt += '    echo "==> setup cms environment ok"\n'
1184          return txt
1185  
1356    ### FEDE FOR DBS OUTPUT PUBLICATION
1186      def modifyReport(self, nj):
1187          """
1188 <        insert the part of the script that modifies the FrameworkJob Report
1188 >        insert the part of the script that modifies the FrameworkJob Report
1189          """
1190 +        txt = '\n#Written by cms_cmssw::modifyReport\n'
1191 +        publish_data = int(self.cfg_params.get('USER.publish_data',0))
1192 +        if (publish_data == 1):
1193 +            processedDataset = self.cfg_params['USER.publish_data_name']
1194 +            LFNBaseName = LFNBase(processedDataset)
1195  
1196 <        txt = ''
1197 <        try:
1198 <            publish_data = int(self.cfg_params['USER.publish_data'])          
1199 <        except KeyError:
1200 <            publish_data = 0
1201 <        if (publish_data == 1):  
1202 <            txt += 'echo "Modify Job Report" \n'
1369 <            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1370 <            ################ FEDE FOR DBS2 #############################################
1371 <            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1372 <            #############################################################################
1373 <            #try:
1374 <            #    publish_data = int(self.cfg_params['USER.publish_data'])          
1375 <            #except KeyError:
1376 <            #    publish_data = 0
1377 <
1378 <            txt += 'if [ -z "$SE" ]; then\n'
1379 <            txt += '    SE="" \n'
1380 <            txt += 'fi \n'
1381 <            txt += 'if [ -z "$SE_PATH" ]; then\n'
1382 <            txt += '    SE_PATH="" \n'
1383 <            txt += 'fi \n'
1384 <            txt += 'echo "SE = $SE"\n'
1385 <            txt += 'echo "SE_PATH = $SE_PATH"\n'
1196 >            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1197 >            txt += '    FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1198 >            txt += 'else\n'
1199 >            txt += '    FOR_LFN=/copy_problems/ \n'
1200 >            txt += '    SE=""\n'
1201 >            txt += '    SE_PATH=""\n'
1202 >            txt += 'fi\n'
1203  
1204 <        #if (publish_data == 1):  
1205 <            #processedDataset = self.cfg_params['USER.processed_datasetname']
1389 <            processedDataset = self.cfg_params['USER.publish_data_name']
1204 >            txt += 'echo ">>> Modify Job Report:" \n'
1205 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1206              txt += 'ProcessedDataset='+processedDataset+'\n'
1391            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1392            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1393            #### FEDE: added slash in LFN ##############
1394            txt += '    FOR_LFN=/copy_problems/ \n'
1395            txt += 'else \n'
1396            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1397            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1398            txt += '    FOR_LFN=/store$tmp \n'
1399            txt += 'fi \n'
1207              txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1208 +            txt += 'echo "SE = $SE"\n'
1209 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1210              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1211              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1212 <            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1213 <            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1405 <            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1406 <            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1407 <      
1212 >            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1213 >            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1214              txt += 'modifyReport_result=$?\n'
1409            txt += 'echo modifyReport_result = $modifyReport_result\n'
1215              txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1216 <            txt += '    exit_status=1\n'
1217 <            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1216 >            txt += '    modifyReport_result=70500\n'
1217 >            txt += '    job_exit_code=$modifyReport_result\n'
1218 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1219 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1220              txt += 'else\n'
1221 <            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1221 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1222              txt += 'fi\n'
1416        else:
1417            txt += 'echo "no data publication required"\n'
1418            #txt += 'ProcessedDataset=no_data_to_publish \n'
1419            #### FEDE: added slash in LFN ##############
1420            #txt += 'FOR_LFN=/local/ \n'
1421            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1422            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1223          return txt
1224  
1225 <    def cleanEnv(self):
1226 <        ### OLI_DANIELE
1227 <        txt = ''
1228 <        txt += 'if [ $middleware == OSG ]; then\n'  
1229 <        txt += '    cd $RUNTIME_AREA\n'
1230 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1231 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1232 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1233 <        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1234 <        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1235 <        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1236 <        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1237 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1238 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1239 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1225 >    def wsParseFJR(self):
1226 >        """
1227 >        Parse the FrameworkJobReport to obtain useful infos
1228 >        """
1229 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1230 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1231 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1232 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1233 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1234 >        if self.debug_wrapper :
1235 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1236 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1237 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1238 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1239 >        txt += '            cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1240 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1241 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1242 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1243 >        txt += '        else\n'
1244 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1245 >        txt += '        fi\n'
1246 >        txt += '    else\n'
1247 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1248          txt += '    fi\n'
1249 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1250 +
1251 +        if self.datasetPath:
1252 +          # VERIFY PROCESSED DATA
1253 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1254 +            txt += '      echo ">>> Verify list of processed files:"\n'
1255 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1256 +            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1257 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1258 +            txt += '      mv tmp.txt input-files.txt\n'
1259 +            txt += '      echo "cat input-files.txt"\n'
1260 +            txt += '      echo "----------------------"\n'
1261 +            txt += '      cat input-files.txt\n'
1262 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1263 +            txt += '      mv tmp.txt processed-files.txt\n'
1264 +            txt += '      echo "----------------------"\n'
1265 +            txt += '      echo "cat processed-files.txt"\n'
1266 +            txt += '      echo "----------------------"\n'
1267 +            txt += '      cat processed-files.txt\n'
1268 +            txt += '      echo "----------------------"\n'
1269 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1270 +            txt += '      fileverify_status=$?\n'
1271 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1272 +            txt += '         executable_exit_status=30001\n'
1273 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1274 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1275 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1276 +            txt += '      fi\n'
1277 +            txt += '    fi\n'
1278 +            txt += '\n'
1279 +        txt += 'else\n'
1280 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1281          txt += 'fi\n'
1282          txt += '\n'
1283 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1284 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1285 +        txt += 'job_exit_code=$executable_exit_status\n'
1286 +
1287          return txt
1288  
1289      def setParam_(self, param, value):
# Line 1448 | Line 1292 | class Cmssw(JobType):
1292      def getParams(self):
1293          return self._params
1294  
1451    def setTaskid_(self):
1452        self._taskId = self.cfg_params['taskId']
1453        
1454    def getTaskid(self):
1455        return self._taskId
1456
1295      def uniquelist(self, old):
1296          """
1297          remove duplicates from a list
# Line 1463 | Line 1301 | class Cmssw(JobType):
1301              nd[e]=0
1302          return nd.keys()
1303  
1304 <
1467 <    def checkOut(self, limit):
1304 >    def outList(self):
1305          """
1306          check the dimension of the output files
1307          """
1308 <        txt = 'echo "*****************************************"\n'
1309 <        txt += 'echo "** Starting output sandbox limit check **"\n'
1473 <        txt += 'echo "*****************************************"\n'
1474 <        allOutFiles = ""
1308 >        txt = ''
1309 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1310          listOutFiles = []
1311 <        for fileOut in (self.output_file+self.output_file_sandbox):
1312 <             if fileOut.find('crab_fjr') == -1:
1313 <                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1314 <                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1315 <        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1316 <        txt += 'ls -gGhrta;\n'
1317 <        txt += 'sum=0;\n'
1318 <        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1319 <        txt += '    if [ -e $file ]; then\n'
1320 <        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1321 <        txt += '        sum=`expr $sum + $tt`\n'
1322 <        txt += '    else\n'
1323 <        txt += '        echo "WARNING: output file $file not found!"\n'
1324 <        txt += '    fi\n'
1325 <        txt += 'done\n'
1491 <        txt += 'echo "Total Output dimension: $sum";\n'
1492 <        txt += 'limit='+str(limit)+';\n'
1493 <        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1494 <        txt += 'if [ $limit -lt $sum ]; then\n'
1495 <        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1496 <        txt += '    echo "         checking the output file sizes..."\n'
1497 <        """
1498 <        txt += '    dim=0;\n'
1499 <        txt += '    exclude=0;\n'
1500 <        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1501 <        txt += '        sumTemp=0;\n'
1502 <        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1503 <        txt += '            if [ $file != $file2 ]; then\n'
1504 <        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1505 <        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1506 <        txt += '            fi\n'
1507 <        txt += '        done\n'
1508 <        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1509 <        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1510 <        txt += '                dim=$sumTemp;\n'
1511 <        txt += '                exclude=$file;\n'
1512 <        txt += '            fi\n'
1513 <        txt += '        fi\n'
1514 <        txt += '    done\n'
1515 <        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1516 <        """
1517 <        txt += '    tot=0;\n'
1518 <        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1519 <        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1520 <        txt += '        tot=`expr $tot + $tt`;\n'
1521 <        txt += '        if [ $limit -lt $tot ]; then\n'
1522 <        txt += '            tot=`expr $tot - $tt`;\n'
1523 <        txt += '            fileLast=$file;\n'
1524 <        txt += '            break;\n'
1525 <        txt += '        fi\n'
1526 <        txt += '    done\n'
1527 <        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1528 <        txt += '    flag=0;\n'    
1529 <        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1530 <        txt += '        if [ $fileLast = $filess ]; then\n'
1531 <        txt += '            flag=1;\n'
1532 <        txt += '        fi\n'
1533 <        txt += '        if [ $flag -eq 1 ]; then\n'
1534 <        txt += '            rm -f $filess;\n'
1535 <        txt += '        fi\n'
1536 <        txt += '    done\n'
1537 <        txt += '    ls -agGhrt;\n'
1538 <        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1539 <        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1540 <        txt += '    exit_status=70000;\n'
1541 <        txt += 'else'
1542 <        txt += '    echo "Total Output dimension $sum is fine.";\n'
1543 <        txt += 'fi\n'
1544 <        txt += 'echo "*****************************************"\n'
1545 <        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1546 <        txt += 'echo "*****************************************"\n'
1311 >        stdout = 'CMSSW_$NJob.stdout'
1312 >        stderr = 'CMSSW_$NJob.stderr'
1313 >        if (self.return_data == 1):
1314 >            for file in (self.output_file+self.output_file_sandbox):
1315 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1316 >            listOutFiles.append(stdout)
1317 >            listOutFiles.append(stderr)
1318 >        else:
1319 >            for file in (self.output_file_sandbox):
1320 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1321 >            listOutFiles.append(stdout)
1322 >            listOutFiles.append(stderr)
1323 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1324 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1325 >        txt += 'export filesToCheck\n'
1326          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines