ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.128 by fanzago, Thu Oct 11 16:23:44 2007 UTC vs.
Revision 1.211 by slacapra, Tue Jun 10 14:02:12 2008 UTC

# Line 5 | Line 5 | from crab_util import *
5   from BlackWhiteListParser import BlackWhiteListParser
6   import common
7   import Scram
8 + from LFNBaseName import *
9  
10   import os, string, glob
11  
12   class Cmssw(JobType):
13 <    def __init__(self, cfg_params, ncjobs):
13 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14          JobType.__init__(self, 'CMSSW')
15          common.logger.debug(3,'CMSSW::__init__')
16 +        self.skip_blocks = skip_blocks
17 +
18 +        self.argsList = []
19  
20          self._params = {}
21          self.cfg_params = cfg_params
18
22          # init BlackWhiteListParser
23          self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
24  
25 <        try:
23 <            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 <        except KeyError:
25 <            self.MaxTarBallSize = 9.5
25 >        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
26  
27          # number of jobs requested to be created, limit obj splitting
28          self.ncjobs = ncjobs
29  
30          log = common.logger
31 <        
31 >
32          self.scram = Scram.Scram(cfg_params)
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36          self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38        self.additional_tgz_name = 'additional.tgz'
38          self.scriptName = 'CMSSW.sh'
39 <        self.pset = ''      #scrip use case Da  
40 <        self.datasetPath = '' #scrip use case Da
39 >        self.pset = ''
40 >        self.datasetPath = ''
41  
42          # set FJR file name
43          self.fjrFileName = 'crab_fjr.xml'
44  
45          self.version = self.scram.getSWVersion()
46 <        
47 <        #
48 <        # Try to block creation in case of arch/version mismatch
49 <        #
50 <
51 <        a = string.split(self.version, "_")
52 <
53 <        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
54 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
55 <            raise CrabException(msg)
57 <        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
46 >        version_array = self.version.split('_')
47 >        self.CMSSW_major = 0
48 >        self.CMSSW_minor = 0
49 >        self.CMSSW_patch = 0
50 >        try:
51 >            self.CMSSW_major = int(version_array[1])
52 >            self.CMSSW_minor = int(version_array[2])
53 >            self.CMSSW_patch = int(version_array[3])
54 >        except:
55 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
56              raise CrabException(msg)
60        
61        common.taskDB.setDict('codeVersion',self.version)
62        self.setParam_('application', self.version)
57  
58          ### collect Data cards
59  
60 <        ## get DBS mode
61 <        try:
68 <            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 <        except KeyError:
70 <            self.use_dbs_1 = 0
71 <            
72 <        try:
73 <            tmp =  cfg_params['CMSSW.datasetpath']
74 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75 <            if string.lower(tmp)=='none':
76 <                self.datasetPath = None
77 <                self.selectNoInput = 1
78 <            else:
79 <                self.datasetPath = tmp
80 <                self.selectNoInput = 0
81 <        except KeyError:
82 <            msg = "Error: datasetpath not defined "  
60 >        if not cfg_params.has_key('CMSSW.datasetpath'):
61 >            msg = "Error: datasetpath not defined "
62              raise CrabException(msg)
63 <
64 <        # ML monitoring
65 <        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
66 <        if not self.datasetPath:
67 <            self.setParam_('dataset', 'None')
89 <            self.setParam_('owner', 'None')
63 >        tmp =  cfg_params['CMSSW.datasetpath']
64 >        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
65 >        if string.lower(tmp)=='none':
66 >            self.datasetPath = None
67 >            self.selectNoInput = 1
68          else:
69 <            try:
70 <                datasetpath_split = self.datasetPath.split("/")
93 <                # standard style
94 <                self.setParam_('datasetFull', self.datasetPath)
95 <                if self.use_dbs_1 == 1 :
96 <                    self.setParam_('dataset', datasetpath_split[1])
97 <                    self.setParam_('owner', datasetpath_split[-1])
98 <                else:
99 <                    self.setParam_('dataset', datasetpath_split[1])
100 <                    self.setParam_('owner', datasetpath_split[2])
101 <            except:
102 <                self.setParam_('dataset', self.datasetPath)
103 <                self.setParam_('owner', self.datasetPath)
104 <                
105 <        self.setTaskid_()
106 <        self.setParam_('taskId', self.cfg_params['taskId'])
69 >            self.datasetPath = tmp
70 >            self.selectNoInput = 0
71  
72          self.dataTiers = []
73 <
73 >        self.debugWrap = ''
74 >        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
75 >        if self.debug_wrapper: self.debugWrap='--debug'
76          ## now the application
77 <        try:
78 <            self.executable = cfg_params['CMSSW.executable']
113 <            self.setParam_('exe', self.executable)
114 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
116 <            log.debug(3,msg)
117 <        except KeyError:
118 <            self.executable = 'cmsRun'
119 <            self.setParam_('exe', self.executable)
120 <            msg = "User executable not defined. Use cmsRun"
121 <            log.debug(3,msg)
122 <            pass
77 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
78 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
79  
80 <        try:
125 <            self.pset = cfg_params['CMSSW.pset']
126 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if self.pset.lower() != 'none' :
128 <                if (not os.path.exists(self.pset)):
129 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
130 <            else:
131 <                self.pset = None
132 <        except KeyError:
80 >        if not cfg_params.has_key('CMSSW.pset'):
81              raise CrabException("PSet file missing. Cannot run cmsRun ")
82 +        self.pset = cfg_params['CMSSW.pset']
83 +        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
84 +        if self.pset.lower() != 'none' :
85 +            if (not os.path.exists(self.pset)):
86 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
87 +        else:
88 +            self.pset = None
89  
90          # output files
91          ## stuff which must be returned always via sandbox
# Line 140 | Line 95 | class Cmssw(JobType):
95          self.output_file_sandbox.append(self.fjrFileName)
96  
97          # other output files to be returned via sandbox or copied to SE
98 <        try:
99 <            self.output_file = []
100 <            tmp = cfg_params['CMSSW.output_file']
101 <            if tmp != '':
102 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
148 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
149 <                for tmp in tmpOutFiles:
150 <                    tmp=string.strip(tmp)
151 <                    self.output_file.append(tmp)
152 <                    pass
153 <            else:
154 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 <                pass
156 <            pass
157 <        except KeyError:
98 >        self.output_file = []
99 >        tmp = cfg_params.get('CMSSW.output_file',None)
100 >        if tmp :
101 >            self.output_file = [x.strip() for x in tmp.split(',')]
102 >        else:
103              log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
104 <            pass
104 >        pass
105  
106          # script_exe file as additional file in inputSandbox
107 <        try:
108 <            self.scriptExe = cfg_params['USER.script_exe']
109 <            if self.scriptExe != '':
110 <               if not os.path.isfile(self.scriptExe):
111 <                  msg ="ERROR. file "+self.scriptExe+" not found"
112 <                  raise CrabException(msg)
168 <               self.additional_inbox_files.append(string.strip(self.scriptExe))
169 <        except KeyError:
170 <            self.scriptExe = ''
107 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
108 >        if self.scriptExe :
109 >            if not os.path.isfile(self.scriptExe):
110 >                msg ="ERROR. file "+self.scriptExe+" not found"
111 >                raise CrabException(msg)
112 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
113  
172        #CarlosDaniele
114          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
115 <           msg ="Error. script_exe  not defined"
116 <           raise CrabException(msg)
115 >            msg ="Error. script_exe  not defined"
116 >            raise CrabException(msg)
117 >
118 >        # use parent files...
119 >        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
120  
121          ## additional input files
122 <        try:
122 >        if cfg_params.has_key('USER.additional_input_files'):
123              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
124              for tmp in tmpAddFiles:
125                  tmp = string.strip(tmp)
# Line 192 | Line 136 | class Cmssw(JobType):
136                      if not os.path.exists(file):
137                          raise CrabException("Additional input file not found: "+file)
138                      pass
195                    # fname = string.split(file, '/')[-1]
196                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197                    # shutil.copyfile(file, storedFile)
139                      self.additional_inbox_files.append(string.strip(file))
140                  pass
141              pass
142              common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
143 <        except KeyError:
203 <            pass
204 <
205 <        # files per job
206 <        try:
207 <            if (cfg_params['CMSSW.files_per_jobs']):
208 <                raise CrabException("files_per_jobs no longer supported.  Quitting.")
209 <        except KeyError:
210 <            pass
143 >        pass
144  
145          ## Events per job
146 <        try:
146 >        if cfg_params.has_key('CMSSW.events_per_job'):
147              self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
148              self.selectEventsPerJob = 1
149 <        except KeyError:
149 >        else:
150              self.eventsPerJob = -1
151              self.selectEventsPerJob = 0
152 <    
152 >
153          ## number of jobs
154 <        try:
154 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
155              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
156              self.selectNumberOfJobs = 1
157 <        except KeyError:
157 >        else:
158              self.theNumberOfJobs = 0
159              self.selectNumberOfJobs = 0
160  
161 <        try:
161 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
162              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
163              self.selectTotalNumberEvents = 1
164 <        except KeyError:
164 >            if self.selectNumberOfJobs  == 1:
165 >                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
166 >                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
167 >                    raise CrabException(msg)
168 >        else:
169              self.total_number_of_events = 0
170              self.selectTotalNumberEvents = 0
171  
172 <        if self.pset != None: #CarlosDaniele
172 >        if self.pset != None:
173               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
174                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
175                   raise CrabException(msg)
# Line 241 | Line 178 | class Cmssw(JobType):
178                   msg = 'Must specify  number_of_jobs.'
179                   raise CrabException(msg)
180  
181 <        ## source seed for pythia
182 <        try:
183 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
184 <        except KeyError:
185 <            self.sourceSeed = None
186 <            common.logger.debug(5,"No seed given")
187 <
188 <        try:
189 <            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
190 <        except KeyError:
191 <            self.sourceSeedVtx = None
192 <            common.logger.debug(5,"No vertex seed given")
193 <
194 <        try:
195 <            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
196 <        except KeyError:
197 <            self.sourceSeedG4 = None
198 <            common.logger.debug(5,"No g4 sim hits seed given")
199 <
200 <        try:
201 <            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
202 <        except KeyError:
203 <            self.sourceSeedMix = None
204 <            common.logger.debug(5,"No mix seed given")
205 <
206 <        try:
207 <            self.firstRun = int(cfg_params['CMSSW.first_run'])
208 <        except KeyError:
209 <            self.firstRun = None
210 <            common.logger.debug(5,"No first run given")
211 <        if self.pset != None: #CarlosDaniele
212 <            ver = string.split(self.version,"_")
213 <            if (int(ver[1])>=1 and int(ver[2])>=5):
214 <                import PsetManipulator150 as pp
215 <            else:
216 <                import PsetManipulator as pp
217 <            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
181 >        ## New method of dealing with seeds
182 >        self.incrementSeeds = []
183 >        self.preserveSeeds = []
184 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
185 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
186 >            for tmp in tmpList:
187 >                tmp.strip()
188 >                self.preserveSeeds.append(tmp)
189 >        if cfg_params.has_key('CMSSW.increment_seeds'):
190 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
191 >            for tmp in tmpList:
192 >                tmp.strip()
193 >                self.incrementSeeds.append(tmp)
194 >
195 >        ## Old method of dealing with seeds
196 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
197 >        ## remove
198 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
199 >        if self.sourceSeed:
200 >            print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
201 >            self.incrementSeeds.append('sourceSeed')
202 >            self.incrementSeeds.append('theSource')
203 >
204 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
205 >        if self.sourceSeedVtx:
206 >            print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
207 >            self.incrementSeeds.append('VtxSmeared')
208 >
209 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
210 >        if self.sourceSeedG4:
211 >            print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
212 >            self.incrementSeeds.append('g4SimHits')
213 >
214 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
215 >        if self.sourceSeedMix:
216 >            print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
217 >            self.incrementSeeds.append('mix')
218 >
219 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
220 >
221 >
222 >        # Copy/return
223 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
224 >        self.return_data = int(cfg_params.get('USER.return_data',0))
225  
226          #DBSDLS-start
227 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
227 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
228          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
229          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
230          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 289 | Line 233 | class Cmssw(JobType):
233          blockSites = {}
234          if self.datasetPath:
235              blockSites = self.DataDiscoveryAndLocation(cfg_params)
236 <        #DBSDLS-end          
236 >        #DBSDLS-end
237  
294        self.tgzNameWithPath = self.getTarBall(self.executable)
295    
238          ## Select Splitting
239 <        if self.selectNoInput:
240 <            if self.pset == None: #CarlosDaniele
239 >        if self.selectNoInput:
240 >            if self.pset == None:
241                  self.jobSplittingForScript()
242              else:
243                  self.jobSplittingNoInput()
244          else:
245              self.jobSplittingByBlocks(blockSites)
246  
247 <        # modify Pset
248 <        if self.pset != None: #CarlosDaniele
249 <            try:
250 <                if (self.datasetPath): # standard job
251 <                    # allow to processa a fraction of events in a file
252 <                    PsetEdit.inputModule("INPUT")
253 <                    PsetEdit.maxEvent("INPUTMAXEVENTS")
254 <                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
255 <                else:  # pythia like job
247 >        # modify Pset only the first time
248 >        if isNew:
249 >            if self.pset != None:
250 >                import PsetManipulator as pp
251 >                PsetEdit = pp.PsetManipulator(self.pset)
252 >                try:
253 >                    # Add FrameworkJobReport to parameter-set, set max events.
254 >                    # Reset later for data jobs by writeCFG which does all modifications
255 >                    PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
256                      PsetEdit.maxEvent(self.eventsPerJob)
257 <                    if (self.firstRun):
258 <                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
259 <                    if (self.sourceSeed) :
260 <                        PsetEdit.pythiaSeed("INPUT")
261 <                        if (self.sourceSeedVtx) :
262 <                            PsetEdit.vtxSeed("INPUTVTX")
321 <                        if (self.sourceSeedG4) :
322 <                            PsetEdit.g4Seed("INPUTG4")
323 <                        if (self.sourceSeedMix) :
324 <                            PsetEdit.mixSeed("INPUTMIX")
325 <                # add FrameworkJobReport to parameter-set
326 <                PsetEdit.addCrabFJR(self.fjrFileName)
327 <                PsetEdit.psetWriter(self.configFilename())
328 <            except:
329 <                msg='Error while manipuliating ParameterSet: exiting...'
330 <                raise CrabException(msg)
257 >                    PsetEdit.psetWriter(self.configFilename())
258 >                except:
259 >                    msg='Error while manipulating ParameterSet: exiting...'
260 >                    raise CrabException(msg)
261 >            ## Prepare inputSandbox TarBall (only the first time)  
262 >            self.tgzNameWithPath = self.getTarBall(self.executable)
263  
264      def DataDiscoveryAndLocation(self, cfg_params):
265  
266          import DataDiscovery
335        import DataDiscovery_DBS2
267          import DataLocation
268          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
269  
# Line 341 | Line 272 | class Cmssw(JobType):
272          ## Contact the DBS
273          common.logger.message("Contacting Data Discovery Services ...")
274          try:
275 <
345 <            if self.use_dbs_1 == 1 :
346 <                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347 <            else :
348 <                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
275 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
276              self.pubdata.fetchDBSInfo()
277  
278          except DataDiscovery.NotExistingDatasetError, ex :
# Line 357 | Line 284 | class Cmssw(JobType):
284          except DataDiscovery.DataDiscoveryError, ex:
285              msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
286              raise CrabException(msg)
360        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362            raise CrabException(msg)
363        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365            raise CrabException(msg)
366        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
368            raise CrabException(msg)
287  
288          self.filesbyblock=self.pubdata.getFiles()
289          self.eventsbyblock=self.pubdata.getEventsPerBlock()
290          self.eventsbyfile=self.pubdata.getEventsPerFile()
291 +        self.parentFiles=self.pubdata.getParent()
292  
293          ## get max number of events
294 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
294 >        self.maxEvents=self.pubdata.getMaxEvents()
295  
296          ## Contact the DLS and build a list of sites hosting the fileblocks
297          try:
# Line 381 | Line 300 | class Cmssw(JobType):
300          except DataLocation.DataLocationError , ex:
301              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
302              raise CrabException(msg)
303 <        
303 >
304  
305          sites = dataloc.getSites()
306          allSites = []
# Line 395 | Line 314 | class Cmssw(JobType):
314          common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
315  
316          return sites
317 <    
317 >
318      def jobSplittingByBlocks(self, blockSites):
319          """
320          Perform job splitting. Jobs run over an integer number of files
# Line 445 | Line 364 | class Cmssw(JobType):
364              totalNumberOfJobs = 999999999
365          else :
366              totalNumberOfJobs = self.ncjobs
448            
367  
368          blocks = blockSites.keys()
369          blockCount = 0
# Line 465 | Line 383 | class Cmssw(JobType):
383              blockCount += 1
384              if block not in jobsOfBlock.keys() :
385                  jobsOfBlock[block] = []
386 <            
386 >
387              if self.eventsbyblock.has_key(block) :
388                  numEventsInBlock = self.eventsbyblock[block]
389                  common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
390 <            
390 >
391                  files = self.filesbyblock[block]
392                  numFilesInBlock = len(files)
393                  if (numFilesInBlock <= 0):
# Line 477 | Line 395 | class Cmssw(JobType):
395                  fileCount = 0
396  
397                  # ---- New block => New job ---- #
398 <                parString = "\\{"
398 >                parString = ""
399                  # counter for number of events in files currently worked on
400                  filesEventCount = 0
401                  # flag if next while loop should touch new file
402                  newFile = 1
403                  # job event counter
404                  jobSkipEventCount = 0
405 <            
405 >
406                  # ---- Iterate over the files in the block until we've met the requested ---- #
407                  # ---- total # of events or we've gone over all the files in this block  ---- #
408 +                pString=''
409                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
410                      file = files[fileCount]
411 +                    if self.useParent:
412 +                        parent = self.parentFiles[file]
413 +                        for f in parent :
414 +                            pString += '\\\"' + f + '\\\"\,'
415 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
416 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
417                      if newFile :
418                          try:
419                              numEventsInFile = self.eventsbyfile[file]
# Line 500 | Line 425 | class Cmssw(JobType):
425                              newFile = 0
426                          except KeyError:
427                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503                        
428  
429 +                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
430                      # if less events in file remain than eventsPerJobRequested
431 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
431 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
432                          # if last file in block
433                          if ( fileCount == numFilesInBlock-1 ) :
434                              # end job using last file, use remaining events in block
435                              # close job and touch new file
436                              fullString = parString[:-2]
437 <                            fullString += '\\}'
438 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
437 >                            if self.useParent:
438 >                                fullParentString = pString[:-2]
439 >                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
440 >                            else:
441 >                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
442                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
443                              self.jobDestination.append(blockSites[block])
444                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 522 | Line 450 | class Cmssw(JobType):
450                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
451                              jobSkipEventCount = 0
452                              # reset file
453 <                            parString = "\\{"
453 >                            pString = ""
454 >                            parString = ""
455                              filesEventCount = 0
456                              newFile = 1
457                              fileCount += 1
# Line 534 | Line 463 | class Cmssw(JobType):
463                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
464                          # close job and touch new file
465                          fullString = parString[:-2]
466 <                        fullString += '\\}'
467 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
466 >                        if self.useParent:
467 >                            fullParentString = pString[:-2]
468 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
469 >                        else:
470 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
471                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
472                          self.jobDestination.append(blockSites[block])
473                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 546 | Line 478 | class Cmssw(JobType):
478                          eventsRemaining = eventsRemaining - eventsPerJobRequested
479                          jobSkipEventCount = 0
480                          # reset file
481 <                        parString = "\\{"
481 >                        pString = ""
482 >                        parString = ""
483                          filesEventCount = 0
484                          newFile = 1
485                          fileCount += 1
486 <                        
486 >
487                      # if more events in file remain than eventsPerJobRequested
488                      else :
489                          # close job but don't touch new file
490                          fullString = parString[:-2]
491 <                        fullString += '\\}'
492 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
491 >                        if self.useParent:
492 >                            fullParentString = pString[:-2]
493 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
494 >                        else:
495 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
496                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
497                          self.jobDestination.append(blockSites[block])
498                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 570 | Line 506 | class Cmssw(JobType):
506                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
507                          # remove all but the last file
508                          filesEventCount = self.eventsbyfile[file]
509 <                        parString = "\\{"
510 <                        parString += '\\\"' + file + '\\\"\,'
509 >                        if self.useParent:
510 >                            for f in parent : pString += '\\\"' + f + '\\\"\,'
511 >                        parString = '\\\"' + file + '\\\"\,'
512                      pass # END if
513                  pass # END while (iterate over files in the block)
514          pass # END while (iterate over blocks in the dataset)
# Line 579 | Line 516 | class Cmssw(JobType):
516          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
517              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
518          common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
519 <        
519 >
520          # screen output
521          screenOutput = "List of jobs and available destination sites:\n\n"
522  
# Line 591 | Line 528 | class Cmssw(JobType):
528          for block in blocks:
529              if block in jobsOfBlock.keys() :
530                  blockCounter += 1
531 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
531 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
532 >                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
533                  if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
534 <                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
534 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
535                      bloskNoSite.append( blockCounter )
536 <        
536 >
537          common.logger.message(screenOutput)
538          if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
539              msg = 'WARNING: No sites are hosting any part of data for block:\n                '
# Line 611 | Line 549 | class Cmssw(JobType):
549              for range_jobs in noSiteBlock:
550                  msg += str(range_jobs) + virgola
551              msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
552 +            if self.cfg_params.has_key('EDG.se_white_list'):
553 +                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
554 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
555 +                msg += 'Please check if the dataset is available at this site!)\n'
556 +            if self.cfg_params.has_key('EDG.ce_white_list'):
557 +                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
558 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
559 +                msg += 'Please check if the dataset is available at this site!)\n'
560 +
561              common.logger.message(msg)
562  
563          self.list_of_args = list_of_lists
# Line 621 | Line 568 | class Cmssw(JobType):
568          Perform job splitting based on number of event per job
569          """
570          common.logger.debug(5,'Splitting per events')
571 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
572 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
573 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
571 >
572 >        if (self.selectEventsPerJob):
573 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
574 >        if (self.selectNumberOfJobs):
575 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
576 >        if (self.selectTotalNumberEvents):
577 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
578  
579          if (self.total_number_of_events < 0):
580              msg='Cannot split jobs per Events with "-1" as total number of events'
# Line 632 | Line 583 | class Cmssw(JobType):
583          if (self.selectEventsPerJob):
584              if (self.selectTotalNumberEvents):
585                  self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
586 <            elif(self.selectNumberOfJobs) :  
586 >            elif(self.selectNumberOfJobs) :
587                  self.total_number_of_jobs =self.theNumberOfJobs
588 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
588 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
589  
590          elif (self.selectNumberOfJobs) :
591              self.total_number_of_jobs = self.theNumberOfJobs
592              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
593 <
593 >
594          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
595  
596          # is there any remainder?
# Line 655 | Line 606 | class Cmssw(JobType):
606          self.list_of_args = []
607          for i in range(self.total_number_of_jobs):
608              ## Since there is no input, any site is good
609 <           # self.jobDestination.append(["Any"])
659 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
609 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
610              args=[]
611              if (self.firstRun):
612 <                    ## pythia first run
663 <                #self.list_of_args.append([(str(self.firstRun)+str(i))])
612 >                ## pythia first run
613                  args.append(str(self.firstRun)+str(i))
665            else:
666                ## no first run
667                #self.list_of_args.append([str(i)])
668                args.append(str(i))
669            if (self.sourceSeed):
670                args.append(str(self.sourceSeed)+str(i))
671                if (self.sourceSeedVtx):
672                    ## + vtx random seed
673                    args.append(str(self.sourceSeedVtx)+str(i))
674                if (self.sourceSeedG4):
675                    ## + G4 random seed
676                    args.append(str(self.sourceSeedG4)+str(i))
677                if (self.sourceSeedMix):    
678                    ## + Mix random seed
679                    args.append(str(self.sourceSeedMix)+str(i))
680                pass
681            pass
614              self.list_of_args.append(args)
683        pass
684            
685        # print self.list_of_args
615  
616          return
617  
618  
619 <    def jobSplittingForScript(self):#CarlosDaniele
619 >    def jobSplittingForScript(self):
620          """
621          Perform job splitting based on number of job
622          """
# Line 703 | Line 632 | class Cmssw(JobType):
632          # argument is seed number.$i
633          self.list_of_args = []
634          for i in range(self.total_number_of_jobs):
706            ## Since there is no input, any site is good
707           # self.jobDestination.append(["Any"])
635              self.jobDestination.append([""])
709            ## no random seed
636              self.list_of_args.append([str(i)])
637          return
638  
639 <    def split(self, jobParams):
640 <
715 <        common.jobDB.load()
716 <        #### Fabio
639 >    def split(self, jobParams,firstJobID):
640 >
641          njobs = self.total_number_of_jobs
642          arglist = self.list_of_args
643          # create the empty structure
644          for i in range(njobs):
645              jobParams.append("")
722        
723        for job in range(njobs):
724            jobParams[job] = arglist[job]
725            # print str(arglist[job])
726            # print jobParams[job]
727            common.jobDB.setArguments(job, jobParams[job])
728            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
729            common.jobDB.setDestination(job, self.jobDestination[job])
646  
647 <        common.jobDB.save()
647 >        listID=[]
648 >        listField=[]
649 >        for id in range(njobs):
650 >            job = id + int(firstJobID)
651 >            jobParams[id] = arglist[id]
652 >            listID.append(job+1)
653 >            job_ToSave ={}
654 >            concString = ' '
655 >            argu=''
656 >            if len(jobParams[id]):
657 >                argu +=   concString.join(jobParams[id] )
658 >            job_ToSave['arguments']= str(job+1)+' '+argu
659 >            job_ToSave['dlsDestination']= self.jobDestination[id]
660 >            listField.append(job_ToSave)
661 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
662 >            +"                     Destination: "+str(self.jobDestination[id])
663 >            common.logger.debug(5,msg)
664 >        common._db.updateJob_(listID,listField)
665 >        self.argsList = (len(jobParams[0])+1)
666 >
667          return
668 <    
734 <    def getJobTypeArguments(self, nj, sched):
735 <        result = ''
736 <        for i in common.jobDB.arguments(nj):
737 <            result=result+str(i)+" "
738 <        return result
739 <  
668 >
669      def numberOfJobs(self):
741        # Fabio
670          return self.total_number_of_jobs
671  
672      def getTarBall(self, exe):
673          """
674          Return the TarBall with lib and exe
675          """
748        
749        # if it exist, just return it
750        #
751        # Marco. Let's start to use relative path for Boss XML files
752        #
676          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
677          if os.path.exists(self.tgzNameWithPath):
678              return self.tgzNameWithPath
# Line 763 | Line 686 | class Cmssw(JobType):
686  
687          # First of all declare the user Scram area
688          swArea = self.scram.getSWArea_()
766        #print "swArea = ", swArea
767        # swVersion = self.scram.getSWVersion()
768        # print "swVersion = ", swVersion
689          swReleaseTop = self.scram.getReleaseTop_()
690 <        #print "swReleaseTop = ", swReleaseTop
771 <        
690 >
691          ## check if working area is release top
692          if swReleaseTop == '' or swArea == swReleaseTop:
693 +            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
694              return
695  
696          import tarfile
# Line 781 | Line 701 | class Cmssw(JobType):
701                  exeWithPath = self.scram.findFile_(executable)
702                  if ( not exeWithPath ):
703                      raise CrabException('User executable '+executable+' not found')
704 <    
704 >
705                  ## then check if it's private or not
706                  if exeWithPath.find(swReleaseTop) == -1:
707                      # the exe is private, so we must ship
# Line 790 | Line 710 | class Cmssw(JobType):
710                      # distinguish case when script is in user project area or given by full path somewhere else
711                      if exeWithPath.find(path) >= 0 :
712                          exe = string.replace(exeWithPath, path,'')
713 <                        tar.add(path+exe,os.path.basename(executable))
713 >                        tar.add(path+exe,exe)
714                      else :
715                          tar.add(exeWithPath,os.path.basename(executable))
716                      pass
717                  else:
718                      # the exe is from release, we'll find it on WN
719                      pass
720 <    
720 >
721              ## Now get the libraries: only those in local working area
722              libDir = 'lib'
723              lib = swArea+'/' +libDir
724              common.logger.debug(5,"lib "+lib+" to be tarred")
725              if os.path.exists(lib):
726                  tar.add(lib,libDir)
727 <    
727 >
728              ## Now check if module dir is present
729              moduleDir = 'module'
730              module = swArea + '/' + moduleDir
# Line 812 | Line 732 | class Cmssw(JobType):
732                  tar.add(module,moduleDir)
733  
734              ## Now check if any data dir(s) is present
735 <            swAreaLen=len(swArea)
736 <            for root, dirs, files in os.walk(swArea):
737 <                if "data" in dirs:
738 <                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
739 <                    tar.add(root+"/data",root[swAreaLen:]+"/data")
740 <
741 <            ## Add ProdAgent dir to tar
742 <            paDir = 'ProdAgentApi'
743 <            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
744 <            if os.path.isdir(pa):
745 <                tar.add(pa,paDir)
746 <
747 <            ### FEDE FOR DBS PUBLICATION
748 <            ## Add PRODCOMMON dir to tar
749 <            prodcommonDir = 'ProdCommon'
750 <            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
751 <            if os.path.isdir(prodcommonPath):
752 <                tar.add(prodcommonPath,prodcommonDir)
753 <            #############################    
754 <        
735 >            self.dataExist = False
736 >            srcArea=swArea+"/src/"
737 >            todo_list = [(i, i) for i in  os.listdir(srcArea)]
738 >            while len(todo_list):
739 >                entry, name = todo_list.pop()
740 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
741 >                    continue
742 >                if os.path.isdir(srcArea+entry):
743 >                    entryPath = entry + '/'
744 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(srcArea+entry)]
745 >                    if name == 'data':
746 >                        self.dataExist=True
747 >                        common.logger.debug(5,"data "+entry+" to be tarred")
748 >                        tar.add(entry)
749 >                    pass
750 >                pass
751 >
752 >            ### CMSSW ParameterSet
753 >            if not self.pset is None:
754 >                cfg_file = common.work_space.jobDir()+self.configFilename()
755 >                tar.add(cfg_file,self.configFilename())
756 >                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
757 >
758 >
759 >            ## Add ProdCommon dir to tar
760 >            prodcommonDir = './'
761 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
762 >            neededStuff = ['ProdCommon/FwkJobRep', 'IMProv']
763 >            for dir in neededStuff:
764 >                if os.path.isdir(prodcommonPath+dir):
765 >                    tar.add(prodcommonPath+dir,prodcommonDir+dir)
766 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
767 >
768 >            ##### ML stuff
769 >            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
770 >            path=os.environ['CRABDIR'] + '/python/'
771 >            for file in ML_file_list:
772 >                tar.add(path+file,file)
773 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
774 >
775 >            ##### Utils
776 >            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
777 >            for file in Utils_file_list:
778 >                tar.add(path+file,file)
779 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
780 >
781 >            ##### AdditionalFiles
782 >            for file in self.additional_inbox_files:
783 >                tar.add(file,string.split(file,'/')[-1])
784              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
785 +
786              tar.close()
787 <        except :
788 <            raise CrabException('Could not create tar-ball')
787 >        except TarError:
788 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
789  
790          ## check for tarball size
791          tarballinfo = os.stat(self.tgzNameWithPath)
# Line 843 | Line 793 | class Cmssw(JobType):
793              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
794  
795          ## create tar-ball with ML stuff
846        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
847        try:
848            tar = tarfile.open(self.MLtgzfile, "w:gz")
849            path=os.environ['CRABDIR'] + '/python/'
850            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
851                tar.add(path+file,file)
852            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
853            tar.close()
854        except :
855            raise CrabException('Could not create ML files tar-ball')
856        
857        return
858        
859    def additionalInputFileTgz(self):
860        """
861        Put all additional files into a tar ball and return its name
862        """
863        import tarfile
864        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
865        tar = tarfile.open(tarName, "w:gz")
866        for file in self.additional_inbox_files:
867            tar.add(file,string.split(file,'/')[-1])
868        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
869        tar.close()
870        return tarName
796  
797 <    def wsSetupEnvironment(self, nj):
797 >    def wsSetupEnvironment(self, nj=0):
798          """
799          Returns part of a job script which prepares
800          the execution environment for the job 'nj'.
801          """
802 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
803 +            psetName = 'pset.py'
804 +        else:
805 +            psetName = 'pset.cfg'
806          # Prepare JobType-independent part
807 <        txt = ''
808 <  
809 <        ## OLI_Daniele at this level  middleware already known
881 <
882 <        txt += 'if [ $middleware == LCG ]; then \n'
883 <        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
884 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
885 <        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
807 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
808 >        txt += 'echo ">>> setup environment"\n'
809 >        txt += 'if [ $middleware == LCG ]; then \n'
810          txt += self.wsSetupCMSLCGEnvironment_()
811          txt += 'elif [ $middleware == OSG ]; then\n'
812          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
813 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
814 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
815 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
816 <        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
893 <        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
894 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
895 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
896 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
897 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
898 <        txt += '        exit 1\n'
813 >        txt += '    if [ ! $? == 0 ] ;then\n'
814 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
815 >        txt += '        job_exit_code=10016\n'
816 >        txt += '        func_exit\n'
817          txt += '    fi\n'
818 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
819          txt += '\n'
820          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
821          txt += '    cd $WORKING_DIR\n'
822 <        txt += self.wsSetupCMSOSGEnvironment_()
823 <        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
905 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
822 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
823 >        txt += self.wsSetupCMSOSGEnvironment_()
824          txt += 'fi\n'
825  
826          # Prepare JobType-specific part
827          scram = self.scram.commandName()
828          txt += '\n\n'
829 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
829 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
830 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
831          txt += scram+' project CMSSW '+self.version+'\n'
832          txt += 'status=$?\n'
833          txt += 'if [ $status != 0 ] ; then\n'
834 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
835 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
836 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
918 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
919 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
920 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
921 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
922 <        ## OLI_Daniele
923 <        txt += '    if [ $middleware == OSG ]; then \n'
924 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
925 <        txt += '        cd $RUNTIME_AREA\n'
926 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
927 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
928 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
930 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
931 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
932 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
933 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
934 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
935 <        txt += '        fi\n'
936 <        txt += '    fi \n'
937 <        txt += '   exit 1 \n'
834 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
835 >        txt += '    job_exit_code=10034\n'
836 >        txt += '    func_exit\n'
837          txt += 'fi \n'
939        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
838          txt += 'cd '+self.version+'\n'
941        ########## FEDE FOR DBS2 ######################
839          txt += 'SOFTWARE_DIR=`pwd`\n'
840 <        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
944 <        ###############################################
945 <        ### needed grep for bug in scramv1 ###
946 <        txt += scram+' runtime -sh\n'
840 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
841          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
842 <        txt += 'echo $PATH\n'
843 <
842 >        txt += 'if [ $? != 0 ] ; then\n'
843 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
844 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
845 >        txt += '    job_exit_code=10034\n'
846 >        txt += '    func_exit\n'
847 >        txt += 'fi \n'
848          # Handle the arguments:
849          txt += "\n"
850          txt += "## number of arguments (first argument always jobnumber)\n"
851          txt += "\n"
852 < #        txt += "narg=$#\n"
955 <        txt += "if [ $nargs -lt 2 ]\n"
852 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
853          txt += "then\n"
854 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
855 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
856 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
960 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
961 <        txt += '    rm -f $RUNTIME_AREA/$repo \n'
962 <        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
963 <        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
964 <        ## OLI_Daniele
965 <        txt += '    if [ $middleware == OSG ]; then \n'
966 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
967 <        txt += '        cd $RUNTIME_AREA\n'
968 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
969 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
970 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
972 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
973 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
974 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
975 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
976 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
977 <        txt += '        fi\n'
978 <        txt += '    fi \n'
979 <        txt += "    exit 1\n"
854 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
855 >        txt += '    job_exit_code=50113\n'
856 >        txt += "    func_exit\n"
857          txt += "fi\n"
858          txt += "\n"
859  
860          # Prepare job-specific part
861          job = common.job_list[nj]
862 <        ### FEDE FOR DBS OUTPUT PUBLICATION
986 <        if (self.datasetPath):
862 >        if (self.datasetPath):
863              txt += '\n'
864              txt += 'DatasetPath='+self.datasetPath+'\n'
865  
866              datasetpath_split = self.datasetPath.split("/")
867 <            
867 >
868              txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
869              txt += 'DataTier='+datasetpath_split[2]+'\n'
994            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
870              txt += 'ApplicationFamily=cmsRun\n'
871  
872          else:
873              txt += 'DatasetPath=MCDataTier\n'
874              txt += 'PrimaryDataset=null\n'
875              txt += 'DataTier=null\n'
1001            #txt += 'ProcessedDataset=null\n'
876              txt += 'ApplicationFamily=MCDataTier\n'
877 <        if self.pset != None: #CarlosDaniele
877 >        if self.pset != None:
878              pset = os.path.basename(job.configFilename())
879              txt += '\n'
880              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
881              if (self.datasetPath): # standard job
882 <                #txt += 'InputFiles=$2\n'
883 <                txt += 'InputFiles=${args[1]}\n'
884 <                txt += 'MaxEvents=${args[2]}\n'
885 <                txt += 'SkipEvents=${args[3]}\n'
882 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
883 >                if (self.useParent):  
884 >                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
885 >                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
886 >                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
887 >                else:
888 >                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
889 >                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
890                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
891 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
891 >                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
892                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1015                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
893                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1017                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
894              else:  # pythia like job
895 <                seedIndex=1
895 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
896 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
897 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
898 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
899                  if (self.firstRun):
900 <                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
900 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
901                      txt += 'echo "FirstRun: <$FirstRun>"\n'
1023                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1024                    seedIndex=seedIndex+1
902  
903 <                if (self.sourceSeed):
1027 <                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1028 <                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1029 <                    seedIndex=seedIndex+1
1030 <                    ## the following seeds are not always present
1031 <                    if (self.sourceSeedVtx):
1032 <                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1033 <                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1034 <                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1035 <                        seedIndex += 1
1036 <                    if (self.sourceSeedG4):
1037 <                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1038 <                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1039 <                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1040 <                        seedIndex += 1
1041 <                    if (self.sourceSeedMix):
1042 <                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1043 <                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1044 <                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1045 <                        seedIndex += 1
1046 <                    pass
1047 <                pass
1048 <            txt += 'mv -f '+pset+' pset.cfg\n'
903 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
904  
1050        if len(self.additional_inbox_files) > 0:
1051            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1052            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1053            txt += 'fi\n'
1054            pass
905  
906 <        if self.pset != None: #CarlosDaniele
907 <            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1058 <        
1059 <            txt += '\n'
1060 <            txt += 'echo "***** cat pset.cfg *********"\n'
1061 <            txt += 'cat pset.cfg\n'
1062 <            txt += 'echo "****** end pset.cfg ********"\n'
906 >        if self.pset != None:
907 >            # FUTURE: Can simply for 2_1_x and higher
908              txt += '\n'
909 <            ### FEDE FOR DBS OUTPUT PUBLICATION
910 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
909 >            if self.debug_wrapper==True:
910 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
911 >                txt += 'cat ' + psetName + '\n'
912 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
913 >                txt += '\n'
914 >            txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
915              txt += 'echo "PSETHASH = $PSETHASH" \n'
1067            ##############
916              txt += '\n'
1069            # txt += 'echo "***** cat pset1.cfg *********"\n'
1070            # txt += 'cat pset1.cfg\n'
1071            # txt += 'echo "****** end pset1.cfg ********"\n'
917          return txt
918  
919 <    def wsBuildExe(self, nj=0):
919 >    def wsUntarSoftware(self, nj=0):
920          """
921          Put in the script the commands to build an executable
922          or a library.
923          """
924  
925 <        txt = ""
925 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
926  
927          if os.path.isfile(self.tgzNameWithPath):
928 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
928 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
929              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
930 +            if  self.debug_wrapper:
931 +                txt += 'ls -Al \n'
932              txt += 'untar_status=$? \n'
933              txt += 'if [ $untar_status -ne 0 ]; then \n'
934 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
935 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
936 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1090 <            txt += '   if [ $middleware == OSG ]; then \n'
1091 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1092 <            txt += '       cd $RUNTIME_AREA\n'
1093 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
1094 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
1095 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1096 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1097 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1098 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1099 <            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1100 <            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1101 <            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1102 <            txt += '       fi\n'
1103 <            txt += '   fi \n'
1104 <            txt += '   \n'
1105 <            txt += '   exit 1 \n'
934 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
935 >            txt += '   job_exit_code=$untar_status\n'
936 >            txt += '   func_exit\n'
937              txt += 'else \n'
938              txt += '   echo "Successful untar" \n'
939              txt += 'fi \n'
940              txt += '\n'
941 <            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
941 >            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
942              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
943 <            #### FEDE FOR DBS OUTPUT PUBLICATION
1113 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1114 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1115 <            #txt += '   export PYTHONPATH=ProdAgentApi\n'
943 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
944              txt += 'else\n'
945 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1118 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1119 <            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
945 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
946              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1121            ###################  
947              txt += 'fi\n'
948              txt += '\n'
949  
950              pass
951 <        
951 >
952          return txt
953  
954 <    def modifySteeringCards(self, nj):
954 >    def wsBuildExe(self, nj=0):
955          """
956 <        modify the card provided by the user,
957 <        writing a new card into share dir
956 >        Put in the script the commands to build an executable
957 >        or a library.
958          """
959 <        
959 >
960 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
961 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
962 >
963 >        txt += 'rm -r lib/ module/ \n'
964 >        txt += 'mv $RUNTIME_AREA/lib/ . \n'
965 >        txt += 'mv $RUNTIME_AREA/module/ . \n'
966 >        if self.dataExist == True:
967 >            txt += 'rm -r src/ \n'
968 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
969 >        if len(self.additional_inbox_files)>0:
970 >            for file in self.additional_inbox_files:
971 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
972 >        txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
973 >        txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
974 >
975 >        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
976 >        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
977 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
978 >        txt += 'else\n'
979 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
980 >        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
981 >        txt += 'fi\n'
982 >        txt += '\n'
983 >
984 >        return txt
985 >
986 >
987      def executableName(self):
988 <        if self.scriptExe: #CarlosDaniele
988 >        if self.scriptExe:
989              return "sh "
990          else:
991              return self.executable
992  
993      def executableArgs(self):
994 +        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
995          if self.scriptExe:#CarlosDaniele
996              return   self.scriptExe + " $NJob"
997          else:
998 <            # if >= CMSSW_1_5_X, add -e
999 <            version_array = self.scram.getSWVersion().split('_')
1000 <            major = 0
1001 <            minor = 0
1002 <            try:
1003 <                major = int(version_array[1])
1004 <                minor = int(version_array[2])
1005 <            except:
1153 <                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1154 <                raise CrabException(msg)
1155 <            if major >= 1 and minor >= 5 :
1156 <                return " -e -p pset.cfg"
998 >            ex_args = ""
999 >            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1000 >            # Framework job report
1001 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1002 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1003 >            # Type of config file
1004 >            if self.CMSSW_major >= 2 :
1005 >                ex_args += " -p pset.py"
1006              else:
1007 <                return " -p pset.cfg"
1007 >                ex_args += " -p pset.cfg"
1008 >            return ex_args
1009  
1010      def inputSandbox(self, nj):
1011          """
1012          Returns a list of filenames to be put in JDL input sandbox.
1013          """
1014          inp_box = []
1165        # # dict added to delete duplicate from input sandbox file list
1166        # seen = {}
1167        ## code
1015          if os.path.isfile(self.tgzNameWithPath):
1016              inp_box.append(self.tgzNameWithPath)
1017 <        if os.path.isfile(self.MLtgzfile):
1018 <            inp_box.append(self.MLtgzfile)
1172 <        ## config
1173 <        if not self.pset is None:
1174 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1175 <        ## additional input files
1176 <        tgz = self.additionalInputFileTgz()
1177 <        inp_box.append(tgz)
1017 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1018 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1019          return inp_box
1020  
1021      def outputSandbox(self, nj):
# Line 1185 | Line 1026 | class Cmssw(JobType):
1026  
1027          ## User Declared output files
1028          for out in (self.output_file+self.output_file_sandbox):
1029 <            n_out = nj + 1
1030 <            out_box.append(self.numberFile_(out,str(n_out)))
1029 >            n_out = nj + 1
1030 >            out_box.append(numberFile(out,str(n_out)))
1031          return out_box
1032  
1192    def prepareSteeringCards(self):
1193        """
1194        Make initial modifications of the user's steering card file.
1195        """
1196        return
1033  
1034      def wsRenameOutput(self, nj):
1035          """
1036          Returns part of a job script which renames the produced files.
1037          """
1038  
1039 <        txt = '\n'
1040 <        txt += '# directory content\n'
1041 <        txt += 'ls \n'
1042 <
1043 <        txt += 'output_exit_status=0\n'
1044 <        
1045 <        for fileWithSuffix in (self.output_file_sandbox):
1210 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1211 <            txt += '\n'
1212 <            txt += '# check output file\n'
1213 <            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1214 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1215 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1216 <            txt += 'else\n'
1217 <            txt += '    exit_status=60302\n'
1218 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1219 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1220 <                txt += '    if [ $middleware == OSG ]; then \n'
1221 <                txt += '        echo "prepare dummy output file"\n'
1222 <                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1223 <                txt += '    fi \n'
1224 <            txt += 'fi\n'
1225 <        
1039 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1040 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1041 >        txt += 'echo ">>> current directory content:"\n'
1042 >        if self.debug_wrapper:
1043 >            txt += 'ls -Al\n'
1044 >        txt += '\n'
1045 >
1046          for fileWithSuffix in (self.output_file):
1047 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1047 >            output_file_num = numberFile(fileWithSuffix, '$NJob')
1048              txt += '\n'
1049              txt += '# check output file\n'
1050              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1051 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1052 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1051 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1052 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1053 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1054 >            else:
1055 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1056 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1057              txt += 'else\n'
1058 <            txt += '    exit_status=60302\n'
1059 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1060 <            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1237 <            txt += '    output_exit_status=$exit_status\n'
1238 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1058 >            txt += '    job_exit_code=60302\n'
1059 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1060 >            if common.scheduler.name().upper() == 'CONDOR_G':
1061                  txt += '    if [ $middleware == OSG ]; then \n'
1062                  txt += '        echo "prepare dummy output file"\n'
1063                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
# Line 1243 | Line 1065 | class Cmssw(JobType):
1065              txt += 'fi\n'
1066          file_list = []
1067          for fileWithSuffix in (self.output_file):
1068 <             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1069 <            
1068 >             file_list.append(numberFile(fileWithSuffix, '$NJob'))
1069 >
1070          txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1071 +        txt += '\n'
1072 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1073 +        txt += 'echo ">>> current directory content:"\n'
1074 +        if self.debug_wrapper:
1075 +            txt += 'ls -Al\n'
1076 +        txt += '\n'
1077          txt += 'cd $RUNTIME_AREA\n'
1078 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1079          return txt
1080  
1252    def numberFile_(self, file, txt):
1253        """
1254        append _'txt' before last extension of a file
1255        """
1256        p = string.split(file,".")
1257        # take away last extension
1258        name = p[0]
1259        for x in p[1:-1]:
1260            name=name+"."+x
1261        # add "_txt"
1262        if len(p)>1:
1263            ext = p[len(p)-1]
1264            result = name + '_' + txt + "." + ext
1265        else:
1266            result = name + '_' + txt
1267        
1268        return result
1269
1081      def getRequirements(self, nj=[]):
1082          """
1083 <        return job requirements to add to jdl files
1083 >        return job requirements to add to jdl files
1084          """
1085          req = ''
1086          if self.version:
1087              req='Member("VO-cms-' + \
1088                   self.version + \
1089                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1090 <        ## SL add requirement for OS version only if SL4
1280 <        #reSL4 = re.compile( r'slc4' )
1281 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1090 >        if self.executable_arch:
1091              req+=' && Member("VO-cms-' + \
1092                   self.executable_arch + \
1093                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1094  
1095          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1096 +        if common.scheduler.name() == "glitecoll":
1097 +            req += ' && other.GlueCEStateStatus == "Production" '
1098  
1099          return req
1100  
1101      def configFilename(self):
1102          """ return the config filename """
1103 <        return self.name()+'.cfg'
1103 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1104 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1105 >          return self.name()+'.py'
1106 >        else:
1107 >          return self.name()+'.cfg'
1108  
1294    ### OLI_DANIELE
1109      def wsSetupCMSOSGEnvironment_(self):
1110          """
1111          Returns part of a job script which is prepares
1112          the execution environment and which is common for all CMS jobs.
1113          """
1114 <        txt = '\n'
1115 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1116 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1117 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1118 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1119 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1306 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1114 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1115 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1116 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1117 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1118 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1119 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1120          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1121 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1122 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1123 <        txt += '   else\n'
1124 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1125 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1126 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1314 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1315 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1316 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1317 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1318 <        txt += '       exit 1\n'
1319 <        txt += '\n'
1320 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1321 <        txt += '       cd $RUNTIME_AREA\n'
1322 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1323 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1324 <        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1325 <        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1326 <        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1327 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1328 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1329 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1330 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1331 <        txt += '       fi\n'
1332 <        txt += '\n'
1333 <        txt += '       exit 1\n'
1334 <        txt += '   fi\n'
1121 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1122 >        txt += '    else\n'
1123 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1124 >        txt += '        job_exit_code=10020\n'
1125 >        txt += '        func_exit\n'
1126 >        txt += '    fi\n'
1127          txt += '\n'
1128 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1129 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1128 >        txt += '    echo "==> setup cms environment ok"\n'
1129 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1130  
1131          return txt
1132 <
1341 <    ### OLI_DANIELE
1132 >
1133      def wsSetupCMSLCGEnvironment_(self):
1134          """
1135          Returns part of a job script which is prepares
1136          the execution environment and which is common for all CMS jobs.
1137          """
1138 <        txt  = '   \n'
1139 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1140 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1141 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1142 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1143 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1144 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1145 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1146 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1147 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1148 <        txt += '       exit 1\n'
1149 <        txt += '   else\n'
1150 <        txt += '       echo "Sourcing environment... "\n'
1151 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1152 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1153 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1154 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1155 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1156 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1157 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1158 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1159 <        txt += '           exit 1\n'
1160 <        txt += '       fi\n'
1161 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1162 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1163 <        txt += '       result=$?\n'
1164 <        txt += '       if [ $result -ne 0 ]; then\n'
1374 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1375 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1376 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1377 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1378 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1379 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1380 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1381 <        txt += '           exit 1\n'
1382 <        txt += '       fi\n'
1383 <        txt += '   fi\n'
1384 <        txt += '   \n'
1385 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1386 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1138 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1139 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1140 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1141 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1142 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1143 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1144 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1145 >        txt += '        job_exit_code=10031\n'
1146 >        txt += '        func_exit\n'
1147 >        txt += '    else\n'
1148 >        txt += '        echo "Sourcing environment... "\n'
1149 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1150 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1151 >        txt += '            job_exit_code=10020\n'
1152 >        txt += '            func_exit\n'
1153 >        txt += '        fi\n'
1154 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1155 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1156 >        txt += '        result=$?\n'
1157 >        txt += '        if [ $result -ne 0 ]; then\n'
1158 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1159 >        txt += '            job_exit_code=10032\n'
1160 >        txt += '            func_exit\n'
1161 >        txt += '        fi\n'
1162 >        txt += '    fi\n'
1163 >        txt += '    \n'
1164 >        txt += '    echo "==> setup cms environment ok"\n'
1165          return txt
1166  
1389    ### FEDE FOR DBS OUTPUT PUBLICATION
1167      def modifyReport(self, nj):
1168          """
1169 <        insert the part of the script that modifies the FrameworkJob Report
1169 >        insert the part of the script that modifies the FrameworkJob Report
1170          """
1171 +        txt = '\n#Written by cms_cmssw::modifyReport\n'
1172 +        publish_data = int(self.cfg_params.get('USER.publish_data',0))
1173 +        if (publish_data == 1):
1174 +            processedDataset = self.cfg_params['USER.publish_data_name']
1175 +            LFNBaseName = LFNBase(processedDataset)
1176  
1177 <        txt = ''
1178 <        try:
1179 <            publish_data = int(self.cfg_params['USER.publish_data'])          
1180 <        except KeyError:
1181 <            publish_data = 0
1182 <        if (publish_data == 1):  
1183 <            txt += 'echo "Modify Job Report" \n'
1402 <            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1403 <            ################ FEDE FOR DBS2 #############################################
1404 <            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1405 <            #############################################################################
1406 <            #try:
1407 <            #    publish_data = int(self.cfg_params['USER.publish_data'])          
1408 <            #except KeyError:
1409 <            #    publish_data = 0
1410 <
1411 <            txt += 'if [ -z "$SE" ]; then\n'
1412 <            txt += '    SE="" \n'
1413 <            txt += 'fi \n'
1414 <            txt += 'if [ -z "$SE_PATH" ]; then\n'
1415 <            txt += '    SE_PATH="" \n'
1416 <            txt += 'fi \n'
1417 <            txt += 'echo "SE = $SE"\n'
1418 <            txt += 'echo "SE_PATH = $SE_PATH"\n'
1177 >            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1178 >            txt += '    FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1179 >            txt += 'else\n'
1180 >            txt += '    FOR_LFN=/copy_problems/ \n'
1181 >            txt += '    SE=""\n'
1182 >            txt += '    SE_PATH=""\n'
1183 >            txt += 'fi\n'
1184  
1185 <        #if (publish_data == 1):  
1186 <            #processedDataset = self.cfg_params['USER.processed_datasetname']
1422 <            processedDataset = self.cfg_params['USER.publish_data_name']
1185 >            txt += 'echo ">>> Modify Job Report:" \n'
1186 >            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1187              txt += 'ProcessedDataset='+processedDataset+'\n'
1424            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1425            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1426            #### FEDE: added slash in LFN ##############
1427            txt += '    FOR_LFN=/copy_problems/ \n'
1428            txt += 'else \n'
1429            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1430            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1431            txt += '    FOR_LFN=/store$tmp \n'
1432            txt += 'fi \n'
1188              txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1189 +            txt += 'echo "SE = $SE"\n'
1190 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1191              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1192              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1193 <            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1194 <            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1438 <            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1439 <            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1440 <      
1193 >            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1194 >            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1195              txt += 'modifyReport_result=$?\n'
1442            txt += 'echo modifyReport_result = $modifyReport_result\n'
1196              txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1197 <            txt += '    exit_status=1\n'
1198 <            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1197 >            txt += '    modifyReport_result=70500\n'
1198 >            txt += '    job_exit_code=$modifyReport_result\n'
1199 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1200 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1201              txt += 'else\n'
1202 <            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1202 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1203              txt += 'fi\n'
1449        else:
1450            txt += 'echo "no data publication required"\n'
1451            #txt += 'ProcessedDataset=no_data_to_publish \n'
1452            #### FEDE: added slash in LFN ##############
1453            #txt += 'FOR_LFN=/local/ \n'
1454            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1455            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1204          return txt
1205  
1206 <    def cleanEnv(self):
1207 <        ### OLI_DANIELE
1208 <        txt = ''
1209 <        txt += 'if [ $middleware == OSG ]; then\n'  
1210 <        txt += '    cd $RUNTIME_AREA\n'
1211 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1212 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1213 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1214 <        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1215 <        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1216 <        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1217 <        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1218 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1219 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1220 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1206 >    def wsParseFJR(self):
1207 >        """
1208 >        Parse the FrameworkJobReport to obtain useful infos
1209 >        """
1210 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1211 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1212 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1213 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1214 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1215 >        if self.debug_wrapper :
1216 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1217 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1218 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1219 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1220 >        txt += '            cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1221 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1222 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1223 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1224 >        txt += '        else\n'
1225 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1226 >        txt += '        fi\n'
1227 >        txt += '    else\n'
1228 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1229          txt += '    fi\n'
1230 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1231 +
1232 +        if self.datasetPath:
1233 +          # VERIFY PROCESSED DATA
1234 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1235 +            txt += '      echo ">>> Verify list of processed files:"\n'
1236 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1237 +            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1238 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1239 +            txt += '      mv tmp.txt input-files.txt\n'
1240 +            txt += '      echo "cat input-files.txt"\n'
1241 +            txt += '      echo "----------------------"\n'
1242 +            txt += '      cat input-files.txt\n'
1243 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1244 +            txt += '      mv tmp.txt processed-files.txt\n'
1245 +            txt += '      echo "----------------------"\n'
1246 +            txt += '      echo "cat processed-files.txt"\n'
1247 +            txt += '      echo "----------------------"\n'
1248 +            txt += '      cat processed-files.txt\n'
1249 +            txt += '      echo "----------------------"\n'
1250 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1251 +            txt += '      fileverify_status=$?\n'
1252 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1253 +            txt += '         executable_exit_status=30001\n'
1254 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1255 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1256 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1257 +            txt += '      fi\n'
1258 +            txt += '    fi\n'
1259 +            txt += '\n'
1260 +        txt += 'else\n'
1261 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1262          txt += 'fi\n'
1263          txt += '\n'
1264 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1265 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1266 +        txt += 'job_exit_code=$executable_exit_status\n'
1267 +
1268          return txt
1269  
1270      def setParam_(self, param, value):
# Line 1481 | Line 1273 | class Cmssw(JobType):
1273      def getParams(self):
1274          return self._params
1275  
1484    def setTaskid_(self):
1485        self._taskId = self.cfg_params['taskId']
1486        
1487    def getTaskid(self):
1488        return self._taskId
1489
1276      def uniquelist(self, old):
1277          """
1278          remove duplicates from a list
# Line 1496 | Line 1282 | class Cmssw(JobType):
1282              nd[e]=0
1283          return nd.keys()
1284  
1285 <
1500 <    def checkOut(self, limit):
1285 >    def outList(self):
1286          """
1287          check the dimension of the output files
1288          """
1289 <        txt = 'echo "*****************************************"\n'
1290 <        txt += 'echo "** Starting output sandbox limit check **"\n'
1506 <        txt += 'echo "*****************************************"\n'
1507 <        allOutFiles = ""
1289 >        txt = ''
1290 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1291          listOutFiles = []
1292 <        for fileOut in (self.output_file+self.output_file_sandbox):
1293 <             if fileOut.find('crab_fjr') == -1:
1294 <                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1295 <                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1296 <        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1297 <        txt += 'ls -gGhrta;\n'
1298 <        txt += 'sum=0;\n'
1299 <        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1300 <        txt += '    if [ -e $file ]; then\n'
1301 <        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1302 <        txt += '        sum=`expr $sum + $tt`\n'
1303 <        txt += '    else\n'
1304 <        txt += '        echo "WARNING: output file $file not found!"\n'
1305 <        txt += '    fi\n'
1306 <        txt += 'done\n'
1524 <        txt += 'echo "Total Output dimension: $sum";\n'
1525 <        txt += 'limit='+str(limit)+';\n'
1526 <        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1527 <        txt += 'if [ $limit -lt $sum ]; then\n'
1528 <        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1529 <        txt += '    echo "         checking the output file sizes..."\n'
1530 <        """
1531 <        txt += '    dim=0;\n'
1532 <        txt += '    exclude=0;\n'
1533 <        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1534 <        txt += '        sumTemp=0;\n'
1535 <        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1536 <        txt += '            if [ $file != $file2 ]; then\n'
1537 <        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1538 <        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1539 <        txt += '            fi\n'
1540 <        txt += '        done\n'
1541 <        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1542 <        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1543 <        txt += '                dim=$sumTemp;\n'
1544 <        txt += '                exclude=$file;\n'
1545 <        txt += '            fi\n'
1546 <        txt += '        fi\n'
1547 <        txt += '    done\n'
1548 <        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1549 <        """
1550 <        txt += '    tot=0;\n'
1551 <        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1552 <        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1553 <        txt += '        tot=`expr $tot + $tt`;\n'
1554 <        txt += '        if [ $limit -lt $tot ]; then\n'
1555 <        txt += '            tot=`expr $tot - $tt`;\n'
1556 <        txt += '            fileLast=$file;\n'
1557 <        txt += '            break;\n'
1558 <        txt += '        fi\n'
1559 <        txt += '    done\n'
1560 <        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1561 <        txt += '    flag=0;\n'    
1562 <        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1563 <        txt += '        if [ $fileLast = $filess ]; then\n'
1564 <        txt += '            flag=1;\n'
1565 <        txt += '        fi\n'
1566 <        txt += '        if [ $flag -eq 1 ]; then\n'
1567 <        txt += '            rm -f $filess;\n'
1568 <        txt += '        fi\n'
1569 <        txt += '    done\n'
1570 <        txt += '    ls -agGhrt;\n'
1571 <        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1572 <        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1573 <        txt += '    exit_status=70000;\n'
1574 <        txt += 'else'
1575 <        txt += '    echo "Total Output dimension $sum is fine.";\n'
1576 <        txt += 'fi\n'
1577 <        txt += 'echo "*****************************************"\n'
1578 <        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1579 <        txt += 'echo "*****************************************"\n'
1292 >        stdout = 'CMSSW_$NJob.stdout'
1293 >        stderr = 'CMSSW_$NJob.stderr'
1294 >        if (self.return_data == 1):
1295 >            for file in (self.output_file+self.output_file_sandbox):
1296 >                listOutFiles.append(numberFile(file, '$NJob'))
1297 >            listOutFiles.append(stdout)
1298 >            listOutFiles.append(stderr)
1299 >        else:
1300 >            for file in (self.output_file_sandbox):
1301 >                listOutFiles.append(numberFile(file, '$NJob'))
1302 >            listOutFiles.append(stdout)
1303 >            listOutFiles.append(stderr)
1304 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1305 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1306 >        txt += 'export filesToCheck\n'
1307          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines