ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.128 by fanzago, Thu Oct 11 16:23:44 2007 UTC vs.
Revision 1.226 by ewv, Thu Jul 3 19:30:07 2008 UTC

# Line 5 | Line 5 | from crab_util import *
5   from BlackWhiteListParser import BlackWhiteListParser
6   import common
7   import Scram
8 + from LFNBaseName import *
9  
10   import os, string, glob
11  
12   class Cmssw(JobType):
13 <    def __init__(self, cfg_params, ncjobs):
13 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14          JobType.__init__(self, 'CMSSW')
15          common.logger.debug(3,'CMSSW::__init__')
16 +        self.skip_blocks = skip_blocks
17 +
18 +        self.argsList = []
19  
20          self._params = {}
21          self.cfg_params = cfg_params
18
22          # init BlackWhiteListParser
23          self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
24  
25 <        try:
23 <            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 <        except KeyError:
25 <            self.MaxTarBallSize = 9.5
25 >        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
26  
27          # number of jobs requested to be created, limit obj splitting
28          self.ncjobs = ncjobs
29  
30          log = common.logger
31 <        
31 >
32          self.scram = Scram.Scram(cfg_params)
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36          self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38        self.additional_tgz_name = 'additional.tgz'
38          self.scriptName = 'CMSSW.sh'
39 <        self.pset = ''      #scrip use case Da  
40 <        self.datasetPath = '' #scrip use case Da
39 >        self.pset = ''
40 >        self.datasetPath = ''
41  
42          # set FJR file name
43          self.fjrFileName = 'crab_fjr.xml'
44  
45          self.version = self.scram.getSWVersion()
46 <        
47 <        #
48 <        # Try to block creation in case of arch/version mismatch
49 <        #
50 <
51 <        a = string.split(self.version, "_")
52 <
53 <        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
54 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
55 <            raise CrabException(msg)
57 <        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 <            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
46 >        version_array = self.version.split('_')
47 >        self.CMSSW_major = 0
48 >        self.CMSSW_minor = 0
49 >        self.CMSSW_patch = 0
50 >        try:
51 >            self.CMSSW_major = int(version_array[1])
52 >            self.CMSSW_minor = int(version_array[2])
53 >            self.CMSSW_patch = int(version_array[3])
54 >        except:
55 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
56              raise CrabException(msg)
60        
61        common.taskDB.setDict('codeVersion',self.version)
62        self.setParam_('application', self.version)
57  
58          ### collect Data cards
59  
60 <        ## get DBS mode
61 <        try:
68 <            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 <        except KeyError:
70 <            self.use_dbs_1 = 0
71 <            
72 <        try:
73 <            tmp =  cfg_params['CMSSW.datasetpath']
74 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75 <            if string.lower(tmp)=='none':
76 <                self.datasetPath = None
77 <                self.selectNoInput = 1
78 <            else:
79 <                self.datasetPath = tmp
80 <                self.selectNoInput = 0
81 <        except KeyError:
82 <            msg = "Error: datasetpath not defined "  
60 >        if not cfg_params.has_key('CMSSW.datasetpath'):
61 >            msg = "Error: datasetpath not defined "
62              raise CrabException(msg)
63  
64 <        # ML monitoring
65 <        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
66 <        if not self.datasetPath:
67 <            self.setParam_('dataset', 'None')
68 <            self.setParam_('owner', 'None')
64 >        ### Temporary: added to remove input file control in the case of PU
65 >        self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
66 >
67 >        tmp =  cfg_params['CMSSW.datasetpath']
68 >        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69 >        if string.lower(tmp)=='none':
70 >            self.datasetPath = None
71 >            self.selectNoInput = 1
72          else:
73 <            try:
74 <                datasetpath_split = self.datasetPath.split("/")
93 <                # standard style
94 <                self.setParam_('datasetFull', self.datasetPath)
95 <                if self.use_dbs_1 == 1 :
96 <                    self.setParam_('dataset', datasetpath_split[1])
97 <                    self.setParam_('owner', datasetpath_split[-1])
98 <                else:
99 <                    self.setParam_('dataset', datasetpath_split[1])
100 <                    self.setParam_('owner', datasetpath_split[2])
101 <            except:
102 <                self.setParam_('dataset', self.datasetPath)
103 <                self.setParam_('owner', self.datasetPath)
104 <                
105 <        self.setTaskid_()
106 <        self.setParam_('taskId', self.cfg_params['taskId'])
73 >            self.datasetPath = tmp
74 >            self.selectNoInput = 0
75  
76          self.dataTiers = []
77 <
77 >        self.debugWrap = ''
78 >        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
79 >        if self.debug_wrapper: self.debugWrap='--debug'
80          ## now the application
81 <        try:
82 <            self.executable = cfg_params['CMSSW.executable']
113 <            self.setParam_('exe', self.executable)
114 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
115 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
116 <            log.debug(3,msg)
117 <        except KeyError:
118 <            self.executable = 'cmsRun'
119 <            self.setParam_('exe', self.executable)
120 <            msg = "User executable not defined. Use cmsRun"
121 <            log.debug(3,msg)
122 <            pass
81 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
82 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
83  
84 <        try:
125 <            self.pset = cfg_params['CMSSW.pset']
126 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 <            if self.pset.lower() != 'none' :
128 <                if (not os.path.exists(self.pset)):
129 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
130 <            else:
131 <                self.pset = None
132 <        except KeyError:
84 >        if not cfg_params.has_key('CMSSW.pset'):
85              raise CrabException("PSet file missing. Cannot run cmsRun ")
86 +        self.pset = cfg_params['CMSSW.pset']
87 +        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
88 +        if self.pset.lower() != 'none' :
89 +            if (not os.path.exists(self.pset)):
90 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
91 +        else:
92 +            self.pset = None
93  
94          # output files
95          ## stuff which must be returned always via sandbox
# Line 140 | Line 99 | class Cmssw(JobType):
99          self.output_file_sandbox.append(self.fjrFileName)
100  
101          # other output files to be returned via sandbox or copied to SE
102 <        try:
103 <            self.output_file = []
104 <            tmp = cfg_params['CMSSW.output_file']
105 <            if tmp != '':
106 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
107 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
108 <                for tmp in tmpOutFiles:
109 <                    tmp=string.strip(tmp)
151 <                    self.output_file.append(tmp)
152 <                    pass
153 <            else:
154 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155 <                pass
156 <            pass
157 <        except KeyError:
158 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159 <            pass
102 >        outfileflag = False
103 >        self.output_file = []
104 >        tmp = cfg_params.get('CMSSW.output_file',None)
105 >        if tmp :
106 >            self.output_file = [x.strip() for x in tmp.split(',')]
107 >            outfileflag = True #output found
108 >        #else:
109 >        #    log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
110  
111          # script_exe file as additional file in inputSandbox
112 <        try:
113 <            self.scriptExe = cfg_params['USER.script_exe']
114 <            if self.scriptExe != '':
115 <               if not os.path.isfile(self.scriptExe):
116 <                  msg ="ERROR. file "+self.scriptExe+" not found"
117 <                  raise CrabException(msg)
168 <               self.additional_inbox_files.append(string.strip(self.scriptExe))
169 <        except KeyError:
170 <            self.scriptExe = ''
112 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
113 >        if self.scriptExe :
114 >            if not os.path.isfile(self.scriptExe):
115 >                msg ="ERROR. file "+self.scriptExe+" not found"
116 >                raise CrabException(msg)
117 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
118  
172        #CarlosDaniele
119          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
120 <           msg ="Error. script_exe  not defined"
121 <           raise CrabException(msg)
120 >            msg ="Error. script_exe  not defined"
121 >            raise CrabException(msg)
122 >
123 >        # use parent files...
124 >        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
125  
126          ## additional input files
127 <        try:
127 >        if cfg_params.has_key('USER.additional_input_files'):
128              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
129              for tmp in tmpAddFiles:
130                  tmp = string.strip(tmp)
# Line 192 | Line 141 | class Cmssw(JobType):
141                      if not os.path.exists(file):
142                          raise CrabException("Additional input file not found: "+file)
143                      pass
195                    # fname = string.split(file, '/')[-1]
196                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
197                    # shutil.copyfile(file, storedFile)
144                      self.additional_inbox_files.append(string.strip(file))
145                  pass
146              pass
147              common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
148 <        except KeyError:
203 <            pass
204 <
205 <        # files per job
206 <        try:
207 <            if (cfg_params['CMSSW.files_per_jobs']):
208 <                raise CrabException("files_per_jobs no longer supported.  Quitting.")
209 <        except KeyError:
210 <            pass
148 >        pass
149  
150          ## Events per job
151 <        try:
151 >        if cfg_params.has_key('CMSSW.events_per_job'):
152              self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
153              self.selectEventsPerJob = 1
154 <        except KeyError:
154 >        else:
155              self.eventsPerJob = -1
156              self.selectEventsPerJob = 0
157 <    
157 >
158          ## number of jobs
159 <        try:
159 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
160              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
161              self.selectNumberOfJobs = 1
162 <        except KeyError:
162 >        else:
163              self.theNumberOfJobs = 0
164              self.selectNumberOfJobs = 0
165  
166 <        try:
166 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
167              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
168              self.selectTotalNumberEvents = 1
169 <        except KeyError:
169 >            if self.selectNumberOfJobs  == 1:
170 >                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
171 >                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
172 >                    raise CrabException(msg)
173 >        else:
174              self.total_number_of_events = 0
175              self.selectTotalNumberEvents = 0
176  
177 <        if self.pset != None: #CarlosDaniele
177 >        if self.pset != None:
178               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
179                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
180                   raise CrabException(msg)
# Line 241 | Line 183 | class Cmssw(JobType):
183                   msg = 'Must specify  number_of_jobs.'
184                   raise CrabException(msg)
185  
186 <        ## source seed for pythia
187 <        try:
188 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
189 <        except KeyError:
190 <            self.sourceSeed = None
191 <            common.logger.debug(5,"No seed given")
192 <
193 <        try:
194 <            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
195 <        except KeyError:
196 <            self.sourceSeedVtx = None
197 <            common.logger.debug(5,"No vertex seed given")
198 <
199 <        try:
200 <            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
201 <        except KeyError:
202 <            self.sourceSeedG4 = None
203 <            common.logger.debug(5,"No g4 sim hits seed given")
204 <
205 <        try:
206 <            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
207 <        except KeyError:
208 <            self.sourceSeedMix = None
209 <            common.logger.debug(5,"No mix seed given")
210 <
211 <        try:
212 <            self.firstRun = int(cfg_params['CMSSW.first_run'])
213 <        except KeyError:
214 <            self.firstRun = None
215 <            common.logger.debug(5,"No first run given")
216 <        if self.pset != None: #CarlosDaniele
217 <            ver = string.split(self.version,"_")
218 <            if (int(ver[1])>=1 and int(ver[2])>=5):
219 <                import PsetManipulator150 as pp
220 <            else:
221 <                import PsetManipulator as pp
222 <            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
186 >        ## New method of dealing with seeds
187 >        self.incrementSeeds = []
188 >        self.preserveSeeds = []
189 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
190 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
191 >            for tmp in tmpList:
192 >                tmp.strip()
193 >                self.preserveSeeds.append(tmp)
194 >        if cfg_params.has_key('CMSSW.increment_seeds'):
195 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
196 >            for tmp in tmpList:
197 >                tmp.strip()
198 >                self.incrementSeeds.append(tmp)
199 >
200 >        ## Old method of dealing with seeds
201 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
202 >        ## remove
203 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
204 >        if self.sourceSeed:
205 >            print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
206 >            self.incrementSeeds.append('sourceSeed')
207 >            self.incrementSeeds.append('theSource')
208 >
209 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
210 >        if self.sourceSeedVtx:
211 >            print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
212 >            self.incrementSeeds.append('VtxSmeared')
213 >
214 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
215 >        if self.sourceSeedG4:
216 >            print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
217 >            self.incrementSeeds.append('g4SimHits')
218 >
219 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
220 >        if self.sourceSeedMix:
221 >            print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
222 >            self.incrementSeeds.append('mix')
223 >
224 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
225 >
226 >
227 >        # Copy/return
228 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
229 >        self.return_data = int(cfg_params.get('USER.return_data',0))
230  
231          #DBSDLS-start
232 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
232 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
233          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
234          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
235          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 289 | Line 238 | class Cmssw(JobType):
238          blockSites = {}
239          if self.datasetPath:
240              blockSites = self.DataDiscoveryAndLocation(cfg_params)
241 <        #DBSDLS-end          
241 >        #DBSDLS-end
242  
294        self.tgzNameWithPath = self.getTarBall(self.executable)
295    
243          ## Select Splitting
244 <        if self.selectNoInput:
245 <            if self.pset == None: #CarlosDaniele
244 >        if self.selectNoInput:
245 >            if self.pset == None:
246                  self.jobSplittingForScript()
247              else:
248                  self.jobSplittingNoInput()
249          else:
250              self.jobSplittingByBlocks(blockSites)
251  
252 <        # modify Pset
253 <        if self.pset != None: #CarlosDaniele
254 <            try:
255 <                if (self.datasetPath): # standard job
256 <                    # allow to processa a fraction of events in a file
257 <                    PsetEdit.inputModule("INPUT")
258 <                    PsetEdit.maxEvent("INPUTMAXEVENTS")
259 <                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
260 <                else:  # pythia like job
252 >        # modify Pset only the first time
253 >        if isNew:
254 >            if self.pset != None:
255 >                import PsetManipulator as pp
256 >                PsetEdit = pp.PsetManipulator(self.pset)
257 >                try:
258 >                    # Add FrameworkJobReport to parameter-set, set max events.
259 >                    # Reset later for data jobs by writeCFG which does all modifications
260 >                    PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
261                      PsetEdit.maxEvent(self.eventsPerJob)
262 <                    if (self.firstRun):
263 <                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
264 <                    if (self.sourceSeed) :
265 <                        PsetEdit.pythiaSeed("INPUT")
266 <                        if (self.sourceSeedVtx) :
267 <                            PsetEdit.vtxSeed("INPUTVTX")
268 <                        if (self.sourceSeedG4) :
269 <                            PsetEdit.g4Seed("INPUTG4")
270 <                        if (self.sourceSeedMix) :
271 <                            PsetEdit.mixSeed("INPUTMIX")
272 <                # add FrameworkJobReport to parameter-set
273 <                PsetEdit.addCrabFJR(self.fjrFileName)
274 <                PsetEdit.psetWriter(self.configFilename())
275 <            except:
276 <                msg='Error while manipuliating ParameterSet: exiting...'
277 <                raise CrabException(msg)
262 >                    PsetEdit.psetWriter(self.configFilename())
263 >                    ## If present, add TFileService to output files
264 >                    if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
265 >                        tfsOutput = PsetEdit.getTFileService()
266 >                        if tfsOutput:
267 >                            if tfsOutput in self.output_file:
268 >                                common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
269 >                            else:
270 >                                outfileflag = True #output found
271 >                                self.output_file.append(tfsOutput)
272 >                                common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
273 >                            pass
274 >                        pass
275 >                    ## If present and requested, add PoolOutputModule to output files
276 >                    if int(cfg_params.get('CMSSW.get_edm_output',0)):
277 >                        edmOutput = PsetEdit.getPoolOutputModule()
278 >                        if edmOutput:
279 >                            if edmOutput in self.output_file:
280 >                                common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
281 >                            else:
282 >                                self.output_file.append(edmOutput)
283 >                                common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
284 >                            pass
285 >                        pass
286 >                except CrabException:
287 >                    msg='Error while manipulating ParameterSet: exiting...'
288 >                    raise CrabException(msg)
289 >            ## Prepare inputSandbox TarBall (only the first time)
290 >            self.tgzNameWithPath = self.getTarBall(self.executable)
291  
292      def DataDiscoveryAndLocation(self, cfg_params):
293  
294          import DataDiscovery
335        import DataDiscovery_DBS2
295          import DataLocation
296          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
297  
# Line 341 | Line 300 | class Cmssw(JobType):
300          ## Contact the DBS
301          common.logger.message("Contacting Data Discovery Services ...")
302          try:
303 <
345 <            if self.use_dbs_1 == 1 :
346 <                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347 <            else :
348 <                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
303 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
304              self.pubdata.fetchDBSInfo()
305  
306          except DataDiscovery.NotExistingDatasetError, ex :
# Line 357 | Line 312 | class Cmssw(JobType):
312          except DataDiscovery.DataDiscoveryError, ex:
313              msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
314              raise CrabException(msg)
360        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362            raise CrabException(msg)
363        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365            raise CrabException(msg)
366        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
368            raise CrabException(msg)
315  
316          self.filesbyblock=self.pubdata.getFiles()
317          self.eventsbyblock=self.pubdata.getEventsPerBlock()
318          self.eventsbyfile=self.pubdata.getEventsPerFile()
319 +        self.parentFiles=self.pubdata.getParent()
320  
321          ## get max number of events
322 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
322 >        self.maxEvents=self.pubdata.getMaxEvents()
323  
324          ## Contact the DLS and build a list of sites hosting the fileblocks
325          try:
# Line 381 | Line 328 | class Cmssw(JobType):
328          except DataLocation.DataLocationError , ex:
329              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
330              raise CrabException(msg)
331 <        
331 >
332  
333          sites = dataloc.getSites()
334          allSites = []
# Line 395 | Line 342 | class Cmssw(JobType):
342          common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
343  
344          return sites
345 <    
345 >
346      def jobSplittingByBlocks(self, blockSites):
347          """
348          Perform job splitting. Jobs run over an integer number of files
# Line 445 | Line 392 | class Cmssw(JobType):
392              totalNumberOfJobs = 999999999
393          else :
394              totalNumberOfJobs = self.ncjobs
448            
395  
396          blocks = blockSites.keys()
397          blockCount = 0
# Line 465 | Line 411 | class Cmssw(JobType):
411              blockCount += 1
412              if block not in jobsOfBlock.keys() :
413                  jobsOfBlock[block] = []
414 <            
414 >
415              if self.eventsbyblock.has_key(block) :
416                  numEventsInBlock = self.eventsbyblock[block]
417                  common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
418 <            
418 >
419                  files = self.filesbyblock[block]
420                  numFilesInBlock = len(files)
421                  if (numFilesInBlock <= 0):
# Line 477 | Line 423 | class Cmssw(JobType):
423                  fileCount = 0
424  
425                  # ---- New block => New job ---- #
426 <                parString = "\\{"
426 >                parString = ""
427                  # counter for number of events in files currently worked on
428                  filesEventCount = 0
429                  # flag if next while loop should touch new file
430                  newFile = 1
431                  # job event counter
432                  jobSkipEventCount = 0
433 <            
433 >
434                  # ---- Iterate over the files in the block until we've met the requested ---- #
435                  # ---- total # of events or we've gone over all the files in this block  ---- #
436 +                pString=''
437                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
438                      file = files[fileCount]
439 +                    if self.useParent:
440 +                        parent = self.parentFiles[file]
441 +                        for f in parent :
442 +                            pString += '\\\"' + f + '\\\"\,'
443 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
444 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
445                      if newFile :
446                          try:
447                              numEventsInFile = self.eventsbyfile[file]
# Line 500 | Line 453 | class Cmssw(JobType):
453                              newFile = 0
454                          except KeyError:
455                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503                        
456  
457 +                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
458                      # if less events in file remain than eventsPerJobRequested
459 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
459 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
460                          # if last file in block
461                          if ( fileCount == numFilesInBlock-1 ) :
462                              # end job using last file, use remaining events in block
463                              # close job and touch new file
464                              fullString = parString[:-2]
465 <                            fullString += '\\}'
466 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
465 >                            if self.useParent:
466 >                                fullParentString = pString[:-2]
467 >                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
468 >                            else:
469 >                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
470                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
471                              self.jobDestination.append(blockSites[block])
472                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 522 | Line 478 | class Cmssw(JobType):
478                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
479                              jobSkipEventCount = 0
480                              # reset file
481 <                            parString = "\\{"
481 >                            pString = ""
482 >                            parString = ""
483                              filesEventCount = 0
484                              newFile = 1
485                              fileCount += 1
# Line 534 | Line 491 | class Cmssw(JobType):
491                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
492                          # close job and touch new file
493                          fullString = parString[:-2]
494 <                        fullString += '\\}'
495 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
494 >                        if self.useParent:
495 >                            fullParentString = pString[:-2]
496 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
497 >                        else:
498 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
499                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
500                          self.jobDestination.append(blockSites[block])
501                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 546 | Line 506 | class Cmssw(JobType):
506                          eventsRemaining = eventsRemaining - eventsPerJobRequested
507                          jobSkipEventCount = 0
508                          # reset file
509 <                        parString = "\\{"
509 >                        pString = ""
510 >                        parString = ""
511                          filesEventCount = 0
512                          newFile = 1
513                          fileCount += 1
514 <                        
514 >
515                      # if more events in file remain than eventsPerJobRequested
516                      else :
517                          # close job but don't touch new file
518                          fullString = parString[:-2]
519 <                        fullString += '\\}'
520 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
519 >                        if self.useParent:
520 >                            fullParentString = pString[:-2]
521 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
522 >                        else:
523 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
524                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
525                          self.jobDestination.append(blockSites[block])
526                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
# Line 570 | Line 534 | class Cmssw(JobType):
534                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
535                          # remove all but the last file
536                          filesEventCount = self.eventsbyfile[file]
537 <                        parString = "\\{"
538 <                        parString += '\\\"' + file + '\\\"\,'
537 >                        if self.useParent:
538 >                            for f in parent : pString += '\\\"' + f + '\\\"\,'
539 >                        parString = '\\\"' + file + '\\\"\,'
540                      pass # END if
541                  pass # END while (iterate over files in the block)
542          pass # END while (iterate over blocks in the dataset)
# Line 579 | Line 544 | class Cmssw(JobType):
544          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
545              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
546          common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
547 <        
547 >
548          # screen output
549          screenOutput = "List of jobs and available destination sites:\n\n"
550  
# Line 591 | Line 556 | class Cmssw(JobType):
556          for block in blocks:
557              if block in jobsOfBlock.keys() :
558                  blockCounter += 1
559 <                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
559 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
560 >                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
561                  if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
562 <                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
562 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
563                      bloskNoSite.append( blockCounter )
564 <        
564 >
565          common.logger.message(screenOutput)
566          if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
567              msg = 'WARNING: No sites are hosting any part of data for block:\n                '
# Line 611 | Line 577 | class Cmssw(JobType):
577              for range_jobs in noSiteBlock:
578                  msg += str(range_jobs) + virgola
579              msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
580 +            if self.cfg_params.has_key('EDG.se_white_list'):
581 +                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
582 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
583 +                msg += 'Please check if the dataset is available at this site!)\n'
584 +            if self.cfg_params.has_key('EDG.ce_white_list'):
585 +                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
586 +                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
587 +                msg += 'Please check if the dataset is available at this site!)\n'
588 +
589              common.logger.message(msg)
590  
591          self.list_of_args = list_of_lists
# Line 621 | Line 596 | class Cmssw(JobType):
596          Perform job splitting based on number of event per job
597          """
598          common.logger.debug(5,'Splitting per events')
599 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
600 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
601 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
599 >
600 >        if (self.selectEventsPerJob):
601 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
602 >        if (self.selectNumberOfJobs):
603 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
604 >        if (self.selectTotalNumberEvents):
605 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
606  
607          if (self.total_number_of_events < 0):
608              msg='Cannot split jobs per Events with "-1" as total number of events'
# Line 632 | Line 611 | class Cmssw(JobType):
611          if (self.selectEventsPerJob):
612              if (self.selectTotalNumberEvents):
613                  self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
614 <            elif(self.selectNumberOfJobs) :  
614 >            elif(self.selectNumberOfJobs) :
615                  self.total_number_of_jobs =self.theNumberOfJobs
616 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
616 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
617  
618          elif (self.selectNumberOfJobs) :
619              self.total_number_of_jobs = self.theNumberOfJobs
620              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
621 <
621 >
622          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
623  
624          # is there any remainder?
# Line 655 | Line 634 | class Cmssw(JobType):
634          self.list_of_args = []
635          for i in range(self.total_number_of_jobs):
636              ## Since there is no input, any site is good
637 <           # self.jobDestination.append(["Any"])
659 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
637 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
638              args=[]
639              if (self.firstRun):
640 <                    ## pythia first run
663 <                #self.list_of_args.append([(str(self.firstRun)+str(i))])
640 >                ## pythia first run
641                  args.append(str(self.firstRun)+str(i))
665            else:
666                ## no first run
667                #self.list_of_args.append([str(i)])
668                args.append(str(i))
669            if (self.sourceSeed):
670                args.append(str(self.sourceSeed)+str(i))
671                if (self.sourceSeedVtx):
672                    ## + vtx random seed
673                    args.append(str(self.sourceSeedVtx)+str(i))
674                if (self.sourceSeedG4):
675                    ## + G4 random seed
676                    args.append(str(self.sourceSeedG4)+str(i))
677                if (self.sourceSeedMix):    
678                    ## + Mix random seed
679                    args.append(str(self.sourceSeedMix)+str(i))
680                pass
681            pass
642              self.list_of_args.append(args)
683        pass
684            
685        # print self.list_of_args
643  
644          return
645  
646  
647 <    def jobSplittingForScript(self):#CarlosDaniele
647 >    def jobSplittingForScript(self):
648          """
649          Perform job splitting based on number of job
650          """
# Line 703 | Line 660 | class Cmssw(JobType):
660          # argument is seed number.$i
661          self.list_of_args = []
662          for i in range(self.total_number_of_jobs):
706            ## Since there is no input, any site is good
707           # self.jobDestination.append(["Any"])
663              self.jobDestination.append([""])
709            ## no random seed
664              self.list_of_args.append([str(i)])
665          return
666  
667 <    def split(self, jobParams):
668 <
715 <        common.jobDB.load()
716 <        #### Fabio
667 >    def split(self, jobParams,firstJobID):
668 >
669          njobs = self.total_number_of_jobs
670          arglist = self.list_of_args
671          # create the empty structure
672          for i in range(njobs):
673              jobParams.append("")
722        
723        for job in range(njobs):
724            jobParams[job] = arglist[job]
725            # print str(arglist[job])
726            # print jobParams[job]
727            common.jobDB.setArguments(job, jobParams[job])
728            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
729            common.jobDB.setDestination(job, self.jobDestination[job])
674  
675 <        common.jobDB.save()
675 >        listID=[]
676 >        listField=[]
677 >        for id in range(njobs):
678 >            job = id + int(firstJobID)
679 >            jobParams[id] = arglist[id]
680 >            listID.append(job+1)
681 >            job_ToSave ={}
682 >            concString = ' '
683 >            argu=''
684 >            if len(jobParams[id]):
685 >                argu +=   concString.join(jobParams[id] )
686 >            job_ToSave['arguments']= str(job+1)+' '+argu
687 >            job_ToSave['dlsDestination']= self.jobDestination[id]
688 >            listField.append(job_ToSave)
689 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
690 >            +"                     Destination: "+str(self.jobDestination[id])
691 >            common.logger.debug(5,msg)
692 >        common._db.updateJob_(listID,listField)
693 >        self.argsList = (len(jobParams[0])+1)
694 >
695          return
696 <    
734 <    def getJobTypeArguments(self, nj, sched):
735 <        result = ''
736 <        for i in common.jobDB.arguments(nj):
737 <            result=result+str(i)+" "
738 <        return result
739 <  
696 >
697      def numberOfJobs(self):
741        # Fabio
698          return self.total_number_of_jobs
699  
700      def getTarBall(self, exe):
701          """
702          Return the TarBall with lib and exe
703          """
748        
749        # if it exist, just return it
750        #
751        # Marco. Let's start to use relative path for Boss XML files
752        #
704          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
705          if os.path.exists(self.tgzNameWithPath):
706              return self.tgzNameWithPath
# Line 763 | Line 714 | class Cmssw(JobType):
714  
715          # First of all declare the user Scram area
716          swArea = self.scram.getSWArea_()
766        #print "swArea = ", swArea
767        # swVersion = self.scram.getSWVersion()
768        # print "swVersion = ", swVersion
717          swReleaseTop = self.scram.getReleaseTop_()
718 <        #print "swReleaseTop = ", swReleaseTop
771 <        
718 >
719          ## check if working area is release top
720          if swReleaseTop == '' or swArea == swReleaseTop:
721 +            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
722              return
723  
724          import tarfile
# Line 781 | Line 729 | class Cmssw(JobType):
729                  exeWithPath = self.scram.findFile_(executable)
730                  if ( not exeWithPath ):
731                      raise CrabException('User executable '+executable+' not found')
732 <    
732 >
733                  ## then check if it's private or not
734                  if exeWithPath.find(swReleaseTop) == -1:
735                      # the exe is private, so we must ship
# Line 790 | Line 738 | class Cmssw(JobType):
738                      # distinguish case when script is in user project area or given by full path somewhere else
739                      if exeWithPath.find(path) >= 0 :
740                          exe = string.replace(exeWithPath, path,'')
741 <                        tar.add(path+exe,os.path.basename(executable))
741 >                        tar.add(path+exe,exe)
742                      else :
743                          tar.add(exeWithPath,os.path.basename(executable))
744                      pass
745                  else:
746                      # the exe is from release, we'll find it on WN
747                      pass
748 <    
748 >
749              ## Now get the libraries: only those in local working area
750              libDir = 'lib'
751              lib = swArea+'/' +libDir
752              common.logger.debug(5,"lib "+lib+" to be tarred")
753              if os.path.exists(lib):
754                  tar.add(lib,libDir)
755 <    
755 >
756              ## Now check if module dir is present
757              moduleDir = 'module'
758              module = swArea + '/' + moduleDir
# Line 812 | Line 760 | class Cmssw(JobType):
760                  tar.add(module,moduleDir)
761  
762              ## Now check if any data dir(s) is present
763 <            swAreaLen=len(swArea)
764 <            for root, dirs, files in os.walk(swArea):
765 <                if "data" in dirs:
766 <                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
767 <                    tar.add(root+"/data",root[swAreaLen:]+"/data")
768 <
769 <            ## Add ProdAgent dir to tar
770 <            paDir = 'ProdAgentApi'
771 <            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
772 <            if os.path.isdir(pa):
773 <                tar.add(pa,paDir)
774 <
775 <            ### FEDE FOR DBS PUBLICATION
776 <            ## Add PRODCOMMON dir to tar
777 <            prodcommonDir = 'ProdCommon'
778 <            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
779 <            if os.path.isdir(prodcommonPath):
780 <                tar.add(prodcommonPath,prodcommonDir)
781 <            #############################    
782 <        
763 >            self.dataExist = False
764 >            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
765 >            while len(todo_list):
766 >                entry, name = todo_list.pop()
767 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
768 >                    continue
769 >                if os.path.isdir(swArea+"/src/"+entry):
770 >                    entryPath = entry + '/'
771 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
772 >                    if name == 'data':
773 >                        self.dataExist=True
774 >                        common.logger.debug(5,"data "+entry+" to be tarred")
775 >                        tar.add(swArea+"/src/"+entry,"src/"+entry)
776 >                    pass
777 >                pass
778 >
779 >            ### CMSSW ParameterSet
780 >            if not self.pset is None:
781 >                cfg_file = common.work_space.jobDir()+self.configFilename()
782 >                tar.add(cfg_file,self.configFilename())
783 >                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
784 >
785 >
786 >            ## Add ProdCommon dir to tar
787 >            prodcommonDir = './'
788 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
789 >            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv']
790 >            for file in neededStuff:
791 >                tar.add(prodcommonPath+file,prodcommonDir+file)
792 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
793 >
794 >            ##### ML stuff
795 >            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
796 >            path=os.environ['CRABDIR'] + '/python/'
797 >            for file in ML_file_list:
798 >                tar.add(path+file,file)
799              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
800 +
801 +            ##### Utils
802 +            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
803 +            for file in Utils_file_list:
804 +                tar.add(path+file,file)
805 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
806 +
807 +            ##### AdditionalFiles
808 +            for file in self.additional_inbox_files:
809 +                tar.add(file,string.split(file,'/')[-1])
810 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
811 +
812              tar.close()
813 <        except :
814 <            raise CrabException('Could not create tar-ball')
813 >        except IOError:
814 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
815 >        except tarfile.TarError:
816 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
817  
818          ## check for tarball size
819          tarballinfo = os.stat(self.tgzNameWithPath)
# Line 843 | Line 821 | class Cmssw(JobType):
821              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
822  
823          ## create tar-ball with ML stuff
846        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
847        try:
848            tar = tarfile.open(self.MLtgzfile, "w:gz")
849            path=os.environ['CRABDIR'] + '/python/'
850            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
851                tar.add(path+file,file)
852            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
853            tar.close()
854        except :
855            raise CrabException('Could not create ML files tar-ball')
856        
857        return
858        
859    def additionalInputFileTgz(self):
860        """
861        Put all additional files into a tar ball and return its name
862        """
863        import tarfile
864        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
865        tar = tarfile.open(tarName, "w:gz")
866        for file in self.additional_inbox_files:
867            tar.add(file,string.split(file,'/')[-1])
868        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
869        tar.close()
870        return tarName
824  
825 <    def wsSetupEnvironment(self, nj):
825 >    def wsSetupEnvironment(self, nj=0):
826          """
827          Returns part of a job script which prepares
828          the execution environment for the job 'nj'.
829          """
830 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
831 +            psetName = 'pset.py'
832 +        else:
833 +            psetName = 'pset.cfg'
834          # Prepare JobType-independent part
835 <        txt = ''
836 <  
837 <        ## OLI_Daniele at this level  middleware already known
881 <
882 <        txt += 'if [ $middleware == LCG ]; then \n'
883 <        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
884 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
885 <        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
835 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
836 >        txt += 'echo ">>> setup environment"\n'
837 >        txt += 'if [ $middleware == LCG ]; then \n'
838          txt += self.wsSetupCMSLCGEnvironment_()
839          txt += 'elif [ $middleware == OSG ]; then\n'
840          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
841 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
842 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
843 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
844 <        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
893 <        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
894 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
895 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
896 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
897 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
898 <        txt += '        exit 1\n'
841 >        txt += '    if [ ! $? == 0 ] ;then\n'
842 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
843 >        txt += '        job_exit_code=10016\n'
844 >        txt += '        func_exit\n'
845          txt += '    fi\n'
846 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
847          txt += '\n'
848          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
849          txt += '    cd $WORKING_DIR\n'
850 <        txt += self.wsSetupCMSOSGEnvironment_()
851 <        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
905 <        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
850 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
851 >        txt += self.wsSetupCMSOSGEnvironment_()
852          txt += 'fi\n'
853  
854          # Prepare JobType-specific part
855          scram = self.scram.commandName()
856          txt += '\n\n'
857 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
857 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
858 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
859          txt += scram+' project CMSSW '+self.version+'\n'
860          txt += 'status=$?\n'
861          txt += 'if [ $status != 0 ] ; then\n'
862 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
863 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
864 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
918 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
919 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
920 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
921 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
922 <        ## OLI_Daniele
923 <        txt += '    if [ $middleware == OSG ]; then \n'
924 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
925 <        txt += '        cd $RUNTIME_AREA\n'
926 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
927 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
928 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
930 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
931 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
932 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
933 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
934 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
935 <        txt += '        fi\n'
936 <        txt += '    fi \n'
937 <        txt += '   exit 1 \n'
862 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
863 >        txt += '    job_exit_code=10034\n'
864 >        txt += '    func_exit\n'
865          txt += 'fi \n'
939        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
866          txt += 'cd '+self.version+'\n'
941        ########## FEDE FOR DBS2 ######################
867          txt += 'SOFTWARE_DIR=`pwd`\n'
868 <        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
944 <        ###############################################
945 <        ### needed grep for bug in scramv1 ###
946 <        txt += scram+' runtime -sh\n'
868 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
869          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
870 <        txt += 'echo $PATH\n'
871 <
870 >        txt += 'if [ $? != 0 ] ; then\n'
871 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
872 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
873 >        txt += '    job_exit_code=10034\n'
874 >        txt += '    func_exit\n'
875 >        txt += 'fi \n'
876          # Handle the arguments:
877          txt += "\n"
878          txt += "## number of arguments (first argument always jobnumber)\n"
879          txt += "\n"
880 < #        txt += "narg=$#\n"
955 <        txt += "if [ $nargs -lt 2 ]\n"
880 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
881          txt += "then\n"
882 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
883 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
884 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
960 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
961 <        txt += '    rm -f $RUNTIME_AREA/$repo \n'
962 <        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
963 <        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
964 <        ## OLI_Daniele
965 <        txt += '    if [ $middleware == OSG ]; then \n'
966 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
967 <        txt += '        cd $RUNTIME_AREA\n'
968 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
969 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
970 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
972 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
973 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
974 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
975 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
976 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
977 <        txt += '        fi\n'
978 <        txt += '    fi \n'
979 <        txt += "    exit 1\n"
882 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
883 >        txt += '    job_exit_code=50113\n'
884 >        txt += "    func_exit\n"
885          txt += "fi\n"
886          txt += "\n"
887  
888          # Prepare job-specific part
889          job = common.job_list[nj]
890 <        ### FEDE FOR DBS OUTPUT PUBLICATION
986 <        if (self.datasetPath):
890 >        if (self.datasetPath):
891              txt += '\n'
892              txt += 'DatasetPath='+self.datasetPath+'\n'
893  
894              datasetpath_split = self.datasetPath.split("/")
895 <            
895 >
896              txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
897              txt += 'DataTier='+datasetpath_split[2]+'\n'
994            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
898              txt += 'ApplicationFamily=cmsRun\n'
899  
900          else:
901              txt += 'DatasetPath=MCDataTier\n'
902              txt += 'PrimaryDataset=null\n'
903              txt += 'DataTier=null\n'
1001            #txt += 'ProcessedDataset=null\n'
904              txt += 'ApplicationFamily=MCDataTier\n'
905 <        if self.pset != None: #CarlosDaniele
905 >        if self.pset != None:
906              pset = os.path.basename(job.configFilename())
907              txt += '\n'
908              txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
909              if (self.datasetPath): # standard job
910 <                #txt += 'InputFiles=$2\n'
911 <                txt += 'InputFiles=${args[1]}\n'
912 <                txt += 'MaxEvents=${args[2]}\n'
913 <                txt += 'SkipEvents=${args[3]}\n'
910 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
911 >                if (self.useParent):
912 >                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
913 >                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
914 >                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
915 >                else:
916 >                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
917 >                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
918                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
919 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
919 >                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
920                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1015                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
921                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1017                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
922              else:  # pythia like job
923 <                seedIndex=1
923 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
924 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
925 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
926 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
927                  if (self.firstRun):
928 <                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
928 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
929                      txt += 'echo "FirstRun: <$FirstRun>"\n'
1023                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1024                    seedIndex=seedIndex+1
930  
931 <                if (self.sourceSeed):
1027 <                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1028 <                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1029 <                    seedIndex=seedIndex+1
1030 <                    ## the following seeds are not always present
1031 <                    if (self.sourceSeedVtx):
1032 <                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1033 <                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1034 <                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1035 <                        seedIndex += 1
1036 <                    if (self.sourceSeedG4):
1037 <                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1038 <                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1039 <                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1040 <                        seedIndex += 1
1041 <                    if (self.sourceSeedMix):
1042 <                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1043 <                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1044 <                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1045 <                        seedIndex += 1
1046 <                    pass
1047 <                pass
1048 <            txt += 'mv -f '+pset+' pset.cfg\n'
931 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
932  
1050        if len(self.additional_inbox_files) > 0:
1051            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1052            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1053            txt += 'fi\n'
1054            pass
933  
934 <        if self.pset != None: #CarlosDaniele
935 <            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1058 <        
1059 <            txt += '\n'
1060 <            txt += 'echo "***** cat pset.cfg *********"\n'
1061 <            txt += 'cat pset.cfg\n'
1062 <            txt += 'echo "****** end pset.cfg ********"\n'
934 >        if self.pset != None:
935 >            # FUTURE: Can simply for 2_1_x and higher
936              txt += '\n'
937 <            ### FEDE FOR DBS OUTPUT PUBLICATION
938 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
937 >            if self.debug_wrapper==True:
938 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
939 >                txt += 'cat ' + psetName + '\n'
940 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
941 >                txt += '\n'
942 >            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
943 >                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
944 >            else:
945 >                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
946              txt += 'echo "PSETHASH = $PSETHASH" \n'
1067            ##############
947              txt += '\n'
1069            # txt += 'echo "***** cat pset1.cfg *********"\n'
1070            # txt += 'cat pset1.cfg\n'
1071            # txt += 'echo "****** end pset1.cfg ********"\n'
948          return txt
949  
950 <    def wsBuildExe(self, nj=0):
950 >    def wsUntarSoftware(self, nj=0):
951          """
952          Put in the script the commands to build an executable
953          or a library.
954          """
955  
956 <        txt = ""
956 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
957  
958          if os.path.isfile(self.tgzNameWithPath):
959 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
959 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
960              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
961 +            if  self.debug_wrapper:
962 +                txt += 'ls -Al \n'
963              txt += 'untar_status=$? \n'
964              txt += 'if [ $untar_status -ne 0 ]; then \n'
965 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
966 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
967 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1090 <            txt += '   if [ $middleware == OSG ]; then \n'
1091 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1092 <            txt += '       cd $RUNTIME_AREA\n'
1093 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
1094 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
1095 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1096 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1097 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1098 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1099 <            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1100 <            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1101 <            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1102 <            txt += '       fi\n'
1103 <            txt += '   fi \n'
1104 <            txt += '   \n'
1105 <            txt += '   exit 1 \n'
965 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
966 >            txt += '   job_exit_code=$untar_status\n'
967 >            txt += '   func_exit\n'
968              txt += 'else \n'
969              txt += '   echo "Successful untar" \n'
970              txt += 'fi \n'
971              txt += '\n'
972 <            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
972 >            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
973              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
974 <            #### FEDE FOR DBS OUTPUT PUBLICATION
1113 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1114 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1115 <            #txt += '   export PYTHONPATH=ProdAgentApi\n'
974 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
975              txt += 'else\n'
976 <            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1118 <            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1119 <            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
976 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
977              txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1121            ###################  
978              txt += 'fi\n'
979              txt += '\n'
980  
981              pass
982 <        
982 >
983          return txt
984  
985 <    def modifySteeringCards(self, nj):
985 >    def wsBuildExe(self, nj=0):
986          """
987 <        modify the card provided by the user,
988 <        writing a new card into share dir
987 >        Put in the script the commands to build an executable
988 >        or a library.
989          """
990 <        
990 >
991 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
992 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
993 >
994 >        txt += 'rm -r lib/ module/ \n'
995 >        txt += 'mv $RUNTIME_AREA/lib/ . \n'
996 >        txt += 'mv $RUNTIME_AREA/module/ . \n'
997 >        if self.dataExist == True:
998 >            txt += 'rm -r src/ \n'
999 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
1000 >        if len(self.additional_inbox_files)>0:
1001 >            for file in self.additional_inbox_files:
1002 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
1003 >        # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1004 >        # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
1005 >
1006 >        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1007 >        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1008 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
1009 >        txt += 'else\n'
1010 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1011 >        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1012 >        txt += 'fi\n'
1013 >        txt += '\n'
1014 >
1015 >        return txt
1016 >
1017 >
1018      def executableName(self):
1019 <        if self.scriptExe: #CarlosDaniele
1019 >        if self.scriptExe:
1020              return "sh "
1021          else:
1022              return self.executable
1023  
1024      def executableArgs(self):
1025 +        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1026          if self.scriptExe:#CarlosDaniele
1027              return   self.scriptExe + " $NJob"
1028          else:
1029 <            # if >= CMSSW_1_5_X, add -e
1030 <            version_array = self.scram.getSWVersion().split('_')
1031 <            major = 0
1032 <            minor = 0
1033 <            try:
1034 <                major = int(version_array[1])
1035 <                minor = int(version_array[2])
1036 <            except:
1153 <                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1154 <                raise CrabException(msg)
1155 <            if major >= 1 and minor >= 5 :
1156 <                return " -e -p pset.cfg"
1029 >            ex_args = ""
1030 >            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1031 >            # Framework job report
1032 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1033 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1034 >            # Type of config file
1035 >            if self.CMSSW_major >= 2 :
1036 >                ex_args += " -p pset.py"
1037              else:
1038 <                return " -p pset.cfg"
1038 >                ex_args += " -p pset.cfg"
1039 >            return ex_args
1040  
1041      def inputSandbox(self, nj):
1042          """
1043          Returns a list of filenames to be put in JDL input sandbox.
1044          """
1045          inp_box = []
1165        # # dict added to delete duplicate from input sandbox file list
1166        # seen = {}
1167        ## code
1046          if os.path.isfile(self.tgzNameWithPath):
1047              inp_box.append(self.tgzNameWithPath)
1048 <        if os.path.isfile(self.MLtgzfile):
1049 <            inp_box.append(self.MLtgzfile)
1172 <        ## config
1173 <        if not self.pset is None:
1174 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1175 <        ## additional input files
1176 <        tgz = self.additionalInputFileTgz()
1177 <        inp_box.append(tgz)
1048 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1049 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1050          return inp_box
1051  
1052      def outputSandbox(self, nj):
# Line 1185 | Line 1057 | class Cmssw(JobType):
1057  
1058          ## User Declared output files
1059          for out in (self.output_file+self.output_file_sandbox):
1060 <            n_out = nj + 1
1061 <            out_box.append(self.numberFile_(out,str(n_out)))
1060 >            n_out = nj + 1
1061 >            out_box.append(numberFile(out,str(n_out)))
1062          return out_box
1063  
1192    def prepareSteeringCards(self):
1193        """
1194        Make initial modifications of the user's steering card file.
1195        """
1196        return
1064  
1065      def wsRenameOutput(self, nj):
1066          """
1067          Returns part of a job script which renames the produced files.
1068          """
1069  
1070 <        txt = '\n'
1071 <        txt += '# directory content\n'
1072 <        txt += 'ls \n'
1073 <
1074 <        txt += 'output_exit_status=0\n'
1075 <        
1076 <        for fileWithSuffix in (self.output_file_sandbox):
1210 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1211 <            txt += '\n'
1212 <            txt += '# check output file\n'
1213 <            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1214 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1215 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1216 <            txt += 'else\n'
1217 <            txt += '    exit_status=60302\n'
1218 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1219 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1220 <                txt += '    if [ $middleware == OSG ]; then \n'
1221 <                txt += '        echo "prepare dummy output file"\n'
1222 <                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1223 <                txt += '    fi \n'
1224 <            txt += 'fi\n'
1225 <        
1070 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1071 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1072 >        txt += 'echo ">>> current directory content:"\n'
1073 >        if self.debug_wrapper:
1074 >            txt += 'ls -Al\n'
1075 >        txt += '\n'
1076 >
1077          for fileWithSuffix in (self.output_file):
1078 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1078 >            output_file_num = numberFile(fileWithSuffix, '$NJob')
1079              txt += '\n'
1080              txt += '# check output file\n'
1081              txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1082 <            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1083 <            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1082 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1083 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1084 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1085 >            else:
1086 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1087 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1088              txt += 'else\n'
1089 <            txt += '    exit_status=60302\n'
1090 <            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1091 <            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1237 <            txt += '    output_exit_status=$exit_status\n'
1238 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1089 >            txt += '    job_exit_code=60302\n'
1090 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1091 >            if common.scheduler.name().upper() == 'CONDOR_G':
1092                  txt += '    if [ $middleware == OSG ]; then \n'
1093                  txt += '        echo "prepare dummy output file"\n'
1094                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
# Line 1243 | Line 1096 | class Cmssw(JobType):
1096              txt += 'fi\n'
1097          file_list = []
1098          for fileWithSuffix in (self.output_file):
1099 <             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1100 <            
1099 >             file_list.append(numberFile(fileWithSuffix, '$NJob'))
1100 >
1101          txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1102 +        txt += '\n'
1103 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1104 +        txt += 'echo ">>> current directory content:"\n'
1105 +        if self.debug_wrapper:
1106 +            txt += 'ls -Al\n'
1107 +        txt += '\n'
1108          txt += 'cd $RUNTIME_AREA\n'
1109 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1110          return txt
1111  
1252    def numberFile_(self, file, txt):
1253        """
1254        append _'txt' before last extension of a file
1255        """
1256        p = string.split(file,".")
1257        # take away last extension
1258        name = p[0]
1259        for x in p[1:-1]:
1260            name=name+"."+x
1261        # add "_txt"
1262        if len(p)>1:
1263            ext = p[len(p)-1]
1264            result = name + '_' + txt + "." + ext
1265        else:
1266            result = name + '_' + txt
1267        
1268        return result
1269
1112      def getRequirements(self, nj=[]):
1113          """
1114 <        return job requirements to add to jdl files
1114 >        return job requirements to add to jdl files
1115          """
1116          req = ''
1117          if self.version:
1118              req='Member("VO-cms-' + \
1119                   self.version + \
1120                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1121 <        ## SL add requirement for OS version only if SL4
1280 <        #reSL4 = re.compile( r'slc4' )
1281 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1121 >        if self.executable_arch:
1122              req+=' && Member("VO-cms-' + \
1123                   self.executable_arch + \
1124                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1125  
1126          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1127 +        if common.scheduler.name() == "glitecoll":
1128 +            req += ' && other.GlueCEStateStatus == "Production" '
1129  
1130          return req
1131  
1132      def configFilename(self):
1133          """ return the config filename """
1134 <        return self.name()+'.cfg'
1134 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1135 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1136 >          return self.name()+'.py'
1137 >        else:
1138 >          return self.name()+'.cfg'
1139  
1294    ### OLI_DANIELE
1140      def wsSetupCMSOSGEnvironment_(self):
1141          """
1142          Returns part of a job script which is prepares
1143          the execution environment and which is common for all CMS jobs.
1144          """
1145 <        txt = '\n'
1146 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1147 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1148 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1149 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1150 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1306 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1145 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1146 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1147 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1148 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1149 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1150 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1151          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1152 <        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1153 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1154 <        txt += '   else\n'
1155 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1156 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1157 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1314 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1315 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1316 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1317 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1318 <        txt += '       exit 1\n'
1319 <        txt += '\n'
1320 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1321 <        txt += '       cd $RUNTIME_AREA\n'
1322 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1323 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1324 <        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1325 <        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1326 <        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1327 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1328 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1329 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1330 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1331 <        txt += '       fi\n'
1332 <        txt += '\n'
1333 <        txt += '       exit 1\n'
1334 <        txt += '   fi\n'
1152 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1153 >        txt += '    else\n'
1154 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1155 >        txt += '        job_exit_code=10020\n'
1156 >        txt += '        func_exit\n'
1157 >        txt += '    fi\n'
1158          txt += '\n'
1159 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1160 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1159 >        txt += '    echo "==> setup cms environment ok"\n'
1160 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1161  
1162          return txt
1163 <
1341 <    ### OLI_DANIELE
1163 >
1164      def wsSetupCMSLCGEnvironment_(self):
1165          """
1166          Returns part of a job script which is prepares
1167          the execution environment and which is common for all CMS jobs.
1168          """
1169 <        txt  = '   \n'
1170 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1171 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1172 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1173 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1174 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1175 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1176 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1177 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1178 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1179 <        txt += '       exit 1\n'
1180 <        txt += '   else\n'
1181 <        txt += '       echo "Sourcing environment... "\n'
1182 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1183 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1184 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1185 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1186 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1187 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1188 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1189 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1190 <        txt += '           exit 1\n'
1191 <        txt += '       fi\n'
1192 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1193 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1194 <        txt += '       result=$?\n'
1195 <        txt += '       if [ $result -ne 0 ]; then\n'
1374 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1375 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1376 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1377 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1378 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1379 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1380 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1381 <        txt += '           exit 1\n'
1382 <        txt += '       fi\n'
1383 <        txt += '   fi\n'
1384 <        txt += '   \n'
1385 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1386 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1169 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1170 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1171 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1172 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1173 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1174 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1175 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1176 >        txt += '        job_exit_code=10031\n'
1177 >        txt += '        func_exit\n'
1178 >        txt += '    else\n'
1179 >        txt += '        echo "Sourcing environment... "\n'
1180 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1181 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1182 >        txt += '            job_exit_code=10020\n'
1183 >        txt += '            func_exit\n'
1184 >        txt += '        fi\n'
1185 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1186 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1187 >        txt += '        result=$?\n'
1188 >        txt += '        if [ $result -ne 0 ]; then\n'
1189 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1190 >        txt += '            job_exit_code=10032\n'
1191 >        txt += '            func_exit\n'
1192 >        txt += '        fi\n'
1193 >        txt += '    fi\n'
1194 >        txt += '    \n'
1195 >        txt += '    echo "==> setup cms environment ok"\n'
1196          return txt
1197  
1389    ### FEDE FOR DBS OUTPUT PUBLICATION
1198      def modifyReport(self, nj):
1199          """
1200 <        insert the part of the script that modifies the FrameworkJob Report
1200 >        insert the part of the script that modifies the FrameworkJob Report
1201          """
1202 +        txt = '\n#Written by cms_cmssw::modifyReport\n'
1203 +        publish_data = int(self.cfg_params.get('USER.publish_data',0))
1204 +        if (publish_data == 1):
1205 +            processedDataset = self.cfg_params['USER.publish_data_name']
1206 +            ### FEDE  for publication with LSF and CAF schedulers ####
1207 +            print "common.scheduler.name().upper() = ", common.scheduler.name().upper()
1208 +            if (common.scheduler.name().upper() == "CAF" or common.scheduler.name().upper() == "LSF"):
1209 +                print "chiamo LFNBaseName con localUser = true"
1210 +                LFNBaseName = LFNBase(processedDataset, LocalUser=True)
1211 +            else :    
1212 +                LFNBaseName = LFNBase(processedDataset)
1213 +            ####    
1214  
1215 <        txt = ''
1216 <        try:
1217 <            publish_data = int(self.cfg_params['USER.publish_data'])          
1218 <        except KeyError:
1219 <            publish_data = 0
1220 <        if (publish_data == 1):  
1221 <            txt += 'echo "Modify Job Report" \n'
1402 <            #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1403 <            ################ FEDE FOR DBS2 #############################################
1404 <            txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1405 <            #############################################################################
1406 <            #try:
1407 <            #    publish_data = int(self.cfg_params['USER.publish_data'])          
1408 <            #except KeyError:
1409 <            #    publish_data = 0
1410 <
1411 <            txt += 'if [ -z "$SE" ]; then\n'
1412 <            txt += '    SE="" \n'
1413 <            txt += 'fi \n'
1414 <            txt += 'if [ -z "$SE_PATH" ]; then\n'
1415 <            txt += '    SE_PATH="" \n'
1416 <            txt += 'fi \n'
1417 <            txt += 'echo "SE = $SE"\n'
1418 <            txt += 'echo "SE_PATH = $SE_PATH"\n'
1215 >            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1216 >            txt += '    FOR_LFN=%s_${PSETHASH}/\n'%(LFNBaseName)
1217 >            txt += 'else\n'
1218 >            txt += '    FOR_LFN=/copy_problems/ \n'
1219 >            txt += '    SE=""\n'
1220 >            txt += '    SE_PATH=""\n'
1221 >            txt += 'fi\n'
1222  
1223 <        #if (publish_data == 1):  
1224 <            #processedDataset = self.cfg_params['USER.processed_datasetname']
1422 <            processedDataset = self.cfg_params['USER.publish_data_name']
1223 >            txt += 'echo ">>> Modify Job Report:" \n'
1224 >            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1225              txt += 'ProcessedDataset='+processedDataset+'\n'
1424            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1425            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1426            #### FEDE: added slash in LFN ##############
1427            txt += '    FOR_LFN=/copy_problems/ \n'
1428            txt += 'else \n'
1429            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1430            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1431            txt += '    FOR_LFN=/store$tmp \n'
1432            txt += 'fi \n'
1226              txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1227 +            txt += 'echo "SE = $SE"\n'
1228 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1229              txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1230              txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1231 <            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1232 <            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1438 <            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1439 <            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1440 <      
1231 >            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1232 >            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1233              txt += 'modifyReport_result=$?\n'
1442            txt += 'echo modifyReport_result = $modifyReport_result\n'
1234              txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1235 <            txt += '    exit_status=1\n'
1236 <            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1235 >            txt += '    modifyReport_result=70500\n'
1236 >            txt += '    job_exit_code=$modifyReport_result\n'
1237 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1238 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1239              txt += 'else\n'
1240 <            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1240 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1241              txt += 'fi\n'
1449        else:
1450            txt += 'echo "no data publication required"\n'
1451            #txt += 'ProcessedDataset=no_data_to_publish \n'
1452            #### FEDE: added slash in LFN ##############
1453            #txt += 'FOR_LFN=/local/ \n'
1454            #txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1455            #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1242          return txt
1243  
1244 <    def cleanEnv(self):
1245 <        ### OLI_DANIELE
1246 <        txt = ''
1247 <        txt += 'if [ $middleware == OSG ]; then\n'  
1248 <        txt += '    cd $RUNTIME_AREA\n'
1249 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1250 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1251 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1252 <        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1253 <        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1254 <        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1255 <        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1256 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1257 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1258 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1244 >    def wsParseFJR(self):
1245 >        """
1246 >        Parse the FrameworkJobReport to obtain useful infos
1247 >        """
1248 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1249 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1250 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1251 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1252 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1253 >        if self.debug_wrapper :
1254 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1255 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1256 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1257 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1258 >        txt += '            cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1259 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1260 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1261 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1262 >        txt += '        else\n'
1263 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1264 >        txt += '        fi\n'
1265 >        txt += '    else\n'
1266 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1267          txt += '    fi\n'
1268 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1269 +
1270 +        if (self.datasetPath and not self.dataset_pu ):
1271 +          # VERIFY PROCESSED DATA
1272 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1273 +            txt += '      echo ">>> Verify list of processed files:"\n'
1274 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1275 +            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1276 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1277 +            txt += '      mv tmp.txt input-files.txt\n'
1278 +            txt += '      echo "cat input-files.txt"\n'
1279 +            txt += '      echo "----------------------"\n'
1280 +            txt += '      cat input-files.txt\n'
1281 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1282 +            txt += '      mv tmp.txt processed-files.txt\n'
1283 +            txt += '      echo "----------------------"\n'
1284 +            txt += '      echo "cat processed-files.txt"\n'
1285 +            txt += '      echo "----------------------"\n'
1286 +            txt += '      cat processed-files.txt\n'
1287 +            txt += '      echo "----------------------"\n'
1288 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1289 +            txt += '      fileverify_status=$?\n'
1290 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1291 +            txt += '         executable_exit_status=30001\n'
1292 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1293 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1294 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1295 +            txt += '      fi\n'
1296 +            txt += '    fi\n'
1297 +            txt += '\n'
1298 +        txt += 'else\n'
1299 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1300          txt += 'fi\n'
1301          txt += '\n'
1302 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1303 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1304 +        txt += 'job_exit_code=$executable_exit_status\n'
1305 +
1306          return txt
1307  
1308      def setParam_(self, param, value):
# Line 1481 | Line 1311 | class Cmssw(JobType):
1311      def getParams(self):
1312          return self._params
1313  
1484    def setTaskid_(self):
1485        self._taskId = self.cfg_params['taskId']
1486        
1487    def getTaskid(self):
1488        return self._taskId
1489
1314      def uniquelist(self, old):
1315          """
1316          remove duplicates from a list
# Line 1496 | Line 1320 | class Cmssw(JobType):
1320              nd[e]=0
1321          return nd.keys()
1322  
1323 <
1500 <    def checkOut(self, limit):
1323 >    def outList(self):
1324          """
1325          check the dimension of the output files
1326          """
1327 <        txt = 'echo "*****************************************"\n'
1328 <        txt += 'echo "** Starting output sandbox limit check **"\n'
1506 <        txt += 'echo "*****************************************"\n'
1507 <        allOutFiles = ""
1327 >        txt = ''
1328 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1329          listOutFiles = []
1330 <        for fileOut in (self.output_file+self.output_file_sandbox):
1331 <             if fileOut.find('crab_fjr') == -1:
1332 <                 allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1333 <                 listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1334 <        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1335 <        txt += 'ls -gGhrta;\n'
1336 <        txt += 'sum=0;\n'
1337 <        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1338 <        txt += '    if [ -e $file ]; then\n'
1339 <        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1340 <        txt += '        sum=`expr $sum + $tt`\n'
1341 <        txt += '    else\n'
1342 <        txt += '        echo "WARNING: output file $file not found!"\n'
1343 <        txt += '    fi\n'
1344 <        txt += 'done\n'
1524 <        txt += 'echo "Total Output dimension: $sum";\n'
1525 <        txt += 'limit='+str(limit)+';\n'
1526 <        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1527 <        txt += 'if [ $limit -lt $sum ]; then\n'
1528 <        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1529 <        txt += '    echo "         checking the output file sizes..."\n'
1530 <        """
1531 <        txt += '    dim=0;\n'
1532 <        txt += '    exclude=0;\n'
1533 <        txt += '    for files in '+str(allOutFiles)+' ; do\n'
1534 <        txt += '        sumTemp=0;\n'
1535 <        txt += '        for file2 in '+str(allOutFiles)+' ; do\n'
1536 <        txt += '            if [ $file != $file2 ]; then\n'
1537 <        txt += '                tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1538 <        txt += '                sumTemp=`expr $sumTemp + $tt`;\n'
1539 <        txt += '            fi\n'
1540 <        txt += '        done\n'
1541 <        txt += '        if [ $sumTemp -lt $limit ]; then\n'
1542 <        txt += '            if [ $dim -lt $sumTemp ]; then\n'
1543 <        txt += '                dim=$sumTemp;\n'
1544 <        txt += '                exclude=$file;\n'
1545 <        txt += '            fi\n'
1546 <        txt += '        fi\n'
1547 <        txt += '    done\n'
1548 <        txt += '    echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1549 <        """
1550 <        txt += '    tot=0;\n'
1551 <        txt += '    for file2 in '+str(allOutFiles)+' ; do\n'
1552 <        txt += '        tt=`ls -gGrta $file2 | awk \'{ print $3 }\';`\n'
1553 <        txt += '        tot=`expr $tot + $tt`;\n'
1554 <        txt += '        if [ $limit -lt $tot ]; then\n'
1555 <        txt += '            tot=`expr $tot - $tt`;\n'
1556 <        txt += '            fileLast=$file;\n'
1557 <        txt += '            break;\n'
1558 <        txt += '        fi\n'
1559 <        txt += '    done\n'
1560 <        txt += '    echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1561 <        txt += '    flag=0;\n'    
1562 <        txt += '    for filess in '+str(allOutFiles)+' ; do\n'
1563 <        txt += '        if [ $fileLast = $filess ]; then\n'
1564 <        txt += '            flag=1;\n'
1565 <        txt += '        fi\n'
1566 <        txt += '        if [ $flag -eq 1 ]; then\n'
1567 <        txt += '            rm -f $filess;\n'
1568 <        txt += '        fi\n'
1569 <        txt += '    done\n'
1570 <        txt += '    ls -agGhrt;\n'
1571 <        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1572 <        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1573 <        txt += '    exit_status=70000;\n'
1574 <        txt += 'else'
1575 <        txt += '    echo "Total Output dimension $sum is fine.";\n'
1576 <        txt += 'fi\n'
1577 <        txt += 'echo "*****************************************"\n'
1578 <        txt += 'echo "*** Ending output sandbox limit check ***"\n'
1579 <        txt += 'echo "*****************************************"\n'
1330 >        stdout = 'CMSSW_$NJob.stdout'
1331 >        stderr = 'CMSSW_$NJob.stderr'
1332 >        if (self.return_data == 1):
1333 >            for file in (self.output_file+self.output_file_sandbox):
1334 >                listOutFiles.append(numberFile(file, '$NJob'))
1335 >            listOutFiles.append(stdout)
1336 >            listOutFiles.append(stderr)
1337 >        else:
1338 >            for file in (self.output_file_sandbox):
1339 >                listOutFiles.append(numberFile(file, '$NJob'))
1340 >            listOutFiles.append(stdout)
1341 >            listOutFiles.append(stderr)
1342 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1343 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1344 >        txt += 'export filesToCheck\n'
1345          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines