ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.68 by gutsche, Thu Feb 15 20:06:41 2007 UTC vs.
Revision 1.247 by fanzago, Mon Sep 29 17:19:16 2008 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import SEBlackWhiteListParser
6   import common
6 import PsetManipulator  
7
8 import DataDiscovery
9 import DataDiscovery_DBS2
10 import DataLocation
7   import Scram
8  
9 < import os, string, re, shutil
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params, ncjobs):
12 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15 +        self.skip_blocks = skip_blocks
16 +
17 +        self.argsList = []
18  
20        # Marco.
19          self._params = {}
20          self.cfg_params = cfg_params
21 +        # init BlackWhiteListParser
22 +        self.blackWhiteListParser = SEBlackWhiteListParser(cfg_params)
23 +
24 +        ### Temporary patch to automatically skip the ISB size check:
25 +        server=self.cfg_params.get('CRAB.server_name',None)
26 +        size = 9.5
27 +        if server: size = 99999
28 +        ### D.S.
29 +        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',size))
30  
31          # number of jobs requested to be created, limit obj splitting
32          self.ncjobs = ncjobs
33  
34          log = common.logger
35 <        
35 >
36          self.scram = Scram.Scram(cfg_params)
37          self.additional_inbox_files = []
38          self.scriptExe = ''
39          self.executable = ''
40 +        self.executable_arch = self.scram.getArch()
41          self.tgz_name = 'default.tgz'
42          self.scriptName = 'CMSSW.sh'
43 <        self.pset = ''      #scrip use case Da  
44 <        self.datasetPath = '' #scrip use case Da
43 >        self.pset = ''
44 >        self.datasetPath = ''
45  
46          # set FJR file name
47          self.fjrFileName = 'crab_fjr.xml'
48  
49          self.version = self.scram.getSWVersion()
50 <        common.taskDB.setDict('codeVersion',self.version)
51 <        self.setParam_('application', self.version)
50 >        version_array = self.version.split('_')
51 >        self.CMSSW_major = 0
52 >        self.CMSSW_minor = 0
53 >        self.CMSSW_patch = 0
54 >        try:
55 >            self.CMSSW_major = int(version_array[1])
56 >            self.CMSSW_minor = int(version_array[2])
57 >            self.CMSSW_patch = int(version_array[3])
58 >        except:
59 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
60 >            raise CrabException(msg)
61  
62          ### collect Data cards
63  
64 <        ## get DBS mode
65 <        self.use_dbs_2 = 0
49 <        try:
50 <            self.use_dbs_2 = int(self.cfg_params['CMSSW.use_dbs_2'])
51 <        except KeyError:
52 <            self.use_dbs_2 = 0
53 <            
54 <        try:
55 <            tmp =  cfg_params['CMSSW.datasetpath']
56 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
57 <            if string.lower(tmp)=='none':
58 <                self.datasetPath = None
59 <                self.selectNoInput = 1
60 <            else:
61 <                self.datasetPath = tmp
62 <                self.selectNoInput = 0
63 <        except KeyError:
64 <            msg = "Error: datasetpath not defined "  
64 >        if not cfg_params.has_key('CMSSW.datasetpath'):
65 >            msg = "Error: datasetpath not defined "
66              raise CrabException(msg)
67  
68 <        # ML monitoring
69 <        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
69 <        if not self.datasetPath:
70 <            self.setParam_('dataset', 'None')
71 <            self.setParam_('owner', 'None')
72 <        else:
73 <            datasetpath_split = self.datasetPath.split("/")
74 <            self.setParam_('dataset', datasetpath_split[1])
75 <            self.setParam_('owner', datasetpath_split[-1])
68 >        ### Temporary: added to remove input file control in the case of PU
69 >        self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
70  
71 <        self.setTaskid_()
72 <        self.setParam_('taskId', self.cfg_params['taskId'])
71 >        tmp =  cfg_params['CMSSW.datasetpath']
72 >        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
73  
74 <        self.dataTiers = []
74 >        if tmp =='':
75 >            msg = "Error: datasetpath not defined "
76 >            raise CrabException(msg)
77 >        elif string.lower(tmp)=='none':
78 >            self.datasetPath = None
79 >            self.selectNoInput = 1
80 >        else:
81 >            self.datasetPath = tmp
82 >            self.selectNoInput = 0
83  
84 +        self.dataTiers = []
85 +        self.debugWrap = ''
86 +        self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
87 +        if self.debug_wrapper: self.debugWrap='--debug'
88          ## now the application
89 <        try:
90 <            self.executable = cfg_params['CMSSW.executable']
85 <            self.setParam_('exe', self.executable)
86 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
87 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
88 <            log.debug(3,msg)
89 <        except KeyError:
90 <            self.executable = 'cmsRun'
91 <            self.setParam_('exe', self.executable)
92 <            msg = "User executable not defined. Use cmsRun"
93 <            log.debug(3,msg)
94 <            pass
89 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
90 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
91  
92 <        try:
97 <            self.pset = cfg_params['CMSSW.pset']
98 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
99 <            if self.pset.lower() != 'none' :
100 <                if (not os.path.exists(self.pset)):
101 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
102 <            else:
103 <                self.pset = None
104 <        except KeyError:
92 >        if not cfg_params.has_key('CMSSW.pset'):
93              raise CrabException("PSet file missing. Cannot run cmsRun ")
94 +        self.pset = cfg_params['CMSSW.pset']
95 +        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
96 +        if self.pset.lower() != 'none' :
97 +            if (not os.path.exists(self.pset)):
98 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
99 +        else:
100 +            self.pset = None
101  
102          # output files
103          ## stuff which must be returned always via sandbox
# Line 112 | Line 107 | class Cmssw(JobType):
107          self.output_file_sandbox.append(self.fjrFileName)
108  
109          # other output files to be returned via sandbox or copied to SE
110 <        try:
111 <            self.output_file = []
112 <            tmp = cfg_params['CMSSW.output_file']
113 <            if tmp != '':
114 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
115 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
116 <                for tmp in tmpOutFiles:
117 <                    tmp=string.strip(tmp)
123 <                    self.output_file.append(tmp)
124 <                    pass
125 <            else:
126 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
127 <                pass
128 <            pass
129 <        except KeyError:
130 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
131 <            pass
110 >        outfileflag = False
111 >        self.output_file = []
112 >        tmp = cfg_params.get('CMSSW.output_file',None)
113 >        if tmp :
114 >            self.output_file = [x.strip() for x in tmp.split(',')]
115 >            outfileflag = True #output found
116 >        #else:
117 >        #    log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
118  
119          # script_exe file as additional file in inputSandbox
120 <        try:
121 <            self.scriptExe = cfg_params['USER.script_exe']
122 <            if self.scriptExe != '':
123 <               if not os.path.isfile(self.scriptExe):
124 <                  msg ="ERROR. file "+self.scriptExe+" not found"
125 <                  raise CrabException(msg)
126 <               self.additional_inbox_files.append(string.strip(self.scriptExe))
141 <        except KeyError:
142 <            self.scriptExe = ''
143 <        #CarlosDaniele
120 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
121 >        if self.scriptExe :
122 >            if not os.path.isfile(self.scriptExe):
123 >                msg ="ERROR. file "+self.scriptExe+" not found"
124 >                raise CrabException(msg)
125 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
126 >
127          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
128 <           msg ="WARNING. script_exe  not defined"
129 <           raise CrabException(msg)
128 >            msg ="Error. script_exe  not defined"
129 >            raise CrabException(msg)
130 >
131 >        # use parent files...
132 >        self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
133  
134          ## additional input files
135 <        try:
135 >        if cfg_params.has_key('USER.additional_input_files'):
136              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
137 <            common.logger.debug(5,"Additional input files: "+str(tmpAddFiles))
138 <            for tmpFile in tmpAddFiles:
139 <                tmpFile = string.strip(tmpFile)
140 <                if not os.path.exists(tmpFile):
141 <                    raise CrabException("Additional input file not found: "+tmpFile)
137 >            for tmp in tmpAddFiles:
138 >                tmp = string.strip(tmp)
139 >                dirname = ''
140 >                if not tmp[0]=="/": dirname = "."
141 >                files = []
142 >                if string.find(tmp,"*")>-1:
143 >                    files = glob.glob(os.path.join(dirname, tmp))
144 >                    if len(files)==0:
145 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
146 >                else:
147 >                    files.append(tmp)
148 >                for file in files:
149 >                    if not os.path.exists(file):
150 >                        raise CrabException("Additional input file not found: "+file)
151                      pass
152 <                storedFile = common.work_space.shareDir()+ tmpFile
158 <                shutil.copyfile(tmpFile, storedFile)
159 <                self.additional_inbox_files.append(string.strip(storedFile))
152 >                    self.additional_inbox_files.append(string.strip(file))
153                  pass
161            common.logger.debug(5,"Inbox files so far : "+str(self.additional_inbox_files))
162            pass
163        except KeyError:
164            pass
165
166        # files per job
167        try:
168            if (cfg_params['CMSSW.files_per_jobs']):
169                raise CrabException("files_per_jobs no longer supported.  Quitting.")
170        except KeyError:
154              pass
155 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
156 +        pass
157  
158          ## Events per job
159 <        try:
159 >        if cfg_params.has_key('CMSSW.events_per_job'):
160              self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
161              self.selectEventsPerJob = 1
162 <        except KeyError:
162 >        else:
163              self.eventsPerJob = -1
164              self.selectEventsPerJob = 0
165 <    
165 >
166          ## number of jobs
167 <        try:
167 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
168              self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
169              self.selectNumberOfJobs = 1
170 <        except KeyError:
170 >        else:
171              self.theNumberOfJobs = 0
172              self.selectNumberOfJobs = 0
173  
174 <        try:
174 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
175              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
176              self.selectTotalNumberEvents = 1
177 <        except KeyError:
177 >            if self.selectNumberOfJobs  == 1:
178 >                if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
179 >                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
180 >                    raise CrabException(msg)
181 >        else:
182              self.total_number_of_events = 0
183              self.selectTotalNumberEvents = 0
184  
185 <        if self.pset != None: #CarlosDaniele
185 >        if self.pset != None:
186               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
187                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
188                   raise CrabException(msg)
# Line 202 | Line 191 | class Cmssw(JobType):
191                   msg = 'Must specify  number_of_jobs.'
192                   raise CrabException(msg)
193  
194 <        ## source seed for pythia
195 <        try:
196 <            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
197 <        except KeyError:
198 <            self.sourceSeed = None
199 <            common.logger.debug(5,"No seed given")
194 >        ## New method of dealing with seeds
195 >        self.incrementSeeds = []
196 >        self.preserveSeeds = []
197 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
198 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
199 >            for tmp in tmpList:
200 >                tmp.strip()
201 >                self.preserveSeeds.append(tmp)
202 >        if cfg_params.has_key('CMSSW.increment_seeds'):
203 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
204 >            for tmp in tmpList:
205 >                tmp.strip()
206 >                self.incrementSeeds.append(tmp)
207 >
208 >        ## FUTURE: Can remove in CRAB 2.4.0
209 >        self.sourceSeed    = cfg_params.get('CMSSW.pythia_seed',None)
210 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
211 >        self.sourceSeedG4  = cfg_params.get('CMSSW.g4_seed',None)
212 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
213 >        if self.sourceSeed or self.sourceSeedVtx or self.sourceSeedG4 or self.sourceSeedMix:
214 >            msg = 'pythia_seed, vtx_seed, g4_seed, and mix_seed are no longer valid settings. You must use increment_seeds or preserve_seeds'
215 >            raise CrabException(msg)
216  
217 <        try:
218 <            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
219 <        except KeyError:
220 <            self.sourceSeedVtx = None
221 <            common.logger.debug(5,"No vertex seed given")
217 <        try:
218 <            self.firstRun = int(cfg_params['CMSSW.first_run'])
219 <        except KeyError:
220 <            self.firstRun = None
221 <            common.logger.debug(5,"No first run given")
222 <        if self.pset != None: #CarlosDaniele
223 <            self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
217 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
218 >
219 >        # Copy/return
220 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
221 >        self.return_data = int(cfg_params.get('USER.return_data',0))
222  
223          #DBSDLS-start
224 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
224 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
225          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
226          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
227          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
# Line 232 | Line 230 | class Cmssw(JobType):
230          blockSites = {}
231          if self.datasetPath:
232              blockSites = self.DataDiscoveryAndLocation(cfg_params)
233 <        #DBSDLS-end          
233 >        #DBSDLS-end
234  
237        self.tgzNameWithPath = self.getTarBall(self.executable)
238    
235          ## Select Splitting
236 <        if self.selectNoInput:
237 <            if self.pset == None: #CarlosDaniele
236 >        if self.selectNoInput:
237 >            if self.pset == None:
238                  self.jobSplittingForScript()
239              else:
240                  self.jobSplittingNoInput()
241 <        else:
241 >        elif (cfg_params.get('CMSSW.noblockboundary',0)):
242 >            self.jobSplittingNoBlockBoundary(blockSites)
243 >        else:
244              self.jobSplittingByBlocks(blockSites)
245  
246 <        # modify Pset
247 <        if self.pset != None: #CarlosDaniele
248 <            try:
249 <                if (self.datasetPath): # standard job
250 <                    # allow to processa a fraction of events in a file
251 <                    self.PsetEdit.inputModule("INPUT")
252 <                    self.PsetEdit.maxEvent("INPUTMAXEVENTS")
253 <                    self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
254 <                else:  # pythia like job
255 <                    self.PsetEdit.maxEvent(self.eventsPerJob)
256 <                    if (self.firstRun):
257 <                        self.PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
258 <                    if (self.sourceSeed) :
259 <                        self.PsetEdit.pythiaSeed("INPUT")
260 <                        if (self.sourceSeedVtx) :
261 <                            self.PsetEdit.pythiaSeedVtx("INPUTVTX")
262 <                # add FrameworkJobReport to parameter-set
263 <                self.PsetEdit.addCrabFJR(self.fjrFileName)
264 <                self.PsetEdit.psetWriter(self.configFilename())
265 <            except:
266 <                msg='Error while manipuliating ParameterSet: exiting...'
267 <                raise CrabException(msg)
246 >        # modify Pset only the first time
247 >        if isNew:
248 >            if self.pset != None:
249 >                import PsetManipulator as pp
250 >                PsetEdit = pp.PsetManipulator(self.pset)
251 >                try:
252 >                    # Add FrameworkJobReport to parameter-set, set max events.
253 >                    # Reset later for data jobs by writeCFG which does all modifications
254 >                    PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
255 >                    PsetEdit.maxEvent(self.eventsPerJob)
256 >                    PsetEdit.psetWriter(self.configFilename())
257 >                    ## If present, add TFileService to output files
258 >                    if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
259 >                        tfsOutput = PsetEdit.getTFileService()
260 >                        if tfsOutput:
261 >                            if tfsOutput in self.output_file:
262 >                                common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
263 >                            else:
264 >                                outfileflag = True #output found
265 >                                self.output_file.append(tfsOutput)
266 >                                common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
267 >                            pass
268 >                        pass
269 >                    ## If present and requested, add PoolOutputModule to output files
270 >                    if int(cfg_params.get('CMSSW.get_edm_output',0)):
271 >                        edmOutput = PsetEdit.getPoolOutputModule()
272 >                        if edmOutput:
273 >                            if edmOutput in self.output_file:
274 >                                common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
275 >                            else:
276 >                                self.output_file.append(edmOutput)
277 >                                common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
278 >                            pass
279 >                        pass
280 >                except CrabException:
281 >                    msg='Error while manipulating ParameterSet: exiting...'
282 >                    raise CrabException(msg)
283 >            ## Prepare inputSandbox TarBall (only the first time)
284 >            self.tgzNameWithPath = self.getTarBall(self.executable)
285  
286      def DataDiscoveryAndLocation(self, cfg_params):
287  
288 +        import DataDiscovery
289 +        import DataLocation
290          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
291  
292          datasetPath=self.datasetPath
293  
294          ## Contact the DBS
295 <        common.logger.message("Contacting DBS...")
295 >        common.logger.message("Contacting Data Discovery Services ...")
296          try:
297 <
281 <            if self.use_dbs_2 == 1 :
282 <                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
283 <            else :
284 <                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
297 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
298              self.pubdata.fetchDBSInfo()
299  
300          except DataDiscovery.NotExistingDatasetError, ex :
# Line 293 | Line 306 | class Cmssw(JobType):
306          except DataDiscovery.DataDiscoveryError, ex:
307              msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
308              raise CrabException(msg)
296        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
297            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
298            raise CrabException(msg)
299        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
300            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
301            raise CrabException(msg)
302        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
303            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
304            raise CrabException(msg)
305
306        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
307        common.logger.message("Required data are :"+self.datasetPath)
309  
310          self.filesbyblock=self.pubdata.getFiles()
311          self.eventsbyblock=self.pubdata.getEventsPerBlock()
312          self.eventsbyfile=self.pubdata.getEventsPerFile()
313 +        self.parentFiles=self.pubdata.getParent()
314  
315          ## get max number of events
316 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
315 <        common.logger.message("The number of available events is %s\n"%self.maxEvents)
316 >        self.maxEvents=self.pubdata.getMaxEvents()
317  
317        common.logger.message("Contacting DLS...")
318          ## Contact the DLS and build a list of sites hosting the fileblocks
319          try:
320              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
# Line 322 | Line 322 | class Cmssw(JobType):
322          except DataLocation.DataLocationError , ex:
323              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
324              raise CrabException(msg)
325 <        
325 >
326  
327          sites = dataloc.getSites()
328          allSites = []
# Line 332 | Line 332 | class Cmssw(JobType):
332                  allSites.append(oneSite)
333          allSites = self.uniquelist(allSites)
334  
335 <        common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
336 <        common.logger.debug(6, "List of Sites: "+str(allSites))
335 >        # screen output
336 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
337 >
338          return sites
339 <    
339 >
340      def jobSplittingByBlocks(self, blockSites):
341          """
342          Perform job splitting. Jobs run over an integer number of files
# Line 385 | Line 386 | class Cmssw(JobType):
386              totalNumberOfJobs = 999999999
387          else :
388              totalNumberOfJobs = self.ncjobs
388            
389  
390          blocks = blockSites.keys()
391          blockCount = 0
# Line 395 | Line 395 | class Cmssw(JobType):
395          jobCount = 0
396          list_of_lists = []
397  
398 +        # list tracking which jobs are in which jobs belong to which block
399 +        jobsOfBlock = {}
400 +
401          # ---- Iterate over the blocks in the dataset until ---- #
402          # ---- we've met the requested total # of events    ---- #
403          while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
404              block = blocks[blockCount]
405              blockCount += 1
406 <            
406 >            if block not in jobsOfBlock.keys() :
407 >                jobsOfBlock[block] = []
408  
409              if self.eventsbyblock.has_key(block) :
410                  numEventsInBlock = self.eventsbyblock[block]
411                  common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
412 <            
412 >
413                  files = self.filesbyblock[block]
414                  numFilesInBlock = len(files)
415                  if (numFilesInBlock <= 0):
# Line 413 | Line 417 | class Cmssw(JobType):
417                  fileCount = 0
418  
419                  # ---- New block => New job ---- #
420 <                parString = "\\{"
420 >                parString = ""
421                  # counter for number of events in files currently worked on
422                  filesEventCount = 0
423                  # flag if next while loop should touch new file
424                  newFile = 1
425                  # job event counter
426                  jobSkipEventCount = 0
427 <            
427 >
428                  # ---- Iterate over the files in the block until we've met the requested ---- #
429                  # ---- total # of events or we've gone over all the files in this block  ---- #
430 +                pString=''
431                  while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
432                      file = files[fileCount]
433 +                    if self.useParent:
434 +                        parent = self.parentFiles[file]
435 +                        for f in parent :
436 +                            pString += '\\\"' + f + '\\\"\,'
437 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
438 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
439                      if newFile :
440                          try:
441                              numEventsInFile = self.eventsbyfile[file]
# Line 436 | Line 447 | class Cmssw(JobType):
447                              newFile = 0
448                          except KeyError:
449                              common.logger.message("File "+str(file)+" has unknown number of events: skipping")
439                        
450  
451 +                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
452                      # if less events in file remain than eventsPerJobRequested
453 <                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
453 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
454                          # if last file in block
455                          if ( fileCount == numFilesInBlock-1 ) :
456                              # end job using last file, use remaining events in block
457                              # close job and touch new file
458                              fullString = parString[:-2]
459 <                            fullString += '\\}'
460 <                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
459 >                            if self.useParent:
460 >                                fullParentString = pString[:-2]
461 >                                list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
462 >                            else:
463 >                                list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
464                              common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
465                              self.jobDestination.append(blockSites[block])
466                              common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
467 +                            # fill jobs of block dictionary
468 +                            jobsOfBlock[block].append(jobCount+1)
469                              # reset counter
470                              jobCount = jobCount + 1
471                              totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
472                              eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
473                              jobSkipEventCount = 0
474                              # reset file
475 <                            parString = "\\{"
475 >                            pString = ""
476 >                            parString = ""
477                              filesEventCount = 0
478                              newFile = 1
479                              fileCount += 1
# Line 468 | Line 485 | class Cmssw(JobType):
485                      elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
486                          # close job and touch new file
487                          fullString = parString[:-2]
488 <                        fullString += '\\}'
489 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
488 >                        if self.useParent:
489 >                            fullParentString = pString[:-2]
490 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
491 >                        else:
492 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
493                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
494                          self.jobDestination.append(blockSites[block])
495                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
496 +                        jobsOfBlock[block].append(jobCount+1)
497                          # reset counter
498                          jobCount = jobCount + 1
499                          totalEventCount = totalEventCount + eventsPerJobRequested
500                          eventsRemaining = eventsRemaining - eventsPerJobRequested
501                          jobSkipEventCount = 0
502                          # reset file
503 <                        parString = "\\{"
503 >                        pString = ""
504 >                        parString = ""
505                          filesEventCount = 0
506                          newFile = 1
507                          fileCount += 1
508 <                        
508 >
509                      # if more events in file remain than eventsPerJobRequested
510                      else :
511                          # close job but don't touch new file
512                          fullString = parString[:-2]
513 <                        fullString += '\\}'
514 <                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
513 >                        if self.useParent:
514 >                            fullParentString = pString[:-2]
515 >                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
516 >                        else:
517 >                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
518                          common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
519                          self.jobDestination.append(blockSites[block])
520                          common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
521 +                        jobsOfBlock[block].append(jobCount+1)
522                          # increase counter
523                          jobCount = jobCount + 1
524                          totalEventCount = totalEventCount + eventsPerJobRequested
# Line 502 | Line 528 | class Cmssw(JobType):
528                          jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
529                          # remove all but the last file
530                          filesEventCount = self.eventsbyfile[file]
531 <                        parString = "\\{"
532 <                        parString += '\\\"' + file + '\\\"\,'
531 >                        if self.useParent:
532 >                            for f in parent : pString += '\\\"' + f + '\\\"\,'
533 >                        parString = '\\\"' + file + '\\\"\,'
534                      pass # END if
535                  pass # END while (iterate over files in the block)
536          pass # END while (iterate over blocks in the dataset)
537          self.ncjobs = self.total_number_of_jobs = jobCount
538          if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
539              common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
540 <        common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
541 <        
540 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
541 >
542 >        # screen output
543 >        screenOutput = "List of jobs and available destination sites:\n\n"
544 >
545 >        # keep trace of block with no sites to print a warning at the end
546 >        noSiteBlock = []
547 >        bloskNoSite = []
548 >
549 >        blockCounter = 0
550 >        for block in blocks:
551 >            if block in jobsOfBlock.keys() :
552 >                blockCounter += 1
553 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),
554 >                    ','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
555 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
556 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
557 >                    bloskNoSite.append( blockCounter )
558 >
559 >        common.logger.message(screenOutput)
560 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
561 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
562 >            virgola = ""
563 >            if len(bloskNoSite) > 1:
564 >                virgola = ","
565 >            for block in bloskNoSite:
566 >                msg += ' ' + str(block) + virgola
567 >            msg += '\n               Related jobs:\n                 '
568 >            virgola = ""
569 >            if len(noSiteBlock) > 1:
570 >                virgola = ","
571 >            for range_jobs in noSiteBlock:
572 >                msg += str(range_jobs) + virgola
573 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
574 >            if self.cfg_params.has_key('EDG.se_white_list'):
575 >                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
576 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
577 >                msg += 'Please check if the dataset is available at this site!)\n'
578 >            if self.cfg_params.has_key('EDG.ce_white_list'):
579 >                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
580 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
581 >                msg += 'Please check if the dataset is available at this site!)\n'
582 >
583 >            common.logger.message(msg)
584 >
585          self.list_of_args = list_of_lists
586          return
587  
588 +    def jobSplittingNoBlockBoundary(self,blockSites):
589 +        """
590 +        """
591 +        # ---- Handle the possible job splitting configurations ---- #
592 +        if (self.selectTotalNumberEvents):
593 +            totalEventsRequested = self.total_number_of_events
594 +        if (self.selectEventsPerJob):
595 +            eventsPerJobRequested = self.eventsPerJob
596 +            if (self.selectNumberOfJobs):
597 +                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
598 +                                                                                                          
599 +        # If user requested all the events in the dataset
600 +        if (totalEventsRequested == -1):
601 +            eventsRemaining=self.maxEvents
602 +        # If user requested more events than are in the dataset
603 +        elif (totalEventsRequested > self.maxEvents):
604 +            eventsRemaining = self.maxEvents
605 +            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
606 +        # If user requested less events than are in the dataset
607 +        else:
608 +            eventsRemaining = totalEventsRequested
609 +                                                                                                          
610 +        # If user requested more events per job than are in the dataset
611 +        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
612 +            eventsPerJobRequested = self.maxEvents
613 +                                                                                                          
614 +        # For user info at end
615 +        totalEventCount = 0
616 +
617 +        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
618 +            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
619 +                                                                                                          
620 +        if (self.selectNumberOfJobs):
621 +            common.logger.message("May not create the exact number_of_jobs requested.")
622 +                                                                                                          
623 +        if ( self.ncjobs == 'all' ) :
624 +            totalNumberOfJobs = 999999999
625 +        else :
626 +            totalNumberOfJobs = self.ncjobs
627 +                                                                                                          
628 +        blocks = blockSites.keys()
629 +        blockCount = 0
630 +        # Backup variable in case self.maxEvents counted events in a non-included block
631 +        numBlocksInDataset = len(blocks)
632 +                                                                                                          
633 +        jobCount = 0
634 +        list_of_lists = []
635 +
636 +        #AF
637 +        #AF do not reset input files and event count on block boundary
638 +        #AF
639 +        parString=""
640 +        filesEventCount = 0
641 +        #AF
642 +
643 +        # list tracking which jobs are in which jobs belong to which block
644 +        jobsOfBlock = {}
645 +        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
646 +            block = blocks[blockCount]
647 +            blockCount += 1
648 +            if block not in jobsOfBlock.keys() :
649 +                jobsOfBlock[block] = []
650 +
651 +            if self.eventsbyblock.has_key(block) :
652 +                numEventsInBlock = self.eventsbyblock[block]
653 +                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
654 +                files = self.filesbyblock[block]
655 +                numFilesInBlock = len(files)
656 +                if (numFilesInBlock <= 0):
657 +                    continue
658 +                fileCount = 0
659 +                #AF
660 +                #AF do not reset input files and event count of block boundary
661 +                #AF
662 +                ## ---- New block => New job ---- #
663 +                #parString = ""
664 +                # counter for number of events in files currently worked on
665 +                #filesEventCount = 0
666 +                #AF
667 +                # flag if next while loop should touch new file
668 +                newFile = 1
669 +                # job event counter
670 +                jobSkipEventCount = 0
671 +
672 +                # ---- Iterate over the files in the block until we've met the requested ---- #
673 +                # ---- total # of events or we've gone over all the files in this block  ---- #
674 +                pString=''
675 +                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
676 +                    file = files[fileCount]
677 +                    if self.useParent:
678 +                        parent = self.parentFiles[file]
679 +                        for f in parent :
680 +                            pString += '\\\"' + f + '\\\"\,'
681 +                        common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
682 +                        common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
683 +                    if newFile :
684 +                        try:
685 +                            numEventsInFile = self.eventsbyfile[file]
686 +                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
687 +                            # increase filesEventCount
688 +                            filesEventCount += numEventsInFile
689 +                            # Add file to current job
690 +                            parString += '\\\"' + file + '\\\"\,'
691 +                            newFile = 0
692 +                        except KeyError:
693 +                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
694 +                    eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining)
695 +                    #common.logger.message("AF filesEventCount %s - jobSkipEventCount %s "%(filesEventCount,jobSkipEventCount))  
696 +                    # if less events in file remain than eventsPerJobRequested
697 +                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested):
698 +                      #AF
699 +                      #AF skip fileboundary part
700 +                      #AF
701 +                            # go to next file
702 +                            newFile = 1
703 +                            fileCount += 1
704 +                    # if events in file equal to eventsPerJobRequested
705 +                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
706 +                        # close job and touch new file
707 +                        fullString = parString[:-2]
708 +                        if self.useParent:
709 +                            fullParentString = pString[:-2]
710 +                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
711 +                        else:
712 +                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
713 +                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
714 +                        self.jobDestination.append(blockSites[block])
715 +                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
716 +                        jobsOfBlock[block].append(jobCount+1)
717 +                        # reset counter
718 +                        jobCount = jobCount + 1
719 +                        totalEventCount = totalEventCount + eventsPerJobRequested
720 +                        eventsRemaining = eventsRemaining - eventsPerJobRequested
721 +                        jobSkipEventCount = 0
722 +                        # reset file
723 +                        pString = ""
724 +                        parString = ""
725 +                        filesEventCount = 0
726 +                        newFile = 1
727 +                        fileCount += 1
728 +
729 +                    # if more events in file remain than eventsPerJobRequested
730 +                    else :
731 +                        # close job but don't touch new file
732 +                        fullString = parString[:-2]
733 +                        if self.useParent:
734 +                            fullParentString = pString[:-2]
735 +                            list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
736 +                        else:
737 +                            list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
738 +                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
739 +                        self.jobDestination.append(blockSites[block])
740 +                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
741 +                        jobsOfBlock[block].append(jobCount+1)
742 +                        # increase counter
743 +                        jobCount = jobCount + 1
744 +                        totalEventCount = totalEventCount + eventsPerJobRequested
745 +                        eventsRemaining = eventsRemaining - eventsPerJobRequested
746 +                        # calculate skip events for last file
747 +                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
748 +                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
749 +                        # remove all but the last file
750 +                        filesEventCount = self.eventsbyfile[file]
751 +                        if self.useParent:
752 +                            for f in parent : pString += '\\\"' + f + '\\\"\,'
753 +                        parString = '\\\"' + file + '\\\"\,'
754 +                    pass # END if
755 +                pass # END while (iterate over files in the block)
756 +        pass # END while (iterate over blocks in the dataset)
757 +        self.ncjobs = self.total_number_of_jobs = jobCount
758 +        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
759 +            common.logger.message("eventsRemaining "+str(eventsRemaining))
760 +            common.logger.message("jobCount "+str(jobCount))
761 +            common.logger.message(" totalNumberOfJobs "+str(totalNumberOfJobs))
762 +            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
763 +        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
764 +
765 +        # screen output
766 +        screenOutput = "List of jobs and available destination sites:\n\n"
767 +
768 +        #AF
769 +        #AF   skip check on  block with no sites
770 +        #AF
771 +        self.list_of_args = list_of_lists
772 +
773 +        return
774 +
775 +
776 +
777      def jobSplittingNoInput(self):
778          """
779          Perform job splitting based on number of event per job
780          """
781          common.logger.debug(5,'Splitting per events')
782 <        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
783 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
784 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
782 >
783 >        if (self.selectEventsPerJob):
784 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
785 >        if (self.selectNumberOfJobs):
786 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
787 >        if (self.selectTotalNumberEvents):
788 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
789  
790          if (self.total_number_of_events < 0):
791              msg='Cannot split jobs per Events with "-1" as total number of events'
# Line 531 | Line 794 | class Cmssw(JobType):
794          if (self.selectEventsPerJob):
795              if (self.selectTotalNumberEvents):
796                  self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
797 <            elif(self.selectNumberOfJobs) :  
797 >            elif(self.selectNumberOfJobs) :
798                  self.total_number_of_jobs =self.theNumberOfJobs
799 <                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
799 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
800  
801          elif (self.selectNumberOfJobs) :
802              self.total_number_of_jobs = self.theNumberOfJobs
803              self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
804 <
804 >
805          common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
806  
807          # is there any remainder?
# Line 554 | Line 817 | class Cmssw(JobType):
817          self.list_of_args = []
818          for i in range(self.total_number_of_jobs):
819              ## Since there is no input, any site is good
820 <           # self.jobDestination.append(["Any"])
821 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
559 <            args=''
820 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
821 >            args=[]
822              if (self.firstRun):
823 <                    ## pythia first run
824 <                #self.list_of_args.append([(str(self.firstRun)+str(i))])
825 <                args=args+(str(self.firstRun)+str(i))
564 <            else:
565 <                ## no first run
566 <                #self.list_of_args.append([str(i)])
567 <                args=args+str(i)
568 <            if (self.sourceSeed):
569 <                if (self.sourceSeedVtx):
570 <                    ## pythia + vtx random seed
571 <                    #self.list_of_args.append([
572 <                    #                          str(self.sourceSeed)+str(i),
573 <                    #                          str(self.sourceSeedVtx)+str(i)
574 <                    #                          ])
575 <                    args=args+str(',')+str(self.sourceSeed)+str(i)+str(',')+str(self.sourceSeedVtx)+str(i)
576 <                else:
577 <                    ## only pythia random seed
578 <                    #self.list_of_args.append([(str(self.sourceSeed)+str(i))])
579 <                    args=args +str(',')+str(self.sourceSeed)+str(i)
580 <            else:
581 <                ## no random seed
582 <                if str(args)=='': args=args+(str(self.firstRun)+str(i))
583 <            arguments=args.split(',')
584 <            if len(arguments)==3:self.list_of_args.append([str(arguments[0]),str(arguments[1]),str(arguments[2])])
585 <            elif len(arguments)==2:self.list_of_args.append([str(arguments[0]),str(arguments[1])])
586 <            else :self.list_of_args.append([str(arguments[0])])
587 <            
588 <     #   print self.list_of_args
823 >                ## pythia first run
824 >                args.append(str(self.firstRun)+str(i))
825 >            self.list_of_args.append(args)
826  
827          return
828  
829  
830 <    def jobSplittingForScript(self):#CarlosDaniele
830 >    def jobSplittingForScript(self):
831          """
832          Perform job splitting based on number of job
833          """
# Line 606 | Line 843 | class Cmssw(JobType):
843          # argument is seed number.$i
844          self.list_of_args = []
845          for i in range(self.total_number_of_jobs):
609            ## Since there is no input, any site is good
610           # self.jobDestination.append(["Any"])
846              self.jobDestination.append([""])
612            ## no random seed
847              self.list_of_args.append([str(i)])
848          return
849  
850 <    def split(self, jobParams):
851 <
618 <        common.jobDB.load()
619 <        #### Fabio
850 >    def split(self, jobParams,firstJobID):
851 >
852          njobs = self.total_number_of_jobs
853          arglist = self.list_of_args
854          # create the empty structure
855          for i in range(njobs):
856              jobParams.append("")
625        
626        for job in range(njobs):
627            jobParams[job] = arglist[job]
628            # print str(arglist[job])
629            # print jobParams[job]
630            common.jobDB.setArguments(job, jobParams[job])
631            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
632            common.jobDB.setDestination(job, self.jobDestination[job])
857  
858 <        common.jobDB.save()
858 >        listID=[]
859 >        listField=[]
860 >        for id in range(njobs):
861 >            job = id + int(firstJobID)
862 >            jobParams[id] = arglist[id]
863 >            listID.append(job+1)
864 >            job_ToSave ={}
865 >            concString = ' '
866 >            argu=''
867 >            if len(jobParams[id]):
868 >                argu +=   concString.join(jobParams[id] )
869 >            job_ToSave['arguments']= str(job+1)+' '+argu
870 >            job_ToSave['dlsDestination']= self.jobDestination[id]
871 >            listField.append(job_ToSave)
872 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
873 >            +"                     Destination: "+str(self.jobDestination[id])
874 >            common.logger.debug(5,msg)
875 >        common._db.updateJob_(listID,listField)
876 >        self.argsList = (len(jobParams[0])+1)
877 >
878          return
879 <    
637 <    def getJobTypeArguments(self, nj, sched):
638 <        result = ''
639 <        for i in common.jobDB.arguments(nj):
640 <            result=result+str(i)+" "
641 <        return result
642 <  
879 >
880      def numberOfJobs(self):
644        # Fabio
881          return self.total_number_of_jobs
882  
883      def getTarBall(self, exe):
884          """
885          Return the TarBall with lib and exe
886          """
887 <        
652 <        # if it exist, just return it
653 <        #
654 <        # Marco. Let's start to use relative path for Boss XML files
655 <        #
656 <        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
887 >        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
888          if os.path.exists(self.tgzNameWithPath):
889              return self.tgzNameWithPath
890  
# Line 666 | Line 897 | class Cmssw(JobType):
897  
898          # First of all declare the user Scram area
899          swArea = self.scram.getSWArea_()
669        #print "swArea = ", swArea
670        # swVersion = self.scram.getSWVersion()
671        # print "swVersion = ", swVersion
900          swReleaseTop = self.scram.getReleaseTop_()
901 <        #print "swReleaseTop = ", swReleaseTop
674 <        
901 >
902          ## check if working area is release top
903          if swReleaseTop == '' or swArea == swReleaseTop:
904 +            common.logger.debug(3,"swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
905              return
906  
907          import tarfile
# Line 684 | Line 912 | class Cmssw(JobType):
912                  exeWithPath = self.scram.findFile_(executable)
913                  if ( not exeWithPath ):
914                      raise CrabException('User executable '+executable+' not found')
915 <    
915 >
916                  ## then check if it's private or not
917                  if exeWithPath.find(swReleaseTop) == -1:
918                      # the exe is private, so we must ship
919                      common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
920                      path = swArea+'/'
921 <                    exe = string.replace(exeWithPath, path,'')
922 <                    tar.add(path+exe,exe)
921 >                    # distinguish case when script is in user project area or given by full path somewhere else
922 >                    if exeWithPath.find(path) >= 0 :
923 >                        exe = string.replace(exeWithPath, path,'')
924 >                        tar.add(path+exe,exe)
925 >                    else :
926 >                        tar.add(exeWithPath,os.path.basename(executable))
927                      pass
928                  else:
929                      # the exe is from release, we'll find it on WN
930                      pass
931 <    
931 >
932              ## Now get the libraries: only those in local working area
933              libDir = 'lib'
934              lib = swArea+'/' +libDir
935              common.logger.debug(5,"lib "+lib+" to be tarred")
936              if os.path.exists(lib):
937                  tar.add(lib,libDir)
938 <    
938 >
939              ## Now check if module dir is present
940              moduleDir = 'module'
941              module = swArea + '/' + moduleDir
# Line 711 | Line 943 | class Cmssw(JobType):
943                  tar.add(module,moduleDir)
944  
945              ## Now check if any data dir(s) is present
946 <            swAreaLen=len(swArea)
947 <            for root, dirs, files in os.walk(swArea):
948 <                if "data" in dirs:
949 <                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
950 <                    tar.add(root+"/data",root[swAreaLen:]+"/data")
951 <
952 <            ## Add ProdAgent dir to tar
953 <            paDir = 'ProdAgentApi'
954 <            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
955 <            if os.path.isdir(pa):
956 <                tar.add(pa,paDir)
957 <        
946 >            self.dataExist = False
947 >            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
948 >            while len(todo_list):
949 >                entry, name = todo_list.pop()
950 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
951 >                    continue
952 >                if os.path.isdir(swArea+"/src/"+entry):
953 >                    entryPath = entry + '/'
954 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
955 >                    if name == 'data':
956 >                        self.dataExist=True
957 >                        common.logger.debug(5,"data "+entry+" to be tarred")
958 >                        tar.add(swArea+"/src/"+entry,"src/"+entry)
959 >                    pass
960 >                pass
961 >
962 >            ### CMSSW ParameterSet
963 >            if not self.pset is None:
964 >                cfg_file = common.work_space.jobDir()+self.configFilename()
965 >                tar.add(cfg_file,self.configFilename())
966 >                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
967 >
968 >
969 >            ## Add ProdCommon dir to tar
970 >            prodcommonDir = './'
971 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
972 >            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
973 >                           'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage']
974 >            for file in neededStuff:
975 >                tar.add(prodcommonPath+file,prodcommonDir+file)
976              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
977 <            tar.close()
978 <        except :
979 <            raise CrabException('Could not create tar-ball')
730 <        
731 <        ## create tar-ball with ML stuff
732 <        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
733 <        try:
734 <            tar = tarfile.open(self.MLtgzfile, "w:gz")
977 >
978 >            ##### ML stuff
979 >            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
980              path=os.environ['CRABDIR'] + '/python/'
981 <            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
981 >            for file in ML_file_list:
982 >                tar.add(path+file,file)
983 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
984 >
985 >            ##### Utils
986 >            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
987 >            for file in Utils_file_list:
988                  tar.add(path+file,file)
989 <            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
989 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
990 >
991 >            ##### AdditionalFiles
992 >            for file in self.additional_inbox_files:
993 >                tar.add(file,string.split(file,'/')[-1])
994 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
995 >
996              tar.close()
997 <        except :
998 <            raise CrabException('Could not create ML files tar-ball')
999 <        
1000 <        return
1001 <        
1002 <    def wsSetupEnvironment(self, nj):
997 >        except IOError, exc:
998 >            common.logger.write(str(exc))
999 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
1000 >        except tarfile.TarError, exc:
1001 >            common.logger.write(str(exc))
1002 >            raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
1003 >
1004 >        ## check for tarball size
1005 >        tarballinfo = os.stat(self.tgzNameWithPath)
1006 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
1007 >            msg  = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
1008 >               +'MB input sandbox limit \n'
1009 >            msg += '      and not supported by the direct GRID submission system.\n'
1010 >            msg += '      Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
1011 >            msg += '      For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
1012 >            raise CrabException(msg)
1013 >
1014 >        ## create tar-ball with ML stuff
1015 >
1016 >    def wsSetupEnvironment(self, nj=0):
1017          """
1018          Returns part of a job script which prepares
1019          the execution environment for the job 'nj'.
1020          """
1021 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1022 +            psetName = 'pset.py'
1023 +        else:
1024 +            psetName = 'pset.cfg'
1025          # Prepare JobType-independent part
1026 <        txt = ''
1027 <  
1028 <        ## OLI_Daniele at this level  middleware already known
754 <
755 <        txt += 'if [ $middleware == LCG ]; then \n'
1026 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
1027 >        txt += 'echo ">>> setup environment"\n'
1028 >        txt += 'if [ $middleware == LCG ]; then \n'
1029          txt += self.wsSetupCMSLCGEnvironment_()
1030          txt += 'elif [ $middleware == OSG ]; then\n'
1031          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
1032 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
1033 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
1034 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
1035 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
763 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
764 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
765 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
766 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
767 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
768 <        txt += '        exit 1\n'
1032 >        txt += '    if [ ! $? == 0 ] ;then\n'
1033 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
1034 >        txt += '        job_exit_code=10016\n'
1035 >        txt += '        func_exit\n'
1036          txt += '    fi\n'
1037 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
1038          txt += '\n'
1039          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
1040          txt += '    cd $WORKING_DIR\n'
1041 <        txt += self.wsSetupCMSOSGEnvironment_()
1041 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
1042 >        txt += self.wsSetupCMSOSGEnvironment_()
1043          txt += 'fi\n'
1044  
1045          # Prepare JobType-specific part
1046          scram = self.scram.commandName()
1047          txt += '\n\n'
1048 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
1048 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
1049 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
1050          txt += scram+' project CMSSW '+self.version+'\n'
1051          txt += 'status=$?\n'
1052          txt += 'if [ $status != 0 ] ; then\n'
1053 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
1054 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
1055 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
786 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
787 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
788 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
789 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
790 <        ## OLI_Daniele
791 <        txt += '    if [ $middleware == OSG ]; then \n'
792 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
793 <        txt += '        cd $RUNTIME_AREA\n'
794 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
795 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
796 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
797 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
798 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
799 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
800 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
801 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
802 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
803 <        txt += '        fi\n'
804 <        txt += '    fi \n'
805 <        txt += '   exit 1 \n'
1053 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
1054 >        txt += '    job_exit_code=10034\n'
1055 >        txt += '    func_exit\n'
1056          txt += 'fi \n'
807        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
1057          txt += 'cd '+self.version+'\n'
1058 <        ### needed grep for bug in scramv1 ###
1059 <        txt += scram+' runtime -sh\n'
1058 >        txt += 'SOFTWARE_DIR=`pwd`\n'
1059 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1060          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
1061 <        txt += 'echo $PATH\n'
1062 <
1061 >        txt += 'if [ $? != 0 ] ; then\n'
1062 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
1063 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
1064 >        txt += '    job_exit_code=10034\n'
1065 >        txt += '    func_exit\n'
1066 >        txt += 'fi \n'
1067          # Handle the arguments:
1068          txt += "\n"
1069          txt += "## number of arguments (first argument always jobnumber)\n"
1070          txt += "\n"
1071 < #        txt += "narg=$#\n"
819 <        txt += "if [ $nargs -lt 2 ]\n"
1071 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
1072          txt += "then\n"
1073 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
1074 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
1075 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
824 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
825 <        txt += '    rm -f $RUNTIME_AREA/$repo \n'
826 <        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
827 <        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
828 <        ## OLI_Daniele
829 <        txt += '    if [ $middleware == OSG ]; then \n'
830 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
831 <        txt += '        cd $RUNTIME_AREA\n'
832 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
833 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
834 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
835 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
836 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
837 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
838 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
839 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
840 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
841 <        txt += '        fi\n'
842 <        txt += '    fi \n'
843 <        txt += "    exit 1\n"
1073 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
1074 >        txt += '    job_exit_code=50113\n'
1075 >        txt += "    func_exit\n"
1076          txt += "fi\n"
1077          txt += "\n"
1078  
1079          # Prepare job-specific part
1080          job = common.job_list[nj]
1081 <        if self.pset != None: #CarlosDaniele
1081 >        if (self.datasetPath):
1082 >            self.primaryDataset = self.datasetPath.split("/")[1]
1083 >            DataTier = self.datasetPath.split("/")[2]
1084 >            txt += '\n'
1085 >            txt += 'DatasetPath='+self.datasetPath+'\n'
1086 >
1087 >            txt += 'PrimaryDataset='+self.primaryDataset +'\n'
1088 >            txt += 'DataTier='+DataTier+'\n'
1089 >            txt += 'ApplicationFamily=cmsRun\n'
1090 >
1091 >        else:
1092 >            self.primaryDataset = 'null'
1093 >            txt += 'DatasetPath=MCDataTier\n'
1094 >            txt += 'PrimaryDataset=null\n'
1095 >            txt += 'DataTier=null\n'
1096 >            txt += 'ApplicationFamily=MCDataTier\n'
1097 >        if self.pset != None:
1098              pset = os.path.basename(job.configFilename())
1099              txt += '\n'
1100 +            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
1101              if (self.datasetPath): # standard job
1102 <                #txt += 'InputFiles=$2\n'
1103 <                txt += 'InputFiles=${args[1]}\n'
1104 <                txt += 'MaxEvents=${args[2]}\n'
1105 <                txt += 'SkipEvents=${args[3]}\n'
1102 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
1103 >                if (self.useParent):
1104 >                    txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
1105 >                    txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
1106 >                    txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
1107 >                else:
1108 >                    txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
1109 >                    txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
1110                  txt += 'echo "Inputfiles:<$InputFiles>"\n'
1111 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n'
1111 >                if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
1112                  txt += 'echo "MaxEvents:<$MaxEvents>"\n'
860                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n'
1113                  txt += 'echo "SkipEvents:<$SkipEvents>"\n'
862                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n'
1114              else:  # pythia like job
1115 <                if (self.sourceSeed):
1116 <                    txt += 'FirstRun=${args[1]}\n'
1115 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
1116 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
1117 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
1118 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
1119 >                if (self.firstRun):
1120 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
1121                      txt += 'echo "FirstRun: <$FirstRun>"\n'
867                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" $RUNTIME_AREA/'+pset+' > tmp_1.cfg\n'
868                else:
869                    txt += '# Copy untouched pset\n'
870                    txt += 'cp $RUNTIME_AREA/'+pset+' tmp_1.cfg\n'
871                if (self.sourceSeed):
872 #                    txt += 'Seed=$2\n'
873                    txt += 'Seed=${args[2]}\n'
874                    txt += 'echo "Seed: <$Seed>"\n'
875                    txt += 'sed "s#\<INPUT\>#$Seed#" tmp_1.cfg > tmp_2.cfg\n'
876                    if (self.sourceSeedVtx):
877 #                        txt += 'VtxSeed=$3\n'
878                        txt += 'VtxSeed=${args[3]}\n'
879                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
880                        txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp_2.cfg > pset.cfg\n'
881                    else:
882                        txt += 'mv tmp_2.cfg pset.cfg\n'
883                else:
884                    txt += 'mv tmp_1.cfg pset.cfg\n'
885                   # txt += '# Copy untouched pset\n'
886                   # txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
1122  
1123 +            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
1124  
1125 <        if len(self.additional_inbox_files) > 0:
1126 <            for file in self.additional_inbox_files:
1127 <                relFile = file.split("/")[-1]
892 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
893 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
894 <                txt += '   chmod +x '+relFile+'\n'
895 <                txt += 'fi\n'
896 <            pass
897 <
898 <        if self.pset != None: #CarlosDaniele
899 <            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
900 <        
1125 >
1126 >        if self.pset != None:
1127 >            # FUTURE: Can simply for 2_1_x and higher
1128              txt += '\n'
1129 <            txt += 'echo "***** cat pset.cfg *********"\n'
1130 <            txt += 'cat pset.cfg\n'
1131 <            txt += 'echo "****** end pset.cfg ********"\n'
1129 >            if self.debug_wrapper==True:
1130 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
1131 >                txt += 'cat ' + psetName + '\n'
1132 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
1133 >                txt += '\n'
1134 >            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1135 >                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
1136 >            else:
1137 >                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
1138 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1139              txt += '\n'
906            # txt += 'echo "***** cat pset1.cfg *********"\n'
907            # txt += 'cat pset1.cfg\n'
908            # txt += 'echo "****** end pset1.cfg ********"\n'
1140          return txt
1141  
1142 <    def wsBuildExe(self, nj=0):
1142 >    def wsUntarSoftware(self, nj=0):
1143          """
1144          Put in the script the commands to build an executable
1145          or a library.
1146          """
1147  
1148 <        txt = ""
1148 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
1149  
1150          if os.path.isfile(self.tgzNameWithPath):
1151 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1151 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1152              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1153 +            if  self.debug_wrapper:
1154 +                txt += 'ls -Al \n'
1155              txt += 'untar_status=$? \n'
1156              txt += 'if [ $untar_status -ne 0 ]; then \n'
1157 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1158 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
1159 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
927 <            txt += '   if [ $middleware == OSG ]; then \n'
928 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
929 <            txt += '       cd $RUNTIME_AREA\n'
930 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
931 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
932 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
933 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
934 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
935 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
936 <            txt += '           rm -f $RUNTIME_AREA/$repo \n'
937 <            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
938 <            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
939 <            txt += '       fi\n'
940 <            txt += '   fi \n'
941 <            txt += '   \n'
942 <            txt += '   exit 1 \n'
1157 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
1158 >            txt += '   job_exit_code=$untar_status\n'
1159 >            txt += '   func_exit\n'
1160              txt += 'else \n'
1161              txt += '   echo "Successful untar" \n'
1162              txt += 'fi \n'
1163              txt += '\n'
1164 <            txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
1164 >            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1165              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1166 <            txt += '   export PYTHONPATH=ProdAgentApi\n'
1166 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
1167              txt += 'else\n'
1168 <            txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1168 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1169 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1170              txt += 'fi\n'
1171              txt += '\n'
1172  
1173              pass
1174 <        
1174 >
1175          return txt
1176  
1177 <    def modifySteeringCards(self, nj):
1177 >    def wsBuildExe(self, nj=0):
1178          """
1179 <        modify the card provided by the user,
1180 <        writing a new card into share dir
1179 >        Put in the script the commands to build an executable
1180 >        or a library.
1181          """
1182 <        
1182 >
1183 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
1184 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
1185 >
1186 >        txt += 'rm -r lib/ module/ \n'
1187 >        txt += 'mv $RUNTIME_AREA/lib/ . \n'
1188 >        txt += 'mv $RUNTIME_AREA/module/ . \n'
1189 >        if self.dataExist == True:
1190 >            txt += 'rm -r src/ \n'
1191 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
1192 >        if len(self.additional_inbox_files)>0:
1193 >            for file in self.additional_inbox_files:
1194 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
1195 >        # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1196 >        # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
1197 >
1198 >        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1199 >        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1200 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
1201 >        txt += 'else\n'
1202 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1203 >        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1204 >        txt += 'fi\n'
1205 >        txt += '\n'
1206 >
1207 >        return txt
1208 >
1209 >
1210      def executableName(self):
1211 <        if self.pset == None: #CarlosDaniele
1211 >        if self.scriptExe:
1212              return "sh "
1213          else:
1214              return self.executable
1215  
1216      def executableArgs(self):
1217 <        if self.pset == None:#CarlosDaniele
1217 >        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1218 >        if self.scriptExe:#CarlosDaniele
1219              return   self.scriptExe + " $NJob"
1220 <        else:
1221 <            return " -p pset.cfg"
1220 >        else:
1221 >            ex_args = ""
1222 >            # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1223 >            # Framework job report
1224 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1225 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1226 >            # Type of config file
1227 >            if self.CMSSW_major >= 2 :
1228 >                ex_args += " -p pset.py"
1229 >            else:
1230 >                ex_args += " -p pset.cfg"
1231 >            return ex_args
1232  
1233      def inputSandbox(self, nj):
1234          """
1235          Returns a list of filenames to be put in JDL input sandbox.
1236          """
1237          inp_box = []
982        # # dict added to delete duplicate from input sandbox file list
983        # seen = {}
984        ## code
1238          if os.path.isfile(self.tgzNameWithPath):
1239              inp_box.append(self.tgzNameWithPath)
1240 <        if os.path.isfile(self.MLtgzfile):
988 <            inp_box.append(self.MLtgzfile)
989 <        ## config
990 <        if not self.pset is None: #CarlosDaniele
991 <            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
992 <        ## additional input files
993 <        #for file in self.additional_inbox_files:
994 <        #    inp_box.append(common.work_space.cwdDir()+file)
1240 >        inp_box.append(common.work_space.jobDir() + self.scriptName)
1241          return inp_box
1242  
1243      def outputSandbox(self, nj):
# Line 1002 | Line 1248 | class Cmssw(JobType):
1248  
1249          ## User Declared output files
1250          for out in (self.output_file+self.output_file_sandbox):
1251 <            n_out = nj + 1
1252 <            out_box.append(self.numberFile_(out,str(n_out)))
1251 >            n_out = nj + 1
1252 >            out_box.append(numberFile(out,str(n_out)))
1253          return out_box
1254  
1009    def prepareSteeringCards(self):
1010        """
1011        Make initial modifications of the user's steering card file.
1012        """
1013        return
1255  
1256      def wsRenameOutput(self, nj):
1257          """
1258          Returns part of a job script which renames the produced files.
1259          """
1260  
1261 <        txt = '\n'
1262 <        txt += '# directory content\n'
1263 <        txt += 'ls \n'
1261 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1262 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1263 >        txt += 'echo ">>> current directory content:"\n'
1264 >        if self.debug_wrapper:
1265 >            txt += 'ls -Al\n'
1266 >        txt += '\n'
1267  
1268 <        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1269 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1268 >        for fileWithSuffix in (self.output_file):
1269 >            output_file_num = numberFile(fileWithSuffix, '$NJob')
1270              txt += '\n'
1271              txt += '# check output file\n'
1272 <            txt += 'ls '+fileWithSuffix+'\n'
1273 <            txt += 'ls_result=$?\n'
1274 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
1275 <            txt += '   echo "ERROR: Problem with output file"\n'
1276 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1272 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1273 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1274 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1275 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1276 >            else:
1277 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1278 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1279 >            txt += 'else\n'
1280 >            txt += '    job_exit_code=60302\n'
1281 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1282 >            if common.scheduler.name().upper() == 'CONDOR_G':
1283                  txt += '    if [ $middleware == OSG ]; then \n'
1284                  txt += '        echo "prepare dummy output file"\n'
1285                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1286                  txt += '    fi \n'
1037            txt += 'else\n'
1038            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1287              txt += 'fi\n'
1288 <      
1289 <        txt += 'cd $RUNTIME_AREA\n'
1290 <        txt += 'cd $RUNTIME_AREA\n'
1043 <        ### OLI_DANIELE
1044 <        txt += 'if [ $middleware == OSG ]; then\n'  
1045 <        txt += '    cd $RUNTIME_AREA\n'
1046 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1047 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1048 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1049 <        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1050 <        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1051 <        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1052 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1053 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1054 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1055 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1056 <        txt += '    fi\n'
1057 <        txt += 'fi\n'
1058 <        txt += '\n'
1059 <
1060 <        file_list = ''
1061 <        ## Add to filelist only files to be possibly copied to SE
1062 <        for fileWithSuffix in self.output_file:
1063 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1064 <            file_list=file_list+output_file_num+' '
1065 <        file_list=file_list[:-1]
1066 <        txt += 'file_list="'+file_list+'"\n'
1288 >        file_list = []
1289 >        for fileWithSuffix in (self.output_file):
1290 >             file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
1291  
1292 +        txt += 'file_list="'+string.join(file_list,',')+'"\n'
1293 +        txt += '\n'
1294 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1295 +        txt += 'echo ">>> current directory content:"\n'
1296 +        if self.debug_wrapper:
1297 +            txt += 'ls -Al\n'
1298 +        txt += '\n'
1299 +        txt += 'cd $RUNTIME_AREA\n'
1300 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1301          return txt
1302  
1070    def numberFile_(self, file, txt):
1071        """
1072        append _'txt' before last extension of a file
1073        """
1074        p = string.split(file,".")
1075        # take away last extension
1076        name = p[0]
1077        for x in p[1:-1]:
1078           name=name+"."+x
1079        # add "_txt"
1080        if len(p)>1:
1081          ext = p[len(p)-1]
1082          result = name + '_' + txt + "." + ext
1083        else:
1084          result = name + '_' + txt
1085        
1086        return result
1087
1303      def getRequirements(self, nj=[]):
1304          """
1305 <        return job requirements to add to jdl files
1305 >        return job requirements to add to jdl files
1306          """
1307          req = ''
1308          if self.version:
1309              req='Member("VO-cms-' + \
1310                   self.version + \
1311                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1312 +        if self.executable_arch:
1313 +            req+=' && Member("VO-cms-' + \
1314 +                 self.executable_arch + \
1315 +                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1316  
1317          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1318 +        if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
1319 +            req += ' && other.GlueCEStateStatus == "Production" '
1320  
1321          return req
1322  
1323      def configFilename(self):
1324          """ return the config filename """
1325 <        return self.name()+'.cfg'
1325 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1326 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1327 >          return self.name()+'.py'
1328 >        else:
1329 >          return self.name()+'.cfg'
1330  
1106    ### OLI_DANIELE
1331      def wsSetupCMSOSGEnvironment_(self):
1332          """
1333          Returns part of a job script which is prepares
1334          the execution environment and which is common for all CMS jobs.
1335          """
1336 <        txt = '\n'
1337 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1338 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1339 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1340 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1341 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1336 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1337 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1338 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1339 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1340 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1341 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1342          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1343 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1344 <        txt += '   else\n'
1345 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1346 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1347 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1348 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1125 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1126 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1127 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1128 <        txt += '       exit 1\n'
1129 <        txt += '\n'
1130 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1131 <        txt += '       cd $RUNTIME_AREA\n'
1132 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1133 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1134 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1135 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1136 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1137 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1138 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1139 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1140 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1141 <        txt += '       fi\n'
1142 <        txt += '\n'
1143 <        txt += '       exit 1\n'
1144 <        txt += '   fi\n'
1343 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1344 >        txt += '    else\n'
1345 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1346 >        txt += '        job_exit_code=10020\n'
1347 >        txt += '        func_exit\n'
1348 >        txt += '    fi\n'
1349          txt += '\n'
1350 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1351 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1350 >        txt += '    echo "==> setup cms environment ok"\n'
1351 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1352  
1353          return txt
1354 <
1151 <    ### OLI_DANIELE
1354 >
1355      def wsSetupCMSLCGEnvironment_(self):
1356          """
1357          Returns part of a job script which is prepares
1358          the execution environment and which is common for all CMS jobs.
1359          """
1360 <        txt  = '   \n'
1361 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1362 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1363 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1364 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1365 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1366 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1367 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1368 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1369 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1370 <        txt += '       exit 1\n'
1371 <        txt += '   else\n'
1372 <        txt += '       echo "Sourcing environment... "\n'
1373 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1374 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1375 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1376 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1377 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1378 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1379 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1380 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1381 <        txt += '           exit 1\n'
1382 <        txt += '       fi\n'
1383 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1384 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1385 <        txt += '       result=$?\n'
1386 <        txt += '       if [ $result -ne 0 ]; then\n'
1387 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1388 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1389 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1390 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1391 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1392 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1393 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1394 <        txt += '           exit 1\n'
1395 <        txt += '       fi\n'
1396 <        txt += '   fi\n'
1397 <        txt += '   \n'
1398 <        txt += '   string=`cat /etc/redhat-release`\n'
1399 <        txt += '   echo $string\n'
1400 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
1401 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1402 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1403 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1404 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1405 <        txt += '   else\n'
1406 <        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1407 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1408 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1409 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1410 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1411 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1412 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1413 <        txt += '       exit 1\n'
1414 <        txt += '   fi\n'
1415 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1416 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1360 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1361 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1362 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1363 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1364 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1365 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1366 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1367 >        txt += '        job_exit_code=10031\n'
1368 >        txt += '        func_exit\n'
1369 >        txt += '    else\n'
1370 >        txt += '        echo "Sourcing environment... "\n'
1371 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1372 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1373 >        txt += '            job_exit_code=10020\n'
1374 >        txt += '            func_exit\n'
1375 >        txt += '        fi\n'
1376 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1377 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1378 >        txt += '        result=$?\n'
1379 >        txt += '        if [ $result -ne 0 ]; then\n'
1380 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1381 >        txt += '            job_exit_code=10032\n'
1382 >        txt += '            func_exit\n'
1383 >        txt += '        fi\n'
1384 >        txt += '    fi\n'
1385 >        txt += '    \n'
1386 >        txt += '    echo "==> setup cms environment ok"\n'
1387 >        return txt
1388 >
1389 >    def wsModifyReport(self, nj):
1390 >        """
1391 >        insert the part of the script that modifies the FrameworkJob Report
1392 >        """
1393 >        txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1394 >        publish_data = int(self.cfg_params.get('USER.publish_data',0))
1395 >        if (publish_data == 1):
1396 >
1397 >            txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1398 >            txt += '    FOR_LFN=$LFNBaseName/${PSETHASH}/\n'
1399 >            txt += 'else\n'
1400 >            txt += '    FOR_LFN=/copy_problems/ \n'
1401 >            txt += '    SE=""\n'
1402 >            txt += '    SE_PATH=""\n'
1403 >            txt += 'fi\n'
1404 >
1405 >            txt += 'echo ">>> Modify Job Report:" \n'
1406 >            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1407 >            txt += 'ProcessedDataset= $procDataset \n'
1408 >            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1409 >            txt += 'echo "SE = $SE"\n'
1410 >            txt += 'echo "SE_PATH = $SE_PATH"\n'
1411 >            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1412 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1413 >            args = '$RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier ' \
1414 >                   '$User -$ProcessedDataset-$PSETHASH $ApplicationFamily '+ \
1415 >                    '  $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH'
1416 >            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1417 >            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1418 >            txt += 'modifyReport_result=$?\n'
1419 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1420 >            txt += '    modifyReport_result=70500\n'
1421 >            txt += '    job_exit_code=$modifyReport_result\n'
1422 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1423 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1424 >            txt += 'else\n'
1425 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1426 >            txt += 'fi\n'
1427 >        return txt
1428 >
1429 >    def wsParseFJR(self):
1430 >        """
1431 >        Parse the FrameworkJobReport to obtain useful infos
1432 >        """
1433 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1434 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1435 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1436 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1437 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1438 >        if self.debug_wrapper :
1439 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1440 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1441 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1442 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1443 >        txt += '            cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1444 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1445 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1446 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1447 >        txt += '        else\n'
1448 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1449 >        txt += '        fi\n'
1450 >        txt += '    else\n'
1451 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1452 >        txt += '    fi\n'
1453 >          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1454 >        txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1455 >        txt += '      echo ">>> Executable succeded  $executable_exit_status"\n'
1456 >        if (self.datasetPath and not (self.dataset_pu or self.useParent)) :
1457 >          # VERIFY PROCESSED DATA
1458 >            txt += '      echo ">>> Verify list of processed files:"\n'
1459 >            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1460 >            txt += '      python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1461 >            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1462 >            txt += '      mv tmp.txt input-files.txt\n'
1463 >            txt += '      echo "cat input-files.txt"\n'
1464 >            txt += '      echo "----------------------"\n'
1465 >            txt += '      cat input-files.txt\n'
1466 >            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1467 >            txt += '      mv tmp.txt processed-files.txt\n'
1468 >            txt += '      echo "----------------------"\n'
1469 >            txt += '      echo "cat processed-files.txt"\n'
1470 >            txt += '      echo "----------------------"\n'
1471 >            txt += '      cat processed-files.txt\n'
1472 >            txt += '      echo "----------------------"\n'
1473 >            txt += '      diff -q input-files.txt processed-files.txt\n'
1474 >            txt += '      fileverify_status=$?\n'
1475 >            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1476 >            txt += '         executable_exit_status=30001\n'
1477 >            txt += '         echo "ERROR ==> not all input files processed"\n'
1478 >            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1479 >            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1480 >            txt += '      fi\n'
1481 >        txt += '    elif [ $executable_exit_status -ne 0 ] || [ $executable_exit_status -ne 50015 ] || [ $executable_exit_status -ne 50017 ];then\n'
1482 >        txt += '      echo ">>> Executable failed  $executable_exit_status"\n'
1483 >        txt += '      func_exit\n'
1484 >        txt += '    fi\n'
1485 >        txt += '\n'
1486 >        txt += 'else\n'
1487 >        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1488 >        txt += 'fi\n'
1489 >        txt += '\n'
1490 >        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1491 >        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1492 >        txt += 'job_exit_code=$executable_exit_status\n'
1493 >
1494          return txt
1495  
1496      def setParam_(self, param, value):
# Line 1219 | Line 1499 | class Cmssw(JobType):
1499      def getParams(self):
1500          return self._params
1501  
1222    def setTaskid_(self):
1223        self._taskId = self.cfg_params['taskId']
1224        
1225    def getTaskid(self):
1226        return self._taskId
1227
1228 #######################################################################
1502      def uniquelist(self, old):
1503          """
1504          remove duplicates from a list
# Line 1234 | Line 1507 | class Cmssw(JobType):
1507          for e in old:
1508              nd[e]=0
1509          return nd.keys()
1510 +
1511 +    def outList(self):
1512 +        """
1513 +        check the dimension of the output files
1514 +        """
1515 +        txt = ''
1516 +        txt += 'echo ">>> list of expected files on output sandbox"\n'
1517 +        listOutFiles = []
1518 +        stdout = 'CMSSW_$NJob.stdout'
1519 +        stderr = 'CMSSW_$NJob.stderr'
1520 +        if (self.return_data == 1):
1521 +            for file in (self.output_file+self.output_file_sandbox):
1522 +                listOutFiles.append(numberFile(file, '$NJob'))
1523 +            listOutFiles.append(stdout)
1524 +            listOutFiles.append(stderr)
1525 +        else:
1526 +            for file in (self.output_file_sandbox):
1527 +                listOutFiles.append(numberFile(file, '$NJob'))
1528 +            listOutFiles.append(stdout)
1529 +            listOutFiles.append(stderr)
1530 +        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1531 +        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1532 +        txt += 'export filesToCheck\n'
1533 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines