ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.1 by slacapra, Thu Apr 6 16:18:17 2006 UTC vs.
Revision 1.108 by gutsche, Thu Jul 26 03:02:58 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
6
7 import DataDiscovery
8 import DataLocation
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
16 <        self.analisys_common_info = {}
16 >        self._params = {}
17 >        self.cfg_params = cfg_params
18 >
19 >        # init BlackWhiteListParser
20 >        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 >
22 >        try:
23 >            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 >        except KeyError:
25 >            self.MaxTarBallSize = 9.5
26 >
27 >        # number of jobs requested to be created, limit obj splitting
28 >        self.ncjobs = ncjobs
29  
30          log = common.logger
31          
32          self.scram = Scram.Scram(cfg_params)
23        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39 +        self.scriptName = 'CMSSW.sh'
40 +        self.pset = ''      #scrip use case Da  
41 +        self.datasetPath = '' #scrip use case Da
42 +
43 +        # set FJR file name
44 +        self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 <        common.analisys_common_info['sw_version'] = self.version
47 >        common.taskDB.setDict('codeVersion',self.version)
48 >        self.setParam_('application', self.version)
49  
50          ### collect Data cards
51 +
52 +        ## get DBS mode
53          try:
54 <            self.owner = cfg_params['CMSSW.owner']
35 <            log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
36 <            self.dataset = cfg_params['CMSSW.dataset']
37 <            log.debug(6, "CMSSW::CMSSW(): dataset = "+self.dataset)
54 >            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
55          except KeyError:
56 <            msg = "Error: owner and/or dataset not defined "
56 >            self.use_dbs_1 = 0
57 >            
58 >        try:
59 >            tmp =  cfg_params['CMSSW.datasetpath']
60 >            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
61 >            if string.lower(tmp)=='none':
62 >                self.datasetPath = None
63 >                self.selectNoInput = 1
64 >            else:
65 >                self.datasetPath = tmp
66 >                self.selectNoInput = 0
67 >        except KeyError:
68 >            msg = "Error: datasetpath not defined "  
69              raise CrabException(msg)
70  
71 +        # ML monitoring
72 +        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
73 +        if not self.datasetPath:
74 +            self.setParam_('dataset', 'None')
75 +            self.setParam_('owner', 'None')
76 +        else:
77 +            try:
78 +                datasetpath_split = self.datasetPath.split("/")
79 +                # standard style
80 +                if self.use_dbs_1 == 1 :
81 +                    self.setParam_('dataset', datasetpath_split[1])
82 +                    self.setParam_('owner', datasetpath_split[-1])
83 +                else:
84 +                    self.setParam_('dataset', datasetpath_split[1])
85 +                    self.setParam_('owner', datasetpath_split[2])
86 +            except:
87 +                self.setParam_('dataset', self.datasetPath)
88 +                self.setParam_('owner', self.datasetPath)
89 +                
90 +        self.setTaskid_()
91 +        self.setParam_('taskId', self.cfg_params['taskId'])
92 +
93          self.dataTiers = []
43        try:
44            tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
45            for tmp in tmpDataTiers:
46                tmp=string.strip(tmp)
47                self.dataTiers.append(tmp)
48                pass
49            pass
50        except KeyError:
51            pass
52        log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
94  
95          ## now the application
96          try:
97              self.executable = cfg_params['CMSSW.executable']
98 +            self.setParam_('exe', self.executable)
99              log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
100              msg = "Default executable cmsRun overridden. Switch to " + self.executable
101              log.debug(3,msg)
102          except KeyError:
103              self.executable = 'cmsRun'
104 +            self.setParam_('exe', self.executable)
105              msg = "User executable not defined. Use cmsRun"
106              log.debug(3,msg)
107              pass
# Line 66 | Line 109 | class Cmssw(JobType):
109          try:
110              self.pset = cfg_params['CMSSW.pset']
111              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
112 <            if (not os.path.exists(self.pset)):
113 <                raise CrabException("User defined PSet file "+self.pset+" does not exist")
112 >            if self.pset.lower() != 'none' :
113 >                if (not os.path.exists(self.pset)):
114 >                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
115 >            else:
116 >                self.pset = None
117          except KeyError:
118              raise CrabException("PSet file missing. Cannot run cmsRun ")
119  
120          # output files
121 +        ## stuff which must be returned always via sandbox
122 +        self.output_file_sandbox = []
123 +
124 +        # add fjr report by default via sandbox
125 +        self.output_file_sandbox.append(self.fjrFileName)
126 +
127 +        # other output files to be returned via sandbox or copied to SE
128          try:
129              self.output_file = []
77
130              tmp = cfg_params['CMSSW.output_file']
131              if tmp != '':
132                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 83 | Line 135 | class Cmssw(JobType):
135                      tmp=string.strip(tmp)
136                      self.output_file.append(tmp)
137                      pass
86
138              else:
139 <                log.message("No output file defined: only stdout/err will be available")
139 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
140                  pass
141              pass
142          except KeyError:
143 <            log.message("No output file defined: only stdout/err will be available")
143 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
144              pass
145  
146          # script_exe file as additional file in inputSandbox
147          try:
148 <           self.scriptExe = cfg_params['CMSSW.script_exe']
149 <           self.additional_inbox_files.append(self.scriptExe)
148 >            self.scriptExe = cfg_params['USER.script_exe']
149 >            if self.scriptExe != '':
150 >               if not os.path.isfile(self.scriptExe):
151 >                  msg ="ERROR. file "+self.scriptExe+" not found"
152 >                  raise CrabException(msg)
153 >               self.additional_inbox_files.append(string.strip(self.scriptExe))
154          except KeyError:
155 <           pass
156 <        if self.scriptExe != '':
157 <           if os.path.isfile(self.scriptExe):
158 <              pass
159 <           else:
160 <              log.message("WARNING. file "+self.scriptExe+" not found")
161 <              sys.exit()
107 <                  
155 >            self.scriptExe = ''
156 >
157 >        #CarlosDaniele
158 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
159 >           msg ="Error. script_exe  not defined"
160 >           raise CrabException(msg)
161 >
162          ## additional input files
163          try:
164 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
164 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
165              for tmp in tmpAddFiles:
166 <                tmp=string.strip(tmp)
167 <                self.additional_inbox_files.append(tmp)
166 >                tmp = string.strip(tmp)
167 >                dirname = ''
168 >                if not tmp[0]=="/": dirname = "."
169 >                files = []
170 >                if string.find(tmp,"*")>-1:
171 >                    files = glob.glob(os.path.join(dirname, tmp))
172 >                    if len(files)==0:
173 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
174 >                else:
175 >                    files.append(tmp)
176 >                for file in files:
177 >                    if not os.path.exists(file):
178 >                        raise CrabException("Additional input file not found: "+file)
179 >                    pass
180 >                    # fname = string.split(file, '/')[-1]
181 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
182 >                    # shutil.copyfile(file, storedFile)
183 >                    self.additional_inbox_files.append(string.strip(file))
184                  pass
185              pass
186 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
187          except KeyError:
188              pass
189  
190 +        # files per job
191 +        try:
192 +            if (cfg_params['CMSSW.files_per_jobs']):
193 +                raise CrabException("files_per_jobs no longer supported.  Quitting.")
194 +        except KeyError:
195 +            pass
196 +
197 +        ## Events per job
198 +        try:
199 +            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
200 +            self.selectEventsPerJob = 1
201 +        except KeyError:
202 +            self.eventsPerJob = -1
203 +            self.selectEventsPerJob = 0
204 +    
205 +        ## number of jobs
206 +        try:
207 +            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
208 +            self.selectNumberOfJobs = 1
209 +        except KeyError:
210 +            self.theNumberOfJobs = 0
211 +            self.selectNumberOfJobs = 0
212 +
213          try:
214              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
215 +            self.selectTotalNumberEvents = 1
216          except KeyError:
217 <            msg = 'Must define total_number_of_events and job_number_of_events'
218 <            raise CrabException(msg)
219 <            
220 < #Marco: FirstEvent is nolonger used inside PSet
221 < #        try:
222 < #            self.first = int(cfg_params['CMSSW.first_event'])
223 < #        except KeyError:
224 < #            self.first = 0
225 < #            pass
226 < #        log.debug(6, "Orca::Orca(): total number of events = "+`self.total_number_of_events`)
227 <        #log.debug(6, "Orca::Orca(): events per job = "+`self.job_number_of_events`)
228 < #        log.debug(6, "Orca::Orca(): first event = "+`self.first`)
229 <        
230 <        CEBlackList = []
231 <        try:
137 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
138 <            for tmp in tmpBad:
139 <                tmp=string.strip(tmp)
140 <                CEBlackList.append(tmp)
217 >            self.total_number_of_events = 0
218 >            self.selectTotalNumberEvents = 0
219 >
220 >        if self.pset != None: #CarlosDaniele
221 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
222 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
223 >                 raise CrabException(msg)
224 >        else:
225 >             if (self.selectNumberOfJobs == 0):
226 >                 msg = 'Must specify  number_of_jobs.'
227 >                 raise CrabException(msg)
228 >
229 >        ## source seed for pythia
230 >        try:
231 >            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
232          except KeyError:
233 <            pass
233 >            self.sourceSeed = None
234 >            common.logger.debug(5,"No seed given")
235  
236 <        self.reCEBlackList=[]
237 <        for bad in CEBlackList:
146 <            self.reCEBlackList.append(re.compile( bad ))
147 <
148 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
149 <
150 <        CEWhiteList = []
151 <        try:
152 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
153 <            #tmpGood = ['cern']
154 <            for tmp in tmpGood:
155 <                tmp=string.strip(tmp)
156 <                #if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch
157 <                CEWhiteList.append(tmp)
236 >        try:
237 >            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
238          except KeyError:
239 <            pass
239 >            self.sourceSeedVtx = None
240 >            common.logger.debug(5,"No vertex seed given")
241  
242 <        #print 'CEWhiteList: ',CEWhiteList
243 <        self.reCEWhiteList=[]
244 <        for Good in CEWhiteList:
245 <            self.reCEWhiteList.append(re.compile( Good ))
242 >        try:
243 >            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
244 >        except KeyError:
245 >            self.sourceSeedG4 = None
246 >            common.logger.debug(5,"No g4 sim hits seed given")
247  
248 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
248 >        try:
249 >            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
250 >        except KeyError:
251 >            self.sourceSeedMix = None
252 >            common.logger.debug(5,"No mix seed given")
253 >
254 >        try:
255 >            self.firstRun = int(cfg_params['CMSSW.first_run'])
256 >        except KeyError:
257 >            self.firstRun = None
258 >            common.logger.debug(5,"No first run given")
259 >        if self.pset != None: #CarlosDaniele
260 >            ver = string.split(self.version,"_")
261 >            if (int(ver[1])>=1 and int(ver[2])>=5):
262 >                import PsetManipulator150 as pp
263 >            else:
264 >                import PsetManipulator as pp
265 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
266  
267          #DBSDLS-start
268          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
269          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
270          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
271 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
272          ## Perform the data location and discovery (based on DBS/DLS)
273 <        self.DataDiscoveryAndLocation(cfg_params)
273 >        ## SL: Don't if NONE is specified as input (pythia use case)
274 >        blockSites = {}
275 >        if self.datasetPath:
276 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
277          #DBSDLS-end          
278  
279          self.tgzNameWithPath = self.getTarBall(self.executable)
280 +    
281 +        ## Select Splitting
282 +        if self.selectNoInput:
283 +            if self.pset == None: #CarlosDaniele
284 +                self.jobSplittingForScript()
285 +            else:
286 +                self.jobSplittingNoInput()
287 +        else:
288 +            self.jobSplittingByBlocks(blockSites)
289 +
290 +        # modify Pset
291 +        if self.pset != None: #CarlosDaniele
292 +            try:
293 +                if (self.datasetPath): # standard job
294 +                    # allow to processa a fraction of events in a file
295 +                    PsetEdit.inputModule("INPUT")
296 +                    PsetEdit.maxEvent("INPUTMAXEVENTS")
297 +                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
298 +                else:  # pythia like job
299 +                    PsetEdit.maxEvent(self.eventsPerJob)
300 +                    if (self.firstRun):
301 +                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
302 +                    if (self.sourceSeed) :
303 +                        PsetEdit.pythiaSeed("INPUT")
304 +                        if (self.sourceSeedVtx) :
305 +                            PsetEdit.vtxSeed("INPUTVTX")
306 +                        if (self.sourceSeedG4) :
307 +                            self.PsetEdit.g4Seed("INPUTG4")
308 +                        if (self.sourceSeedMix) :
309 +                            self.PsetEdit.mixSeed("INPUTMIX")
310 +                # add FrameworkJobReport to parameter-set
311 +                PsetEdit.addCrabFJR(self.fjrFileName)
312 +                PsetEdit.psetWriter(self.configFilename())
313 +            except:
314 +                msg='Error while manipuliating ParameterSet: exiting...'
315 +                raise CrabException(msg)
316  
317      def DataDiscoveryAndLocation(self, cfg_params):
318  
319 <        fun = "CMSSW::DataDiscoveryAndLocation()"
319 >        import DataDiscovery
320 >        import DataDiscovery_DBS2
321 >        import DataLocation
322 >        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
323 >
324 >        datasetPath=self.datasetPath
325  
326          ## Contact the DBS
327 +        common.logger.message("Contacting Data Discovery Services ...")
328          try:
329 <            self.pubdata=DataDiscovery.DataDiscovery(self.owner,
330 <                                                     self.dataset,
331 <                                                     self.dataTiers,
332 <                                                     cfg_params)
329 >
330 >            if self.use_dbs_1 == 1 :
331 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
332 >            else :
333 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
334              self.pubdata.fetchDBSInfo()
335  
336          except DataDiscovery.NotExistingDatasetError, ex :
337              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
338              raise CrabException(msg)
193
339          except DataDiscovery.NoDataTierinProvenanceError, ex :
340              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
341              raise CrabException(msg)
342          except DataDiscovery.DataDiscoveryError, ex:
343 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
343 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
344 >            raise CrabException(msg)
345 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
346 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
347 >            raise CrabException(msg)
348 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
349 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
350 >            raise CrabException(msg)
351 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
352 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
353              raise CrabException(msg)
354  
355 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
356 <        self.DBSPaths=self.pubdata.getDBSPaths()
357 <        common.logger.message("Required data are : ")
204 <        for path in self.DBSPaths:
205 <            common.logger.message(" --> "+path )
355 >        self.filesbyblock=self.pubdata.getFiles()
356 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
357 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
358  
359          ## get max number of events
208        common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
360          self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
210        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
211
212        ## get fileblocks corresponding to the required data
213        fb=self.pubdata.getFileBlocks()
214        common.logger.debug(5,"fileblocks are %s"%fb)
361  
362          ## Contact the DLS and build a list of sites hosting the fileblocks
363          try:
364 <            dataloc=DataLocation.DataLocation(self.pubdata.getFileBlocks(),cfg_params)
364 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
365              dataloc.fetchDLSInfo()
366          except DataLocation.DataLocationError , ex:
367              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
368              raise CrabException(msg)
369          
224        allsites=dataloc.getSites()
225        common.logger.debug(5,"sites are %s"%allsites)
226        sites=self.checkBlackList(allsites)
227        common.logger.debug(5,"sites are (after black list) %s"%sites)
228        sites=self.checkWhiteList(sites)
229        common.logger.debug(5,"sites are (after white list) %s"%sites)
370  
371 <        if len(sites)==0:
372 <            msg = 'No sites hosting all the needed data! Exiting... '
373 <            raise CrabException(msg)
374 <        common.logger.message("List of Sites hosting the data : "+str(sites))
375 <        common.logger.debug(6, "List of Sites: "+str(sites))
376 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
377 <        return
371 >        sites = dataloc.getSites()
372 >        allSites = []
373 >        listSites = sites.values()
374 >        for listSite in listSites:
375 >            for oneSite in listSite:
376 >                allSites.append(oneSite)
377 >        allSites = self.uniquelist(allSites)
378 >
379 >        # screen output
380 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
381 >
382 >        return sites
383 >    
384 >    def jobSplittingByBlocks(self, blockSites):
385 >        """
386 >        Perform job splitting. Jobs run over an integer number of files
387 >        and no more than one block.
388 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
389 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
390 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
391 >                  self.maxEvents, self.filesbyblock
392 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
393 >              self.total_number_of_jobs - Total # of jobs
394 >              self.list_of_args - File(s) job will run on (a list of lists)
395 >        """
396 >
397 >        # ---- Handle the possible job splitting configurations ---- #
398 >        if (self.selectTotalNumberEvents):
399 >            totalEventsRequested = self.total_number_of_events
400 >        if (self.selectEventsPerJob):
401 >            eventsPerJobRequested = self.eventsPerJob
402 >            if (self.selectNumberOfJobs):
403 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
404 >
405 >        # If user requested all the events in the dataset
406 >        if (totalEventsRequested == -1):
407 >            eventsRemaining=self.maxEvents
408 >        # If user requested more events than are in the dataset
409 >        elif (totalEventsRequested > self.maxEvents):
410 >            eventsRemaining = self.maxEvents
411 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
412 >        # If user requested less events than are in the dataset
413 >        else:
414 >            eventsRemaining = totalEventsRequested
415 >
416 >        # If user requested more events per job than are in the dataset
417 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
418 >            eventsPerJobRequested = self.maxEvents
419 >
420 >        # For user info at end
421 >        totalEventCount = 0
422 >
423 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
424 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
425 >
426 >        if (self.selectNumberOfJobs):
427 >            common.logger.message("May not create the exact number_of_jobs requested.")
428 >
429 >        if ( self.ncjobs == 'all' ) :
430 >            totalNumberOfJobs = 999999999
431 >        else :
432 >            totalNumberOfJobs = self.ncjobs
433 >            
434 >
435 >        blocks = blockSites.keys()
436 >        blockCount = 0
437 >        # Backup variable in case self.maxEvents counted events in a non-included block
438 >        numBlocksInDataset = len(blocks)
439 >
440 >        jobCount = 0
441 >        list_of_lists = []
442 >
443 >        # list tracking which jobs are in which jobs belong to which block
444 >        jobsOfBlock = {}
445 >
446 >        # ---- Iterate over the blocks in the dataset until ---- #
447 >        # ---- we've met the requested total # of events    ---- #
448 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
449 >            block = blocks[blockCount]
450 >            blockCount += 1
451 >            if block not in jobsOfBlock.keys() :
452 >                jobsOfBlock[block] = []
453 >            
454 >            if self.eventsbyblock.has_key(block) :
455 >                numEventsInBlock = self.eventsbyblock[block]
456 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
457 >            
458 >                files = self.filesbyblock[block]
459 >                numFilesInBlock = len(files)
460 >                if (numFilesInBlock <= 0):
461 >                    continue
462 >                fileCount = 0
463 >
464 >                # ---- New block => New job ---- #
465 >                parString = "\\{"
466 >                # counter for number of events in files currently worked on
467 >                filesEventCount = 0
468 >                # flag if next while loop should touch new file
469 >                newFile = 1
470 >                # job event counter
471 >                jobSkipEventCount = 0
472 >            
473 >                # ---- Iterate over the files in the block until we've met the requested ---- #
474 >                # ---- total # of events or we've gone over all the files in this block  ---- #
475 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
476 >                    file = files[fileCount]
477 >                    if newFile :
478 >                        try:
479 >                            numEventsInFile = self.eventsbyfile[file]
480 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
481 >                            # increase filesEventCount
482 >                            filesEventCount += numEventsInFile
483 >                            # Add file to current job
484 >                            parString += '\\\"' + file + '\\\"\,'
485 >                            newFile = 0
486 >                        except KeyError:
487 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
488 >                        
489 >
490 >                    # if less events in file remain than eventsPerJobRequested
491 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
492 >                        # if last file in block
493 >                        if ( fileCount == numFilesInBlock-1 ) :
494 >                            # end job using last file, use remaining events in block
495 >                            # close job and touch new file
496 >                            fullString = parString[:-2]
497 >                            fullString += '\\}'
498 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
499 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
500 >                            self.jobDestination.append(blockSites[block])
501 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
502 >                            # fill jobs of block dictionary
503 >                            jobsOfBlock[block].append(jobCount+1)
504 >                            # reset counter
505 >                            jobCount = jobCount + 1
506 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
507 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
508 >                            jobSkipEventCount = 0
509 >                            # reset file
510 >                            parString = "\\{"
511 >                            filesEventCount = 0
512 >                            newFile = 1
513 >                            fileCount += 1
514 >                        else :
515 >                            # go to next file
516 >                            newFile = 1
517 >                            fileCount += 1
518 >                    # if events in file equal to eventsPerJobRequested
519 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
520 >                        # close job and touch new file
521 >                        fullString = parString[:-2]
522 >                        fullString += '\\}'
523 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
524 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
525 >                        self.jobDestination.append(blockSites[block])
526 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
527 >                        jobsOfBlock[block].append(jobCount+1)
528 >                        # reset counter
529 >                        jobCount = jobCount + 1
530 >                        totalEventCount = totalEventCount + eventsPerJobRequested
531 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
532 >                        jobSkipEventCount = 0
533 >                        # reset file
534 >                        parString = "\\{"
535 >                        filesEventCount = 0
536 >                        newFile = 1
537 >                        fileCount += 1
538 >                        
539 >                    # if more events in file remain than eventsPerJobRequested
540 >                    else :
541 >                        # close job but don't touch new file
542 >                        fullString = parString[:-2]
543 >                        fullString += '\\}'
544 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
545 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
546 >                        self.jobDestination.append(blockSites[block])
547 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
548 >                        jobsOfBlock[block].append(jobCount+1)
549 >                        # increase counter
550 >                        jobCount = jobCount + 1
551 >                        totalEventCount = totalEventCount + eventsPerJobRequested
552 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
553 >                        # calculate skip events for last file
554 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
555 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
556 >                        # remove all but the last file
557 >                        filesEventCount = self.eventsbyfile[file]
558 >                        parString = "\\{"
559 >                        parString += '\\\"' + file + '\\\"\,'
560 >                    pass # END if
561 >                pass # END while (iterate over files in the block)
562 >        pass # END while (iterate over blocks in the dataset)
563 >        self.ncjobs = self.total_number_of_jobs = jobCount
564 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
565 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
566 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
567          
568 <    def checkBlackList(self, allSites):
569 <        if len(self.reCEBlackList)==0: return allSites
570 <        sites = []
571 <        for site in allSites:
572 <            common.logger.debug(10,'Site '+site)
573 <            good=1
574 <            for re in self.reCEBlackList:
575 <                if re.search(site):
576 <                    common.logger.message('CE in black list, skipping site '+site)
577 <                    good=0
568 >        # screen output
569 >        screenOutput = "List of jobs and available destination sites:\n\n"
570 >
571 >        blockCounter = 0
572 >        for block in blocks:
573 >            if block in jobsOfBlock.keys() :
574 >                blockCounter += 1
575 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
576 >
577 >        common.logger.message(screenOutput)
578 >
579 >        self.list_of_args = list_of_lists
580 >        return
581 >
582 >    def jobSplittingNoInput(self):
583 >        """
584 >        Perform job splitting based on number of event per job
585 >        """
586 >        common.logger.debug(5,'Splitting per events')
587 >        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
588 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
589 >        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
590 >
591 >        if (self.total_number_of_events < 0):
592 >            msg='Cannot split jobs per Events with "-1" as total number of events'
593 >            raise CrabException(msg)
594 >
595 >        if (self.selectEventsPerJob):
596 >            if (self.selectTotalNumberEvents):
597 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
598 >            elif(self.selectNumberOfJobs) :  
599 >                self.total_number_of_jobs =self.theNumberOfJobs
600 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
601 >
602 >        elif (self.selectNumberOfJobs) :
603 >            self.total_number_of_jobs = self.theNumberOfJobs
604 >            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
605 >
606 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
607 >
608 >        # is there any remainder?
609 >        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
610 >
611 >        common.logger.debug(5,'Check  '+str(check))
612 >
613 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
614 >        if check > 0:
615 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
616 >
617 >        # argument is seed number.$i
618 >        self.list_of_args = []
619 >        for i in range(self.total_number_of_jobs):
620 >            ## Since there is no input, any site is good
621 >           # self.jobDestination.append(["Any"])
622 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
623 >            args=[]
624 >            if (self.firstRun):
625 >                    ## pythia first run
626 >                #self.list_of_args.append([(str(self.firstRun)+str(i))])
627 >                args.append(str(self.firstRun)+str(i))
628 >            else:
629 >                ## no first run
630 >                #self.list_of_args.append([str(i)])
631 >                args.append(str(i))
632 >            if (self.sourceSeed):
633 >                args.append(str(self.sourceSeed)+str(i))
634 >                if (self.sourceSeedVtx):
635 >                    ## + vtx random seed
636 >                    args.append(str(self.sourceSeedVtx)+str(i))
637 >                if (self.sourceSeedG4):
638 >                    ## + G4 random seed
639 >                    args.append(str(self.sourceSeedG4)+str(i))
640 >                if (self.sourceSeedMix):    
641 >                    ## + Mix random seed
642 >                    args.append(str(self.sourceSeedMix)+str(i))
643                  pass
644 <            if good: sites.append(site)
645 <        if len(sites) == 0:
646 <            common.logger.debug(3,"No sites found after BlackList")
647 <        return sites
644 >            pass
645 >            self.list_of_args.append(args)
646 >        pass
647 >            
648 >        # print self.list_of_args
649  
650 <    def checkWhiteList(self, allsites):
650 >        return
651  
652 <        if len(self.reCEWhiteList)==0: return pubDBUrls
653 <        sites = []
654 <        for site in allsites:
655 <            #print 'connecting to the URL ',url
656 <            good=0
657 <            for re in self.reCEWhiteList:
658 <                if re.search(site):
659 <                    common.logger.debug(5,'CE in white list, adding site '+site)
660 <                    good=1
661 <                if not good: continue
662 <                sites.append(site)
663 <        if len(sites) == 0:
664 <            common.logger.message("No sites found after WhiteList\n")
665 <        else:
666 <            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
667 <        return sites
652 >
653 >    def jobSplittingForScript(self):#CarlosDaniele
654 >        """
655 >        Perform job splitting based on number of job
656 >        """
657 >        common.logger.debug(5,'Splitting per job')
658 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
659 >
660 >        self.total_number_of_jobs = self.theNumberOfJobs
661 >
662 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
663 >
664 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
665 >
666 >        # argument is seed number.$i
667 >        self.list_of_args = []
668 >        for i in range(self.total_number_of_jobs):
669 >            ## Since there is no input, any site is good
670 >           # self.jobDestination.append(["Any"])
671 >            self.jobDestination.append([""])
672 >            ## no random seed
673 >            self.list_of_args.append([str(i)])
674 >        return
675 >
676 >    def split(self, jobParams):
677 >
678 >        common.jobDB.load()
679 >        #### Fabio
680 >        njobs = self.total_number_of_jobs
681 >        arglist = self.list_of_args
682 >        # create the empty structure
683 >        for i in range(njobs):
684 >            jobParams.append("")
685 >        
686 >        for job in range(njobs):
687 >            jobParams[job] = arglist[job]
688 >            # print str(arglist[job])
689 >            # print jobParams[job]
690 >            common.jobDB.setArguments(job, jobParams[job])
691 >            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
692 >            common.jobDB.setDestination(job, self.jobDestination[job])
693 >
694 >        common.jobDB.save()
695 >        return
696 >    
697 >    def getJobTypeArguments(self, nj, sched):
698 >        result = ''
699 >        for i in common.jobDB.arguments(nj):
700 >            result=result+str(i)+" "
701 >        return result
702 >  
703 >    def numberOfJobs(self):
704 >        # Fabio
705 >        return self.total_number_of_jobs
706  
707      def getTarBall(self, exe):
708          """
# Line 277 | Line 710 | class Cmssw(JobType):
710          """
711          
712          # if it exist, just return it
713 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
713 >        #
714 >        # Marco. Let's start to use relative path for Boss XML files
715 >        #
716 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
717          if os.path.exists(self.tgzNameWithPath):
718              return self.tgzNameWithPath
719  
# Line 291 | Line 727 | class Cmssw(JobType):
727          # First of all declare the user Scram area
728          swArea = self.scram.getSWArea_()
729          #print "swArea = ", swArea
730 <        swVersion = self.scram.getSWVersion()
731 <        #print "swVersion = ", swVersion
730 >        # swVersion = self.scram.getSWVersion()
731 >        # print "swVersion = ", swVersion
732          swReleaseTop = self.scram.getReleaseTop_()
733          #print "swReleaseTop = ", swReleaseTop
734          
# Line 300 | Line 736 | class Cmssw(JobType):
736          if swReleaseTop == '' or swArea == swReleaseTop:
737              return
738  
739 <        filesToBeTarred = []
740 <        ## First find the executable
741 <        if (self.executable != ''):
742 <            exeWithPath = self.scram.findFile_(executable)
743 < #           print exeWithPath
744 <            if ( not exeWithPath ):
745 <                raise CrabException('User executable '+executable+' not found')
746 <
747 <            ## then check if it's private or not
748 <            if exeWithPath.find(swReleaseTop) == -1:
749 <                # the exe is private, so we must ship
750 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
751 <                path = swArea+'/'
752 <                exe = string.replace(exeWithPath, path,'')
753 <                filesToBeTarred.append(exe)
754 <                pass
755 <            else:
756 <                # the exe is from release, we'll find it on WN
757 <                pass
758 <
759 <        ## Now get the libraries: only those in local working area
760 <        libDir = 'lib'
761 <        lib = swArea+'/' +libDir
762 <        common.logger.debug(5,"lib "+lib+" to be tarred")
763 <        if os.path.exists(lib):
764 <            filesToBeTarred.append(libDir)
765 <
766 <        ## Now check if the Data dir is present
767 <        dataDir = 'src/Data/'
768 <        if os.path.isdir(swArea+'/'+dataDir):
769 <            filesToBeTarred.append(dataDir)
770 <
771 <        ## Create the tar-ball
772 <        if len(filesToBeTarred)>0:
773 <            cwd = os.getcwd()
774 <            os.chdir(swArea)
775 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
776 <            for line in filesToBeTarred:
777 <                tarcmd = tarcmd + line + ' '
778 <            cout = runCommand(tarcmd)
779 <            if not cout:
780 <                raise CrabException('Could not create tar-ball')
781 <            os.chdir(cwd)
782 <        else:
783 <            common.logger.debug(5,"No files to be to be tarred")
739 >        import tarfile
740 >        try: # create tar ball
741 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
742 >            ## First find the executable
743 >            if (self.executable != ''):
744 >                exeWithPath = self.scram.findFile_(executable)
745 >                if ( not exeWithPath ):
746 >                    raise CrabException('User executable '+executable+' not found')
747 >    
748 >                ## then check if it's private or not
749 >                if exeWithPath.find(swReleaseTop) == -1:
750 >                    # the exe is private, so we must ship
751 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
752 >                    path = swArea+'/'
753 >                    # distinguish case when script is in user project area or given by full path somewhere else
754 >                    if exeWithPath.find(path) >= 0 :
755 >                        exe = string.replace(exeWithPath, path,'')
756 >                        tar.add(path+exe,os.path.basename(executable))
757 >                    else :
758 >                        tar.add(exeWithPath,os.path.basename(executable))
759 >                    pass
760 >                else:
761 >                    # the exe is from release, we'll find it on WN
762 >                    pass
763 >    
764 >            ## Now get the libraries: only those in local working area
765 >            libDir = 'lib'
766 >            lib = swArea+'/' +libDir
767 >            common.logger.debug(5,"lib "+lib+" to be tarred")
768 >            if os.path.exists(lib):
769 >                tar.add(lib,libDir)
770 >    
771 >            ## Now check if module dir is present
772 >            moduleDir = 'module'
773 >            module = swArea + '/' + moduleDir
774 >            if os.path.isdir(module):
775 >                tar.add(module,moduleDir)
776 >
777 >            ## Now check if any data dir(s) is present
778 >            swAreaLen=len(swArea)
779 >            for root, dirs, files in os.walk(swArea):
780 >                if "data" in dirs:
781 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
782 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
783 >
784 >            ## Add ProdAgent dir to tar
785 >            paDir = 'ProdAgentApi'
786 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
787 >            if os.path.isdir(pa):
788 >                tar.add(pa,paDir)
789 >
790 >            ### FEDE FOR DBS PUBLICATION
791 >            ## Add PRODCOMMON dir to tar
792 >            prodcommonDir = 'ProdCommon'
793 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
794 >            if os.path.isdir(prodcommonPath):
795 >                tar.add(prodcommonPath,prodcommonDir)
796 >            #############################    
797 >        
798 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
799 >            tar.close()
800 >        except :
801 >            raise CrabException('Could not create tar-ball')
802 >
803 >        ## check for tarball size
804 >        tarballinfo = os.stat(self.tgzNameWithPath)
805 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
806 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
807 >
808 >        ## create tar-ball with ML stuff
809 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
810 >        try:
811 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
812 >            path=os.environ['CRABDIR'] + '/python/'
813 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
814 >                tar.add(path+file,file)
815 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
816 >            tar.close()
817 >        except :
818 >            raise CrabException('Could not create ML files tar-ball')
819          
820          return
821          
822 +    def additionalInputFileTgz(self):
823 +        """
824 +        Put all additional files into a tar ball and return its name
825 +        """
826 +        import tarfile
827 +        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
828 +        tar = tarfile.open(tarName, "w:gz")
829 +        for file in self.additional_inbox_files:
830 +            tar.add(file,string.split(file,'/')[-1])
831 +        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
832 +        tar.close()
833 +        return tarName
834 +
835      def wsSetupEnvironment(self, nj):
836          """
837          Returns part of a job script which prepares
838          the execution environment for the job 'nj'.
839          """
840          # Prepare JobType-independent part
841 <        txt = self.wsSetupCMSEnvironment_()
841 >        txt = ''
842 >  
843 >        ## OLI_Daniele at this level  middleware already known
844 >
845 >        txt += 'echo "### Firtst set SCRAM ARCH and BUILD_ARCH ###"\n'
846 >        txt += 'echo "Setting SCRAM_ARCH='+self.executable_arch+'"\n'
847 >        txt += 'export SCRAM_ARCH='+self.executable_arch+'\n'
848 >        txt += 'export BUILD_ARCH='+self.executable_arch+'\n'
849 >        txt += 'if [ $middleware == LCG ]; then \n'
850 >        txt += self.wsSetupCMSLCGEnvironment_()
851 >        txt += 'elif [ $middleware == OSG ]; then\n'
852 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
853 >        txt += '    echo "Created working directory: $WORKING_DIR"\n'
854 >        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
855 >        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
856 >        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
857 >        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
858 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
859 >        txt += '        rm -f $RUNTIME_AREA/$repo \n'
860 >        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
861 >        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
862 >        txt += '        exit 1\n'
863 >        txt += '    fi\n'
864 >        txt += '\n'
865 >        txt += '    echo "Change to working directory: $WORKING_DIR"\n'
866 >        txt += '    cd $WORKING_DIR\n'
867 >        txt += self.wsSetupCMSOSGEnvironment_()
868 >        txt += 'fi\n'
869  
870          # Prepare JobType-specific part
871          scram = self.scram.commandName()
# Line 363 | Line 874 | class Cmssw(JobType):
874          txt += scram+' project CMSSW '+self.version+'\n'
875          txt += 'status=$?\n'
876          txt += 'if [ $status != 0 ] ; then\n'
877 <        txt += '   echo "SET_EXE_ENV 1 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
878 <        txt += '   echo "JOB_EXIT_STATUS = 5"\n'
879 <        txt += '   echo "SanityCheckCode = 5" | tee -a $RUNTIME_AREA/$repo\n'
877 >        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
878 >        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
879 >        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
880          txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
881 <        txt += '   exit 5 \n'
881 >        txt += '   rm -f $RUNTIME_AREA/$repo \n'
882 >        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
883 >        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
884 >        ## OLI_Daniele
885 >        txt += '    if [ $middleware == OSG ]; then \n'
886 >        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
887 >        txt += '        cd $RUNTIME_AREA\n'
888 >        txt += '        /bin/rm -rf $WORKING_DIR\n'
889 >        txt += '        if [ -d $WORKING_DIR ] ;then\n'
890 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
891 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
892 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
893 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
894 >        txt += '            rm -f $RUNTIME_AREA/$repo \n'
895 >        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
896 >        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
897 >        txt += '        fi\n'
898 >        txt += '    fi \n'
899 >        txt += '   exit 1 \n'
900          txt += 'fi \n'
901          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
902          txt += 'cd '+self.version+'\n'
903 +        ########## FEDE FOR DBS2 ######################
904 +        txt += 'SOFTWARE_DIR=`pwd`\n'
905 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
906 +        ###############################################
907          ### needed grep for bug in scramv1 ###
908 +        txt += scram+' runtime -sh\n'
909          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
910 +        txt += 'echo $PATH\n'
911  
912          # Handle the arguments:
913          txt += "\n"
914 <        txt += "## ARGUMNETS: $1 Job Number\n"
380 <        # txt += "## ARGUMNETS: $2 First Event for this job\n"
381 <        # txt += "## ARGUMNETS: $3 Max Event for this job\n"
914 >        txt += "## number of arguments (first argument always jobnumber)\n"
915          txt += "\n"
916 <        txt += "narg=$#\n"
917 <        txt += "if [ $narg -lt 1 ]\n"
916 > #        txt += "narg=$#\n"
917 >        txt += "if [ $nargs -lt 2 ]\n"
918          txt += "then\n"
919 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
920 <        txt += '    echo "JOB_EXIT_STATUS = 1"\n'
921 <        txt += '    echo "SanityCheckCode = 1" | tee -a $RUNTIME_AREA/$repo\n'
919 >        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
920 >        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
921 >        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
922          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
923 +        txt += '    rm -f $RUNTIME_AREA/$repo \n'
924 +        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
925 +        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
926 +        ## OLI_Daniele
927 +        txt += '    if [ $middleware == OSG ]; then \n'
928 +        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
929 +        txt += '        cd $RUNTIME_AREA\n'
930 +        txt += '        /bin/rm -rf $WORKING_DIR\n'
931 +        txt += '        if [ -d $WORKING_DIR ] ;then\n'
932 +        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
933 +        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
934 +        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
935 +        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
936 +        txt += '            rm -f $RUNTIME_AREA/$repo \n'
937 +        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
938 +        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
939 +        txt += '        fi\n'
940 +        txt += '    fi \n'
941          txt += "    exit 1\n"
942          txt += "fi\n"
943          txt += "\n"
393        txt += "NJob=$1\n"
394        # txt += "FirstEvent=$2\n"
395        # txt += "MaxEvents=$3\n"
944  
945          # Prepare job-specific part
946          job = common.job_list[nj]
947 <        pset = os.path.basename(job.configFilename())
948 <        txt += '\n'
949 <        txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
950 <        # txt += 'if [ -e $RUNTIME_AREA/orcarc_$CE ] ; then\n'
951 <        # txt += '  cat $RUNTIME_AREA/orcarc_$CE .orcarc >> .orcarc_tmp\n'
952 <        # txt += '  mv .orcarc_tmp .orcarc\n'
953 <        # txt += 'fi\n'
954 <        # txt += 'if [ -e $RUNTIME_AREA/init_$CE.sh ] ; then\n'
955 <        # txt += '  cp $RUNTIME_AREA/init_$CE.sh init.sh\n'
956 <        # txt += 'fi\n'
947 >        ### FEDE FOR DBS OUTPUT PUBLICATION
948 >        if (self.datasetPath):
949 >            txt += '\n'
950 >            txt += 'DatasetPath='+self.datasetPath+'\n'
951 >
952 >            datasetpath_split = self.datasetPath.split("/")
953 >            
954 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
955 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
956 >            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
957 >            txt += 'ApplicationFamily=cmsRun\n'
958 >
959 >        else:
960 >            txt += 'DatasetPath=MCDataTier\n'
961 >            txt += 'PrimaryDataset=null\n'
962 >            txt += 'DataTier=null\n'
963 >            #txt += 'ProcessedDataset=null\n'
964 >            txt += 'ApplicationFamily=MCDataTier\n'
965 >        if self.pset != None: #CarlosDaniele
966 >            pset = os.path.basename(job.configFilename())
967 >            txt += '\n'
968 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
969 >            if (self.datasetPath): # standard job
970 >                #txt += 'InputFiles=$2\n'
971 >                txt += 'InputFiles=${args[1]}\n'
972 >                txt += 'MaxEvents=${args[2]}\n'
973 >                txt += 'SkipEvents=${args[3]}\n'
974 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
975 >                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
976 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
977 >                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
978 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
979 >                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
980 >            else:  # pythia like job
981 >                seedIndex=1
982 >                if (self.firstRun):
983 >                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
984 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
985 >                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986 >                    seedIndex=seedIndex+1
987 >
988 >                if (self.sourceSeed):
989 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
990 >                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
991 >                    seedIndex=seedIndex+1
992 >                    ## the following seeds are not always present
993 >                    if (self.sourceSeedVtx):
994 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
995 >                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
996 >                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
997 >                        seedIndex += 1
998 >                    if (self.sourceSeedG4):
999 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1000 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1001 >                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1002 >                        seedIndex += 1
1003 >                    if (self.sourceSeedMix):
1004 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1005 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1006 >                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1007 >                        seedIndex += 1
1008 >                    pass
1009 >                pass
1010 >            txt += 'mv -f '+pset+' pset.cfg\n'
1011  
1012          if len(self.additional_inbox_files) > 0:
1013 <            for file in self.additional_inbox_files:
1014 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
1015 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
414 <                txt += '   chmod +x '+file+'\n'
415 <                txt += 'fi\n'
1013 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1014 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1015 >            txt += 'fi\n'
1016              pass
1017  
1018 <        # txt += '\n'
1019 <        # txt += 'chmod +x ./init.sh\n'
1020 <        # txt += './init.sh\n'
1021 <        # txt += 'exitStatus=$?\n'
1022 <        # txt += 'if [ $exitStatus != 0 ] ; then\n'
1023 <        # txt += '  echo "SET_EXE_ENV 1 ==> ERROR StageIn init script failed"\n'
1024 <        # txt += '  echo "JOB_EXIT_STATUS = $exitStatus" \n'
1025 <        # txt += '  echo "SanityCheckCode = $exitStatus" | tee -a $RUNTIME_AREA/$repo\n'
1026 <        # txt += '  dumpStatus $RUNTIME_AREA/$repo\n'
1027 <        # txt += '  exit $exitStatus\n'
1028 <        # txt += 'fi\n'
1029 <        # txt += "echo 'SET_EXE_ENV 0 ==> job setup ok'\n"
1030 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1031 <
1032 <        # txt += 'echo "FirstEvent=$FirstEvent" >> .orcarc\n'
1033 <        # txt += 'echo "MaxEvents=$MaxEvents" >> .orcarc\n'
1034 <        # if self.ML:
435 <        #     txt += 'echo "MonalisaJobId=$NJob" >> .orcarc\n'
1018 >        if self.pset != None: #CarlosDaniele
1019 >            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1020 >        
1021 >            txt += '\n'
1022 >            txt += 'echo "***** cat pset.cfg *********"\n'
1023 >            txt += 'cat pset.cfg\n'
1024 >            txt += 'echo "****** end pset.cfg ********"\n'
1025 >            txt += '\n'
1026 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1027 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1028 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1029 >            ##############
1030 >            txt += '\n'
1031 >            # txt += 'echo "***** cat pset1.cfg *********"\n'
1032 >            # txt += 'cat pset1.cfg\n'
1033 >            # txt += 'echo "****** end pset1.cfg ********"\n'
1034 >        return txt
1035  
1036 <        txt += '\n'
1037 <        txt += 'echo "***** cat pset.cfg *********"\n'
1038 <        txt += 'cat pset.cfg\n'
1039 <        txt += 'echo "****** end pset.cfg ********"\n'
1036 >    def wsBuildExe(self, nj=0):
1037 >        """
1038 >        Put in the script the commands to build an executable
1039 >        or a library.
1040 >        """
1041 >
1042 >        txt = ""
1043 >
1044 >        if os.path.isfile(self.tgzNameWithPath):
1045 >            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1046 >            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1047 >            txt += 'untar_status=$? \n'
1048 >            txt += 'if [ $untar_status -ne 0 ]; then \n'
1049 >            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1050 >            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
1051 >            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1052 >            txt += '   if [ $middleware == OSG ]; then \n'
1053 >            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1054 >            txt += '       cd $RUNTIME_AREA\n'
1055 >            txt += '       /bin/rm -rf $WORKING_DIR\n'
1056 >            txt += '       if [ -d $WORKING_DIR ] ;then\n'
1057 >            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1058 >            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1059 >            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1060 >            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1061 >            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1062 >            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1063 >            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1064 >            txt += '       fi\n'
1065 >            txt += '   fi \n'
1066 >            txt += '   \n'
1067 >            txt += '   exit 1 \n'
1068 >            txt += 'else \n'
1069 >            txt += '   echo "Successful untar" \n'
1070 >            txt += 'fi \n'
1071 >            txt += '\n'
1072 >            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1073 >            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1074 >            #### FEDE FOR DBS OUTPUT PUBLICATION
1075 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1076 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1077 >            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1078 >            txt += 'else\n'
1079 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1080 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1081 >            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1082 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1083 >            ###################  
1084 >            txt += 'fi\n'
1085 >            txt += '\n'
1086 >
1087 >            pass
1088 >        
1089          return txt
1090  
1091      def modifySteeringCards(self, nj):
# Line 447 | Line 1095 | class Cmssw(JobType):
1095          """
1096          
1097      def executableName(self):
1098 <        return self.executable
1098 >        if self.scriptExe: #CarlosDaniele
1099 >            return "sh "
1100 >        else:
1101 >            return self.executable
1102  
1103      def executableArgs(self):
1104 <        return "-p pset.cfg"
1104 >        if self.scriptExe:#CarlosDaniele
1105 >            return   self.scriptExe + " $NJob"
1106 >        else:
1107 >            # if >= CMSSW_1_5_X, add -e
1108 >            version_array = self.scram.getSWVersion().split('_')
1109 >            major = 0
1110 >            minor = 0
1111 >            try:
1112 >                major = int(version_array[1])
1113 >                minor = int(version_array[2])
1114 >            except:
1115 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1116 >                raise CrabException(msg)
1117 >            if major >= 1 and minor >= 5 :
1118 >                return " -e -p pset.cfg"
1119 >            else:
1120 >                return " -p pset.cfg"
1121  
1122      def inputSandbox(self, nj):
1123          """
1124          Returns a list of filenames to be put in JDL input sandbox.
1125          """
1126          inp_box = []
1127 <        # dict added to delete duplicate from input sandbox file list
1128 <        seen = {}
1127 >        # # dict added to delete duplicate from input sandbox file list
1128 >        # seen = {}
1129          ## code
1130          if os.path.isfile(self.tgzNameWithPath):
1131              inp_box.append(self.tgzNameWithPath)
1132 +        if os.path.isfile(self.MLtgzfile):
1133 +            inp_box.append(self.MLtgzfile)
1134          ## config
1135 <        inp_box.append(common.job_list[nj].configFilename())
1135 >        if not self.pset is None:
1136 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1137          ## additional input files
1138 <        for file in self.additional_inbox_files:
1139 <            inp_box.append(common.work_space.cwdDir()+file)
470 <        #print "sono inputSandbox, inp_box = ", inp_box
1138 >        tgz = self.additionalInputFileTgz()
1139 >        inp_box.append(tgz)
1140          return inp_box
1141  
1142      def outputSandbox(self, nj):
# Line 476 | Line 1145 | class Cmssw(JobType):
1145          """
1146          out_box = []
1147  
479        stdout=common.job_list[nj].stdout()
480        stderr=common.job_list[nj].stderr()
481
1148          ## User Declared output files
1149 <        for out in self.output_file:
1149 >        for out in (self.output_file+self.output_file_sandbox):
1150              n_out = nj + 1
1151              out_box.append(self.numberFile_(out,str(n_out)))
1152          return out_box
487        return []
1153  
1154      def prepareSteeringCards(self):
1155          """
1156          Make initial modifications of the user's steering card file.
1157          """
493        infile = open(self.pset,'r')
494            
495        outfile = open(common.work_space.jobDir()+self.name()+'.cfg', 'w')
496          
497        outfile.write('\n\n##### The following cards have been created by CRAB: DO NOT TOUCH #####\n')
498
499        outfile.write('InputCollections=/System/'+self.owner+'/'+self.dataset+'/'+self.dataset+'\n')
500
501        infile.close()
502        outfile.close()
1158          return
1159  
1160      def wsRenameOutput(self, nj):
# Line 508 | Line 1163 | class Cmssw(JobType):
1163          """
1164  
1165          txt = '\n'
1166 <        file_list = ''
1167 <        for fileWithSuffix in self.output_file:
1166 >        txt += '# directory content\n'
1167 >        txt += 'ls \n'
1168 >
1169 >        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1170              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
514            file_list=file_list+output_file_num+','
515            txt += '\n'
516            txt += 'ls \n'
1171              txt += '\n'
1172 <            txt += 'ls '+fileWithSuffix+'\n'
1173 <            txt += 'exe_result=$?\n'
1174 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
1175 <            txt += '   echo "ERROR: No output file to manage"\n'
1176 <            txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
523 <            txt += '   echo "SanityCheckCode = $exe_result" | tee -a $RUNTIME_AREA/$repo\n'
524 <            txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
525 <            txt += '   exit $exe_result \n'
1172 >            txt += '# check output file\n'
1173 >            # txt += 'ls '+fileWithSuffix+'\n'
1174 >            # txt += 'ls_result=$?\n'
1175 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1176 >            txt += '   mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1177              txt += 'else\n'
1178 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1178 >            txt += '   exit_status=60302\n'
1179 >            txt += '   echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1180 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1181 >                txt += '    if [ $middleware == OSG ]; then \n'
1182 >                txt += '        echo "prepare dummy output file"\n'
1183 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1184 >                txt += '    fi \n'
1185              txt += 'fi\n'
1186 <            txt += 'cd $RUNTIME_AREA\n'
1187 <                      
1188 <            pass
1186 >        file_list = []
1187 >        for fileWithSuffix in (self.output_file):
1188 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1189 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1190        
1191 <        file_list=file_list[:-1]
1192 <        txt += 'file_list='+file_list+'\n'
1191 >        txt += 'cd $RUNTIME_AREA\n'
1192 >        #### FEDE this is the cleanEnv function
1193 >        ### OLI_DANIELE
1194 >        #txt += 'if [ $middleware == OSG ]; then\n'  
1195 >        #txt += '    cd $RUNTIME_AREA\n'
1196 >        #txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1197 >        #txt += '    /bin/rm -rf $WORKING_DIR\n'
1198 >        #txt += '    if [ -d $WORKING_DIR ] ;then\n'
1199 >        #txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1200 >        #txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1201 >        #txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1202 >        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1203 >        #txt += '        rm -f $RUNTIME_AREA/$repo \n'
1204 >        #txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1205 >        #txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1206 >        #txt += '    fi\n'
1207 >        #txt += 'fi\n'
1208 >        #txt += '\n'
1209 >
1210 >
1211          return txt
1212  
1213      def numberFile_(self, file, txt):
# Line 542 | Line 1218 | class Cmssw(JobType):
1218          # take away last extension
1219          name = p[0]
1220          for x in p[1:-1]:
1221 <           name=name+"."+x
1221 >            name=name+"."+x
1222          # add "_txt"
1223          if len(p)>1:
1224 <          ext = p[len(p)-1]
1225 <          #result = name + '_' + str(txt) + "." + ext
550 <          result = name + '_' + txt + "." + ext
1224 >            ext = p[len(p)-1]
1225 >            result = name + '_' + txt + "." + ext
1226          else:
1227 <          #result = name + '_' + str(txt)
553 <          result = name + '_' + txt
1227 >            result = name + '_' + txt
1228          
1229          return result
1230  
1231 <    def getRequirements(self):
1231 >    def getRequirements(self, nj=[]):
1232          """
1233          return job requirements to add to jdl files
1234          """
1235          req = ''
1236 <        if common.analisys_common_info['sites']:
1237 <            if common.analisys_common_info['sw_version']:
1238 <                req='Member("VO-cms-' + \
1239 <                     common.analisys_common_info['sw_version'] + \
1240 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1241 <            if len(common.analisys_common_info['sites'])>0:
1242 <                req = req + ' && ('
1243 <                for i in range(len(common.analisys_common_info['sites'])):
1244 <                    req = req + 'other.GlueCEInfoHostName == "' \
1245 <                         + common.analisys_common_info['sites'][i] + '"'
1246 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1247 <                        req = req + ' || '
1248 <            req = req + ')'
575 <        #print "req = ", req
1236 >        if self.version:
1237 >            req='Member("VO-cms-' + \
1238 >                 self.version + \
1239 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1240 >        ## SL add requirement for OS version only if SL4
1241 >        reSL4 = re.compile( r'slc4' )
1242 >        if self.executable_arch and reSL4.search(self.executable_arch):
1243 >            req+=' && Member("VO-cms-' + \
1244 >                 self.executable_arch + \
1245 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1246 >
1247 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1248 >
1249          return req
1250 +
1251 +    def configFilename(self):
1252 +        """ return the config filename """
1253 +        return self.name()+'.cfg'
1254 +
1255 +    ### OLI_DANIELE
1256 +    def wsSetupCMSOSGEnvironment_(self):
1257 +        """
1258 +        Returns part of a job script which is prepares
1259 +        the execution environment and which is common for all CMS jobs.
1260 +        """
1261 +        txt = '\n'
1262 +        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1263 +        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1264 +        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1265 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1266 +        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1267 +        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1268 +        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1269 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1270 +        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1271 +        txt += '   else\n'
1272 +        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1273 +        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1274 +        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1275 +        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1276 +        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1277 +        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1278 +        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1279 +        txt += '       exit 1\n'
1280 +        txt += '\n'
1281 +        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1282 +        txt += '       cd $RUNTIME_AREA\n'
1283 +        txt += '       /bin/rm -rf $WORKING_DIR\n'
1284 +        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1285 +        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1286 +        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1287 +        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1288 +        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1289 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1290 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1291 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1292 +        txt += '       fi\n'
1293 +        txt += '\n'
1294 +        txt += '       exit 1\n'
1295 +        txt += '   fi\n'
1296 +        txt += '\n'
1297 +        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1298 +        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1299 +
1300 +        return txt
1301 +
1302 +    ### OLI_DANIELE
1303 +    def wsSetupCMSLCGEnvironment_(self):
1304 +        """
1305 +        Returns part of a job script which is prepares
1306 +        the execution environment and which is common for all CMS jobs.
1307 +        """
1308 +        txt  = '   \n'
1309 +        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1310 +        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1311 +        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1312 +        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1313 +        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1314 +        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1315 +        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1316 +        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1317 +        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1318 +        txt += '       exit 1\n'
1319 +        txt += '   else\n'
1320 +        txt += '       echo "Sourcing environment... "\n'
1321 +        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1322 +        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1323 +        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1324 +        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1325 +        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1326 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1327 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1328 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1329 +        txt += '           exit 1\n'
1330 +        txt += '       fi\n'
1331 +        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1332 +        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1333 +        txt += '       result=$?\n'
1334 +        txt += '       if [ $result -ne 0 ]; then\n'
1335 +        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1336 +        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1337 +        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1338 +        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1339 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1340 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1341 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1342 +        txt += '           exit 1\n'
1343 +        txt += '       fi\n'
1344 +        txt += '   fi\n'
1345 +        txt += '   \n'
1346 +        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1347 +        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1348 +        return txt
1349 +
1350 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1351 +    def modifyReport(self, nj):
1352 +        """
1353 +        insert the part of the script that modifies the FrameworkJob Report
1354 +        """
1355 +
1356 +        txt = ''
1357 +        txt += 'echo "Modify Job Report" \n'
1358 +        #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1359 +        ################ FEDE FOR DBS2 #############################################
1360 +        txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1361 +        #############################################################################
1362 +        try:
1363 +            publish_data = int(self.cfg_params['USER.publish_data'])          
1364 +        except KeyError:
1365 +            publish_data = 0
1366 +
1367 +        txt += 'if [ -z "$SE" ]; then\n'
1368 +        txt += '    SE="" \n'
1369 +        txt += 'fi \n'
1370 +        txt += 'if [ -z "$SE_PATH" ]; then\n'
1371 +        txt += '    SE_PATH="" \n'
1372 +        txt += 'fi \n'
1373 +        txt += 'echo "SE = $SE"\n'
1374 +        txt += 'echo "SE_PATH = $SE_PATH"\n'
1375 +
1376 +        if (publish_data == 1):  
1377 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1378 +            processedDataset = self.cfg_params['USER.publish_data_name']
1379 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1380 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1381 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1382 +            #### FEDE: added slash in LFN ##############
1383 +            txt += '    FOR_LFN=/copy_problems/ \n'
1384 +            txt += 'else \n'
1385 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1386 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1387 +            txt += '    FOR_LFN=/store$tmp \n'
1388 +            txt += 'fi \n'
1389 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1390 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1391 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1392 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1393 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1394 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1395 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1396 +      
1397 +            txt += 'modifyReport_result=$?\n'
1398 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1399 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1400 +            txt += '    exit_status=1\n'
1401 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1402 +            txt += 'else\n'
1403 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1404 +            txt += 'fi\n'
1405 +        else:
1406 +            txt += 'ProcessedDataset=no_data_to_publish \n'
1407 +            #### FEDE: added slash in LFN ##############
1408 +            txt += 'FOR_LFN=/local/ \n'
1409 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1410 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1411 +        return txt
1412 +
1413 +    def cleanEnv(self):
1414 +        ### OLI_DANIELE
1415 +        txt = ''
1416 +        txt += 'if [ $middleware == OSG ]; then\n'  
1417 +        txt += '    cd $RUNTIME_AREA\n'
1418 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1419 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1420 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1421 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1422 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1423 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1424 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1425 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1426 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1427 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1428 +        txt += '    fi\n'
1429 +        txt += 'fi\n'
1430 +        txt += '\n'
1431 +        return txt
1432 +
1433 +    def setParam_(self, param, value):
1434 +        self._params[param] = value
1435 +
1436 +    def getParams(self):
1437 +        return self._params
1438 +
1439 +    def setTaskid_(self):
1440 +        self._taskId = self.cfg_params['taskId']
1441 +        
1442 +    def getTaskid(self):
1443 +        return self._taskId
1444 +
1445 +    def uniquelist(self, old):
1446 +        """
1447 +        remove duplicates from a list
1448 +        """
1449 +        nd={}
1450 +        for e in old:
1451 +            nd[e]=0
1452 +        return nd.keys()

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines