ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.1 by slacapra, Thu Apr 6 16:18:17 2006 UTC vs.
Revision 1.112 by corvo, Thu Aug 9 16:53:21 2007 UTC

# Line 3 | Line 3 | from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5   import common
6
7 import DataDiscovery
8 import DataLocation
6   import Scram
7  
8 < import os, string, re
8 > import os, string, glob
9  
10   class Cmssw(JobType):
11 <    def __init__(self, cfg_params):
11 >    def __init__(self, cfg_params, ncjobs):
12          JobType.__init__(self, 'CMSSW')
13          common.logger.debug(3,'CMSSW::__init__')
14  
15 <        self.analisys_common_info = {}
15 >        self._params = {}
16 >        self.cfg_params = cfg_params
17 >
18 >        try:
19 >            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
20 >        except KeyError:
21 >            self.MaxTarBallSize = 9.5
22 >
23 >        # number of jobs requested to be created, limit obj splitting
24 >        self.ncjobs = ncjobs
25  
26          log = common.logger
27          
28          self.scram = Scram.Scram(cfg_params)
23        scramArea = ''
29          self.additional_inbox_files = []
30          self.scriptExe = ''
31          self.executable = ''
32 +        self.executable_arch = self.scram.getArch()
33          self.tgz_name = 'default.tgz'
34 +        self.additional_tgz_name = 'additional.tgz'
35 +        self.scriptName = 'CMSSW.sh'
36 +        self.pset = ''      #scrip use case Da  
37 +        self.datasetPath = '' #scrip use case Da
38 +
39 +        # set FJR file name
40 +        self.fjrFileName = 'crab_fjr.xml'
41  
42          self.version = self.scram.getSWVersion()
43 <        common.analisys_common_info['sw_version'] = self.version
43 >        
44 >        #
45 >        # Try to block creation in case of arch/version mismatch
46 >        #
47 >
48 >        a = string.split(self.version, "_")
49 >
50 >        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
51 >            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
52 >            raise CrabException(msg)
53 >        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
54 >            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
55 >            raise CrabException(msg)
56 >        
57 >        common.taskDB.setDict('codeVersion',self.version)
58 >        self.setParam_('application', self.version)
59  
60          ### collect Data cards
61 +
62 +        ## get DBS mode
63 +        try:
64 +            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
65 +        except KeyError:
66 +            self.use_dbs_1 = 0
67 +            
68          try:
69 <            self.owner = cfg_params['CMSSW.owner']
70 <            log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
71 <            self.dataset = cfg_params['CMSSW.dataset']
72 <            log.debug(6, "CMSSW::CMSSW(): dataset = "+self.dataset)
69 >            tmp =  cfg_params['CMSSW.datasetpath']
70 >            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
71 >            if string.lower(tmp)=='none':
72 >                self.datasetPath = None
73 >                self.selectNoInput = 1
74 >            else:
75 >                self.datasetPath = tmp
76 >                self.selectNoInput = 0
77          except KeyError:
78 <            msg = "Error: owner and/or dataset not defined "
78 >            msg = "Error: datasetpath not defined "  
79              raise CrabException(msg)
80  
81 +        # ML monitoring
82 +        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
83 +        if not self.datasetPath:
84 +            self.setParam_('dataset', 'None')
85 +            self.setParam_('owner', 'None')
86 +        else:
87 +            try:
88 +                datasetpath_split = self.datasetPath.split("/")
89 +                # standard style
90 +                if self.use_dbs_1 == 1 :
91 +                    self.setParam_('dataset', datasetpath_split[1])
92 +                    self.setParam_('owner', datasetpath_split[-1])
93 +                else:
94 +                    self.setParam_('dataset', datasetpath_split[1])
95 +                    self.setParam_('owner', datasetpath_split[2])
96 +            except:
97 +                self.setParam_('dataset', self.datasetPath)
98 +                self.setParam_('owner', self.datasetPath)
99 +                
100 +        self.setTaskid_()
101 +        self.setParam_('taskId', self.cfg_params['taskId'])
102 +
103          self.dataTiers = []
43        try:
44            tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
45            for tmp in tmpDataTiers:
46                tmp=string.strip(tmp)
47                self.dataTiers.append(tmp)
48                pass
49            pass
50        except KeyError:
51            pass
52        log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
104  
105          ## now the application
106          try:
107              self.executable = cfg_params['CMSSW.executable']
108 +            self.setParam_('exe', self.executable)
109              log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
110              msg = "Default executable cmsRun overridden. Switch to " + self.executable
111              log.debug(3,msg)
112          except KeyError:
113              self.executable = 'cmsRun'
114 +            self.setParam_('exe', self.executable)
115              msg = "User executable not defined. Use cmsRun"
116              log.debug(3,msg)
117              pass
# Line 66 | Line 119 | class Cmssw(JobType):
119          try:
120              self.pset = cfg_params['CMSSW.pset']
121              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
122 <            if (not os.path.exists(self.pset)):
123 <                raise CrabException("User defined PSet file "+self.pset+" does not exist")
122 >            if self.pset.lower() != 'none' :
123 >                if (not os.path.exists(self.pset)):
124 >                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
125 >            else:
126 >                self.pset = None
127          except KeyError:
128              raise CrabException("PSet file missing. Cannot run cmsRun ")
129  
130          # output files
131 +        ## stuff which must be returned always via sandbox
132 +        self.output_file_sandbox = []
133 +
134 +        # add fjr report by default via sandbox
135 +        self.output_file_sandbox.append(self.fjrFileName)
136 +
137 +        # other output files to be returned via sandbox or copied to SE
138          try:
139              self.output_file = []
77
140              tmp = cfg_params['CMSSW.output_file']
141              if tmp != '':
142                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 83 | Line 145 | class Cmssw(JobType):
145                      tmp=string.strip(tmp)
146                      self.output_file.append(tmp)
147                      pass
86
148              else:
149 <                log.message("No output file defined: only stdout/err will be available")
149 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
150                  pass
151              pass
152          except KeyError:
153 <            log.message("No output file defined: only stdout/err will be available")
153 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
154              pass
155  
156          # script_exe file as additional file in inputSandbox
157          try:
158 <           self.scriptExe = cfg_params['CMSSW.script_exe']
159 <           self.additional_inbox_files.append(self.scriptExe)
158 >            self.scriptExe = cfg_params['USER.script_exe']
159 >            if self.scriptExe != '':
160 >               if not os.path.isfile(self.scriptExe):
161 >                  msg ="ERROR. file "+self.scriptExe+" not found"
162 >                  raise CrabException(msg)
163 >               self.additional_inbox_files.append(string.strip(self.scriptExe))
164          except KeyError:
165 <           pass
166 <        if self.scriptExe != '':
167 <           if os.path.isfile(self.scriptExe):
168 <              pass
169 <           else:
170 <              log.message("WARNING. file "+self.scriptExe+" not found")
171 <              sys.exit()
107 <                  
165 >            self.scriptExe = ''
166 >
167 >        #CarlosDaniele
168 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
169 >           msg ="Error. script_exe  not defined"
170 >           raise CrabException(msg)
171 >
172          ## additional input files
173          try:
174 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
174 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
175              for tmp in tmpAddFiles:
176 <                tmp=string.strip(tmp)
177 <                self.additional_inbox_files.append(tmp)
176 >                tmp = string.strip(tmp)
177 >                dirname = ''
178 >                if not tmp[0]=="/": dirname = "."
179 >                files = []
180 >                if string.find(tmp,"*")>-1:
181 >                    files = glob.glob(os.path.join(dirname, tmp))
182 >                    if len(files)==0:
183 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
184 >                else:
185 >                    files.append(tmp)
186 >                for file in files:
187 >                    if not os.path.exists(file):
188 >                        raise CrabException("Additional input file not found: "+file)
189 >                    pass
190 >                    # fname = string.split(file, '/')[-1]
191 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
192 >                    # shutil.copyfile(file, storedFile)
193 >                    self.additional_inbox_files.append(string.strip(file))
194                  pass
195              pass
196 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
197 +        except KeyError:
198 +            pass
199 +
200 +        # files per job
201 +        try:
202 +            if (cfg_params['CMSSW.files_per_jobs']):
203 +                raise CrabException("files_per_jobs no longer supported.  Quitting.")
204          except KeyError:
205              pass
206  
207 +        ## Events per job
208 +        try:
209 +            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
210 +            self.selectEventsPerJob = 1
211 +        except KeyError:
212 +            self.eventsPerJob = -1
213 +            self.selectEventsPerJob = 0
214 +    
215 +        ## number of jobs
216 +        try:
217 +            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
218 +            self.selectNumberOfJobs = 1
219 +        except KeyError:
220 +            self.theNumberOfJobs = 0
221 +            self.selectNumberOfJobs = 0
222 +
223          try:
224              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
225 +            self.selectTotalNumberEvents = 1
226          except KeyError:
227 <            msg = 'Must define total_number_of_events and job_number_of_events'
228 <            raise CrabException(msg)
229 <            
230 < #Marco: FirstEvent is nolonger used inside PSet
231 < #        try:
232 < #            self.first = int(cfg_params['CMSSW.first_event'])
233 < #        except KeyError:
234 < #            self.first = 0
235 < #            pass
236 < #        log.debug(6, "Orca::Orca(): total number of events = "+`self.total_number_of_events`)
237 <        #log.debug(6, "Orca::Orca(): events per job = "+`self.job_number_of_events`)
238 < #        log.debug(6, "Orca::Orca(): first event = "+`self.first`)
239 <        
240 <        CEBlackList = []
241 <        try:
137 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
138 <            for tmp in tmpBad:
139 <                tmp=string.strip(tmp)
140 <                CEBlackList.append(tmp)
227 >            self.total_number_of_events = 0
228 >            self.selectTotalNumberEvents = 0
229 >
230 >        if self.pset != None: #CarlosDaniele
231 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
232 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
233 >                 raise CrabException(msg)
234 >        else:
235 >             if (self.selectNumberOfJobs == 0):
236 >                 msg = 'Must specify  number_of_jobs.'
237 >                 raise CrabException(msg)
238 >
239 >        ## source seed for pythia
240 >        try:
241 >            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
242          except KeyError:
243 <            pass
243 >            self.sourceSeed = None
244 >            common.logger.debug(5,"No seed given")
245  
246 <        self.reCEBlackList=[]
247 <        for bad in CEBlackList:
146 <            self.reCEBlackList.append(re.compile( bad ))
147 <
148 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
149 <
150 <        CEWhiteList = []
151 <        try:
152 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
153 <            #tmpGood = ['cern']
154 <            for tmp in tmpGood:
155 <                tmp=string.strip(tmp)
156 <                #if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch
157 <                CEWhiteList.append(tmp)
246 >        try:
247 >            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
248          except KeyError:
249 <            pass
249 >            self.sourceSeedVtx = None
250 >            common.logger.debug(5,"No vertex seed given")
251 >
252 >        try:
253 >            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
254 >        except KeyError:
255 >            self.sourceSeedG4 = None
256 >            common.logger.debug(5,"No g4 sim hits seed given")
257  
258 <        #print 'CEWhiteList: ',CEWhiteList
259 <        self.reCEWhiteList=[]
260 <        for Good in CEWhiteList:
261 <            self.reCEWhiteList.append(re.compile( Good ))
258 >        try:
259 >            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
260 >        except KeyError:
261 >            self.sourceSeedMix = None
262 >            common.logger.debug(5,"No mix seed given")
263  
264 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
264 >        try:
265 >            self.firstRun = int(cfg_params['CMSSW.first_run'])
266 >        except KeyError:
267 >            self.firstRun = None
268 >            common.logger.debug(5,"No first run given")
269 >        if self.pset != None: #CarlosDaniele
270 >            ver = string.split(self.version,"_")
271 >            if (int(ver[1])>=1 and int(ver[2])>=5):
272 >                import PsetManipulator150 as pp
273 >            else:
274 >                import PsetManipulator as pp
275 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
276  
277          #DBSDLS-start
278          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
279          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
280          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
281 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
282          ## Perform the data location and discovery (based on DBS/DLS)
283 <        self.DataDiscoveryAndLocation(cfg_params)
283 >        ## SL: Don't if NONE is specified as input (pythia use case)
284 >        blockSites = {}
285 >        if self.datasetPath:
286 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
287          #DBSDLS-end          
288  
289          self.tgzNameWithPath = self.getTarBall(self.executable)
290 +    
291 +        ## Select Splitting
292 +        if self.selectNoInput:
293 +            if self.pset == None: #CarlosDaniele
294 +                self.jobSplittingForScript()
295 +            else:
296 +                self.jobSplittingNoInput()
297 +        else:
298 +            self.jobSplittingByBlocks(blockSites)
299 +
300 +        # modify Pset
301 +        if self.pset != None: #CarlosDaniele
302 +            try:
303 +                if (self.datasetPath): # standard job
304 +                    # allow to processa a fraction of events in a file
305 +                    PsetEdit.inputModule("INPUT")
306 +                    PsetEdit.maxEvent("INPUTMAXEVENTS")
307 +                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
308 +                else:  # pythia like job
309 +                    PsetEdit.maxEvent(self.eventsPerJob)
310 +                    if (self.firstRun):
311 +                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
312 +                    if (self.sourceSeed) :
313 +                        PsetEdit.pythiaSeed("INPUT")
314 +                        if (self.sourceSeedVtx) :
315 +                            PsetEdit.vtxSeed("INPUTVTX")
316 +                        if (self.sourceSeedG4) :
317 +                            self.PsetEdit.g4Seed("INPUTG4")
318 +                        if (self.sourceSeedMix) :
319 +                            self.PsetEdit.mixSeed("INPUTMIX")
320 +                # add FrameworkJobReport to parameter-set
321 +                PsetEdit.addCrabFJR(self.fjrFileName)
322 +                PsetEdit.psetWriter(self.configFilename())
323 +            except:
324 +                msg='Error while manipuliating ParameterSet: exiting...'
325 +                raise CrabException(msg)
326  
327      def DataDiscoveryAndLocation(self, cfg_params):
328  
329 <        fun = "CMSSW::DataDiscoveryAndLocation()"
329 >        import DataDiscovery
330 >        import DataDiscovery_DBS2
331 >        import DataLocation
332 >        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
333 >
334 >        datasetPath=self.datasetPath
335  
336          ## Contact the DBS
337 +        common.logger.message("Contacting Data Discovery Services ...")
338          try:
339 <            self.pubdata=DataDiscovery.DataDiscovery(self.owner,
340 <                                                     self.dataset,
341 <                                                     self.dataTiers,
342 <                                                     cfg_params)
339 >
340 >            if self.use_dbs_1 == 1 :
341 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
342 >            else :
343 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
344              self.pubdata.fetchDBSInfo()
345  
346          except DataDiscovery.NotExistingDatasetError, ex :
347              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
348              raise CrabException(msg)
193
349          except DataDiscovery.NoDataTierinProvenanceError, ex :
350              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
351              raise CrabException(msg)
352          except DataDiscovery.DataDiscoveryError, ex:
353 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
353 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
354 >            raise CrabException(msg)
355 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
356 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
357 >            raise CrabException(msg)
358 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
359 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
360 >            raise CrabException(msg)
361 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
362 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
363              raise CrabException(msg)
364  
365 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
366 <        self.DBSPaths=self.pubdata.getDBSPaths()
367 <        common.logger.message("Required data are : ")
204 <        for path in self.DBSPaths:
205 <            common.logger.message(" --> "+path )
365 >        self.filesbyblock=self.pubdata.getFiles()
366 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
367 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
368  
369          ## get max number of events
208        common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
370          self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
210        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
211
212        ## get fileblocks corresponding to the required data
213        fb=self.pubdata.getFileBlocks()
214        common.logger.debug(5,"fileblocks are %s"%fb)
371  
372          ## Contact the DLS and build a list of sites hosting the fileblocks
373          try:
374 <            dataloc=DataLocation.DataLocation(self.pubdata.getFileBlocks(),cfg_params)
374 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
375              dataloc.fetchDLSInfo()
376          except DataLocation.DataLocationError , ex:
377              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
378              raise CrabException(msg)
379          
380 <        allsites=dataloc.getSites()
381 <        common.logger.debug(5,"sites are %s"%allsites)
382 <        sites=self.checkBlackList(allsites)
383 <        common.logger.debug(5,"sites are (after black list) %s"%sites)
384 <        sites=self.checkWhiteList(sites)
385 <        common.logger.debug(5,"sites are (after white list) %s"%sites)
386 <
387 <        if len(sites)==0:
388 <            msg = 'No sites hosting all the needed data! Exiting... '
389 <            raise CrabException(msg)
390 <        common.logger.message("List of Sites hosting the data : "+str(sites))
391 <        common.logger.debug(6, "List of Sites: "+str(sites))
392 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
393 <        return
380 >
381 >        sites = dataloc.getSites()
382 >        allSites = []
383 >        listSites = sites.values()
384 >        for listSite in listSites:
385 >            for oneSite in listSite:
386 >                allSites.append(oneSite)
387 >        allSites = self.uniquelist(allSites)
388 >
389 >        # screen output
390 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
391 >
392 >        return sites
393 >    
394 >    def jobSplittingByBlocks(self, blockSites):
395 >        """
396 >        Perform job splitting. Jobs run over an integer number of files
397 >        and no more than one block.
398 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
399 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
400 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
401 >                  self.maxEvents, self.filesbyblock
402 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
403 >              self.total_number_of_jobs - Total # of jobs
404 >              self.list_of_args - File(s) job will run on (a list of lists)
405 >        """
406 >
407 >        # ---- Handle the possible job splitting configurations ---- #
408 >        if (self.selectTotalNumberEvents):
409 >            totalEventsRequested = self.total_number_of_events
410 >        if (self.selectEventsPerJob):
411 >            eventsPerJobRequested = self.eventsPerJob
412 >            if (self.selectNumberOfJobs):
413 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
414 >
415 >        # If user requested all the events in the dataset
416 >        if (totalEventsRequested == -1):
417 >            eventsRemaining=self.maxEvents
418 >        # If user requested more events than are in the dataset
419 >        elif (totalEventsRequested > self.maxEvents):
420 >            eventsRemaining = self.maxEvents
421 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
422 >        # If user requested less events than are in the dataset
423 >        else:
424 >            eventsRemaining = totalEventsRequested
425 >
426 >        # If user requested more events per job than are in the dataset
427 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
428 >            eventsPerJobRequested = self.maxEvents
429 >
430 >        # For user info at end
431 >        totalEventCount = 0
432 >
433 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
434 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
435 >
436 >        if (self.selectNumberOfJobs):
437 >            common.logger.message("May not create the exact number_of_jobs requested.")
438 >
439 >        if ( self.ncjobs == 'all' ) :
440 >            totalNumberOfJobs = 999999999
441 >        else :
442 >            totalNumberOfJobs = self.ncjobs
443 >            
444 >
445 >        blocks = blockSites.keys()
446 >        blockCount = 0
447 >        # Backup variable in case self.maxEvents counted events in a non-included block
448 >        numBlocksInDataset = len(blocks)
449 >
450 >        jobCount = 0
451 >        list_of_lists = []
452 >
453 >        # list tracking which jobs are in which jobs belong to which block
454 >        jobsOfBlock = {}
455 >
456 >        # ---- Iterate over the blocks in the dataset until ---- #
457 >        # ---- we've met the requested total # of events    ---- #
458 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
459 >            block = blocks[blockCount]
460 >            blockCount += 1
461 >            if block not in jobsOfBlock.keys() :
462 >                jobsOfBlock[block] = []
463 >            
464 >            if self.eventsbyblock.has_key(block) :
465 >                numEventsInBlock = self.eventsbyblock[block]
466 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
467 >            
468 >                files = self.filesbyblock[block]
469 >                numFilesInBlock = len(files)
470 >                if (numFilesInBlock <= 0):
471 >                    continue
472 >                fileCount = 0
473 >
474 >                # ---- New block => New job ---- #
475 >                parString = "\\{"
476 >                # counter for number of events in files currently worked on
477 >                filesEventCount = 0
478 >                # flag if next while loop should touch new file
479 >                newFile = 1
480 >                # job event counter
481 >                jobSkipEventCount = 0
482 >            
483 >                # ---- Iterate over the files in the block until we've met the requested ---- #
484 >                # ---- total # of events or we've gone over all the files in this block  ---- #
485 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
486 >                    file = files[fileCount]
487 >                    if newFile :
488 >                        try:
489 >                            numEventsInFile = self.eventsbyfile[file]
490 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
491 >                            # increase filesEventCount
492 >                            filesEventCount += numEventsInFile
493 >                            # Add file to current job
494 >                            parString += '\\\"' + file + '\\\"\,'
495 >                            newFile = 0
496 >                        except KeyError:
497 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
498 >                        
499 >
500 >                    # if less events in file remain than eventsPerJobRequested
501 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
502 >                        # if last file in block
503 >                        if ( fileCount == numFilesInBlock-1 ) :
504 >                            # end job using last file, use remaining events in block
505 >                            # close job and touch new file
506 >                            fullString = parString[:-2]
507 >                            fullString += '\\}'
508 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
509 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
510 >                            self.jobDestination.append(blockSites[block])
511 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
512 >                            # fill jobs of block dictionary
513 >                            jobsOfBlock[block].append(jobCount+1)
514 >                            # reset counter
515 >                            jobCount = jobCount + 1
516 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
517 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
518 >                            jobSkipEventCount = 0
519 >                            # reset file
520 >                            parString = "\\{"
521 >                            filesEventCount = 0
522 >                            newFile = 1
523 >                            fileCount += 1
524 >                        else :
525 >                            # go to next file
526 >                            newFile = 1
527 >                            fileCount += 1
528 >                    # if events in file equal to eventsPerJobRequested
529 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
530 >                        # close job and touch new file
531 >                        fullString = parString[:-2]
532 >                        fullString += '\\}'
533 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
534 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
535 >                        self.jobDestination.append(blockSites[block])
536 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
537 >                        jobsOfBlock[block].append(jobCount+1)
538 >                        # reset counter
539 >                        jobCount = jobCount + 1
540 >                        totalEventCount = totalEventCount + eventsPerJobRequested
541 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
542 >                        jobSkipEventCount = 0
543 >                        # reset file
544 >                        parString = "\\{"
545 >                        filesEventCount = 0
546 >                        newFile = 1
547 >                        fileCount += 1
548 >                        
549 >                    # if more events in file remain than eventsPerJobRequested
550 >                    else :
551 >                        # close job but don't touch new file
552 >                        fullString = parString[:-2]
553 >                        fullString += '\\}'
554 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
555 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
556 >                        self.jobDestination.append(blockSites[block])
557 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
558 >                        jobsOfBlock[block].append(jobCount+1)
559 >                        # increase counter
560 >                        jobCount = jobCount + 1
561 >                        totalEventCount = totalEventCount + eventsPerJobRequested
562 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
563 >                        # calculate skip events for last file
564 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
565 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
566 >                        # remove all but the last file
567 >                        filesEventCount = self.eventsbyfile[file]
568 >                        parString = "\\{"
569 >                        parString += '\\\"' + file + '\\\"\,'
570 >                    pass # END if
571 >                pass # END while (iterate over files in the block)
572 >        pass # END while (iterate over blocks in the dataset)
573 >        self.ncjobs = self.total_number_of_jobs = jobCount
574 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
575 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
576 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
577          
578 <    def checkBlackList(self, allSites):
579 <        if len(self.reCEBlackList)==0: return allSites
580 <        sites = []
581 <        for site in allSites:
582 <            common.logger.debug(10,'Site '+site)
583 <            good=1
584 <            for re in self.reCEBlackList:
585 <                if re.search(site):
586 <                    common.logger.message('CE in black list, skipping site '+site)
587 <                    good=0
578 >        # screen output
579 >        screenOutput = "List of jobs and available destination sites:\n\n"
580 >
581 >        blockCounter = 0
582 >        for block in blocks:
583 >            if block in jobsOfBlock.keys() :
584 >                blockCounter += 1
585 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(blockSites[block]))
586 >
587 >       # common.logger.message(screenOutput)
588 >
589 >        self.list_of_args = list_of_lists
590 >        return
591 >
592 >    def jobSplittingNoInput(self):
593 >        """
594 >        Perform job splitting based on number of event per job
595 >        """
596 >        common.logger.debug(5,'Splitting per events')
597 >        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
598 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
599 >        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
600 >
601 >        if (self.total_number_of_events < 0):
602 >            msg='Cannot split jobs per Events with "-1" as total number of events'
603 >            raise CrabException(msg)
604 >
605 >        if (self.selectEventsPerJob):
606 >            if (self.selectTotalNumberEvents):
607 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
608 >            elif(self.selectNumberOfJobs) :  
609 >                self.total_number_of_jobs =self.theNumberOfJobs
610 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
611 >
612 >        elif (self.selectNumberOfJobs) :
613 >            self.total_number_of_jobs = self.theNumberOfJobs
614 >            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
615 >
616 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
617 >
618 >        # is there any remainder?
619 >        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
620 >
621 >        common.logger.debug(5,'Check  '+str(check))
622 >
623 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
624 >        if check > 0:
625 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
626 >
627 >        # argument is seed number.$i
628 >        self.list_of_args = []
629 >        for i in range(self.total_number_of_jobs):
630 >            ## Since there is no input, any site is good
631 >           # self.jobDestination.append(["Any"])
632 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
633 >            args=[]
634 >            if (self.firstRun):
635 >                    ## pythia first run
636 >                #self.list_of_args.append([(str(self.firstRun)+str(i))])
637 >                args.append(str(self.firstRun)+str(i))
638 >            else:
639 >                ## no first run
640 >                #self.list_of_args.append([str(i)])
641 >                args.append(str(i))
642 >            if (self.sourceSeed):
643 >                args.append(str(self.sourceSeed)+str(i))
644 >                if (self.sourceSeedVtx):
645 >                    ## + vtx random seed
646 >                    args.append(str(self.sourceSeedVtx)+str(i))
647 >                if (self.sourceSeedG4):
648 >                    ## + G4 random seed
649 >                    args.append(str(self.sourceSeedG4)+str(i))
650 >                if (self.sourceSeedMix):    
651 >                    ## + Mix random seed
652 >                    args.append(str(self.sourceSeedMix)+str(i))
653                  pass
654 <            if good: sites.append(site)
655 <        if len(sites) == 0:
656 <            common.logger.debug(3,"No sites found after BlackList")
657 <        return sites
654 >            pass
655 >            self.list_of_args.append(args)
656 >        pass
657 >            
658 >        # print self.list_of_args
659  
660 <    def checkWhiteList(self, allsites):
660 >        return
661  
662 <        if len(self.reCEWhiteList)==0: return pubDBUrls
663 <        sites = []
664 <        for site in allsites:
665 <            #print 'connecting to the URL ',url
666 <            good=0
667 <            for re in self.reCEWhiteList:
668 <                if re.search(site):
669 <                    common.logger.debug(5,'CE in white list, adding site '+site)
670 <                    good=1
671 <                if not good: continue
672 <                sites.append(site)
673 <        if len(sites) == 0:
674 <            common.logger.message("No sites found after WhiteList\n")
675 <        else:
676 <            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
677 <        return sites
662 >
663 >    def jobSplittingForScript(self):#CarlosDaniele
664 >        """
665 >        Perform job splitting based on number of job
666 >        """
667 >        common.logger.debug(5,'Splitting per job')
668 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
669 >
670 >        self.total_number_of_jobs = self.theNumberOfJobs
671 >
672 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
673 >
674 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
675 >
676 >        # argument is seed number.$i
677 >        self.list_of_args = []
678 >        for i in range(self.total_number_of_jobs):
679 >            ## Since there is no input, any site is good
680 >           # self.jobDestination.append(["Any"])
681 >            self.jobDestination.append([""])
682 >            ## no random seed
683 >            self.list_of_args.append([str(i)])
684 >        return
685 >
686 >    def split(self, jobParams):
687 >
688 >        common.jobDB.load()
689 >        #### Fabio
690 >        njobs = self.total_number_of_jobs
691 >        arglist = self.list_of_args
692 >        # create the empty structure
693 >        for i in range(njobs):
694 >            jobParams.append("")
695 >        
696 >        for job in range(njobs):
697 >            jobParams[job] = arglist[job]
698 >            # print str(arglist[job])
699 >            # print jobParams[job]
700 >            common.jobDB.setArguments(job, jobParams[job])
701 >            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
702 >            common.jobDB.setDestination(job, self.jobDestination[job])
703 >
704 >        common.jobDB.save()
705 >        return
706 >    
707 >    def getJobTypeArguments(self, nj, sched):
708 >        result = ''
709 >        for i in common.jobDB.arguments(nj):
710 >            result=result+str(i)+" "
711 >        return result
712 >  
713 >    def numberOfJobs(self):
714 >        # Fabio
715 >        return self.total_number_of_jobs
716  
717      def getTarBall(self, exe):
718          """
# Line 277 | Line 720 | class Cmssw(JobType):
720          """
721          
722          # if it exist, just return it
723 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
723 >        #
724 >        # Marco. Let's start to use relative path for Boss XML files
725 >        #
726 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
727          if os.path.exists(self.tgzNameWithPath):
728              return self.tgzNameWithPath
729  
# Line 291 | Line 737 | class Cmssw(JobType):
737          # First of all declare the user Scram area
738          swArea = self.scram.getSWArea_()
739          #print "swArea = ", swArea
740 <        swVersion = self.scram.getSWVersion()
741 <        #print "swVersion = ", swVersion
740 >        # swVersion = self.scram.getSWVersion()
741 >        # print "swVersion = ", swVersion
742          swReleaseTop = self.scram.getReleaseTop_()
743          #print "swReleaseTop = ", swReleaseTop
744          
# Line 300 | Line 746 | class Cmssw(JobType):
746          if swReleaseTop == '' or swArea == swReleaseTop:
747              return
748  
749 <        filesToBeTarred = []
750 <        ## First find the executable
751 <        if (self.executable != ''):
752 <            exeWithPath = self.scram.findFile_(executable)
753 < #           print exeWithPath
754 <            if ( not exeWithPath ):
755 <                raise CrabException('User executable '+executable+' not found')
756 <
757 <            ## then check if it's private or not
758 <            if exeWithPath.find(swReleaseTop) == -1:
759 <                # the exe is private, so we must ship
760 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
761 <                path = swArea+'/'
762 <                exe = string.replace(exeWithPath, path,'')
763 <                filesToBeTarred.append(exe)
764 <                pass
765 <            else:
766 <                # the exe is from release, we'll find it on WN
767 <                pass
768 <
769 <        ## Now get the libraries: only those in local working area
770 <        libDir = 'lib'
771 <        lib = swArea+'/' +libDir
772 <        common.logger.debug(5,"lib "+lib+" to be tarred")
773 <        if os.path.exists(lib):
774 <            filesToBeTarred.append(libDir)
775 <
776 <        ## Now check if the Data dir is present
777 <        dataDir = 'src/Data/'
778 <        if os.path.isdir(swArea+'/'+dataDir):
779 <            filesToBeTarred.append(dataDir)
780 <
781 <        ## Create the tar-ball
782 <        if len(filesToBeTarred)>0:
783 <            cwd = os.getcwd()
784 <            os.chdir(swArea)
785 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
786 <            for line in filesToBeTarred:
787 <                tarcmd = tarcmd + line + ' '
788 <            cout = runCommand(tarcmd)
789 <            if not cout:
790 <                raise CrabException('Could not create tar-ball')
791 <            os.chdir(cwd)
792 <        else:
793 <            common.logger.debug(5,"No files to be to be tarred")
749 >        import tarfile
750 >        try: # create tar ball
751 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
752 >            ## First find the executable
753 >            if (self.executable != ''):
754 >                exeWithPath = self.scram.findFile_(executable)
755 >                if ( not exeWithPath ):
756 >                    raise CrabException('User executable '+executable+' not found')
757 >    
758 >                ## then check if it's private or not
759 >                if exeWithPath.find(swReleaseTop) == -1:
760 >                    # the exe is private, so we must ship
761 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
762 >                    path = swArea+'/'
763 >                    # distinguish case when script is in user project area or given by full path somewhere else
764 >                    if exeWithPath.find(path) >= 0 :
765 >                        exe = string.replace(exeWithPath, path,'')
766 >                        tar.add(path+exe,os.path.basename(executable))
767 >                    else :
768 >                        tar.add(exeWithPath,os.path.basename(executable))
769 >                    pass
770 >                else:
771 >                    # the exe is from release, we'll find it on WN
772 >                    pass
773 >    
774 >            ## Now get the libraries: only those in local working area
775 >            libDir = 'lib'
776 >            lib = swArea+'/' +libDir
777 >            common.logger.debug(5,"lib "+lib+" to be tarred")
778 >            if os.path.exists(lib):
779 >                tar.add(lib,libDir)
780 >    
781 >            ## Now check if module dir is present
782 >            moduleDir = 'module'
783 >            module = swArea + '/' + moduleDir
784 >            if os.path.isdir(module):
785 >                tar.add(module,moduleDir)
786 >
787 >            ## Now check if any data dir(s) is present
788 >            swAreaLen=len(swArea)
789 >            for root, dirs, files in os.walk(swArea):
790 >                if "data" in dirs:
791 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
792 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
793 >
794 >            ## Add ProdAgent dir to tar
795 >            paDir = 'ProdAgentApi'
796 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
797 >            if os.path.isdir(pa):
798 >                tar.add(pa,paDir)
799 >
800 >            ### FEDE FOR DBS PUBLICATION
801 >            ## Add PRODCOMMON dir to tar
802 >            prodcommonDir = 'ProdCommon'
803 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
804 >            if os.path.isdir(prodcommonPath):
805 >                tar.add(prodcommonPath,prodcommonDir)
806 >            #############################    
807 >        
808 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
809 >            tar.close()
810 >        except :
811 >            raise CrabException('Could not create tar-ball')
812 >
813 >        ## check for tarball size
814 >        tarballinfo = os.stat(self.tgzNameWithPath)
815 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
816 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
817 >
818 >        ## create tar-ball with ML stuff
819 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
820 >        try:
821 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
822 >            path=os.environ['CRABDIR'] + '/python/'
823 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
824 >                tar.add(path+file,file)
825 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
826 >            tar.close()
827 >        except :
828 >            raise CrabException('Could not create ML files tar-ball')
829          
830          return
831          
832 +    def additionalInputFileTgz(self):
833 +        """
834 +        Put all additional files into a tar ball and return its name
835 +        """
836 +        import tarfile
837 +        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
838 +        tar = tarfile.open(tarName, "w:gz")
839 +        for file in self.additional_inbox_files:
840 +            tar.add(file,string.split(file,'/')[-1])
841 +        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
842 +        tar.close()
843 +        return tarName
844 +
845      def wsSetupEnvironment(self, nj):
846          """
847          Returns part of a job script which prepares
848          the execution environment for the job 'nj'.
849          """
850          # Prepare JobType-independent part
851 <        txt = self.wsSetupCMSEnvironment_()
851 >        txt = ''
852 >  
853 >        ## OLI_Daniele at this level  middleware already known
854 >
855 >        txt += 'echo "### Firtst set SCRAM ARCH and BUILD_ARCH ###"\n'
856 >        txt += 'echo "Setting SCRAM_ARCH='+self.executable_arch+'"\n'
857 >        txt += 'export SCRAM_ARCH='+self.executable_arch+'\n'
858 >        txt += 'export BUILD_ARCH='+self.executable_arch+'\n'
859 >        txt += 'if [ $middleware == LCG ]; then \n'
860 >        txt += self.wsSetupCMSLCGEnvironment_()
861 >        txt += 'elif [ $middleware == OSG ]; then\n'
862 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
863 >        txt += '    echo "Created working directory: $WORKING_DIR"\n'
864 >        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
865 >        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
866 >        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
867 >        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
868 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
869 >        txt += '        rm -f $RUNTIME_AREA/$repo \n'
870 >        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
871 >        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
872 >        txt += '        exit 1\n'
873 >        txt += '    fi\n'
874 >        txt += '\n'
875 >        txt += '    echo "Change to working directory: $WORKING_DIR"\n'
876 >        txt += '    cd $WORKING_DIR\n'
877 >        txt += self.wsSetupCMSOSGEnvironment_()
878 >        txt += 'fi\n'
879  
880          # Prepare JobType-specific part
881          scram = self.scram.commandName()
# Line 363 | Line 884 | class Cmssw(JobType):
884          txt += scram+' project CMSSW '+self.version+'\n'
885          txt += 'status=$?\n'
886          txt += 'if [ $status != 0 ] ; then\n'
887 <        txt += '   echo "SET_EXE_ENV 1 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
888 <        txt += '   echo "JOB_EXIT_STATUS = 5"\n'
889 <        txt += '   echo "SanityCheckCode = 5" | tee -a $RUNTIME_AREA/$repo\n'
887 >        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
888 >        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
889 >        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
890          txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
891 <        txt += '   exit 5 \n'
891 >        txt += '   rm -f $RUNTIME_AREA/$repo \n'
892 >        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
893 >        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
894 >        ## OLI_Daniele
895 >        txt += '    if [ $middleware == OSG ]; then \n'
896 >        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
897 >        txt += '        cd $RUNTIME_AREA\n'
898 >        txt += '        /bin/rm -rf $WORKING_DIR\n'
899 >        txt += '        if [ -d $WORKING_DIR ] ;then\n'
900 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
901 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
902 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
903 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
904 >        txt += '            rm -f $RUNTIME_AREA/$repo \n'
905 >        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
906 >        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
907 >        txt += '        fi\n'
908 >        txt += '    fi \n'
909 >        txt += '   exit 1 \n'
910          txt += 'fi \n'
911          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
912          txt += 'cd '+self.version+'\n'
913 +        ########## FEDE FOR DBS2 ######################
914 +        txt += 'SOFTWARE_DIR=`pwd`\n'
915 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
916 +        ###############################################
917          ### needed grep for bug in scramv1 ###
918 +        txt += scram+' runtime -sh\n'
919          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
920 +        txt += 'echo $PATH\n'
921  
922          # Handle the arguments:
923          txt += "\n"
924 <        txt += "## ARGUMNETS: $1 Job Number\n"
380 <        # txt += "## ARGUMNETS: $2 First Event for this job\n"
381 <        # txt += "## ARGUMNETS: $3 Max Event for this job\n"
924 >        txt += "## number of arguments (first argument always jobnumber)\n"
925          txt += "\n"
926 <        txt += "narg=$#\n"
927 <        txt += "if [ $narg -lt 1 ]\n"
926 > #        txt += "narg=$#\n"
927 >        txt += "if [ $nargs -lt 2 ]\n"
928          txt += "then\n"
929 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
930 <        txt += '    echo "JOB_EXIT_STATUS = 1"\n'
931 <        txt += '    echo "SanityCheckCode = 1" | tee -a $RUNTIME_AREA/$repo\n'
929 >        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
930 >        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
931 >        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
932          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
933 +        txt += '    rm -f $RUNTIME_AREA/$repo \n'
934 +        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
935 +        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
936 +        ## OLI_Daniele
937 +        txt += '    if [ $middleware == OSG ]; then \n'
938 +        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
939 +        txt += '        cd $RUNTIME_AREA\n'
940 +        txt += '        /bin/rm -rf $WORKING_DIR\n'
941 +        txt += '        if [ -d $WORKING_DIR ] ;then\n'
942 +        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
943 +        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
944 +        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
945 +        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
946 +        txt += '            rm -f $RUNTIME_AREA/$repo \n'
947 +        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
948 +        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
949 +        txt += '        fi\n'
950 +        txt += '    fi \n'
951          txt += "    exit 1\n"
952          txt += "fi\n"
953          txt += "\n"
393        txt += "NJob=$1\n"
394        # txt += "FirstEvent=$2\n"
395        # txt += "MaxEvents=$3\n"
954  
955          # Prepare job-specific part
956          job = common.job_list[nj]
957 <        pset = os.path.basename(job.configFilename())
958 <        txt += '\n'
959 <        txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
960 <        # txt += 'if [ -e $RUNTIME_AREA/orcarc_$CE ] ; then\n'
961 <        # txt += '  cat $RUNTIME_AREA/orcarc_$CE .orcarc >> .orcarc_tmp\n'
962 <        # txt += '  mv .orcarc_tmp .orcarc\n'
963 <        # txt += 'fi\n'
964 <        # txt += 'if [ -e $RUNTIME_AREA/init_$CE.sh ] ; then\n'
965 <        # txt += '  cp $RUNTIME_AREA/init_$CE.sh init.sh\n'
966 <        # txt += 'fi\n'
957 >        ### FEDE FOR DBS OUTPUT PUBLICATION
958 >        if (self.datasetPath):
959 >            txt += '\n'
960 >            txt += 'DatasetPath='+self.datasetPath+'\n'
961 >
962 >            datasetpath_split = self.datasetPath.split("/")
963 >            
964 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
965 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
966 >            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
967 >            txt += 'ApplicationFamily=cmsRun\n'
968 >
969 >        else:
970 >            txt += 'DatasetPath=MCDataTier\n'
971 >            txt += 'PrimaryDataset=null\n'
972 >            txt += 'DataTier=null\n'
973 >            #txt += 'ProcessedDataset=null\n'
974 >            txt += 'ApplicationFamily=MCDataTier\n'
975 >        if self.pset != None: #CarlosDaniele
976 >            pset = os.path.basename(job.configFilename())
977 >            txt += '\n'
978 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
979 >            if (self.datasetPath): # standard job
980 >                #txt += 'InputFiles=$2\n'
981 >                txt += 'InputFiles=${args[1]}\n'
982 >                txt += 'MaxEvents=${args[2]}\n'
983 >                txt += 'SkipEvents=${args[3]}\n'
984 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
985 >                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
986 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
987 >                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
988 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
989 >                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
990 >            else:  # pythia like job
991 >                seedIndex=1
992 >                if (self.firstRun):
993 >                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
994 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
995 >                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
996 >                    seedIndex=seedIndex+1
997 >
998 >                if (self.sourceSeed):
999 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1000 >                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1001 >                    seedIndex=seedIndex+1
1002 >                    ## the following seeds are not always present
1003 >                    if (self.sourceSeedVtx):
1004 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1005 >                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1006 >                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1007 >                        seedIndex += 1
1008 >                    if (self.sourceSeedG4):
1009 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1010 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1011 >                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1012 >                        seedIndex += 1
1013 >                    if (self.sourceSeedMix):
1014 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1015 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1016 >                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1017 >                        seedIndex += 1
1018 >                    pass
1019 >                pass
1020 >            txt += 'mv -f '+pset+' pset.cfg\n'
1021  
1022          if len(self.additional_inbox_files) > 0:
1023 <            for file in self.additional_inbox_files:
1024 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
1025 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
414 <                txt += '   chmod +x '+file+'\n'
415 <                txt += 'fi\n'
1023 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1024 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1025 >            txt += 'fi\n'
1026              pass
1027  
1028 <        # txt += '\n'
1029 <        # txt += 'chmod +x ./init.sh\n'
1030 <        # txt += './init.sh\n'
1031 <        # txt += 'exitStatus=$?\n'
1032 <        # txt += 'if [ $exitStatus != 0 ] ; then\n'
1033 <        # txt += '  echo "SET_EXE_ENV 1 ==> ERROR StageIn init script failed"\n'
1034 <        # txt += '  echo "JOB_EXIT_STATUS = $exitStatus" \n'
1035 <        # txt += '  echo "SanityCheckCode = $exitStatus" | tee -a $RUNTIME_AREA/$repo\n'
1036 <        # txt += '  dumpStatus $RUNTIME_AREA/$repo\n'
1037 <        # txt += '  exit $exitStatus\n'
1038 <        # txt += 'fi\n'
1039 <        # txt += "echo 'SET_EXE_ENV 0 ==> job setup ok'\n"
1040 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1041 <
1042 <        # txt += 'echo "FirstEvent=$FirstEvent" >> .orcarc\n'
1043 <        # txt += 'echo "MaxEvents=$MaxEvents" >> .orcarc\n'
1044 <        # if self.ML:
435 <        #     txt += 'echo "MonalisaJobId=$NJob" >> .orcarc\n'
1028 >        if self.pset != None: #CarlosDaniele
1029 >            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1030 >        
1031 >            txt += '\n'
1032 >            txt += 'echo "***** cat pset.cfg *********"\n'
1033 >            txt += 'cat pset.cfg\n'
1034 >            txt += 'echo "****** end pset.cfg ********"\n'
1035 >            txt += '\n'
1036 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1037 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1038 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1039 >            ##############
1040 >            txt += '\n'
1041 >            # txt += 'echo "***** cat pset1.cfg *********"\n'
1042 >            # txt += 'cat pset1.cfg\n'
1043 >            # txt += 'echo "****** end pset1.cfg ********"\n'
1044 >        return txt
1045  
1046 <        txt += '\n'
1047 <        txt += 'echo "***** cat pset.cfg *********"\n'
1048 <        txt += 'cat pset.cfg\n'
1049 <        txt += 'echo "****** end pset.cfg ********"\n'
1046 >    def wsBuildExe(self, nj=0):
1047 >        """
1048 >        Put in the script the commands to build an executable
1049 >        or a library.
1050 >        """
1051 >
1052 >        txt = ""
1053 >
1054 >        if os.path.isfile(self.tgzNameWithPath):
1055 >            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
1056 >            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1057 >            txt += 'untar_status=$? \n'
1058 >            txt += 'if [ $untar_status -ne 0 ]; then \n'
1059 >            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
1060 >            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
1061 >            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
1062 >            txt += '   if [ $middleware == OSG ]; then \n'
1063 >            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1064 >            txt += '       cd $RUNTIME_AREA\n'
1065 >            txt += '       /bin/rm -rf $WORKING_DIR\n'
1066 >            txt += '       if [ -d $WORKING_DIR ] ;then\n'
1067 >            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1068 >            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1069 >            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1070 >            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1071 >            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1072 >            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1073 >            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1074 >            txt += '       fi\n'
1075 >            txt += '   fi \n'
1076 >            txt += '   \n'
1077 >            txt += '   exit 1 \n'
1078 >            txt += 'else \n'
1079 >            txt += '   echo "Successful untar" \n'
1080 >            txt += 'fi \n'
1081 >            txt += '\n'
1082 >            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1083 >            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1084 >            #### FEDE FOR DBS OUTPUT PUBLICATION
1085 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1086 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1087 >            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1088 >            txt += 'else\n'
1089 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1090 >            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1091 >            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1092 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1093 >            ###################  
1094 >            txt += 'fi\n'
1095 >            txt += '\n'
1096 >
1097 >            pass
1098 >        
1099          return txt
1100  
1101      def modifySteeringCards(self, nj):
# Line 447 | Line 1105 | class Cmssw(JobType):
1105          """
1106          
1107      def executableName(self):
1108 <        return self.executable
1108 >        if self.scriptExe: #CarlosDaniele
1109 >            return "sh "
1110 >        else:
1111 >            return self.executable
1112  
1113      def executableArgs(self):
1114 <        return "-p pset.cfg"
1114 >        if self.scriptExe:#CarlosDaniele
1115 >            return   self.scriptExe + " $NJob"
1116 >        else:
1117 >            return " -p pset.cfg"
1118  
1119      def inputSandbox(self, nj):
1120          """
1121          Returns a list of filenames to be put in JDL input sandbox.
1122          """
1123          inp_box = []
1124 <        # dict added to delete duplicate from input sandbox file list
1125 <        seen = {}
1124 >        # # dict added to delete duplicate from input sandbox file list
1125 >        # seen = {}
1126          ## code
1127          if os.path.isfile(self.tgzNameWithPath):
1128              inp_box.append(self.tgzNameWithPath)
1129 +        if os.path.isfile(self.MLtgzfile):
1130 +            inp_box.append(self.MLtgzfile)
1131          ## config
1132 <        inp_box.append(common.job_list[nj].configFilename())
1132 >        if not self.pset is None:
1133 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1134          ## additional input files
1135 <        for file in self.additional_inbox_files:
1136 <            inp_box.append(common.work_space.cwdDir()+file)
470 <        #print "sono inputSandbox, inp_box = ", inp_box
1135 >        tgz = self.additionalInputFileTgz()
1136 >        inp_box.append(tgz)
1137          return inp_box
1138  
1139      def outputSandbox(self, nj):
# Line 476 | Line 1142 | class Cmssw(JobType):
1142          """
1143          out_box = []
1144  
479        stdout=common.job_list[nj].stdout()
480        stderr=common.job_list[nj].stderr()
481
1145          ## User Declared output files
1146 <        for out in self.output_file:
1146 >        for out in (self.output_file+self.output_file_sandbox):
1147              n_out = nj + 1
1148              out_box.append(self.numberFile_(out,str(n_out)))
1149          return out_box
487        return []
1150  
1151      def prepareSteeringCards(self):
1152          """
1153          Make initial modifications of the user's steering card file.
1154          """
493        infile = open(self.pset,'r')
494            
495        outfile = open(common.work_space.jobDir()+self.name()+'.cfg', 'w')
496          
497        outfile.write('\n\n##### The following cards have been created by CRAB: DO NOT TOUCH #####\n')
498
499        outfile.write('InputCollections=/System/'+self.owner+'/'+self.dataset+'/'+self.dataset+'\n')
500
501        infile.close()
502        outfile.close()
1155          return
1156  
1157      def wsRenameOutput(self, nj):
# Line 508 | Line 1160 | class Cmssw(JobType):
1160          """
1161  
1162          txt = '\n'
1163 <        file_list = ''
1164 <        for fileWithSuffix in self.output_file:
1163 >        txt += '# directory content\n'
1164 >        txt += 'ls \n'
1165 >
1166 >        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1167              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
514            file_list=file_list+output_file_num+','
515            txt += '\n'
516            txt += 'ls \n'
1168              txt += '\n'
1169 <            txt += 'ls '+fileWithSuffix+'\n'
1170 <            txt += 'exe_result=$?\n'
1171 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
1172 <            txt += '   echo "ERROR: No output file to manage"\n'
1173 <            txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
523 <            txt += '   echo "SanityCheckCode = $exe_result" | tee -a $RUNTIME_AREA/$repo\n'
524 <            txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
525 <            txt += '   exit $exe_result \n'
1169 >            txt += '# check output file\n'
1170 >            # txt += 'ls '+fileWithSuffix+'\n'
1171 >            # txt += 'ls_result=$?\n'
1172 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1173 >            txt += '   mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1174              txt += 'else\n'
1175 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1175 >            txt += '   exit_status=60302\n'
1176 >            txt += '   echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1177 >            if common.scheduler.boss_scheduler_name == 'condor_g':
1178 >                txt += '    if [ $middleware == OSG ]; then \n'
1179 >                txt += '        echo "prepare dummy output file"\n'
1180 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1181 >                txt += '    fi \n'
1182              txt += 'fi\n'
1183 <            txt += 'cd $RUNTIME_AREA\n'
1184 <                      
1185 <            pass
1183 >        file_list = []
1184 >        for fileWithSuffix in (self.output_file):
1185 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1186 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1187        
1188 <        file_list=file_list[:-1]
1189 <        txt += 'file_list='+file_list+'\n'
1188 >        txt += 'cd $RUNTIME_AREA\n'
1189 >        #### FEDE this is the cleanEnv function
1190 >        ### OLI_DANIELE
1191 >        #txt += 'if [ $middleware == OSG ]; then\n'  
1192 >        #txt += '    cd $RUNTIME_AREA\n'
1193 >        #txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1194 >        #txt += '    /bin/rm -rf $WORKING_DIR\n'
1195 >        #txt += '    if [ -d $WORKING_DIR ] ;then\n'
1196 >        #txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1197 >        #txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1198 >        #txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1199 >        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1200 >        #txt += '        rm -f $RUNTIME_AREA/$repo \n'
1201 >        #txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1202 >        #txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1203 >        #txt += '    fi\n'
1204 >        #txt += 'fi\n'
1205 >        #txt += '\n'
1206 >
1207 >
1208          return txt
1209  
1210      def numberFile_(self, file, txt):
# Line 542 | Line 1215 | class Cmssw(JobType):
1215          # take away last extension
1216          name = p[0]
1217          for x in p[1:-1]:
1218 <           name=name+"."+x
1218 >            name=name+"."+x
1219          # add "_txt"
1220          if len(p)>1:
1221 <          ext = p[len(p)-1]
1222 <          #result = name + '_' + str(txt) + "." + ext
550 <          result = name + '_' + txt + "." + ext
1221 >            ext = p[len(p)-1]
1222 >            result = name + '_' + txt + "." + ext
1223          else:
1224 <          #result = name + '_' + str(txt)
553 <          result = name + '_' + txt
1224 >            result = name + '_' + txt
1225          
1226          return result
1227  
1228 <    def getRequirements(self):
1228 >    def getRequirements(self, nj=[]):
1229          """
1230          return job requirements to add to jdl files
1231          """
1232          req = ''
1233 <        if common.analisys_common_info['sites']:
1234 <            if common.analisys_common_info['sw_version']:
1235 <                req='Member("VO-cms-' + \
1236 <                     common.analisys_common_info['sw_version'] + \
1237 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1238 <            if len(common.analisys_common_info['sites'])>0:
1239 <                req = req + ' && ('
1240 <                for i in range(len(common.analisys_common_info['sites'])):
1241 <                    req = req + 'other.GlueCEInfoHostName == "' \
1242 <                         + common.analisys_common_info['sites'][i] + '"'
1243 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1244 <                        req = req + ' || '
1245 <            req = req + ')'
575 <        #print "req = ", req
1233 >        if self.version:
1234 >            req='Member("VO-cms-' + \
1235 >                 self.version + \
1236 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1237 >        ## SL add requirement for OS version only if SL4
1238 >        #reSL4 = re.compile( r'slc4' )
1239 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1240 >            req+=' && Member("VO-cms-' + \
1241 >                 self.executable_arch + \
1242 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1243 >
1244 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1245 >
1246          return req
1247 +
1248 +    def configFilename(self):
1249 +        """ return the config filename """
1250 +        return self.name()+'.cfg'
1251 +
1252 +    ### OLI_DANIELE
1253 +    def wsSetupCMSOSGEnvironment_(self):
1254 +        """
1255 +        Returns part of a job script which is prepares
1256 +        the execution environment and which is common for all CMS jobs.
1257 +        """
1258 +        txt = '\n'
1259 +        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1260 +        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1261 +        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1262 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1263 +        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1264 +        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1265 +        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1266 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1267 +        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1268 +        txt += '   else\n'
1269 +        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1270 +        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1271 +        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1272 +        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1273 +        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1274 +        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1275 +        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1276 +        txt += '       exit 1\n'
1277 +        txt += '\n'
1278 +        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1279 +        txt += '       cd $RUNTIME_AREA\n'
1280 +        txt += '       /bin/rm -rf $WORKING_DIR\n'
1281 +        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1282 +        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1283 +        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1284 +        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1285 +        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1286 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1287 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1288 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1289 +        txt += '       fi\n'
1290 +        txt += '\n'
1291 +        txt += '       exit 1\n'
1292 +        txt += '   fi\n'
1293 +        txt += '\n'
1294 +        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1295 +        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1296 +
1297 +        return txt
1298 +
1299 +    ### OLI_DANIELE
1300 +    def wsSetupCMSLCGEnvironment_(self):
1301 +        """
1302 +        Returns part of a job script which is prepares
1303 +        the execution environment and which is common for all CMS jobs.
1304 +        """
1305 +        txt  = '   \n'
1306 +        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1307 +        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1308 +        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1309 +        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1310 +        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1311 +        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1312 +        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1313 +        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1314 +        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1315 +        txt += '       exit 1\n'
1316 +        txt += '   else\n'
1317 +        txt += '       echo "Sourcing environment... "\n'
1318 +        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1319 +        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1320 +        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1321 +        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1322 +        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1323 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1324 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1325 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1326 +        txt += '           exit 1\n'
1327 +        txt += '       fi\n'
1328 +        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1329 +        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1330 +        txt += '       result=$?\n'
1331 +        txt += '       if [ $result -ne 0 ]; then\n'
1332 +        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1333 +        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1334 +        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1335 +        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1336 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1337 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1338 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1339 +        txt += '           exit 1\n'
1340 +        txt += '       fi\n'
1341 +        txt += '   fi\n'
1342 +        txt += '   \n'
1343 +        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1344 +        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1345 +        return txt
1346 +
1347 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1348 +    def modifyReport(self, nj):
1349 +        """
1350 +        insert the part of the script that modifies the FrameworkJob Report
1351 +        """
1352 +
1353 +        txt = ''
1354 +        txt += 'echo "Modify Job Report" \n'
1355 +        #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1356 +        ################ FEDE FOR DBS2 #############################################
1357 +        txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1358 +        #############################################################################
1359 +        try:
1360 +            publish_data = int(self.cfg_params['USER.publish_data'])          
1361 +        except KeyError:
1362 +            publish_data = 0
1363 +
1364 +        txt += 'if [ -z "$SE" ]; then\n'
1365 +        txt += '    SE="" \n'
1366 +        txt += 'fi \n'
1367 +        txt += 'if [ -z "$SE_PATH" ]; then\n'
1368 +        txt += '    SE_PATH="" \n'
1369 +        txt += 'fi \n'
1370 +        txt += 'echo "SE = $SE"\n'
1371 +        txt += 'echo "SE_PATH = $SE_PATH"\n'
1372 +
1373 +        if (publish_data == 1):  
1374 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1375 +            processedDataset = self.cfg_params['USER.publish_data_name']
1376 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1377 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1378 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1379 +            #### FEDE: added slash in LFN ##############
1380 +            txt += '    FOR_LFN=/copy_problems/ \n'
1381 +            txt += 'else \n'
1382 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1383 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1384 +            txt += '    FOR_LFN=/store$tmp \n'
1385 +            txt += 'fi \n'
1386 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1387 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1388 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1389 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1390 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1391 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1392 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1393 +      
1394 +            txt += 'modifyReport_result=$?\n'
1395 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1396 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1397 +            txt += '    exit_status=1\n'
1398 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1399 +            txt += 'else\n'
1400 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1401 +            txt += 'fi\n'
1402 +        else:
1403 +            txt += 'ProcessedDataset=no_data_to_publish \n'
1404 +            #### FEDE: added slash in LFN ##############
1405 +            txt += 'FOR_LFN=/local/ \n'
1406 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1407 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1408 +        return txt
1409 +
1410 +    def cleanEnv(self):
1411 +        ### OLI_DANIELE
1412 +        txt = ''
1413 +        txt += 'if [ $middleware == OSG ]; then\n'  
1414 +        txt += '    cd $RUNTIME_AREA\n'
1415 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1416 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1417 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1418 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1419 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1420 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1421 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1422 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1423 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1424 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1425 +        txt += '    fi\n'
1426 +        txt += 'fi\n'
1427 +        txt += '\n'
1428 +        return txt
1429 +
1430 +    def setParam_(self, param, value):
1431 +        self._params[param] = value
1432 +
1433 +    def getParams(self):
1434 +        return self._params
1435 +
1436 +    def setTaskid_(self):
1437 +        self._taskId = self.cfg_params['taskId']
1438 +        
1439 +    def getTaskid(self):
1440 +        return self._taskId
1441 +
1442 +    def uniquelist(self, old):
1443 +        """
1444 +        remove duplicates from a list
1445 +        """
1446 +        nd={}
1447 +        for e in old:
1448 +            nd[e]=0
1449 +        return nd.keys()

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines