ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.7 by gutsche, Tue Jun 13 20:43:00 2006 UTC vs.
Revision 1.117 by fanzago, Fri Aug 17 10:45:26 2007 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
6 import PsetManipulator  
7
8 import DBSInfo_EDM
9 import DataDiscovery_EDM
10 import DataLocation_EDM
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
20        self.analisys_common_info = {}
21        # Marco.
16          self._params = {}
17          self.cfg_params = cfg_params
18  
19 +        # init BlackWhiteListParser
20 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21 +
22 +        try:
23 +            self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24 +        except KeyError:
25 +            self.MaxTarBallSize = 9.5
26 +
27 +        # number of jobs requested to be created, limit obj splitting
28 +        self.ncjobs = ncjobs
29 +
30          log = common.logger
31          
32          self.scram = Scram.Scram(cfg_params)
28        scramArea = ''
33          self.additional_inbox_files = []
34          self.scriptExe = ''
35          self.executable = ''
36 +        self.executable_arch = self.scram.getArch()
37          self.tgz_name = 'default.tgz'
38 +        self.additional_tgz_name = 'additional.tgz'
39 +        self.scriptName = 'CMSSW.sh'
40 +        self.pset = ''      #scrip use case Da  
41 +        self.datasetPath = '' #scrip use case Da
42  
43 +        # set FJR file name
44 +        self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 +        
48 +        #
49 +        # Try to block creation in case of arch/version mismatch
50 +        #
51 +
52 +        a = string.split(self.version, "_")
53 +
54 +        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56 +            raise CrabException(msg)
57 +        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58 +            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59 +            raise CrabException(msg)
60 +        
61 +        common.taskDB.setDict('codeVersion',self.version)
62          self.setParam_('application', self.version)
37        common.analisys_common_info['sw_version'] = self.version
38        ### FEDE
39        common.analisys_common_info['copy_input_data'] = 0
40        common.analisys_common_info['events_management'] = 1
63  
64          ### collect Data cards
65 +
66 +        ## get DBS mode
67          try:
68 <         #   self.owner = cfg_params['CMSSW.owner']
69 <         #   log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
70 <         #   self.dataset = cfg_params['CMSSW.dataset']
71 <            self.datasetPath = cfg_params['CMSSW.datasetpath']
72 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+self.datasetPath)
68 >            self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69 >        except KeyError:
70 >            self.use_dbs_1 = 0
71 >            
72 >        try:
73 >            tmp =  cfg_params['CMSSW.datasetpath']
74 >            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75 >            if string.lower(tmp)=='none':
76 >                self.datasetPath = None
77 >                self.selectNoInput = 1
78 >            else:
79 >                self.datasetPath = tmp
80 >                self.selectNoInput = 0
81          except KeyError:
50        #    msg = "Error: owner and/or dataset not defined "
82              msg = "Error: datasetpath not defined "  
83              raise CrabException(msg)
84  
85          # ML monitoring
86          # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
87 <        datasetpath_split = self.datasetPath.split("/")
88 <        self.setParam_('dataset', datasetpath_split[1])
89 <        self.setParam_('owner', datasetpath_split[-1])
90 <
91 <
92 <
87 >        if not self.datasetPath:
88 >            self.setParam_('dataset', 'None')
89 >            self.setParam_('owner', 'None')
90 >        else:
91 >            try:
92 >                datasetpath_split = self.datasetPath.split("/")
93 >                # standard style
94 >                if self.use_dbs_1 == 1 :
95 >                    self.setParam_('dataset', datasetpath_split[1])
96 >                    self.setParam_('owner', datasetpath_split[-1])
97 >                else:
98 >                    self.setParam_('dataset', datasetpath_split[1])
99 >                    self.setParam_('owner', datasetpath_split[2])
100 >            except:
101 >                self.setParam_('dataset', self.datasetPath)
102 >                self.setParam_('owner', self.datasetPath)
103 >                
104 >        self.setTaskid_()
105 >        self.setParam_('taskId', self.cfg_params['taskId'])
106  
107          self.dataTiers = []
64 #       try:
65 #           tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
66 #           for tmp in tmpDataTiers:
67 #               tmp=string.strip(tmp)
68 #               self.dataTiers.append(tmp)
69 #               pass
70 #           pass
71 #       except KeyError:
72 #           pass
73 #       log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
108  
109          ## now the application
110          try:
# Line 89 | Line 123 | class Cmssw(JobType):
123          try:
124              self.pset = cfg_params['CMSSW.pset']
125              log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
126 <            if (not os.path.exists(self.pset)):
127 <                raise CrabException("User defined PSet file "+self.pset+" does not exist")
126 >            if self.pset.lower() != 'none' :
127 >                if (not os.path.exists(self.pset)):
128 >                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
129 >            else:
130 >                self.pset = None
131          except KeyError:
132              raise CrabException("PSet file missing. Cannot run cmsRun ")
133  
134          # output files
135 +        ## stuff which must be returned always via sandbox
136 +        self.output_file_sandbox = []
137 +
138 +        # add fjr report by default via sandbox
139 +        self.output_file_sandbox.append(self.fjrFileName)
140 +
141 +        # other output files to be returned via sandbox or copied to SE
142          try:
143              self.output_file = []
100
144              tmp = cfg_params['CMSSW.output_file']
145              if tmp != '':
146                  tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
# Line 107 | Line 150 | class Cmssw(JobType):
150                      self.output_file.append(tmp)
151                      pass
152              else:
153 <                log.message("No output file defined: only stdout/err will be available")
153 >                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
154                  pass
155              pass
156          except KeyError:
157 <            log.message("No output file defined: only stdout/err will be available")
157 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
158              pass
159  
160          # script_exe file as additional file in inputSandbox
161          try:
162 <           self.scriptExe = cfg_params['USER.script_exe']
163 <           self.additional_inbox_files.append(self.scriptExe)
162 >            self.scriptExe = cfg_params['USER.script_exe']
163 >            if self.scriptExe != '':
164 >               if not os.path.isfile(self.scriptExe):
165 >                  msg ="ERROR. file "+self.scriptExe+" not found"
166 >                  raise CrabException(msg)
167 >               self.additional_inbox_files.append(string.strip(self.scriptExe))
168          except KeyError:
169 <           pass
170 <        if self.scriptExe != '':
171 <           if os.path.isfile(self.scriptExe):
172 <              pass
173 <           else:
174 <              log.message("WARNING. file "+self.scriptExe+" not found")
175 <              sys.exit()
129 <                  
169 >            self.scriptExe = ''
170 >
171 >        #CarlosDaniele
172 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
173 >           msg ="Error. script_exe  not defined"
174 >           raise CrabException(msg)
175 >
176          ## additional input files
177          try:
178 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
178 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
179              for tmp in tmpAddFiles:
180 <                if not os.path.exists(tmp):
181 <                    raise CrabException("Additional input file not found: "+tmp)
182 <                tmp=string.strip(tmp)
183 <                self.additional_inbox_files.append(tmp)
180 >                tmp = string.strip(tmp)
181 >                dirname = ''
182 >                if not tmp[0]=="/": dirname = "."
183 >                files = []
184 >                if string.find(tmp,"*")>-1:
185 >                    files = glob.glob(os.path.join(dirname, tmp))
186 >                    if len(files)==0:
187 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
188 >                else:
189 >                    files.append(tmp)
190 >                for file in files:
191 >                    if not os.path.exists(file):
192 >                        raise CrabException("Additional input file not found: "+file)
193 >                    pass
194 >                    # fname = string.split(file, '/')[-1]
195 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
196 >                    # shutil.copyfile(file, storedFile)
197 >                    self.additional_inbox_files.append(string.strip(file))
198                  pass
199              pass
200 +            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
201          except KeyError:
202              pass
203  
204 +        # files per job
205          try:
206 <            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
206 >            if (cfg_params['CMSSW.files_per_jobs']):
207 >                raise CrabException("files_per_jobs no longer supported.  Quitting.")
208          except KeyError:
209 <            self.filesPerJob = 1
209 >            pass
210  
211 <        ## Max event   will be total_number_of_events ???  Daniele
211 >        ## Events per job
212          try:
213 <            self.maxEv = cfg_params['CMSSW.event_per_job']
213 >            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
214 >            self.selectEventsPerJob = 1
215          except KeyError:
216 <            self.maxEv = "-1"
217 <        ##  
216 >            self.eventsPerJob = -1
217 >            self.selectEventsPerJob = 0
218 >    
219 >        ## number of jobs
220 >        try:
221 >            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
222 >            self.selectNumberOfJobs = 1
223 >        except KeyError:
224 >            self.theNumberOfJobs = 0
225 >            self.selectNumberOfJobs = 0
226 >
227          try:
228              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
229 +            self.selectTotalNumberEvents = 1
230          except KeyError:
231 <            msg = 'Must define total_number_of_events'
232 <            raise CrabException(msg)
233 <        
234 <        CEBlackList = []
231 >            self.total_number_of_events = 0
232 >            self.selectTotalNumberEvents = 0
233 >
234 >        if self.pset != None: #CarlosDaniele
235 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
236 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
237 >                 raise CrabException(msg)
238 >        else:
239 >             if (self.selectNumberOfJobs == 0):
240 >                 msg = 'Must specify  number_of_jobs.'
241 >                 raise CrabException(msg)
242 >
243 >        ## source seed for pythia
244          try:
245 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
163 <            for tmp in tmpBad:
164 <                tmp=string.strip(tmp)
165 <                CEBlackList.append(tmp)
245 >            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
246          except KeyError:
247 <            pass
247 >            self.sourceSeed = None
248 >            common.logger.debug(5,"No seed given")
249  
250 <        self.reCEBlackList=[]
251 <        for bad in CEBlackList:
171 <            self.reCEBlackList.append(re.compile( bad ))
172 <
173 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
174 <
175 <        CEWhiteList = []
176 <        try:
177 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
178 <            for tmp in tmpGood:
179 <                tmp=string.strip(tmp)
180 <                CEWhiteList.append(tmp)
250 >        try:
251 >            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
252          except KeyError:
253 <            pass
253 >            self.sourceSeedVtx = None
254 >            common.logger.debug(5,"No vertex seed given")
255  
256 <        #print 'CEWhiteList: ',CEWhiteList
257 <        self.reCEWhiteList=[]
258 <        for Good in CEWhiteList:
259 <            self.reCEWhiteList.append(re.compile( Good ))
256 >        try:
257 >            self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
258 >        except KeyError:
259 >            self.sourceSeedG4 = None
260 >            common.logger.debug(5,"No g4 sim hits seed given")
261  
262 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
262 >        try:
263 >            self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
264 >        except KeyError:
265 >            self.sourceSeedMix = None
266 >            common.logger.debug(5,"No mix seed given")
267  
268 <        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
268 >        try:
269 >            self.firstRun = int(cfg_params['CMSSW.first_run'])
270 >        except KeyError:
271 >            self.firstRun = None
272 >            common.logger.debug(5,"No first run given")
273 >        if self.pset != None: #CarlosDaniele
274 >            ver = string.split(self.version,"_")
275 >            if (int(ver[1])>=1 and int(ver[2])>=5):
276 >                import PsetManipulator150 as pp
277 >            else:
278 >                import PsetManipulator as pp
279 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
280  
281          #DBSDLS-start
282          ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
283          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
284          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
285 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
286          ## Perform the data location and discovery (based on DBS/DLS)
287 <        self.DataDiscoveryAndLocation(cfg_params)
287 >        ## SL: Don't if NONE is specified as input (pythia use case)
288 >        blockSites = {}
289 >        if self.datasetPath:
290 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
291          #DBSDLS-end          
292  
293          self.tgzNameWithPath = self.getTarBall(self.executable)
294 +    
295 +        ## Select Splitting
296 +        if self.selectNoInput:
297 +            if self.pset == None: #CarlosDaniele
298 +                self.jobSplittingForScript()
299 +            else:
300 +                self.jobSplittingNoInput()
301 +        else:
302 +            self.jobSplittingByBlocks(blockSites)
303  
304 <        self.jobSplitting()  #Daniele job Splitting
305 <        self.PsetEdit.maxEvent(self.maxEv) #Daniele  
306 <        self.PsetEdit.inputModule("INPUT") #Daniele  
307 <        self.PsetEdit.psetWriter(self.configFilename())
308 <        
309 <
304 >        # modify Pset
305 >        if self.pset != None: #CarlosDaniele
306 >            try:
307 >                if (self.datasetPath): # standard job
308 >                    # allow to processa a fraction of events in a file
309 >                    PsetEdit.inputModule("INPUT")
310 >                    PsetEdit.maxEvent("INPUTMAXEVENTS")
311 >                    PsetEdit.skipEvent("INPUTSKIPEVENTS")
312 >                else:  # pythia like job
313 >                    PsetEdit.maxEvent(self.eventsPerJob)
314 >                    if (self.firstRun):
315 >                        PsetEdit.pythiaFirstRun("INPUTFIRSTRUN")  #First Run
316 >                    if (self.sourceSeed) :
317 >                        PsetEdit.pythiaSeed("INPUT")
318 >                        if (self.sourceSeedVtx) :
319 >                            PsetEdit.vtxSeed("INPUTVTX")
320 >                        if (self.sourceSeedG4) :
321 >                            self.PsetEdit.g4Seed("INPUTG4")
322 >                        if (self.sourceSeedMix) :
323 >                            self.PsetEdit.mixSeed("INPUTMIX")
324 >                # add FrameworkJobReport to parameter-set
325 >                PsetEdit.addCrabFJR(self.fjrFileName)
326 >                PsetEdit.psetWriter(self.configFilename())
327 >            except:
328 >                msg='Error while manipuliating ParameterSet: exiting...'
329 >                raise CrabException(msg)
330  
331      def DataDiscoveryAndLocation(self, cfg_params):
332  
333 +        import DataDiscovery
334 +        import DataDiscovery_DBS2
335 +        import DataLocation
336          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
337  
214        #datasetPath = "/"+self.owner+"/"+self.dataTiers[0]+"/"+self.dataset
215        
338          datasetPath=self.datasetPath
339  
218        ## TODO
219        dataTiersList = ""
220        dataTiers = dataTiersList.split(',')
221
340          ## Contact the DBS
341 +        common.logger.message("Contacting Data Discovery Services ...")
342          try:
343 <            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
343 >
344 >            if self.use_dbs_1 == 1 :
345 >                self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
346 >            else :
347 >                self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
348              self.pubdata.fetchDBSInfo()
349  
350 <        except DataDiscovery_EDM.NotExistingDatasetError, ex :
350 >        except DataDiscovery.NotExistingDatasetError, ex :
351              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
352              raise CrabException(msg)
353 <
231 <        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
353 >        except DataDiscovery.NoDataTierinProvenanceError, ex :
354              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
355              raise CrabException(msg)
356 <        except DataDiscovery_EDM.DataDiscoveryError, ex:
357 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
356 >        except DataDiscovery.DataDiscoveryError, ex:
357 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
358 >            raise CrabException(msg)
359 >        except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
360 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
361 >            raise CrabException(msg)
362 >        except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
363 >            msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
364 >            raise CrabException(msg)
365 >        except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
366 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
367              raise CrabException(msg)
368  
369 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
370 <        ## self.DBSPaths=self.pubdata.getDBSPaths()
371 <        common.logger.message("Required data are :"+self.datasetPath)
241 <
242 <        filesbyblock=self.pubdata.getFiles()
243 <        self.AllInputFiles=filesbyblock.values()
244 <        self.files = self.AllInputFiles        
245 <
246 <        ## TEMP
247 <    #    self.filesTmp = filesbyblock.values()
248 <    #    self.files = []
249 <    #    locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/'
250 <    #    locPath=''
251 <    #    tmp = []
252 <    #    for file in self.filesTmp[0]:
253 <    #        tmp.append(locPath+file)
254 <    #    self.files.append(tmp)
255 <        ## END TEMP
369 >        self.filesbyblock=self.pubdata.getFiles()
370 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
371 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
372  
373          ## get max number of events
258        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
374          self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
260        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
375  
376          ## Contact the DLS and build a list of sites hosting the fileblocks
377          try:
378 <            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
378 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
379              dataloc.fetchDLSInfo()
380 <        except DataLocation_EDM.DataLocationError , ex:
380 >        except DataLocation.DataLocationError , ex:
381              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
382              raise CrabException(msg)
383          
270        allsites=dataloc.getSites()
271        common.logger.debug(5,"sites are %s"%allsites)
272        sites=self.checkBlackList(allsites)
273        common.logger.debug(5,"sites are (after black list) %s"%sites)
274        sites=self.checkWhiteList(sites)
275        common.logger.debug(5,"sites are (after white list) %s"%sites)
276
277        if len(sites)==0:
278            msg = 'No sites hosting all the needed data! Exiting... '
279            raise CrabException(msg)
384  
385 <        common.logger.message("List of Sites hosting the data : "+str(sites))
386 <        common.logger.debug(6, "List of Sites: "+str(sites))
387 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
388 <        self.setParam_('TargetCE', ','.join(sites))
389 <        return
385 >        sites = dataloc.getSites()
386 >        allSites = []
387 >        listSites = sites.values()
388 >        for listSite in listSites:
389 >            for oneSite in listSite:
390 >                allSites.append(oneSite)
391 >        allSites = self.uniquelist(allSites)
392 >
393 >        # screen output
394 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
395 >
396 >        return sites
397      
398 <    def jobSplitting(self):
398 >    def jobSplittingByBlocks(self, blockSites):
399          """
400 <        first implemntation for job splitting  
401 <        """    
402 <      #  print 'eventi totali '+str(self.maxEvents)
403 <      #  print 'eventi totali richiesti dallo user '+str(self.total_number_of_events)
404 <        #print 'files per job '+str(self.filesPerJob)
405 <        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
406 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
407 <
408 <        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
409 <        n_tot_files = (len(self.files[0]))
410 <        ## SL: this is wrong if the files have different number of events
411 <        evPerFile = int(self.maxEvents)/n_tot_files
412 <        
413 <        common.logger.debug(5,'Events per File '+str(evPerFile))
414 <
415 <        ## if asked to process all events, do it
416 <        if self.total_number_of_events == -1:
417 <            self.total_number_of_events=self.maxEvents
418 <            self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
419 <            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events')
420 <        
400 >        Perform job splitting. Jobs run over an integer number of files
401 >        and no more than one block.
402 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
403 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
404 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
405 >                  self.maxEvents, self.filesbyblock
406 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
407 >              self.total_number_of_jobs - Total # of jobs
408 >              self.list_of_args - File(s) job will run on (a list of lists)
409 >        """
410 >
411 >        # ---- Handle the possible job splitting configurations ---- #
412 >        if (self.selectTotalNumberEvents):
413 >            totalEventsRequested = self.total_number_of_events
414 >        if (self.selectEventsPerJob):
415 >            eventsPerJobRequested = self.eventsPerJob
416 >            if (self.selectNumberOfJobs):
417 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
418 >
419 >        # If user requested all the events in the dataset
420 >        if (totalEventsRequested == -1):
421 >            eventsRemaining=self.maxEvents
422 >        # If user requested more events than are in the dataset
423 >        elif (totalEventsRequested > self.maxEvents):
424 >            eventsRemaining = self.maxEvents
425 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
426 >        # If user requested less events than are in the dataset
427          else:
428 <            self.total_number_of_files = int(self.total_number_of_events/evPerFile)
429 <            ## SL: if ask for less event than what is computed to be available on a
430 <            ##     file, process the first file anyhow.
431 <            if self.total_number_of_files == 0:
432 <                self.total_number_of_files = self.total_number_of_files + 1
428 >            eventsRemaining = totalEventsRequested
429 >
430 >        # If user requested more events per job than are in the dataset
431 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
432 >            eventsPerJobRequested = self.maxEvents
433  
434 <            common.logger.debug(5,'N files  '+str(self.total_number_of_files))
434 >        # For user info at end
435 >        totalEventCount = 0
436  
437 <            check = 0
437 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
438 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
439 >
440 >        if (self.selectNumberOfJobs):
441 >            common.logger.message("May not create the exact number_of_jobs requested.")
442 >
443 >        if ( self.ncjobs == 'all' ) :
444 >            totalNumberOfJobs = 999999999
445 >        else :
446 >            totalNumberOfJobs = self.ncjobs
447 >            
448 >
449 >        blocks = blockSites.keys()
450 >        blockCount = 0
451 >        # Backup variable in case self.maxEvents counted events in a non-included block
452 >        numBlocksInDataset = len(blocks)
453 >
454 >        jobCount = 0
455 >        list_of_lists = []
456 >
457 >        # list tracking which jobs are in which jobs belong to which block
458 >        jobsOfBlock = {}
459 >
460 >        # ---- Iterate over the blocks in the dataset until ---- #
461 >        # ---- we've met the requested total # of events    ---- #
462 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
463 >            block = blocks[blockCount]
464 >            blockCount += 1
465 >            if block not in jobsOfBlock.keys() :
466 >                jobsOfBlock[block] = []
467 >            
468 >            if self.eventsbyblock.has_key(block) :
469 >                numEventsInBlock = self.eventsbyblock[block]
470 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
471              
472 <            ## Compute the number of jobs
473 <            #self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
474 <            self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob)
475 <            common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
472 >                files = self.filesbyblock[block]
473 >                numFilesInBlock = len(files)
474 >                if (numFilesInBlock <= 0):
475 >                    continue
476 >                fileCount = 0
477 >
478 >                # ---- New block => New job ---- #
479 >                parString = "\\{"
480 >                # counter for number of events in files currently worked on
481 >                filesEventCount = 0
482 >                # flag if next while loop should touch new file
483 >                newFile = 1
484 >                # job event counter
485 >                jobSkipEventCount = 0
486 >            
487 >                # ---- Iterate over the files in the block until we've met the requested ---- #
488 >                # ---- total # of events or we've gone over all the files in this block  ---- #
489 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
490 >                    file = files[fileCount]
491 >                    if newFile :
492 >                        try:
493 >                            numEventsInFile = self.eventsbyfile[file]
494 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
495 >                            # increase filesEventCount
496 >                            filesEventCount += numEventsInFile
497 >                            # Add file to current job
498 >                            parString += '\\\"' + file + '\\\"\,'
499 >                            newFile = 0
500 >                        except KeyError:
501 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
502 >                        
503 >
504 >                    # if less events in file remain than eventsPerJobRequested
505 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
506 >                        # if last file in block
507 >                        if ( fileCount == numFilesInBlock-1 ) :
508 >                            # end job using last file, use remaining events in block
509 >                            # close job and touch new file
510 >                            fullString = parString[:-2]
511 >                            fullString += '\\}'
512 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
513 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
514 >                            self.jobDestination.append(blockSites[block])
515 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
516 >                            # fill jobs of block dictionary
517 >                            jobsOfBlock[block].append(jobCount+1)
518 >                            # reset counter
519 >                            jobCount = jobCount + 1
520 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
521 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
522 >                            jobSkipEventCount = 0
523 >                            # reset file
524 >                            parString = "\\{"
525 >                            filesEventCount = 0
526 >                            newFile = 1
527 >                            fileCount += 1
528 >                        else :
529 >                            # go to next file
530 >                            newFile = 1
531 >                            fileCount += 1
532 >                    # if events in file equal to eventsPerJobRequested
533 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
534 >                        # close job and touch new file
535 >                        fullString = parString[:-2]
536 >                        fullString += '\\}'
537 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
538 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
539 >                        self.jobDestination.append(blockSites[block])
540 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
541 >                        jobsOfBlock[block].append(jobCount+1)
542 >                        # reset counter
543 >                        jobCount = jobCount + 1
544 >                        totalEventCount = totalEventCount + eventsPerJobRequested
545 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
546 >                        jobSkipEventCount = 0
547 >                        # reset file
548 >                        parString = "\\{"
549 >                        filesEventCount = 0
550 >                        newFile = 1
551 >                        fileCount += 1
552 >                        
553 >                    # if more events in file remain than eventsPerJobRequested
554 >                    else :
555 >                        # close job but don't touch new file
556 >                        fullString = parString[:-2]
557 >                        fullString += '\\}'
558 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
559 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
560 >                        self.jobDestination.append(blockSites[block])
561 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
562 >                        jobsOfBlock[block].append(jobCount+1)
563 >                        # increase counter
564 >                        jobCount = jobCount + 1
565 >                        totalEventCount = totalEventCount + eventsPerJobRequested
566 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
567 >                        # calculate skip events for last file
568 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
569 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
570 >                        # remove all but the last file
571 >                        filesEventCount = self.eventsbyfile[file]
572 >                        parString = "\\{"
573 >                        parString += '\\\"' + file + '\\\"\,'
574 >                    pass # END if
575 >                pass # END while (iterate over files in the block)
576 >        pass # END while (iterate over blocks in the dataset)
577 >        self.ncjobs = self.total_number_of_jobs = jobCount
578 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
579 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
580 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
581 >        
582 >        # screen output
583 >        screenOutput = "List of jobs and available destination sites:\n\n"
584 >
585 >        blockCounter = 0
586 >        for block in blocks:
587 >            if block in jobsOfBlock.keys() :
588 >                blockCounter += 1
589 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
590 >
591 >        common.logger.message(screenOutput)
592  
593 <            ## is there any remainder?
594 <            check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob)
593 >        self.list_of_args = list_of_lists
594 >        return
595 >
596 >    def jobSplittingNoInput(self):
597 >        """
598 >        Perform job splitting based on number of event per job
599 >        """
600 >        common.logger.debug(5,'Splitting per events')
601 >        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
602 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
603 >        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
604 >
605 >        if (self.total_number_of_events < 0):
606 >            msg='Cannot split jobs per Events with "-1" as total number of events'
607 >            raise CrabException(msg)
608 >
609 >        if (self.selectEventsPerJob):
610 >            if (self.selectTotalNumberEvents):
611 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
612 >            elif(self.selectNumberOfJobs) :  
613 >                self.total_number_of_jobs =self.theNumberOfJobs
614 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
615 >
616 >        elif (self.selectNumberOfJobs) :
617 >            self.total_number_of_jobs = self.theNumberOfJobs
618 >            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
619 >
620 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
621  
622 <            common.logger.debug(5,'Check  '+str(check))
622 >        # is there any remainder?
623 >        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
624  
625 <            if check > 0:
332 <                self.total_number_of_jobs =  self.total_number_of_jobs + 1
333 <                common.logger.message('Warning: last job will be created with '+str(check)+' files')
625 >        common.logger.debug(5,'Check  '+str(check))
626  
627 <            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)*self.filesPerJob*evPerFile + check*evPerFile)+' events')
627 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
628 >        if check > 0:
629 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
630 >
631 >        # argument is seed number.$i
632 >        self.list_of_args = []
633 >        for i in range(self.total_number_of_jobs):
634 >            ## Since there is no input, any site is good
635 >           # self.jobDestination.append(["Any"])
636 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
637 >            args=[]
638 >            if (self.firstRun):
639 >                    ## pythia first run
640 >                #self.list_of_args.append([(str(self.firstRun)+str(i))])
641 >                args.append(str(self.firstRun)+str(i))
642 >            else:
643 >                ## no first run
644 >                #self.list_of_args.append([str(i)])
645 >                args.append(str(i))
646 >            if (self.sourceSeed):
647 >                args.append(str(self.sourceSeed)+str(i))
648 >                if (self.sourceSeedVtx):
649 >                    ## + vtx random seed
650 >                    args.append(str(self.sourceSeedVtx)+str(i))
651 >                if (self.sourceSeedG4):
652 >                    ## + G4 random seed
653 >                    args.append(str(self.sourceSeedG4)+str(i))
654 >                if (self.sourceSeedMix):    
655 >                    ## + Mix random seed
656 >                    args.append(str(self.sourceSeedMix)+str(i))
657 >                pass
658              pass
659 +            self.list_of_args.append(args)
660 +        pass
661 +            
662 +        # print self.list_of_args
663  
664 <        list_of_lists = []
339 <        for i in xrange(0, int(n_tot_files), self.filesPerJob):
340 <            list_of_lists.append(self.files[0][i: i+self.filesPerJob])
664 >        return
665  
666 <        self.list_of_files = list_of_lists
667 <      
666 >
667 >    def jobSplittingForScript(self):#CarlosDaniele
668 >        """
669 >        Perform job splitting based on number of job
670 >        """
671 >        common.logger.debug(5,'Splitting per job')
672 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
673 >
674 >        self.total_number_of_jobs = self.theNumberOfJobs
675 >
676 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
677 >
678 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
679 >
680 >        # argument is seed number.$i
681 >        self.list_of_args = []
682 >        for i in range(self.total_number_of_jobs):
683 >            ## Since there is no input, any site is good
684 >           # self.jobDestination.append(["Any"])
685 >            self.jobDestination.append([""])
686 >            ## no random seed
687 >            self.list_of_args.append([str(i)])
688          return
689  
690      def split(self, jobParams):
# Line 348 | Line 692 | class Cmssw(JobType):
692          common.jobDB.load()
693          #### Fabio
694          njobs = self.total_number_of_jobs
695 <        filelist = self.list_of_files
695 >        arglist = self.list_of_args
696          # create the empty structure
697          for i in range(njobs):
698              jobParams.append("")
699          
700          for job in range(njobs):
701 <            jobParams[job] = filelist[job]
701 >            jobParams[job] = arglist[job]
702 >            # print str(arglist[job])
703 >            # print jobParams[job]
704              common.jobDB.setArguments(job, jobParams[job])
705 +            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
706 +            common.jobDB.setDestination(job, self.jobDestination[job])
707  
708          common.jobDB.save()
709          return
710      
711      def getJobTypeArguments(self, nj, sched):
712 <        params = common.jobDB.arguments(nj)
713 <        #print params
714 <        parString = "\\{"
715 <        
368 <        for i in range(len(params) - 1):
369 <            parString += '\\\"' + params[i] + '\\\"\,'
370 <        
371 <        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
372 <        return parString
712 >        result = ''
713 >        for i in common.jobDB.arguments(nj):
714 >            result=result+str(i)+" "
715 >        return result
716    
717      def numberOfJobs(self):
718          # Fabio
376
719          return self.total_number_of_jobs
378
379
380
381    def checkBlackList(self, allSites):
382        if len(self.reCEBlackList)==0: return allSites
383        sites = []
384        for site in allSites:
385            common.logger.debug(10,'Site '+site)
386            good=1
387            for re in self.reCEBlackList:
388                if re.search(site):
389                    common.logger.message('CE in black list, skipping site '+site)
390                    good=0
391                pass
392            if good: sites.append(site)
393        if len(sites) == 0:
394            common.logger.debug(3,"No sites found after BlackList")
395        return sites
396
397    def checkWhiteList(self, allSites):
398
399        if len(self.reCEWhiteList)==0: return allSites
400        sites = []
401        for site in allSites:
402            good=0
403            for re in self.reCEWhiteList:
404                if re.search(site):
405                    common.logger.debug(5,'CE in white list, adding site '+site)
406                    good=1
407                if not good: continue
408                sites.append(site)
409        if len(sites) == 0:
410            common.logger.message("No sites found after WhiteList\n")
411        else:
412            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
413        return sites
720  
721      def getTarBall(self, exe):
722          """
# Line 418 | Line 724 | class Cmssw(JobType):
724          """
725          
726          # if it exist, just return it
727 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
727 >        #
728 >        # Marco. Let's start to use relative path for Boss XML files
729 >        #
730 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
731          if os.path.exists(self.tgzNameWithPath):
732              return self.tgzNameWithPath
733  
# Line 432 | Line 741 | class Cmssw(JobType):
741          # First of all declare the user Scram area
742          swArea = self.scram.getSWArea_()
743          #print "swArea = ", swArea
744 <        swVersion = self.scram.getSWVersion()
745 <        #print "swVersion = ", swVersion
744 >        # swVersion = self.scram.getSWVersion()
745 >        # print "swVersion = ", swVersion
746          swReleaseTop = self.scram.getReleaseTop_()
747          #print "swReleaseTop = ", swReleaseTop
748          
# Line 441 | Line 750 | class Cmssw(JobType):
750          if swReleaseTop == '' or swArea == swReleaseTop:
751              return
752  
753 <        filesToBeTarred = []
754 <        ## First find the executable
755 <        if (self.executable != ''):
756 <            exeWithPath = self.scram.findFile_(executable)
757 < #           print exeWithPath
758 <            if ( not exeWithPath ):
759 <                raise CrabException('User executable '+executable+' not found')
760 <
761 <            ## then check if it's private or not
762 <            if exeWithPath.find(swReleaseTop) == -1:
763 <                # the exe is private, so we must ship
764 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
765 <                path = swArea+'/'
766 <                exe = string.replace(exeWithPath, path,'')
767 <                filesToBeTarred.append(exe)
768 <                pass
769 <            else:
770 <                # the exe is from release, we'll find it on WN
771 <                pass
772 <
773 <        ## Now get the libraries: only those in local working area
774 <        libDir = 'lib'
775 <        lib = swArea+'/' +libDir
776 <        common.logger.debug(5,"lib "+lib+" to be tarred")
777 <        if os.path.exists(lib):
778 <            filesToBeTarred.append(libDir)
779 <
780 <        ## Now check if module dir is present
781 <        moduleDir = 'module'
782 <        if os.path.isdir(swArea+'/'+moduleDir):
783 <            filesToBeTarred.append(moduleDir)
784 <
785 <        ## Now check if the Data dir is present
786 <        dataDir = 'src/Data/'
787 <        if os.path.isdir(swArea+'/'+dataDir):
788 <            filesToBeTarred.append(dataDir)
789 <
790 <        ## Create the tar-ball
791 <        if len(filesToBeTarred)>0:
792 <            cwd = os.getcwd()
793 <            os.chdir(swArea)
794 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
795 <            for line in filesToBeTarred:
796 <                tarcmd = tarcmd + line + ' '
797 <            cout = runCommand(tarcmd)
798 <            if not cout:
799 <                raise CrabException('Could not create tar-ball')
800 <            os.chdir(cwd)
801 <        else:
802 <            common.logger.debug(5,"No files to be to be tarred")
753 >        import tarfile
754 >        try: # create tar ball
755 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
756 >            ## First find the executable
757 >            if (self.executable != ''):
758 >                exeWithPath = self.scram.findFile_(executable)
759 >                if ( not exeWithPath ):
760 >                    raise CrabException('User executable '+executable+' not found')
761 >    
762 >                ## then check if it's private or not
763 >                if exeWithPath.find(swReleaseTop) == -1:
764 >                    # the exe is private, so we must ship
765 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
766 >                    path = swArea+'/'
767 >                    # distinguish case when script is in user project area or given by full path somewhere else
768 >                    if exeWithPath.find(path) >= 0 :
769 >                        exe = string.replace(exeWithPath, path,'')
770 >                        tar.add(path+exe,os.path.basename(executable))
771 >                    else :
772 >                        tar.add(exeWithPath,os.path.basename(executable))
773 >                    pass
774 >                else:
775 >                    # the exe is from release, we'll find it on WN
776 >                    pass
777 >    
778 >            ## Now get the libraries: only those in local working area
779 >            libDir = 'lib'
780 >            lib = swArea+'/' +libDir
781 >            common.logger.debug(5,"lib "+lib+" to be tarred")
782 >            if os.path.exists(lib):
783 >                tar.add(lib,libDir)
784 >    
785 >            ## Now check if module dir is present
786 >            moduleDir = 'module'
787 >            module = swArea + '/' + moduleDir
788 >            if os.path.isdir(module):
789 >                tar.add(module,moduleDir)
790 >
791 >            ## Now check if any data dir(s) is present
792 >            swAreaLen=len(swArea)
793 >            for root, dirs, files in os.walk(swArea):
794 >                if "data" in dirs:
795 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
796 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
797 >
798 >            ## Add ProdAgent dir to tar
799 >            paDir = 'ProdAgentApi'
800 >            pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
801 >            if os.path.isdir(pa):
802 >                tar.add(pa,paDir)
803 >
804 >            ### FEDE FOR DBS PUBLICATION
805 >            ## Add PRODCOMMON dir to tar
806 >            prodcommonDir = 'ProdCommon'
807 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
808 >            if os.path.isdir(prodcommonPath):
809 >                tar.add(prodcommonPath,prodcommonDir)
810 >            #############################    
811 >        
812 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
813 >            tar.close()
814 >        except :
815 >            raise CrabException('Could not create tar-ball')
816 >
817 >        ## check for tarball size
818 >        tarballinfo = os.stat(self.tgzNameWithPath)
819 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
820 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
821 >
822 >        ## create tar-ball with ML stuff
823 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
824 >        try:
825 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
826 >            path=os.environ['CRABDIR'] + '/python/'
827 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
828 >                tar.add(path+file,file)
829 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
830 >            tar.close()
831 >        except :
832 >            raise CrabException('Could not create ML files tar-ball')
833          
834          return
835          
836 +    def additionalInputFileTgz(self):
837 +        """
838 +        Put all additional files into a tar ball and return its name
839 +        """
840 +        import tarfile
841 +        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
842 +        tar = tarfile.open(tarName, "w:gz")
843 +        for file in self.additional_inbox_files:
844 +            tar.add(file,string.split(file,'/')[-1])
845 +        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
846 +        tar.close()
847 +        return tarName
848 +
849      def wsSetupEnvironment(self, nj):
850          """
851          Returns part of a job script which prepares
# Line 505 | Line 857 | class Cmssw(JobType):
857          ## OLI_Daniele at this level  middleware already known
858  
859          txt += 'if [ $middleware == LCG ]; then \n'
860 +        txt += '    echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
861 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
862 +        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
863          txt += self.wsSetupCMSLCGEnvironment_()
864          txt += 'elif [ $middleware == OSG ]; then\n'
865 <        txt += '    time=`date -u +"%s"`\n'
866 <        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
512 <        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
513 <        txt += '    /bin/mkdir -p $WORKING_DIR\n'
865 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
866 >        txt += '    echo "Created working directory: $WORKING_DIR"\n'
867          txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
868          txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
869 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
870 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
871 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
869 >        txt += '    echo "JOB_EXIT_STATUS = 10016"\n'
870 >        txt += '    echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
871 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
872 >        txt += '        rm -f $RUNTIME_AREA/$repo \n'
873 >        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
874 >        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
875          txt += '        exit 1\n'
876          txt += '    fi\n'
877          txt += '\n'
878          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
879          txt += '    cd $WORKING_DIR\n'
880          txt += self.wsSetupCMSOSGEnvironment_()
881 +        txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
882 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
883          txt += 'fi\n'
884  
885          # Prepare JobType-specific part
# Line 535 | Line 893 | class Cmssw(JobType):
893          txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
894          txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
895          txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
896 +        txt += '   rm -f $RUNTIME_AREA/$repo \n'
897 +        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
898 +        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
899          ## OLI_Daniele
900          txt += '    if [ $middleware == OSG ]; then \n'
901          txt += '        echo "Remove working directory: $WORKING_DIR"\n'
902          txt += '        cd $RUNTIME_AREA\n'
903          txt += '        /bin/rm -rf $WORKING_DIR\n'
904          txt += '        if [ -d $WORKING_DIR ] ;then\n'
905 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
906 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
907 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
908 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
905 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
906 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
907 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
908 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
909 >        txt += '            rm -f $RUNTIME_AREA/$repo \n'
910 >        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
911 >        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
912          txt += '        fi\n'
913          txt += '    fi \n'
914          txt += '   exit 1 \n'
915          txt += 'fi \n'
916          txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
917          txt += 'cd '+self.version+'\n'
918 +        ########## FEDE FOR DBS2 ######################
919 +        txt += 'SOFTWARE_DIR=`pwd`\n'
920 +        txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
921 +        ###############################################
922          ### needed grep for bug in scramv1 ###
923 +        txt += scram+' runtime -sh\n'
924          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
925 +        txt += 'echo $PATH\n'
926  
927          # Handle the arguments:
928          txt += "\n"
929          txt += "## number of arguments (first argument always jobnumber)\n"
930          txt += "\n"
931 <        txt += "narg=$#\n"
932 <        txt += "if [ $narg -lt 2 ]\n"
931 > #        txt += "narg=$#\n"
932 >        txt += "if [ $nargs -lt 2 ]\n"
933          txt += "then\n"
934 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
934 >        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
935          txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
936          txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
937          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
938 +        txt += '    rm -f $RUNTIME_AREA/$repo \n'
939 +        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
940 +        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
941          ## OLI_Daniele
942          txt += '    if [ $middleware == OSG ]; then \n'
943          txt += '        echo "Remove working directory: $WORKING_DIR"\n'
944          txt += '        cd $RUNTIME_AREA\n'
945          txt += '        /bin/rm -rf $WORKING_DIR\n'
946          txt += '        if [ -d $WORKING_DIR ] ;then\n'
947 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
948 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
949 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
950 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
947 >        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
948 >        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
949 >        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
950 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
951 >        txt += '            rm -f $RUNTIME_AREA/$repo \n'
952 >        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
953 >        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
954          txt += '        fi\n'
955          txt += '    fi \n'
956          txt += "    exit 1\n"
# Line 583 | Line 959 | class Cmssw(JobType):
959  
960          # Prepare job-specific part
961          job = common.job_list[nj]
962 <        pset = os.path.basename(job.configFilename())
963 <        txt += '\n'
964 <        txt += 'InputFiles=$2\n'
965 <        txt += 'echo "<$InputFiles>"\n'
966 <        #txt += 'echo sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' \n'
967 <        txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
968 <        #txt += 'sed "s#{\'INPUT\'}#${InputFiles}#" $RUNTIME_AREA/'+pset+' > pset1.cfg\n'
962 >        ### FEDE FOR DBS OUTPUT PUBLICATION
963 >        if (self.datasetPath):
964 >            txt += '\n'
965 >            txt += 'DatasetPath='+self.datasetPath+'\n'
966 >
967 >            datasetpath_split = self.datasetPath.split("/")
968 >            
969 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
970 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
971 >            #txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
972 >            txt += 'ApplicationFamily=cmsRun\n'
973 >
974 >        else:
975 >            txt += 'DatasetPath=MCDataTier\n'
976 >            txt += 'PrimaryDataset=null\n'
977 >            txt += 'DataTier=null\n'
978 >            #txt += 'ProcessedDataset=null\n'
979 >            txt += 'ApplicationFamily=MCDataTier\n'
980 >        if self.pset != None: #CarlosDaniele
981 >            pset = os.path.basename(job.configFilename())
982 >            txt += '\n'
983 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
984 >            if (self.datasetPath): # standard job
985 >                #txt += 'InputFiles=$2\n'
986 >                txt += 'InputFiles=${args[1]}\n'
987 >                txt += 'MaxEvents=${args[2]}\n'
988 >                txt += 'SkipEvents=${args[3]}\n'
989 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
990 >                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
991 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
992 >                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
993 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
994 >                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
995 >            else:  # pythia like job
996 >                seedIndex=1
997 >                if (self.firstRun):
998 >                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
999 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
1000 >                    txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1001 >                    seedIndex=seedIndex+1
1002 >
1003 >                if (self.sourceSeed):
1004 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
1005 >                    txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1006 >                    seedIndex=seedIndex+1
1007 >                    ## the following seeds are not always present
1008 >                    if (self.sourceSeedVtx):
1009 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1010 >                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1011 >                        txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1012 >                        seedIndex += 1
1013 >                    if (self.sourceSeedG4):
1014 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1015 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
1016 >                        txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1017 >                        seedIndex += 1
1018 >                    if (self.sourceSeedMix):
1019 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1020 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
1021 >                        txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1022 >                        seedIndex += 1
1023 >                    pass
1024 >                pass
1025 >            txt += 'mv -f '+pset+' pset.cfg\n'
1026  
1027          if len(self.additional_inbox_files) > 0:
1028 <            for file in self.additional_inbox_files:
1029 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
1030 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
598 <                txt += '   chmod +x '+file+'\n'
599 <                txt += 'fi\n'
1028 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1029 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1030 >            txt += 'fi\n'
1031              pass
1032  
1033 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1034 <
1035 <        txt += '\n'
1036 <        txt += 'echo "***** cat pset.cfg *********"\n'
1037 <        txt += 'cat pset.cfg\n'
1038 <        txt += 'echo "****** end pset.cfg ********"\n'
1039 <        txt += '\n'
1040 <        # txt += 'echo "***** cat pset1.cfg *********"\n'
1041 <        # txt += 'cat pset1.cfg\n'
1042 <        # txt += 'echo "****** end pset1.cfg ********"\n'
1033 >        if self.pset != None: #CarlosDaniele
1034 >            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1035 >        
1036 >            txt += '\n'
1037 >            txt += 'echo "***** cat pset.cfg *********"\n'
1038 >            txt += 'cat pset.cfg\n'
1039 >            txt += 'echo "****** end pset.cfg ********"\n'
1040 >            txt += '\n'
1041 >            ### FEDE FOR DBS OUTPUT PUBLICATION
1042 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1043 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
1044 >            ##############
1045 >            txt += '\n'
1046 >            # txt += 'echo "***** cat pset1.cfg *********"\n'
1047 >            # txt += 'cat pset1.cfg\n'
1048 >            # txt += 'echo "****** end pset1.cfg ********"\n'
1049          return txt
1050  
1051 <    def wsBuildExe(self, nj):
1051 >    def wsBuildExe(self, nj=0):
1052          """
1053          Put in the script the commands to build an executable
1054          or a library.
# Line 632 | Line 1069 | class Cmssw(JobType):
1069              txt += '       cd $RUNTIME_AREA\n'
1070              txt += '       /bin/rm -rf $WORKING_DIR\n'
1071              txt += '       if [ -d $WORKING_DIR ] ;then\n'
1072 <            txt += '        echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1073 <            txt += '        echo "JOB_EXIT_STATUS = 50999"\n'
1074 <            txt += '        echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1075 <            txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1072 >            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
1073 >            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1074 >            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1075 >            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1076 >            txt += '           rm -f $RUNTIME_AREA/$repo \n'
1077 >            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1078 >            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1079              txt += '       fi\n'
1080              txt += '   fi \n'
1081              txt += '   \n'
# Line 643 | Line 1083 | class Cmssw(JobType):
1083              txt += 'else \n'
1084              txt += '   echo "Successful untar" \n'
1085              txt += 'fi \n'
1086 +            txt += '\n'
1087 +            txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1088 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1089 +            #### FEDE FOR DBS OUTPUT PUBLICATION
1090 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1091 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1092 +            #txt += '   export PYTHONPATH=ProdAgentApi\n'
1093 +            txt += 'else\n'
1094 +            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1095 +            #txt += '   export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1096 +            #txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1097 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1098 +            ###################  
1099 +            txt += 'fi\n'
1100 +            txt += '\n'
1101 +
1102              pass
1103          
1104          return txt
# Line 654 | Line 1110 | class Cmssw(JobType):
1110          """
1111          
1112      def executableName(self):
1113 <        return self.executable
1113 >        if self.scriptExe: #CarlosDaniele
1114 >            return "sh "
1115 >        else:
1116 >            return self.executable
1117  
1118      def executableArgs(self):
1119 <        return " -p pset.cfg"
1119 >        if self.scriptExe:#CarlosDaniele
1120 >            return   self.scriptExe + " $NJob"
1121 >        else:
1122 >            # if >= CMSSW_1_5_X, add -e
1123 >            version_array = self.scram.getSWVersion().split('_')
1124 >            major = 0
1125 >            minor = 0
1126 >            try:
1127 >                major = int(version_array[1])
1128 >                minor = int(version_array[2])
1129 >            except:
1130 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"  
1131 >                raise CrabException(msg)
1132 >            if major >= 1 and minor >= 5 :
1133 >                return " -e -p pset.cfg"
1134 >            else:
1135 >                return " -p pset.cfg"
1136  
1137      def inputSandbox(self, nj):
1138          """
1139          Returns a list of filenames to be put in JDL input sandbox.
1140          """
1141          inp_box = []
1142 <        # dict added to delete duplicate from input sandbox file list
1143 <        seen = {}
1142 >        # # dict added to delete duplicate from input sandbox file list
1143 >        # seen = {}
1144          ## code
1145          if os.path.isfile(self.tgzNameWithPath):
1146              inp_box.append(self.tgzNameWithPath)
1147 +        if os.path.isfile(self.MLtgzfile):
1148 +            inp_box.append(self.MLtgzfile)
1149          ## config
1150 <        inp_box.append(common.job_list[nj].configFilename())
1150 >        if not self.pset is None:
1151 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1152          ## additional input files
1153 <        #for file in self.additional_inbox_files:
1154 <        #    inp_box.append(common.work_space.cwdDir()+file)
1153 >        tgz = self.additionalInputFileTgz()
1154 >        inp_box.append(tgz)
1155          return inp_box
1156  
1157      def outputSandbox(self, nj):
# Line 682 | Line 1160 | class Cmssw(JobType):
1160          """
1161          out_box = []
1162  
685        stdout=common.job_list[nj].stdout()
686        stderr=common.job_list[nj].stderr()
687
1163          ## User Declared output files
1164 <        for out in self.output_file:
1164 >        for out in (self.output_file+self.output_file_sandbox):
1165              n_out = nj + 1
1166              out_box.append(self.numberFile_(out,str(n_out)))
1167          return out_box
693        return []
1168  
1169      def prepareSteeringCards(self):
1170          """
# Line 706 | Line 1180 | class Cmssw(JobType):
1180          txt = '\n'
1181          txt += '# directory content\n'
1182          txt += 'ls \n'
1183 <        file_list = ''
1184 <        for fileWithSuffix in self.output_file:
1183 >
1184 >        for fileWithSuffix in (self.output_file+self.output_file_sandbox):
1185              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
712            file_list=file_list+output_file_num+' '
1186              txt += '\n'
1187              txt += '# check output file\n'
1188 <            txt += 'ls '+fileWithSuffix+'\n'
1189 <            txt += 'exe_result=$?\n'
1190 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
1191 <            txt += '   echo "ERROR: No output file to manage"\n'
1192 <            txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
1193 <            txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
1194 <            txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
1195 <            ### OLI_DANIELE
1188 >            # txt += 'ls '+fileWithSuffix+'\n'
1189 >            # txt += 'ls_result=$?\n'
1190 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1191 >            ###### FEDE FOR OUTPUT DATA PUBLICATION ########
1192 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1193 >            txt += '    cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1194 >            ################################################
1195 >            txt += 'else\n'
1196 >            txt += '    exit_status=60302\n'
1197 >            txt += '    echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1198 >            ############# FEDE ADDED CHECK FOR OUTPUT #############
1199 >            if fileWithSuffix in self.output_file:
1200 >                txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1201 >                txt += '    exit $exit_status\n'
1202 >            #######################################################    
1203              if common.scheduler.boss_scheduler_name == 'condor_g':
1204                  txt += '    if [ $middleware == OSG ]; then \n'
1205                  txt += '        echo "prepare dummy output file"\n'
1206                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1207                  txt += '    fi \n'
728            txt += 'else\n'
729            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1208              txt += 'fi\n'
1209 <      
1209 >        file_list = []
1210 >        for fileWithSuffix in (self.output_file):
1211 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1212 >            
1213 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1214          txt += 'cd $RUNTIME_AREA\n'
733        file_list=file_list[:-1]
734        txt += 'file_list="'+file_list+'"\n'
735        ### OLI_DANIELE
736        txt += 'if [ $middleware == OSG ]; then\n'  
737        txt += '    cd $RUNTIME_AREA\n'
738        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
739        txt += '    /bin/rm -rf $WORKING_DIR\n'
740        txt += '    if [ -d $WORKING_DIR ] ;then\n'
741        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
742        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
743        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
744        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
745        txt += '    fi\n'
746        txt += 'fi\n'
747        txt += '\n'
1215          return txt
1216  
1217      def numberFile_(self, file, txt):
# Line 755 | Line 1222 | class Cmssw(JobType):
1222          # take away last extension
1223          name = p[0]
1224          for x in p[1:-1]:
1225 <           name=name+"."+x
1225 >            name=name+"."+x
1226          # add "_txt"
1227          if len(p)>1:
1228 <          ext = p[len(p)-1]
1229 <          #result = name + '_' + str(txt) + "." + ext
763 <          result = name + '_' + txt + "." + ext
1228 >            ext = p[len(p)-1]
1229 >            result = name + '_' + txt + "." + ext
1230          else:
1231 <          #result = name + '_' + str(txt)
766 <          result = name + '_' + txt
1231 >            result = name + '_' + txt
1232          
1233          return result
1234  
1235 <    def getRequirements(self):
1235 >    def getRequirements(self, nj=[]):
1236          """
1237          return job requirements to add to jdl files
1238          """
1239          req = ''
1240 <        if common.analisys_common_info['sites']:
1241 <            if common.analisys_common_info['sw_version']:
1242 <                req='Member("VO-cms-' + \
1243 <                     common.analisys_common_info['sw_version'] + \
1244 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1245 <            if len(common.analisys_common_info['sites'])>0:
1246 <                req = req + ' && ('
1247 <                for i in range(len(common.analisys_common_info['sites'])):
1248 <                    req = req + 'other.GlueCEInfoHostName == "' \
1249 <                         + common.analisys_common_info['sites'][i] + '"'
1250 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1251 <                        req = req + ' || '
1252 <            req = req + ')'
788 <        #print "req = ", req
1240 >        if self.version:
1241 >            req='Member("VO-cms-' + \
1242 >                 self.version + \
1243 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1244 >        ## SL add requirement for OS version only if SL4
1245 >        #reSL4 = re.compile( r'slc4' )
1246 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1247 >            req+=' && Member("VO-cms-' + \
1248 >                 self.executable_arch + \
1249 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1250 >
1251 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1252 >
1253          return req
1254  
1255      def configFilename(self):
# Line 802 | Line 1266 | class Cmssw(JobType):
1266          txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1267          txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1268          txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1269 +        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1270          txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1271 <        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1272 <        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1273 <        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1271 >        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1272 >        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1273 >        txt += '       export SCRAM_ARCH='+self.executable_arch+'\n'
1274 >        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1275          txt += '   else\n'
1276 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1276 >        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1277          txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1278          txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1279          txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1280 +        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1281 +        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1282 +        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1283          txt += '       exit 1\n'
1284          txt += '\n'
1285          txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1286          txt += '       cd $RUNTIME_AREA\n'
1287          txt += '       /bin/rm -rf $WORKING_DIR\n'
1288          txt += '       if [ -d $WORKING_DIR ] ;then\n'
1289 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1290 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1291 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1292 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1289 >        txt += '           echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1290 >        txt += '           echo "JOB_EXIT_STATUS = 10017"\n'
1291 >        txt += '           echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1292 >        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1293 >        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1294 >        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1295 >        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1296          txt += '       fi\n'
1297          txt += '\n'
1298          txt += '       exit 1\n'
# Line 844 | Line 1316 | class Cmssw(JobType):
1316          txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1317          txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1318          txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1319 +        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1320 +        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1321 +        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1322          txt += '       exit 1\n'
1323          txt += '   else\n'
1324          txt += '       echo "Sourcing environment... "\n'
# Line 852 | Line 1327 | class Cmssw(JobType):
1327          txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1328          txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1329          txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1330 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1331 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1332 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1333          txt += '           exit 1\n'
1334          txt += '       fi\n'
1335          txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
# Line 862 | Line 1340 | class Cmssw(JobType):
1340          txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1341          txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1342          txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1343 +        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1344 +        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1345 +        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1346          txt += '           exit 1\n'
1347          txt += '       fi\n'
1348          txt += '   fi\n'
1349          txt += '   \n'
869        txt += '   string=`cat /etc/redhat-release`\n'
870        txt += '   echo $string\n'
871        txt += '   if [[ $string = *alhalla* ]]; then\n'
872        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
873        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
874        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
875        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
876        txt += '   else\n'
877        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
878        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
879        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
880        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
881        txt += '       exit 1\n'
882        txt += '   fi\n'
1350          txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1351          txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1352          return txt
1353  
1354 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1355 +    def modifyReport(self, nj):
1356 +        """
1357 +        insert the part of the script that modifies the FrameworkJob Report
1358 +        """
1359 +
1360 +        txt = ''
1361 +        txt += 'echo "Modify Job Report" \n'
1362 +        #txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1363 +        ################ FEDE FOR DBS2 #############################################
1364 +        txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1365 +        #############################################################################
1366 +        try:
1367 +            publish_data = int(self.cfg_params['USER.publish_data'])          
1368 +        except KeyError:
1369 +            publish_data = 0
1370 +
1371 +        txt += 'if [ -z "$SE" ]; then\n'
1372 +        txt += '    SE="" \n'
1373 +        txt += 'fi \n'
1374 +        txt += 'if [ -z "$SE_PATH" ]; then\n'
1375 +        txt += '    SE_PATH="" \n'
1376 +        txt += 'fi \n'
1377 +        txt += 'echo "SE = $SE"\n'
1378 +        txt += 'echo "SE_PATH = $SE_PATH"\n'
1379 +
1380 +        if (publish_data == 1):  
1381 +            #processedDataset = self.cfg_params['USER.processed_datasetname']
1382 +            processedDataset = self.cfg_params['USER.publish_data_name']
1383 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1384 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1385 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1386 +            #### FEDE: added slash in LFN ##############
1387 +            txt += '    FOR_LFN=/copy_problems/ \n'
1388 +            txt += 'else \n'
1389 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1390 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1391 +            txt += '    FOR_LFN=/store$tmp \n'
1392 +            txt += 'fi \n'
1393 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1394 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1395 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1396 +            #txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1397 +            txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1398 +            txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1399 +            #txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1400 +      
1401 +            txt += 'modifyReport_result=$?\n'
1402 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1403 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1404 +            txt += '    exit_status=1\n'
1405 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1406 +            txt += 'else\n'
1407 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1408 +            txt += 'fi\n'
1409 +        else:
1410 +            txt += 'ProcessedDataset=no_data_to_publish \n'
1411 +            #### FEDE: added slash in LFN ##############
1412 +            txt += 'FOR_LFN=/local/ \n'
1413 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1414 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1415 +        return txt
1416 +
1417 +    def cleanEnv(self):
1418 +        ### OLI_DANIELE
1419 +        txt = ''
1420 +        txt += 'if [ $middleware == OSG ]; then\n'  
1421 +        txt += '    cd $RUNTIME_AREA\n'
1422 +        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1423 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1424 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1425 +        txt += '              echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1426 +        txt += '              echo "JOB_EXIT_STATUS = 60999"\n'
1427 +        txt += '              echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1428 +        txt += '              dumpStatus $RUNTIME_AREA/$repo\n'
1429 +        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1430 +        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1431 +        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1432 +        txt += '    fi\n'
1433 +        txt += 'fi\n'
1434 +        txt += '\n'
1435 +        return txt
1436 +
1437      def setParam_(self, param, value):
1438          self._params[param] = value
1439  
1440      def getParams(self):
1441          return self._params
1442 +
1443 +    def setTaskid_(self):
1444 +        self._taskId = self.cfg_params['taskId']
1445 +        
1446 +    def getTaskid(self):
1447 +        return self._taskId
1448 +
1449 +    def uniquelist(self, old):
1450 +        """
1451 +        remove duplicates from a list
1452 +        """
1453 +        nd={}
1454 +        for e in old:
1455 +            nd[e]=0
1456 +        return nd.keys()

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines