ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.2 by slacapra, Wed Apr 26 15:31:06 2006 UTC vs.
Revision 1.169 by spiga, Tue Apr 1 14:53:36 2008 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
6
7 import DataDiscovery
8 import DataLocation
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
16 <        self.analisys_common_info = {}
16 >        self.argsList = []
17 >
18 >        self._params = {}
19 >        self.cfg_params = cfg_params
20 >        # init BlackWhiteListParser
21 >        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22 >
23 >        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
24 >
25 >        # number of jobs requested to be created, limit obj splitting
26 >        self.ncjobs = ncjobs
27  
28          log = common.logger
29 <        
29 >
30          self.scram = Scram.Scram(cfg_params)
23        scramArea = ''
31          self.additional_inbox_files = []
32          self.scriptExe = ''
33          self.executable = ''
34 +        self.executable_arch = self.scram.getArch()
35          self.tgz_name = 'default.tgz'
36 +        self.additional_tgz_name = 'additional.tgz'
37 +        self.scriptName = 'CMSSW.sh'
38 +        self.pset = ''      #scrip use case Da
39 +        self.datasetPath = '' #scrip use case Da
40 +
41 +        # set FJR file name
42 +        self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45 <        common.analisys_common_info['sw_version'] = self.version
45 >
46 >        #
47 >        # Try to block creation in case of arch/version mismatch
48 >        #
49 >
50 > #        a = string.split(self.version, "_")
51 > #
52 > #        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 > #            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54 > #            common.logger.message(msg)
55 > #        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56 > #            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
57 > #            raise CrabException(msg)
58 > #
59 >
60 >        self.setParam_('application', self.version)
61  
62          ### collect Data cards
63 <        try:
64 <            self.owner = cfg_params['CMSSW.owner']
65 <            log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
36 <            self.dataset = cfg_params['CMSSW.dataset']
37 <            log.debug(6, "CMSSW::CMSSW(): dataset = "+self.dataset)
38 <        except KeyError:
39 <            msg = "Error: owner and/or dataset not defined "
63 >
64 >        if not cfg_params.has_key('CMSSW.datasetpath'):
65 >            msg = "Error: datasetpath not defined "
66              raise CrabException(msg)
67 +        tmp =  cfg_params['CMSSW.datasetpath']
68 +        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69 +        if string.lower(tmp)=='none':
70 +            self.datasetPath = None
71 +            self.selectNoInput = 1
72 +        else:
73 +            self.datasetPath = tmp
74 +            self.selectNoInput = 0
75 +
76 +        # ML monitoring
77 +        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
78 +        if not self.datasetPath:
79 +            self.setParam_('dataset', 'None')
80 +            self.setParam_('owner', 'None')
81 +        else:
82 +            ## SL what is supposed to fail here?
83 +            try:
84 +                datasetpath_split = self.datasetPath.split("/")
85 +                # standard style
86 +                self.setParam_('datasetFull', self.datasetPath)
87 +                self.setParam_('dataset', datasetpath_split[1])
88 +                self.setParam_('owner', datasetpath_split[2])
89 +            except:
90 +                self.setParam_('dataset', self.datasetPath)
91 +                self.setParam_('owner', self.datasetPath)
92 +
93 +        self.setParam_('taskId', common._db.queryTask('name')) ## new BL--DS
94  
95          self.dataTiers = []
43        try:
44            tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
45            for tmp in tmpDataTiers:
46                tmp=string.strip(tmp)
47                self.dataTiers.append(tmp)
48                pass
49            pass
50        except KeyError:
51            pass
52        log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
96  
97          ## now the application
98 <        try:
99 <            self.executable = cfg_params['CMSSW.executable']
100 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
58 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
59 <            log.debug(3,msg)
60 <        except KeyError:
61 <            self.executable = 'cmsRun'
62 <            msg = "User executable not defined. Use cmsRun"
63 <            log.debug(3,msg)
64 <            pass
98 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
99 >        self.setParam_('exe', self.executable)
100 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
101  
102 <        try:
103 <            self.pset = cfg_params['CMSSW.pset']
104 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
102 >        if not cfg_params.has_key('CMSSW.pset'):
103 >            raise CrabException("PSet file missing. Cannot run cmsRun ")
104 >        self.pset = cfg_params['CMSSW.pset']
105 >        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
106 >        if self.pset.lower() != 'none' :
107              if (not os.path.exists(self.pset)):
108                  raise CrabException("User defined PSet file "+self.pset+" does not exist")
109 <        except KeyError:
110 <            raise CrabException("PSet file missing. Cannot run cmsRun ")
109 >        else:
110 >            self.pset = None
111  
112          # output files
113 <        try:
114 <            self.output_file = []
113 >        ## stuff which must be returned always via sandbox
114 >        self.output_file_sandbox = []
115  
116 <            tmp = cfg_params['CMSSW.output_file']
117 <            if tmp != '':
80 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
81 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
82 <                for tmp in tmpOutFiles:
83 <                    tmp=string.strip(tmp)
84 <                    self.output_file.append(tmp)
85 <                    pass
116 >        # add fjr report by default via sandbox
117 >        self.output_file_sandbox.append(self.fjrFileName)
118  
119 <            else:
120 <                log.message("No output file defined: only stdout/err will be available")
119 >        # other output files to be returned via sandbox or copied to SE
120 >        self.output_file = []
121 >        tmp = cfg_params.get('CMSSW.output_file',None)
122 >        if tmp :
123 >            tmpOutFiles = string.split(tmp,',')
124 >            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
125 >            for tmp in tmpOutFiles:
126 >                tmp=string.strip(tmp)
127 >                self.output_file.append(tmp)
128                  pass
129 <            pass
130 <        except KeyError:
131 <            log.message("No output file defined: only stdout/err will be available")
93 <            pass
129 >        else:
130 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 >        pass
132  
133          # script_exe file as additional file in inputSandbox
134 <        try:
135 <           self.scriptExe = cfg_params['CMSSW.script_exe']
136 <           self.additional_inbox_files.append(self.scriptExe)
137 <        except KeyError:
138 <           pass
139 <        if self.scriptExe != '':
140 <           if os.path.isfile(self.scriptExe):
141 <              pass
142 <           else:
143 <              log.message("WARNING. file "+self.scriptExe+" not found")
144 <              sys.exit()
145 <                  
134 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
135 >        if self.scriptExe :
136 >           if not os.path.isfile(self.scriptExe):
137 >              msg ="ERROR. file "+self.scriptExe+" not found"
138 >              raise CrabException(msg)
139 >           self.additional_inbox_files.append(string.strip(self.scriptExe))
140 >
141 >        #CarlosDaniele
142 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
143 >           msg ="Error. script_exe  not defined"
144 >           raise CrabException(msg)
145 >
146          ## additional input files
147 <        try:
148 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
147 >        if cfg_params.has_key('USER.additional_input_files'):
148 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
149              for tmp in tmpAddFiles:
150 <                tmp=string.strip(tmp)
151 <                self.additional_inbox_files.append(tmp)
150 >                tmp = string.strip(tmp)
151 >                dirname = ''
152 >                if not tmp[0]=="/": dirname = "."
153 >                files = []
154 >                if string.find(tmp,"*")>-1:
155 >                    files = glob.glob(os.path.join(dirname, tmp))
156 >                    if len(files)==0:
157 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
158 >                else:
159 >                    files.append(tmp)
160 >                for file in files:
161 >                    if not os.path.exists(file):
162 >                        raise CrabException("Additional input file not found: "+file)
163 >                    pass
164 >                    # fname = string.split(file, '/')[-1]
165 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
166 >                    # shutil.copyfile(file, storedFile)
167 >                    self.additional_inbox_files.append(string.strip(file))
168                  pass
169              pass
170 <        except KeyError:
171 <            pass
170 >            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
171 >        pass
172  
173 <        try:
174 <            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
175 <        except KeyError:
176 <            msg = 'Must define total_number_of_events and job_number_of_events'
177 <            raise CrabException(msg)
178 <            
179 < #Marco: FirstEvent is nolonger used inside PSet
126 < #        try:
127 < #            self.first = int(cfg_params['CMSSW.first_event'])
128 < #        except KeyError:
129 < #            self.first = 0
130 < #            pass
131 < #        log.debug(6, "Orca::Orca(): total number of events = "+`self.total_number_of_events`)
132 <        #log.debug(6, "Orca::Orca(): events per job = "+`self.job_number_of_events`)
133 < #        log.debug(6, "Orca::Orca(): first event = "+`self.first`)
134 <        
135 <        CEBlackList = []
136 <        try:
137 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
138 <            for tmp in tmpBad:
139 <                tmp=string.strip(tmp)
140 <                CEBlackList.append(tmp)
141 <        except KeyError:
142 <            pass
173 >        ## Events per job
174 >        if cfg_params.has_key('CMSSW.events_per_job'):
175 >            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
176 >            self.selectEventsPerJob = 1
177 >        else:
178 >            self.eventsPerJob = -1
179 >            self.selectEventsPerJob = 0
180  
181 <        self.reCEBlackList=[]
182 <        for bad in CEBlackList:
183 <            self.reCEBlackList.append(re.compile( bad ))
181 >        ## number of jobs
182 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
183 >            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
184 >            self.selectNumberOfJobs = 1
185 >        else:
186 >            self.theNumberOfJobs = 0
187 >            self.selectNumberOfJobs = 0
188  
189 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
189 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
190 >            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
191 >            self.selectTotalNumberEvents = 1
192 >        else:
193 >            self.total_number_of_events = 0
194 >            self.selectTotalNumberEvents = 0
195  
196 <        CEWhiteList = []
197 <        try:
198 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
199 <            #tmpGood = ['cern']
200 <            for tmp in tmpGood:
201 <                tmp=string.strip(tmp)
202 <                #if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch
203 <                CEWhiteList.append(tmp)
204 <        except KeyError:
205 <            pass
196 >        if self.pset != None: #CarlosDaniele
197 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
198 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
199 >                 raise CrabException(msg)
200 >        else:
201 >             if (self.selectNumberOfJobs == 0):
202 >                 msg = 'Must specify  number_of_jobs.'
203 >                 raise CrabException(msg)
204 >
205 >        ## New method of dealing with seeds
206 >        self.incrementSeeds = []
207 >        self.preserveSeeds = []
208 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
209 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
210 >            for tmp in tmpList:
211 >                tmp.strip()
212 >                self.preserveSeeds.append(tmp)
213 >        if cfg_params.has_key('CMSSW.increment_seeds'):
214 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
215 >            for tmp in tmpList:
216 >                tmp.strip()
217 >                self.incrementSeeds.append(tmp)
218 >
219 >        ## Old method of dealing with seeds
220 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
221 >        ## remove
222 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
223 >        if self.sourceSeed:
224 >          print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
225 >          self.incrementSeeds.append('sourceSeed')
226 >
227 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
228 >        if self.sourceSeedVtx:
229 >          print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
230 >          self.incrementSeeds.append('VtxSmeared')
231 >
232 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
233 >        if self.sourceSeedG4:
234 >          print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
235 >          self.incrementSeeds.append('g4SimHits')
236 >
237 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
238 >        if self.sourceSeedMix:
239 >          print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
240 >          self.incrementSeeds.append('mix')
241 >
242 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
243 >
244 >        if self.pset != None: #CarlosDaniele
245 >            import PsetManipulator as pp
246 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
247  
248 <        #print 'CEWhiteList: ',CEWhiteList
162 <        self.reCEWhiteList=[]
163 <        for Good in CEWhiteList:
164 <            self.reCEWhiteList.append(re.compile( Good ))
248 >        # Copy/return
249  
250 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
250 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
251 >        self.return_data = int(cfg_params.get('USER.return_data',0))
252  
253          #DBSDLS-start
254 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
254 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
255          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
256          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
257 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
258          ## Perform the data location and discovery (based on DBS/DLS)
259 <        self.DataDiscoveryAndLocation(cfg_params)
260 <        #DBSDLS-end          
259 >        ## SL: Don't if NONE is specified as input (pythia use case)
260 >        blockSites = {}
261 >        if self.datasetPath:
262 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
263 >        #DBSDLS-end
264  
265          self.tgzNameWithPath = self.getTarBall(self.executable)
266  
267 +        ## Select Splitting
268 +        if self.selectNoInput:
269 +            if self.pset == None: #CarlosDaniele
270 +                self.jobSplittingForScript()
271 +            else:
272 +                self.jobSplittingNoInput()
273 +        else:
274 +            self.jobSplittingByBlocks(blockSites)
275 +
276 +        # modify Pset
277 +        if self.pset != None: #CarlosDaniele
278 +            try:
279 +                # Add FrameworkJobReport to parameter-set, set max events.
280 +                # Reset later for data jobs by writeCFG which does all modifications
281 +                PsetEdit.addCrabFJR(self.fjrFileName)
282 +                PsetEdit.maxEvent(self.eventsPerJob)
283 +                PsetEdit.psetWriter(self.configFilename())
284 +            except:
285 +                msg='Error while manipuliating ParameterSet: exiting...'
286 +                raise CrabException(msg)
287 +
288      def DataDiscoveryAndLocation(self, cfg_params):
289  
290 <        fun = "CMSSW::DataDiscoveryAndLocation()"
290 >        import DataDiscovery
291 >        import DataLocation
292 >        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
293 >
294 >        datasetPath=self.datasetPath
295  
296          ## Contact the DBS
297 +        common.logger.message("Contacting Data Discovery Services ...")
298          try:
299 <            self.pubdata=DataDiscovery.DataDiscovery(self.owner,
185 <                                                     self.dataset,
186 <                                                     self.dataTiers,
187 <                                                     cfg_params)
299 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
300              self.pubdata.fetchDBSInfo()
301  
302          except DataDiscovery.NotExistingDatasetError, ex :
303              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
304              raise CrabException(msg)
193
305          except DataDiscovery.NoDataTierinProvenanceError, ex :
306              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
307              raise CrabException(msg)
308          except DataDiscovery.DataDiscoveryError, ex:
309 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
309 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
310              raise CrabException(msg)
311  
312 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
313 <        self.DBSPaths=self.pubdata.getDBSPaths()
314 <        common.logger.message("Required data are : ")
204 <        for path in self.DBSPaths:
205 <            common.logger.message(" --> "+path )
312 >        self.filesbyblock=self.pubdata.getFiles()
313 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
314 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
315  
316          ## get max number of events
317 <        common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
209 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
210 <        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
211 <
212 <        ## get fileblocks corresponding to the required data
213 <        fb=self.pubdata.getFileBlocks()
214 <        common.logger.debug(5,"fileblocks are %s"%fb)
317 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
318  
319          ## Contact the DLS and build a list of sites hosting the fileblocks
320          try:
321 <            dataloc=DataLocation.DataLocation(self.pubdata.getFileBlocks(),cfg_params)
321 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
322              dataloc.fetchDLSInfo()
323          except DataLocation.DataLocationError , ex:
324              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
325              raise CrabException(msg)
223        
224        allsites=dataloc.getSites()
225        common.logger.debug(5,"sites are %s"%allsites)
226        sites=self.checkBlackList(allsites)
227        common.logger.debug(5,"sites are (after black list) %s"%sites)
228        sites=self.checkWhiteList(sites)
229        common.logger.debug(5,"sites are (after white list) %s"%sites)
326  
327 <        if len(sites)==0:
328 <            msg = 'No sites hosting all the needed data! Exiting... '
329 <            raise CrabException(msg)
330 <        common.logger.message("List of Sites hosting the data : "+str(sites))
331 <        common.logger.debug(6, "List of Sites: "+str(sites))
332 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
333 <        return
334 <        
335 <    def checkBlackList(self, allSites):
336 <        if len(self.reCEBlackList)==0: return allSites
337 <        sites = []
338 <        for site in allSites:
243 <            common.logger.debug(10,'Site '+site)
244 <            good=1
245 <            for re in self.reCEBlackList:
246 <                if re.search(site):
247 <                    common.logger.message('CE in black list, skipping site '+site)
248 <                    good=0
249 <                pass
250 <            if good: sites.append(site)
251 <        if len(sites) == 0:
252 <            common.logger.debug(3,"No sites found after BlackList")
327 >
328 >        sites = dataloc.getSites()
329 >        allSites = []
330 >        listSites = sites.values()
331 >        for listSite in listSites:
332 >            for oneSite in listSite:
333 >                allSites.append(oneSite)
334 >        allSites = self.uniquelist(allSites)
335 >
336 >        # screen output
337 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
338 >
339          return sites
340  
341 <    def checkWhiteList(self, allsites):
341 >  # to Be Removed  DS -- BL
342 >  #  def setArgsList(self, argsList):
343 >  #      self.argsList = argsList
344 >
345 >    def jobSplittingByBlocks(self, blockSites):
346 >        """
347 >        Perform job splitting. Jobs run over an integer number of files
348 >        and no more than one block.
349 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
350 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
351 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
352 >                  self.maxEvents, self.filesbyblock
353 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
354 >              self.total_number_of_jobs - Total # of jobs
355 >              self.list_of_args - File(s) job will run on (a list of lists)
356 >        """
357  
358 <        if len(self.reCEWhiteList)==0: return pubDBUrls
359 <        sites = []
360 <        for site in allsites:
361 <            #print 'connecting to the URL ',url
362 <            good=0
363 <            for re in self.reCEWhiteList:
364 <                if re.search(site):
365 <                    common.logger.debug(5,'CE in white list, adding site '+site)
366 <                    good=1
367 <                if not good: continue
368 <                sites.append(site)
369 <        if len(sites) == 0:
370 <            common.logger.message("No sites found after WhiteList\n")
358 >        # ---- Handle the possible job splitting configurations ---- #
359 >        if (self.selectTotalNumberEvents):
360 >            totalEventsRequested = self.total_number_of_events
361 >        if (self.selectEventsPerJob):
362 >            eventsPerJobRequested = self.eventsPerJob
363 >            if (self.selectNumberOfJobs):
364 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
365 >
366 >        # If user requested all the events in the dataset
367 >        if (totalEventsRequested == -1):
368 >            eventsRemaining=self.maxEvents
369 >        # If user requested more events than are in the dataset
370 >        elif (totalEventsRequested > self.maxEvents):
371 >            eventsRemaining = self.maxEvents
372 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
373 >        # If user requested less events than are in the dataset
374          else:
375 <            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
376 <        return sites
375 >            eventsRemaining = totalEventsRequested
376 >
377 >        # If user requested more events per job than are in the dataset
378 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
379 >            eventsPerJobRequested = self.maxEvents
380 >
381 >        # For user info at end
382 >        totalEventCount = 0
383 >
384 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
385 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
386 >
387 >        if (self.selectNumberOfJobs):
388 >            common.logger.message("May not create the exact number_of_jobs requested.")
389 >
390 >        if ( self.ncjobs == 'all' ) :
391 >            totalNumberOfJobs = 999999999
392 >        else :
393 >            totalNumberOfJobs = self.ncjobs
394 >
395 >
396 >        blocks = blockSites.keys()
397 >        blockCount = 0
398 >        # Backup variable in case self.maxEvents counted events in a non-included block
399 >        numBlocksInDataset = len(blocks)
400 >
401 >        jobCount = 0
402 >        list_of_lists = []
403 >
404 >        # list tracking which jobs are in which jobs belong to which block
405 >        jobsOfBlock = {}
406 >
407 >        # ---- Iterate over the blocks in the dataset until ---- #
408 >        # ---- we've met the requested total # of events    ---- #
409 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
410 >            block = blocks[blockCount]
411 >            blockCount += 1
412 >            if block not in jobsOfBlock.keys() :
413 >                jobsOfBlock[block] = []
414 >
415 >            if self.eventsbyblock.has_key(block) :
416 >                numEventsInBlock = self.eventsbyblock[block]
417 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
418 >
419 >                files = self.filesbyblock[block]
420 >                numFilesInBlock = len(files)
421 >                if (numFilesInBlock <= 0):
422 >                    continue
423 >                fileCount = 0
424 >
425 >                # ---- New block => New job ---- #
426 >                parString = ""
427 >                # counter for number of events in files currently worked on
428 >                filesEventCount = 0
429 >                # flag if next while loop should touch new file
430 >                newFile = 1
431 >                # job event counter
432 >                jobSkipEventCount = 0
433 >
434 >                # ---- Iterate over the files in the block until we've met the requested ---- #
435 >                # ---- total # of events or we've gone over all the files in this block  ---- #
436 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
437 >                    file = files[fileCount]
438 >                    if newFile :
439 >                        try:
440 >                            numEventsInFile = self.eventsbyfile[file]
441 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
442 >                            # increase filesEventCount
443 >                            filesEventCount += numEventsInFile
444 >                            # Add file to current job
445 >                            parString += '\\\"' + file + '\\\"\,'
446 >                            newFile = 0
447 >                        except KeyError:
448 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
449 >
450 >
451 >                    # if less events in file remain than eventsPerJobRequested
452 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
453 >                        # if last file in block
454 >                        if ( fileCount == numFilesInBlock-1 ) :
455 >                            # end job using last file, use remaining events in block
456 >                            # close job and touch new file
457 >                            fullString = parString[:-2]
458 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
459 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
460 >                            self.jobDestination.append(blockSites[block])
461 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
462 >                            # fill jobs of block dictionary
463 >                            jobsOfBlock[block].append(jobCount+1)
464 >                            # reset counter
465 >                            jobCount = jobCount + 1
466 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
467 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
468 >                            jobSkipEventCount = 0
469 >                            # reset file
470 >                            parString = ""
471 >                            filesEventCount = 0
472 >                            newFile = 1
473 >                            fileCount += 1
474 >                        else :
475 >                            # go to next file
476 >                            newFile = 1
477 >                            fileCount += 1
478 >                    # if events in file equal to eventsPerJobRequested
479 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
480 >                        # close job and touch new file
481 >                        fullString = parString[:-2]
482 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
483 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
484 >                        self.jobDestination.append(blockSites[block])
485 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
486 >                        jobsOfBlock[block].append(jobCount+1)
487 >                        # reset counter
488 >                        jobCount = jobCount + 1
489 >                        totalEventCount = totalEventCount + eventsPerJobRequested
490 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
491 >                        jobSkipEventCount = 0
492 >                        # reset file
493 >                        parString = ""
494 >                        filesEventCount = 0
495 >                        newFile = 1
496 >                        fileCount += 1
497 >
498 >                    # if more events in file remain than eventsPerJobRequested
499 >                    else :
500 >                        # close job but don't touch new file
501 >                        fullString = parString[:-2]
502 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
503 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
504 >                        self.jobDestination.append(blockSites[block])
505 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
506 >                        jobsOfBlock[block].append(jobCount+1)
507 >                        # increase counter
508 >                        jobCount = jobCount + 1
509 >                        totalEventCount = totalEventCount + eventsPerJobRequested
510 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
511 >                        # calculate skip events for last file
512 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
513 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
514 >                        # remove all but the last file
515 >                        filesEventCount = self.eventsbyfile[file]
516 >                        parString = '\\\"' + file + '\\\"\,'
517 >                    pass # END if
518 >                pass # END while (iterate over files in the block)
519 >        pass # END while (iterate over blocks in the dataset)
520 >        self.ncjobs = self.total_number_of_jobs = jobCount
521 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
522 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
523 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
524 >
525 >        # screen output
526 >        screenOutput = "List of jobs and available destination sites:\n\n"
527 >
528 >        # keep trace of block with no sites to print a warning at the end
529 >        noSiteBlock = []
530 >        bloskNoSite = []
531 >
532 >        blockCounter = 0
533 >        for block in blocks:
534 >            if block in jobsOfBlock.keys() :
535 >                blockCounter += 1
536 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
537 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
538 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
539 >                    bloskNoSite.append( blockCounter )
540 >
541 >        common.logger.message(screenOutput)
542 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
543 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
544 >            virgola = ""
545 >            if len(bloskNoSite) > 1:
546 >                virgola = ","
547 >            for block in bloskNoSite:
548 >                msg += ' ' + str(block) + virgola
549 >            msg += '\n               Related jobs:\n                 '
550 >            virgola = ""
551 >            if len(noSiteBlock) > 1:
552 >                virgola = ","
553 >            for range_jobs in noSiteBlock:
554 >                msg += str(range_jobs) + virgola
555 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
556 >            if self.cfg_params.has_key('EDG.se_white_list'):
557 >                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
558 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
559 >                msg += 'Please check if the dataset is available at this site!)\n'
560 >            if self.cfg_params.has_key('EDG.ce_white_list'):
561 >                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
562 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
563 >                msg += 'Please check if the dataset is available at this site!)\n'
564 >
565 >            common.logger.message(msg)
566 >
567 >        self.list_of_args = list_of_lists
568 >        return
569 >
570 >    def jobSplittingNoInput(self):
571 >        """
572 >        Perform job splitting based on number of event per job
573 >        """
574 >        common.logger.debug(5,'Splitting per events')
575 >
576 >        if (self.selectEventsPerJob):
577 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
578 >        if (self.selectNumberOfJobs):
579 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
580 >        if (self.selectTotalNumberEvents):
581 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
582 >
583 >        if (self.total_number_of_events < 0):
584 >            msg='Cannot split jobs per Events with "-1" as total number of events'
585 >            raise CrabException(msg)
586 >
587 >        if (self.selectEventsPerJob):
588 >            if (self.selectTotalNumberEvents):
589 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
590 >            elif(self.selectNumberOfJobs) :
591 >                self.total_number_of_jobs =self.theNumberOfJobs
592 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
593 >
594 >        elif (self.selectNumberOfJobs) :
595 >            self.total_number_of_jobs = self.theNumberOfJobs
596 >            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
597 >
598 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
599 >
600 >        # is there any remainder?
601 >        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
602 >
603 >        common.logger.debug(5,'Check  '+str(check))
604 >
605 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
606 >        if check > 0:
607 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
608 >
609 >        # argument is seed number.$i
610 >        self.list_of_args = []
611 >        for i in range(self.total_number_of_jobs):
612 >            ## Since there is no input, any site is good
613 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
614 >            args=[]
615 >            if (self.firstRun):
616 >                ## pythia first run
617 >                args.append(str(self.firstRun)+str(i))
618 >            self.list_of_args.append(args)
619 >
620 >        return
621 >
622 >
623 >    def jobSplittingForScript(self):#CarlosDaniele
624 >        """
625 >        Perform job splitting based on number of job
626 >        """
627 >        common.logger.debug(5,'Splitting per job')
628 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
629 >
630 >        self.total_number_of_jobs = self.theNumberOfJobs
631 >
632 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
633 >
634 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
635 >
636 >        # argument is seed number.$i
637 >        self.list_of_args = []
638 >        for i in range(self.total_number_of_jobs):
639 >            ## Since there is no input, any site is good
640 >           # self.jobDestination.append(["Any"])
641 >            self.jobDestination.append([""])
642 >            ## no random seed
643 >            self.list_of_args.append([str(i)])
644 >        return
645 >
646 >    def split(self, jobParams):
647 >
648 >        #### Fabio
649 >        njobs = self.total_number_of_jobs
650 >        arglist = self.list_of_args
651 >        # create the empty structure
652 >        for i in range(njobs):
653 >            jobParams.append("")
654 >
655 >        listID=[]
656 >        listField=[]
657 >        for job in range(njobs):
658 >            jobParams[job] = arglist[job]
659 >            listID.append(job+1)
660 >            job_ToSave ={}
661 >            concString = ' '
662 >            argu=''
663 >            if len(jobParams[job]):
664 >                argu +=   concString.join(jobParams[job] )
665 >            job_ToSave['arguments']= str(job+1)+' '+argu## new BL--DS
666 >            job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
667 >            #common._db.updateJob_(job,job_ToSave)## new BL--DS
668 >            listField.append(job_ToSave)
669 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
670 >            +"                     Destination: "+str(self.jobDestination[job])
671 >            common.logger.debug(5,msg)
672 >            #common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
673 >        common._db.updateJob_(listID,listField)## new BL--DS
674 >        ## Pay Attention Here....DS--BL
675 >        self.argsList = (len(jobParams[1])+1)
676 >
677 >        return
678 > #
679 > #    def getJobTypeArguments(self, nj, sched):
680 > #        result = ''
681 > #        jobs=[]
682 > #        jobs.append(nj)
683 > #        for i in common._db.queryJob('arguments',jobs):##  BL--DS
684 > #            result=result+str(i)+" "
685 > #        return result
686 >
687 >    def numberOfJobs(self):
688 >        # Fabio
689 >        return self.total_number_of_jobs
690  
691      def getTarBall(self, exe):
692          """
693          Return the TarBall with lib and exe
694          """
695 <        
695 >
696          # if it exist, just return it
697 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
697 >        #
698 >        # Marco. Let's start to use relative path for Boss XML files
699 >        #
700 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
701          if os.path.exists(self.tgzNameWithPath):
702              return self.tgzNameWithPath
703  
# Line 291 | Line 711 | class Cmssw(JobType):
711          # First of all declare the user Scram area
712          swArea = self.scram.getSWArea_()
713          #print "swArea = ", swArea
714 <        swVersion = self.scram.getSWVersion()
715 <        #print "swVersion = ", swVersion
714 >        # swVersion = self.scram.getSWVersion()
715 >        # print "swVersion = ", swVersion
716          swReleaseTop = self.scram.getReleaseTop_()
717          #print "swReleaseTop = ", swReleaseTop
718 <        
718 >
719          ## check if working area is release top
720          if swReleaseTop == '' or swArea == swReleaseTop:
721              return
722  
723 <        filesToBeTarred = []
724 <        ## First find the executable
725 <        if (self.executable != ''):
726 <            exeWithPath = self.scram.findFile_(executable)
727 < #           print exeWithPath
728 <            if ( not exeWithPath ):
729 <                raise CrabException('User executable '+executable+' not found')
730 <
731 <            ## then check if it's private or not
732 <            if exeWithPath.find(swReleaseTop) == -1:
733 <                # the exe is private, so we must ship
734 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
735 <                path = swArea+'/'
736 <                exe = string.replace(exeWithPath, path,'')
737 <                filesToBeTarred.append(exe)
738 <                pass
739 <            else:
740 <                # the exe is from release, we'll find it on WN
741 <                pass
742 <
743 <        ## Now get the libraries: only those in local working area
744 <        libDir = 'lib'
745 <        lib = swArea+'/' +libDir
746 <        common.logger.debug(5,"lib "+lib+" to be tarred")
747 <        if os.path.exists(lib):
748 <            filesToBeTarred.append(libDir)
749 <
750 <        ## Now check if the Data dir is present
751 <        dataDir = 'src/Data/'
752 <        if os.path.isdir(swArea+'/'+dataDir):
753 <            filesToBeTarred.append(dataDir)
754 <
755 <        ## Create the tar-ball
756 <        if len(filesToBeTarred)>0:
757 <            cwd = os.getcwd()
758 <            os.chdir(swArea)
759 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
760 <            for line in filesToBeTarred:
761 <                tarcmd = tarcmd + line + ' '
762 <            cout = runCommand(tarcmd)
763 <            if not cout:
764 <                raise CrabException('Could not create tar-ball')
765 <            os.chdir(cwd)
766 <        else:
767 <            common.logger.debug(5,"No files to be to be tarred")
768 <        
723 >        import tarfile
724 >        try: # create tar ball
725 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
726 >            ## First find the executable
727 >            if (self.executable != ''):
728 >                exeWithPath = self.scram.findFile_(executable)
729 >                if ( not exeWithPath ):
730 >                    raise CrabException('User executable '+executable+' not found')
731 >
732 >                ## then check if it's private or not
733 >                if exeWithPath.find(swReleaseTop) == -1:
734 >                    # the exe is private, so we must ship
735 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
736 >                    path = swArea+'/'
737 >                    # distinguish case when script is in user project area or given by full path somewhere else
738 >                    if exeWithPath.find(path) >= 0 :
739 >                        exe = string.replace(exeWithPath, path,'')
740 >                        tar.add(path+exe,exe)
741 >                    else :
742 >                        tar.add(exeWithPath,os.path.basename(executable))
743 >                    pass
744 >                else:
745 >                    # the exe is from release, we'll find it on WN
746 >                    pass
747 >
748 >            ## Now get the libraries: only those in local working area
749 >            libDir = 'lib'
750 >            lib = swArea+'/' +libDir
751 >            common.logger.debug(5,"lib "+lib+" to be tarred")
752 >            if os.path.exists(lib):
753 >                tar.add(lib,libDir)
754 >
755 >            ## Now check if module dir is present
756 >            moduleDir = 'module'
757 >            module = swArea + '/' + moduleDir
758 >            if os.path.isdir(module):
759 >                tar.add(module,moduleDir)
760 >
761 >            ## Now check if any data dir(s) is present
762 >            swAreaLen=len(swArea)
763 >            for root, dirs, files in os.walk(swArea):
764 >                if "data" in dirs:
765 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
766 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
767 >
768 >
769 >            ## Add ProdCommon dir to tar
770 >            prodcommonDir = 'ProdCommon'
771 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
772 >            if os.path.isdir(prodcommonPath):
773 >                tar.add(prodcommonPath,prodcommonDir)
774 >
775 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
776 >            tar.close()
777 >        except :
778 >            raise CrabException('Could not create tar-ball')
779 >
780 >        ## check for tarball size
781 >        tarballinfo = os.stat(self.tgzNameWithPath)
782 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
783 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
784 >
785 >        ## create tar-ball with ML stuff
786 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
787 >        try:
788 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
789 >            path=os.environ['CRABDIR'] + '/python/'
790 >            #for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py']:
791 >            ### FEDE ####
792 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']:
793 >            ###############
794 >                tar.add(path+file,file)
795 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
796 >            tar.close()
797 >        except :
798 >            raise CrabException('Could not create ML files tar-ball')
799 >
800          return
801 <        
802 <    def wsSetupEnvironment(self, nj):
801 >
802 >    def additionalInputFileTgz(self):
803 >        """
804 >        Put all additional files into a tar ball and return its name
805 >        """
806 >        import tarfile
807 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
808 >        tar = tarfile.open(tarName, "w:gz")
809 >        for file in self.additional_inbox_files:
810 >            tar.add(file,string.split(file,'/')[-1])
811 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
812 >        tar.close()
813 >        return tarName
814 >
815 >    def wsSetupEnvironment(self, nj=0):
816          """
817          Returns part of a job script which prepares
818          the execution environment for the job 'nj'.
819          """
820          # Prepare JobType-independent part
821 <        txt = self.wsSetupCMSEnvironment_()
821 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
822 >        txt += 'echo ">>> setup environment"\n'
823 >        txt += 'if [ $middleware == LCG ]; then \n'
824 >        txt += self.wsSetupCMSLCGEnvironment_()
825 >        txt += 'elif [ $middleware == OSG ]; then\n'
826 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
827 >        txt += '    if [ ! $? == 0 ] ;then\n'
828 >        #txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
829 >        #txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
830 >        #txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
831 >        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
832 >        #txt += '        exit 1\n'
833 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
834 >        txt += '        job_exit_code=10016\n'
835 >        txt += '        func_exit\n'
836 >        txt += '    fi\n'
837 >        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
838 >        txt += '\n'
839 >        txt += '    echo "Change to working directory: $WORKING_DIR"\n'
840 >        txt += '    cd $WORKING_DIR\n'
841 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
842 >        txt += self.wsSetupCMSOSGEnvironment_()
843 >        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
844 >        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
845 >        txt += 'fi\n'
846  
847          # Prepare JobType-specific part
848          scram = self.scram.commandName()
849          txt += '\n\n'
850 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
850 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
851 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
852          txt += scram+' project CMSSW '+self.version+'\n'
853          txt += 'status=$?\n'
854          txt += 'if [ $status != 0 ] ; then\n'
855 <        txt += '   echo "SET_EXE_ENV 1 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
856 <        txt += '   echo "JOB_EXIT_STATUS = 5"\n'
857 <        txt += '   echo "SanityCheckCode = 5" | tee -a $RUNTIME_AREA/$repo\n'
858 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
859 <        txt += '   exit 5 \n'
855 >        #txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
856 >        #txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
857 >        #txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
858 >        #txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
859 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
860 >        txt += '    job_exit_code=10034\n'
861 >        #txt += '    if [ $middleware == OSG ]; then \n'
862 >        #txt += '        cd $RUNTIME_AREA\n'
863 >        #txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
864 >        #txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
865 >        #txt += '        /bin/rm -rf $WORKING_DIR\n'
866 >        #txt += '        if [ -d $WORKING_DIR ] ;then\n'
867 >        #txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
868 >        #txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
869 >        #txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
870 >        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
871 >        #txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
872 >        #txt += '            job_exit_code=10017\n'
873 >        #txt += '        fi\n'
874 >        #txt += '    fi \n'
875 >        #txt += '    exit 1 \n'
876 >        txt += '    func_exit\n'
877          txt += 'fi \n'
372        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
878          txt += 'cd '+self.version+'\n'
879 +        ########## FEDE FOR DBS2 ######################
880 +        txt += 'SOFTWARE_DIR=`pwd`\n'
881 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
882 +        ###############################################
883          ### needed grep for bug in scramv1 ###
884          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
376
885          # Handle the arguments:
886          txt += "\n"
887 <        txt += "## ARGUMNETS: $1 Job Number\n"
380 <        # txt += "## ARGUMNETS: $2 First Event for this job\n"
381 <        # txt += "## ARGUMNETS: $3 Max Event for this job\n"
887 >        txt += "## number of arguments (first argument always jobnumber)\n"
888          txt += "\n"
889 <        txt += "narg=$#\n"
890 <        txt += "if [ $narg -lt 1 ]\n"
889 >       # txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
890 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
891          txt += "then\n"
892 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
893 <        txt += '    echo "JOB_EXIT_STATUS = 1"\n'
894 <        txt += '    echo "SanityCheckCode = 1" | tee -a $RUNTIME_AREA/$repo\n'
895 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
896 <        txt += "    exit 1\n"
892 >        #txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
893 >        #txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
894 >        #txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
895 >        #txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
896 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
897 >        txt += '    job_exit_code=50113\n'
898 >        #txt += '    if [ $middleware == OSG ]; then \n'
899 >        #txt += '        cd $RUNTIME_AREA\n'
900 >        #txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
901 >        #txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
902 >        #txt += '        /bin/rm -rf $WORKING_DIR\n'
903 >        #txt += '        if [ -d $WORKING_DIR ] ;then\n'
904 >        #txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
905 >        #txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
906 >        #txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
907 >        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
908 >        #txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
909 >        #txt += '            job_exit_code=10017\n'
910 >        #txt += '        fi\n'
911 >        #txt += '    fi\n'
912 >        #txt += "    exit 1\n"
913 >        txt += "    func_exit\n"
914          txt += "fi\n"
915          txt += "\n"
393        txt += "NJob=$1\n"
394        # txt += "FirstEvent=$2\n"
395        # txt += "MaxEvents=$3\n"
916  
917          # Prepare job-specific part
918          job = common.job_list[nj]
919 <        pset = os.path.basename(job.configFilename())
920 <        txt += '\n'
921 <        txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
922 <        # txt += 'if [ -e $RUNTIME_AREA/orcarc_$CE ] ; then\n'
923 <        # txt += '  cat $RUNTIME_AREA/orcarc_$CE .orcarc >> .orcarc_tmp\n'
924 <        # txt += '  mv .orcarc_tmp .orcarc\n'
925 <        # txt += 'fi\n'
926 <        # txt += 'if [ -e $RUNTIME_AREA/init_$CE.sh ] ; then\n'
927 <        # txt += '  cp $RUNTIME_AREA/init_$CE.sh init.sh\n'
928 <        # txt += 'fi\n'
919 >        ### FEDE FOR DBS OUTPUT PUBLICATION
920 >        if (self.datasetPath):
921 >            txt += '\n'
922 >            txt += 'DatasetPath='+self.datasetPath+'\n'
923 >
924 >            datasetpath_split = self.datasetPath.split("/")
925 >
926 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
927 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
928 >            txt += 'ApplicationFamily=cmsRun\n'
929 >
930 >        else:
931 >            txt += 'DatasetPath=MCDataTier\n'
932 >            txt += 'PrimaryDataset=null\n'
933 >            txt += 'DataTier=null\n'
934 >            txt += 'ApplicationFamily=MCDataTier\n'
935 >        if self.pset != None:
936 >            pset = os.path.basename(job.configFilename())
937 >            txt += '\n'
938 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
939 >            if (self.datasetPath): # standard job
940 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
941 >                txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
942 >                txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
943 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
944 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
945 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
946 >            else:  # pythia like job
947 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
948 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
949 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
950 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
951 >                if (self.firstRun):
952 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
953 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
954 >
955 >            txt += 'mv -f '+pset+' pset.cfg\n'
956  
957          if len(self.additional_inbox_files) > 0:
958 <            for file in self.additional_inbox_files:
959 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
960 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
961 <                txt += '   chmod +x '+file+'\n'
962 <                txt += 'fi\n'
963 <            pass
964 <
965 <        # txt += '\n'
966 <        # txt += 'chmod +x ./init.sh\n'
967 <        # txt += './init.sh\n'
968 <        # txt += 'exitStatus=$?\n'
969 <        # txt += 'if [ $exitStatus != 0 ] ; then\n'
970 <        # txt += '  echo "SET_EXE_ENV 1 ==> ERROR StageIn init script failed"\n'
971 <        # txt += '  echo "JOB_EXIT_STATUS = $exitStatus" \n'
972 <        # txt += '  echo "SanityCheckCode = $exitStatus" | tee -a $RUNTIME_AREA/$repo\n'
973 <        # txt += '  dumpStatus $RUNTIME_AREA/$repo\n'
974 <        # txt += '  exit $exitStatus\n'
975 <        # txt += 'fi\n'
976 <        # txt += "echo 'SET_EXE_ENV 0 ==> job setup ok'\n"
977 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
978 <
979 <        # txt += 'echo "FirstEvent=$FirstEvent" >> .orcarc\n'
980 <        # txt += 'echo "MaxEvents=$MaxEvents" >> .orcarc\n'
981 <        # if self.ML:
982 <        #     txt += 'echo "MonalisaJobId=$NJob" >> .orcarc\n'
958 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
959 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
960 >            txt += 'fi\n'
961 >            pass
962 >
963 >        if self.pset != None:
964 >            txt += '\n'
965 >            txt += 'echo "***** cat pset.cfg *********"\n'
966 >            txt += 'cat pset.cfg\n'
967 >            txt += 'echo "****** end pset.cfg ********"\n'
968 >            txt += '\n'
969 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
970 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
971 >            txt += '\n'
972 >        return txt
973 >    #### FEDE #####
974 >    def wsUntarSoftware(self, nj=0):
975 >        """
976 >        Put in the script the commands to build an executable
977 >        or a library.
978 >        """
979 >
980 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
981 >
982 >        if os.path.isfile(self.tgzNameWithPath):
983 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
984 >            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
985 >            txt += 'untar_status=$? \n'
986 >            txt += 'if [ $untar_status -ne 0 ]; then \n'
987 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
988 >            txt += '   job_exit_code=$untar_status\n'
989 >            txt += '   func_exit\n'
990 >            txt += 'else \n'
991 >            txt += '   echo "Successful untar" \n'
992 >            txt += 'fi \n'
993 >            txt += '\n'
994 >            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
995 >            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
996 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
997 >            txt += 'else\n'
998 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
999 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1000 >            txt += 'fi\n'
1001 >            txt += '\n'
1002 >
1003 >            pass
1004 >
1005 >        return txt
1006 >        
1007 >    def wsBuildExe(self, nj=0):
1008 >        """
1009 >        Put in the script the commands to build an executable
1010 >        or a library.
1011 >        """
1012 >
1013 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
1014 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
1015 >
1016 >        txt += 'mv $RUNTIME_AREA/lib . \n'
1017 >        txt += 'mv $RUNTIME_AREA/module . \n'
1018 >        txt += 'mv $RUNTIME_AREA/ProdCommon . \n'
1019 >        
1020  
1021 +        #if os.path.isfile(self.tgzNameWithPath):
1022 +        #    txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1023 +        #    txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1024 +        #    txt += 'untar_status=$? \n'
1025 +        #    txt += 'if [ $untar_status -ne 0 ]; then \n'
1026 +        #    txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
1027 +        #    txt += '   job_exit_code=$untar_status\n'
1028 +        #    txt += '   func_exit\n'
1029 +        #    txt += 'else \n'
1030 +        #    txt += '   echo "Successful untar" \n'
1031 +        #    txt += 'fi \n'
1032 +        #    txt += '\n'
1033 +        #    txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
1034 +        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1035 +        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1036 +        txt += 'else\n'
1037 +        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1038 +        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1039 +        txt += 'fi\n'
1040          txt += '\n'
1041 <        txt += 'echo "***** cat pset.cfg *********"\n'
439 <        txt += 'cat pset.cfg\n'
440 <        txt += 'echo "****** end pset.cfg ********"\n'
1041 >
1042          return txt
1043 +    ############################################################################
1044  
1045      def modifySteeringCards(self, nj):
1046          """
1047 <        modify the card provided by the user,
1047 >        modify the card provided by the user,
1048          writing a new card into share dir
1049          """
1050 <        
1050 >
1051      def executableName(self):
1052 <        return self.executable
1052 >        if self.scriptExe: #CarlosDaniele
1053 >            return "sh "
1054 >        else:
1055 >            return self.executable
1056  
1057      def executableArgs(self):
1058 <        return "-p pset.cfg"
1058 >        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1059 >        if self.scriptExe:#CarlosDaniele
1060 >            return   self.scriptExe + " $NJob"
1061 >        else:
1062 >            version_array = self.scram.getSWVersion().split('_')
1063 >            major = 0
1064 >            minor = 0
1065 >            try:
1066 >                major = int(version_array[1])
1067 >                minor = int(version_array[2])
1068 >            except:
1069 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1070 >                raise CrabException(msg)
1071 >
1072 >            ex_args = ""
1073 >
1074 >            # Framework job report
1075 >            if major >= 1 and minor >= 5 :
1076 >                #ex_args += " -j " + self.fjrFileName
1077 >            ### FEDE it could be improved!!! ####    
1078 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1079 >            #######################################
1080 >            # Type of cfg file
1081 >            if major >= 2 :
1082 >                ex_args += " -p pset.pycfg"
1083 >            else:
1084 >                ex_args += " -p pset.cfg"
1085 >            return ex_args
1086  
1087      def inputSandbox(self, nj):
1088          """
1089          Returns a list of filenames to be put in JDL input sandbox.
1090          """
1091          inp_box = []
1092 <        # dict added to delete duplicate from input sandbox file list
1093 <        seen = {}
1092 >        # # dict added to delete duplicate from input sandbox file list
1093 >        # seen = {}
1094          ## code
1095          if os.path.isfile(self.tgzNameWithPath):
1096              inp_box.append(self.tgzNameWithPath)
1097 +        if os.path.isfile(self.MLtgzfile):
1098 +            inp_box.append(self.MLtgzfile)
1099          ## config
1100 <        inp_box.append(common.job_list[nj].configFilename())
1100 >        if not self.pset is None:
1101 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1102          ## additional input files
1103 <        for file in self.additional_inbox_files:
1104 <            inp_box.append(common.work_space.cwdDir()+file)
1105 <        #print "sono inputSandbox, inp_box = ", inp_box
1103 >        tgz = self.additionalInputFileTgz()
1104 >        inp_box.append(tgz)
1105 >        ## executable
1106 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1107 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1108          return inp_box
1109  
1110      def outputSandbox(self, nj):
# Line 476 | Line 1113 | class Cmssw(JobType):
1113          """
1114          out_box = []
1115  
479        stdout=common.job_list[nj].stdout()
480        stderr=common.job_list[nj].stderr()
481
1116          ## User Declared output files
1117 <        for out in self.output_file:
1118 <            n_out = nj + 1
1117 >        for out in (self.output_file+self.output_file_sandbox):
1118 >            n_out = nj + 1
1119              out_box.append(self.numberFile_(out,str(n_out)))
1120          return out_box
487        return []
1121  
1122      def prepareSteeringCards(self):
1123          """
1124          Make initial modifications of the user's steering card file.
1125          """
493        infile = open(self.pset,'r')
494            
495        outfile = open(common.work_space.jobDir()+self.name()+'.cfg', 'w')
496          
497        outfile.write('\n\n##### The following cards have been created by CRAB: DO NOT TOUCH #####\n')
498
499        outfile.write('InputCollections=/System/'+self.owner+'/'+self.dataset+'/'+self.dataset+'\n')
500
501        infile.close()
502        outfile.close()
1126          return
1127  
1128      def wsRenameOutput(self, nj):
# Line 507 | Line 1130 | class Cmssw(JobType):
1130          Returns part of a job script which renames the produced files.
1131          """
1132  
1133 <        txt = '\n'
1134 <        file_list = ''
1135 <        for fileWithSuffix in self.output_file:
1133 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1134 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1135 >        txt += 'echo ">>> current directory content:"\n'
1136 >        txt += 'ls \n'
1137 >        txt += '\n'
1138 >
1139 >        #txt += 'output_exit_status=0\n'
1140 >
1141 >        ### FEDE #######
1142 >        #for fileWithSuffix in (self.output_file_sandbox):
1143 >        #    output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1144 >        #    txt += '\n'
1145 >        #    txt += '# check output file\n'
1146 >        #    txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1147 >        #    txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1148 >        #    txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1149 >        #    txt += 'else\n'
1150 >        #    txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1151 >        #    txt += '    job_exit_code=60302\n'
1152 >        #    if common.scheduler.name().upper() == 'CONDOR_G':
1153 >        #        txt += '    if [ $middleware == OSG ]; then \n'
1154 >        #        txt += '        echo "prepare dummy output file"\n'
1155 >        #        txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1156 >        #        txt += '    fi \n'
1157 >        #    txt += 'fi\n'
1158 >
1159 >        for fileWithSuffix in (self.output_file):
1160              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
514            file_list=file_list+output_file_num+' '
1161              txt += '\n'
1162 <            txt += 'ls \n'
1163 <            txt += '\n'
1164 <            txt += 'ls '+fileWithSuffix+'\n'
1165 <            txt += 'exe_result=$?\n'
1166 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
1167 <            txt += '   echo "ERROR: No output file to manage"\n'
1168 <            txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
1169 <            txt += '   echo "SanityCheckCode = $exe_result" | tee -a $RUNTIME_AREA/$repo\n'
524 <            txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
525 <            txt += '   exit $exe_result \n'
1162 >            txt += '# check output file\n'
1163 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1164 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1165 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1166 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1167 >            else:
1168 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1169 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1170              txt += 'else\n'
1171 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1171 >            #txt += '    exit_status=60302\n'
1172 >            #txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1173 >            #txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1174 >            #txt += '    output_exit_status=$exit_status\n'
1175 >            txt += '    job_exit_code=60302\n'
1176 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1177 >            if common.scheduler.name().upper() == 'CONDOR_G':
1178 >                txt += '    if [ $middleware == OSG ]; then \n'
1179 >                txt += '        echo "prepare dummy output file"\n'
1180 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1181 >                txt += '    fi \n'
1182              txt += 'fi\n'
1183 <            txt += 'cd $RUNTIME_AREA\n'
1184 <                      
1185 <            pass
1186 <      
1187 <        file_list=file_list[:-1]
1188 <        txt += 'file_list="'+file_list+'"\n'
1183 >        file_list = []
1184 >        for fileWithSuffix in (self.output_file):
1185 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1186 >
1187 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1188 >        txt += '\n'
1189 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1190 >        txt += 'echo ">>> current directory content:"\n'
1191 >        txt += 'ls \n'
1192 >        txt += '\n'
1193 >        txt += 'cd $RUNTIME_AREA\n'
1194 >        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1195          return txt
1196  
1197      def numberFile_(self, file, txt):
# Line 542 | Line 1202 | class Cmssw(JobType):
1202          # take away last extension
1203          name = p[0]
1204          for x in p[1:-1]:
1205 <           name=name+"."+x
1205 >            name=name+"."+x
1206          # add "_txt"
1207          if len(p)>1:
1208 <          ext = p[len(p)-1]
1209 <          #result = name + '_' + str(txt) + "." + ext
550 <          result = name + '_' + txt + "." + ext
1208 >            ext = p[len(p)-1]
1209 >            result = name + '_' + txt + "." + ext
1210          else:
1211 <          #result = name + '_' + str(txt)
1212 <          result = name + '_' + txt
554 <        
1211 >            result = name + '_' + txt
1212 >
1213          return result
1214  
1215 <    def getRequirements(self):
1215 >    def getRequirements(self, nj=[]):
1216          """
1217 <        return job requirements to add to jdl files
1217 >        return job requirements to add to jdl files
1218          """
1219          req = ''
1220 <        if common.analisys_common_info['sites']:
1221 <            if common.analisys_common_info['sw_version']:
1222 <                req='Member("VO-cms-' + \
1223 <                     common.analisys_common_info['sw_version'] + \
1224 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1225 <            if len(common.analisys_common_info['sites'])>0:
1226 <                req = req + ' && ('
1227 <                for i in range(len(common.analisys_common_info['sites'])):
1228 <                    req = req + 'other.GlueCEInfoHostName == "' \
1229 <                         + common.analisys_common_info['sites'][i] + '"'
1230 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1231 <                        req = req + ' || '
1232 <            req = req + ')'
1233 <        #print "req = ", req
1220 >        if self.version:
1221 >            req='Member("VO-cms-' + \
1222 >                 self.version + \
1223 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1224 >        ## SL add requirement for OS version only if SL4
1225 >        #reSL4 = re.compile( r'slc4' )
1226 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1227 >            req+=' && Member("VO-cms-' + \
1228 >                 self.executable_arch + \
1229 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1230 >
1231 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1232 >        if common.scheduler.name() == "glitecoll":
1233 >            req += ' && other.GlueCEStateStatus == "Production" '
1234 >
1235          return req
1236 +
1237 +    def configFilename(self):
1238 +        """ return the config filename """
1239 +        return self.name()+'.cfg'
1240 +
1241 +    def wsSetupCMSOSGEnvironment_(self):
1242 +        """
1243 +        Returns part of a job script which is prepares
1244 +        the execution environment and which is common for all CMS jobs.
1245 +        """
1246 +        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1247 +        txt += '    echo ">>> setup CMS OSG environment:"\n'
1248 +        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1249 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1250 +        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1251 +        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1252 +        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1253 +        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1254 +        txt += '    else\n'
1255 +        #txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1256 +        #txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1257 +        #txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1258 +        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1259 +        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1260 +        txt += '        job_exit_code=10020\n'
1261 +        #txt += '        cd $RUNTIME_AREA\n'
1262 +        #txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1263 +        #txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1264 +        #txt += '        /bin/rm -rf $WORKING_DIR\n'
1265 +        #txt += '        if [ -d $WORKING_DIR ] ;then\n'
1266 +        #txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1267 +        #txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1268 +        #txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1269 +        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1270 +        #txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1271 +        #txt += '            job_exit_code=10017\n'
1272 +        #txt += '        fi\n'
1273 +        txt += '\n'
1274 +        #txt += '        exit 1\n'
1275 +        txt += '        func_exit\n'
1276 +        txt += '    fi\n'
1277 +        txt += '\n'
1278 +        txt += '    echo "==> setup cms environment ok"\n'
1279 +        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1280 +
1281 +        return txt
1282 +
1283 +    ### OLI_DANIELE
1284 +    def wsSetupCMSLCGEnvironment_(self):
1285 +        """
1286 +        Returns part of a job script which is prepares
1287 +        the execution environment and which is common for all CMS jobs.
1288 +        """
1289 +        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1290 +        txt += '    echo ">>> setup CMS LCG environment:"\n'
1291 +        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1292 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1293 +        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1294 +        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1295 +        #txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1296 +        #txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1297 +        #txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1298 +        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1299 +        #txt += '        exit 1\n'
1300 +        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1301 +        txt += '        job_exit_code=10031\n'
1302 +        txt += '        func_exit\n'
1303 +        txt += '    else\n'
1304 +        txt += '        echo "Sourcing environment... "\n'
1305 +        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1306 +        #txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1307 +        #txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1308 +        #txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1309 +        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1310 +        #txt += '            exit 1\n'
1311 +        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1312 +        txt += '            job_exit_code=10020\n'
1313 +        txt += '            func_exit\n'
1314 +        txt += '        fi\n'
1315 +        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1316 +        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1317 +        txt += '        result=$?\n'
1318 +        txt += '        if [ $result -ne 0 ]; then\n'
1319 +        #txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1320 +        #txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1321 +        #txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1322 +        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1323 +        #txt += '            exit 1\n'
1324 +        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1325 +        txt += '            job_exit_code=10032\n'
1326 +        txt += '            func_exit\n'
1327 +        txt += '        fi\n'
1328 +        txt += '    fi\n'
1329 +        txt += '    \n'
1330 +        txt += '    echo "==> setup cms environment ok"\n'
1331 +        return txt
1332 +
1333 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1334 +    def modifyReport(self, nj):
1335 +        """
1336 +        insert the part of the script that modifies the FrameworkJob Report
1337 +        """
1338 +
1339 +        txt = '\n#Written by cms_cmssw::modifyReport\n'
1340 +        try:
1341 +            publish_data = int(self.cfg_params['USER.publish_data'])
1342 +        except KeyError:
1343 +            publish_data = 0
1344 +        if (publish_data == 1):
1345 +            
1346 +            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1347 +            txt += '    echo ">>> Modify Job Report:" \n'
1348 +            txt += '    chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1349 +            #txt += '    if [ -z "$SE" ]; then\n'
1350 +            #txt += '        SE="" \n'
1351 +            #txt += '    fi \n'
1352 +            #txt += '    if [ -z "$SE_PATH" ]; then\n'
1353 +            #txt += '        SE_PATH="" \n'
1354 +            #txt += '    fi \n'
1355 +            txt += '    echo "SE = $SE"\n'
1356 +            txt += '    echo "SE_PATH = $SE_PATH"\n'
1357 +
1358 +            processedDataset = self.cfg_params['USER.publish_data_name']
1359 +            txt += '    ProcessedDataset='+processedDataset+'\n'
1360 +            #txt += '    if [ "$SE_PATH" == "" ]; then\n'
1361 +            #txt += '        FOR_LFN=/copy_problems/ \n'
1362 +            #txt += '    else \n'
1363 +            #txt += '        tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1364 +            #txt += '        FOR_LFN=/store$tmp \n'
1365 +            #txt += '    fi \n'
1366 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1367 +            txt += '    FOR_LFN=/store$tmp \n'
1368 +            txt += '    echo "ProcessedDataset = $ProcessedDataset"\n'
1369 +            txt += '    echo "FOR_LFN = $FOR_LFN" \n'
1370 +            txt += '    echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1371 +            #txt += '    echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1372 +            #txt += '    $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1373 +            ### FEDE ####
1374 +            txt += '    echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1375 +            txt += '    $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1376 +            ####################################
1377 +            txt += '    modifyReport_result=$?\n'
1378 +            txt += '    if [ $modifyReport_result -ne 0 ]; then\n'
1379 +            txt += '        modifyReport_result=70500\n'
1380 +            txt += '        job_exit_code=$modifyReport_result\n'
1381 +            txt += '        echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1382 +            txt += '        echo "WARNING: Problem with ModifyJobReport"\n'
1383 +            txt += '    else\n'
1384 +            ### FEDE #####
1385 +            #txt += '        mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1386 +            #######################
1387 +            txt += '        mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1388 +            txt += '    fi\n'
1389 +            txt += 'fi\n'
1390 +        return txt
1391 +
1392 +    def cleanEnv(self):
1393 +        txt = '\n#Written by cms_cmssw::cleanEnv\n'
1394 +        txt += 'if [ $middleware == OSG ]; then\n'
1395 +        txt += '    cd $RUNTIME_AREA\n'
1396 +        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1397 +        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1398 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1399 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1400 +        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1401 +        txt += '        job_exit_code=10017\n'
1402 +        txt += '        func_exit\n'
1403 +        txt += '    fi\n'
1404 +        txt += 'fi\n'
1405 +        txt += '\n'
1406 +        return txt
1407 +
1408 +    def setParam_(self, param, value):
1409 +        self._params[param] = value
1410 +
1411 +    def getParams(self):
1412 +        return self._params
1413 +
1414 +    def uniquelist(self, old):
1415 +        """
1416 +        remove duplicates from a list
1417 +        """
1418 +        nd={}
1419 +        for e in old:
1420 +            nd[e]=0
1421 +        return nd.keys()
1422 +
1423 +    def outList(self):
1424 +        """
1425 +        check the dimension of the output files
1426 +        """
1427 +        txt = ''
1428 +        txt += 'echo ">>> list of expected files on output sandbox"\n'
1429 +        listOutFiles = []
1430 +        stdout = 'CMSSW_$NJob.stdout'
1431 +        stderr = 'CMSSW_$NJob.stderr'
1432 +        if (self.return_data == 1):
1433 +            for file in (self.output_file+self.output_file_sandbox):
1434 +                listOutFiles.append(self.numberFile_(file, '$NJob'))
1435 +            listOutFiles.append(stdout)
1436 +            listOutFiles.append(stderr)
1437 +        else:
1438 +            for file in (self.output_file_sandbox):
1439 +                listOutFiles.append(self.numberFile_(file, '$NJob'))
1440 +            listOutFiles.append(stdout)
1441 +            listOutFiles.append(stderr)
1442 +        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1443 +        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1444 +        txt += 'export filesToCheck\n'
1445 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines