ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.1 by slacapra, Thu Apr 6 16:18:17 2006 UTC vs.
Revision 1.155 by slacapra, Tue Feb 12 15:20:47 2008 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
6
7 import DataDiscovery
8 import DataLocation
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
16 <        self.analisys_common_info = {}
16 >        self.argsList = []
17 >
18 >        self._params = {}
19 >        self.cfg_params = cfg_params
20 >        # init BlackWhiteListParser
21 >        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22 >
23 >        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
24 >
25 >        # number of jobs requested to be created, limit obj splitting
26 >        self.ncjobs = ncjobs
27  
28          log = common.logger
29 <        
29 >
30          self.scram = Scram.Scram(cfg_params)
23        scramArea = ''
31          self.additional_inbox_files = []
32          self.scriptExe = ''
33          self.executable = ''
34 +        self.executable_arch = self.scram.getArch()
35          self.tgz_name = 'default.tgz'
36 +        self.additional_tgz_name = 'additional.tgz'
37 +        self.scriptName = 'CMSSW.sh'
38 +        self.pset = ''      #scrip use case Da
39 +        self.datasetPath = '' #scrip use case Da
40 +
41 +        # set FJR file name
42 +        self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45 <        common.analisys_common_info['sw_version'] = self.version
45 >
46 >        #
47 >        # Try to block creation in case of arch/version mismatch
48 >        #
49 >
50 >        a = string.split(self.version, "_")
51 >
52 >        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 >            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54 >            common.logger.message(msg)
55 >        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56 >            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
57 >            raise CrabException(msg)
58 >
59 >        common.taskDB.setDict('codeVersion',self.version)
60 >        self.setParam_('application', self.version)
61  
62          ### collect Data cards
63 <        try:
64 <            self.owner = cfg_params['CMSSW.owner']
65 <            log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
36 <            self.dataset = cfg_params['CMSSW.dataset']
37 <            log.debug(6, "CMSSW::CMSSW(): dataset = "+self.dataset)
38 <        except KeyError:
39 <            msg = "Error: owner and/or dataset not defined "
63 >
64 >        if not cfg_params.has_key('CMSSW.datasetpath'):
65 >            msg = "Error: datasetpath not defined "
66              raise CrabException(msg)
67 +        tmp =  cfg_params['CMSSW.datasetpath']
68 +        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69 +        if string.lower(tmp)=='none':
70 +            self.datasetPath = None
71 +            self.selectNoInput = 1
72 +        else:
73 +            self.datasetPath = tmp
74 +            self.selectNoInput = 0
75 +
76 +        # ML monitoring
77 +        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
78 +        if not self.datasetPath:
79 +            self.setParam_('dataset', 'None')
80 +            self.setParam_('owner', 'None')
81 +        else:
82 +            ## SL what is supposed to fail here?
83 +            try:
84 +                datasetpath_split = self.datasetPath.split("/")
85 +                # standard style
86 +                self.setParam_('datasetFull', self.datasetPath)
87 +                self.setParam_('dataset', datasetpath_split[1])
88 +                self.setParam_('owner', datasetpath_split[2])
89 +            except:
90 +                self.setParam_('dataset', self.datasetPath)
91 +                self.setParam_('owner', self.datasetPath)
92 +
93 +        self.setParam_('taskId', common.taskDB.dict('taskId'))
94  
95          self.dataTiers = []
43        try:
44            tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
45            for tmp in tmpDataTiers:
46                tmp=string.strip(tmp)
47                self.dataTiers.append(tmp)
48                pass
49            pass
50        except KeyError:
51            pass
52        log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
96  
97          ## now the application
98 <        try:
99 <            self.executable = cfg_params['CMSSW.executable']
100 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
58 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
59 <            log.debug(3,msg)
60 <        except KeyError:
61 <            self.executable = 'cmsRun'
62 <            msg = "User executable not defined. Use cmsRun"
63 <            log.debug(3,msg)
64 <            pass
98 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
99 >        self.setParam_('exe', self.executable)
100 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
101  
102 <        try:
103 <            self.pset = cfg_params['CMSSW.pset']
104 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
102 >        if not cfg_params.has_key('CMSSW.pset'):
103 >            raise CrabException("PSet file missing. Cannot run cmsRun ")
104 >        self.pset = cfg_params['CMSSW.pset']
105 >        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
106 >        if self.pset.lower() != 'none' :
107              if (not os.path.exists(self.pset)):
108                  raise CrabException("User defined PSet file "+self.pset+" does not exist")
109 <        except KeyError:
110 <            raise CrabException("PSet file missing. Cannot run cmsRun ")
109 >        else:
110 >            self.pset = None
111  
112          # output files
113 <        try:
114 <            self.output_file = []
113 >        ## stuff which must be returned always via sandbox
114 >        self.output_file_sandbox = []
115  
116 <            tmp = cfg_params['CMSSW.output_file']
117 <            if tmp != '':
80 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
81 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
82 <                for tmp in tmpOutFiles:
83 <                    tmp=string.strip(tmp)
84 <                    self.output_file.append(tmp)
85 <                    pass
116 >        # add fjr report by default via sandbox
117 >        self.output_file_sandbox.append(self.fjrFileName)
118  
119 <            else:
120 <                log.message("No output file defined: only stdout/err will be available")
119 >        # other output files to be returned via sandbox or copied to SE
120 >        self.output_file = []
121 >        tmp = cfg_params.get('CMSSW.output_file',None)
122 >        if tmp :
123 >            tmpOutFiles = string.split(tmp,',')
124 >            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
125 >            for tmp in tmpOutFiles:
126 >                tmp=string.strip(tmp)
127 >                self.output_file.append(tmp)
128                  pass
129 <            pass
130 <        except KeyError:
131 <            log.message("No output file defined: only stdout/err will be available")
93 <            pass
129 >        else:
130 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 >        pass
132  
133          # script_exe file as additional file in inputSandbox
134 <        try:
135 <           self.scriptExe = cfg_params['CMSSW.script_exe']
136 <           self.additional_inbox_files.append(self.scriptExe)
137 <        except KeyError:
138 <           pass
139 <        if self.scriptExe != '':
140 <           if os.path.isfile(self.scriptExe):
141 <              pass
142 <           else:
143 <              log.message("WARNING. file "+self.scriptExe+" not found")
144 <              sys.exit()
145 <                  
134 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
135 >        if self.scriptExe :
136 >           if not os.path.isfile(self.scriptExe):
137 >              msg ="ERROR. file "+self.scriptExe+" not found"
138 >              raise CrabException(msg)
139 >           self.additional_inbox_files.append(string.strip(self.scriptExe))
140 >
141 >        #CarlosDaniele
142 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
143 >           msg ="Error. script_exe  not defined"
144 >           raise CrabException(msg)
145 >
146          ## additional input files
147 <        try:
148 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
147 >        if cfg_params.has_key('USER.additional_input_files'):
148 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
149              for tmp in tmpAddFiles:
150 <                tmp=string.strip(tmp)
151 <                self.additional_inbox_files.append(tmp)
150 >                tmp = string.strip(tmp)
151 >                dirname = ''
152 >                if not tmp[0]=="/": dirname = "."
153 >                files = []
154 >                if string.find(tmp,"*")>-1:
155 >                    files = glob.glob(os.path.join(dirname, tmp))
156 >                    if len(files)==0:
157 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
158 >                else:
159 >                    files.append(tmp)
160 >                for file in files:
161 >                    if not os.path.exists(file):
162 >                        raise CrabException("Additional input file not found: "+file)
163 >                    pass
164 >                    # fname = string.split(file, '/')[-1]
165 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
166 >                    # shutil.copyfile(file, storedFile)
167 >                    self.additional_inbox_files.append(string.strip(file))
168                  pass
169              pass
170 <        except KeyError:
171 <            pass
170 >            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
171 >        pass
172  
173 <        try:
173 >        ## Events per job
174 >        if cfg_params.has_key('CMSSW.events_per_job'):
175 >            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
176 >            self.selectEventsPerJob = 1
177 >        else:
178 >            self.eventsPerJob = -1
179 >            self.selectEventsPerJob = 0
180 >
181 >        ## number of jobs
182 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
183 >            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
184 >            self.selectNumberOfJobs = 1
185 >        else:
186 >            self.theNumberOfJobs = 0
187 >            self.selectNumberOfJobs = 0
188 >
189 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
190              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
191 <        except KeyError:
192 <            msg = 'Must define total_number_of_events and job_number_of_events'
193 <            raise CrabException(msg)
194 <            
125 < #Marco: FirstEvent is nolonger used inside PSet
126 < #        try:
127 < #            self.first = int(cfg_params['CMSSW.first_event'])
128 < #        except KeyError:
129 < #            self.first = 0
130 < #            pass
131 < #        log.debug(6, "Orca::Orca(): total number of events = "+`self.total_number_of_events`)
132 <        #log.debug(6, "Orca::Orca(): events per job = "+`self.job_number_of_events`)
133 < #        log.debug(6, "Orca::Orca(): first event = "+`self.first`)
134 <        
135 <        CEBlackList = []
136 <        try:
137 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
138 <            for tmp in tmpBad:
139 <                tmp=string.strip(tmp)
140 <                CEBlackList.append(tmp)
141 <        except KeyError:
142 <            pass
191 >            self.selectTotalNumberEvents = 1
192 >        else:
193 >            self.total_number_of_events = 0
194 >            self.selectTotalNumberEvents = 0
195  
196 <        self.reCEBlackList=[]
197 <        for bad in CEBlackList:
198 <            self.reCEBlackList.append(re.compile( bad ))
196 >        if self.pset != None: #CarlosDaniele
197 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
198 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
199 >                 raise CrabException(msg)
200 >        else:
201 >             if (self.selectNumberOfJobs == 0):
202 >                 msg = 'Must specify  number_of_jobs.'
203 >                 raise CrabException(msg)
204  
205 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
205 >        ## source seed for pythia
206 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
207  
208 <        CEWhiteList = []
209 <        try:
210 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
153 <            #tmpGood = ['cern']
154 <            for tmp in tmpGood:
155 <                tmp=string.strip(tmp)
156 <                #if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch
157 <                CEWhiteList.append(tmp)
158 <        except KeyError:
159 <            pass
208 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
209 >
210 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
211  
212 <        #print 'CEWhiteList: ',CEWhiteList
162 <        self.reCEWhiteList=[]
163 <        for Good in CEWhiteList:
164 <            self.reCEWhiteList.append(re.compile( Good ))
212 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
213  
214 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
214 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
215 >
216 >        if self.pset != None: #CarlosDaniele
217 >            import PsetManipulator as pp
218 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
219 >
220 >        # Copy/return
221 >
222 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
223 >        self.return_data = int(cfg_params.get('USER.return_data',0))
224  
225          #DBSDLS-start
226 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
226 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
227          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
228          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
229 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
230          ## Perform the data location and discovery (based on DBS/DLS)
231 <        self.DataDiscoveryAndLocation(cfg_params)
232 <        #DBSDLS-end          
231 >        ## SL: Don't if NONE is specified as input (pythia use case)
232 >        blockSites = {}
233 >        if self.datasetPath:
234 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
235 >        #DBSDLS-end
236  
237          self.tgzNameWithPath = self.getTarBall(self.executable)
238  
239 +        ## Select Splitting
240 +        if self.selectNoInput:
241 +            if self.pset == None: #CarlosDaniele
242 +                self.jobSplittingForScript()
243 +            else:
244 +                self.jobSplittingNoInput()
245 +        else:
246 +            self.jobSplittingByBlocks(blockSites)
247 +
248 +        # modify Pset
249 +        if self.pset != None: #CarlosDaniele
250 +            try:
251 +                if (self.datasetPath): # standard job
252 +                    # allow to processa a fraction of events in a file
253 +                    PsetEdit.inputModule("INPUTFILE")
254 +                    PsetEdit.maxEvent(0)
255 +                    PsetEdit.skipEvent(0)
256 +                else:  # pythia like job
257 +                    PsetEdit.maxEvent(self.eventsPerJob)
258 +                    if (self.firstRun):
259 +                        PsetEdit.pythiaFirstRun(0)  #First Run
260 +                    if (self.sourceSeed) :
261 +                        PsetEdit.pythiaSeed(0)
262 +                        if (self.sourceSeedVtx) :
263 +                            PsetEdit.vtxSeed(0)
264 +                        if (self.sourceSeedG4) :
265 +                            PsetEdit.g4Seed(0)
266 +                        if (self.sourceSeedMix) :
267 +                            PsetEdit.mixSeed(0)
268 +                # add FrameworkJobReport to parameter-set
269 +                PsetEdit.addCrabFJR(self.fjrFileName)
270 +                PsetEdit.psetWriter(self.configFilename())
271 +            except:
272 +                msg='Error while manipuliating ParameterSet: exiting...'
273 +                raise CrabException(msg)
274 +
275      def DataDiscoveryAndLocation(self, cfg_params):
276  
277 <        fun = "CMSSW::DataDiscoveryAndLocation()"
277 >        import DataDiscovery
278 >        import DataLocation
279 >        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
280 >
281 >        datasetPath=self.datasetPath
282  
283          ## Contact the DBS
284 +        common.logger.message("Contacting Data Discovery Services ...")
285          try:
286 <            self.pubdata=DataDiscovery.DataDiscovery(self.owner,
185 <                                                     self.dataset,
186 <                                                     self.dataTiers,
187 <                                                     cfg_params)
286 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
287              self.pubdata.fetchDBSInfo()
288  
289          except DataDiscovery.NotExistingDatasetError, ex :
290              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
291              raise CrabException(msg)
193
292          except DataDiscovery.NoDataTierinProvenanceError, ex :
293              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
294              raise CrabException(msg)
295          except DataDiscovery.DataDiscoveryError, ex:
296 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
296 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
297              raise CrabException(msg)
298  
299 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
300 <        self.DBSPaths=self.pubdata.getDBSPaths()
301 <        common.logger.message("Required data are : ")
204 <        for path in self.DBSPaths:
205 <            common.logger.message(" --> "+path )
299 >        self.filesbyblock=self.pubdata.getFiles()
300 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
301 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
302  
303          ## get max number of events
304 <        common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
209 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
210 <        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
211 <
212 <        ## get fileblocks corresponding to the required data
213 <        fb=self.pubdata.getFileBlocks()
214 <        common.logger.debug(5,"fileblocks are %s"%fb)
304 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
305  
306          ## Contact the DLS and build a list of sites hosting the fileblocks
307          try:
308 <            dataloc=DataLocation.DataLocation(self.pubdata.getFileBlocks(),cfg_params)
308 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
309              dataloc.fetchDLSInfo()
310          except DataLocation.DataLocationError , ex:
311              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
312              raise CrabException(msg)
223        
224        allsites=dataloc.getSites()
225        common.logger.debug(5,"sites are %s"%allsites)
226        sites=self.checkBlackList(allsites)
227        common.logger.debug(5,"sites are (after black list) %s"%sites)
228        sites=self.checkWhiteList(sites)
229        common.logger.debug(5,"sites are (after white list) %s"%sites)
313  
314 <        if len(sites)==0:
315 <            msg = 'No sites hosting all the needed data! Exiting... '
316 <            raise CrabException(msg)
317 <        common.logger.message("List of Sites hosting the data : "+str(sites))
318 <        common.logger.debug(6, "List of Sites: "+str(sites))
319 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
320 <        return
321 <        
322 <    def checkBlackList(self, allSites):
323 <        if len(self.reCEBlackList)==0: return allSites
324 <        sites = []
325 <        for site in allSites:
243 <            common.logger.debug(10,'Site '+site)
244 <            good=1
245 <            for re in self.reCEBlackList:
246 <                if re.search(site):
247 <                    common.logger.message('CE in black list, skipping site '+site)
248 <                    good=0
249 <                pass
250 <            if good: sites.append(site)
251 <        if len(sites) == 0:
252 <            common.logger.debug(3,"No sites found after BlackList")
314 >
315 >        sites = dataloc.getSites()
316 >        allSites = []
317 >        listSites = sites.values()
318 >        for listSite in listSites:
319 >            for oneSite in listSite:
320 >                allSites.append(oneSite)
321 >        allSites = self.uniquelist(allSites)
322 >
323 >        # screen output
324 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
325 >
326          return sites
327  
328 <    def checkWhiteList(self, allsites):
328 >    def setArgsList(self, argsList):
329 >        self.argsList = argsList
330 >
331 >    def jobSplittingByBlocks(self, blockSites):
332 >        """
333 >        Perform job splitting. Jobs run over an integer number of files
334 >        and no more than one block.
335 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
336 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
337 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
338 >                  self.maxEvents, self.filesbyblock
339 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
340 >              self.total_number_of_jobs - Total # of jobs
341 >              self.list_of_args - File(s) job will run on (a list of lists)
342 >        """
343  
344 <        if len(self.reCEWhiteList)==0: return pubDBUrls
345 <        sites = []
346 <        for site in allsites:
347 <            #print 'connecting to the URL ',url
348 <            good=0
349 <            for re in self.reCEWhiteList:
350 <                if re.search(site):
351 <                    common.logger.debug(5,'CE in white list, adding site '+site)
352 <                    good=1
353 <                if not good: continue
354 <                sites.append(site)
355 <        if len(sites) == 0:
356 <            common.logger.message("No sites found after WhiteList\n")
344 >        # ---- Handle the possible job splitting configurations ---- #
345 >        if (self.selectTotalNumberEvents):
346 >            totalEventsRequested = self.total_number_of_events
347 >        if (self.selectEventsPerJob):
348 >            eventsPerJobRequested = self.eventsPerJob
349 >            if (self.selectNumberOfJobs):
350 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
351 >
352 >        # If user requested all the events in the dataset
353 >        if (totalEventsRequested == -1):
354 >            eventsRemaining=self.maxEvents
355 >        # If user requested more events than are in the dataset
356 >        elif (totalEventsRequested > self.maxEvents):
357 >            eventsRemaining = self.maxEvents
358 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
359 >        # If user requested less events than are in the dataset
360          else:
361 <            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
362 <        return sites
361 >            eventsRemaining = totalEventsRequested
362 >
363 >        # If user requested more events per job than are in the dataset
364 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
365 >            eventsPerJobRequested = self.maxEvents
366 >
367 >        # For user info at end
368 >        totalEventCount = 0
369 >
370 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
371 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
372 >
373 >        if (self.selectNumberOfJobs):
374 >            common.logger.message("May not create the exact number_of_jobs requested.")
375 >
376 >        if ( self.ncjobs == 'all' ) :
377 >            totalNumberOfJobs = 999999999
378 >        else :
379 >            totalNumberOfJobs = self.ncjobs
380 >
381 >
382 >        blocks = blockSites.keys()
383 >        blockCount = 0
384 >        # Backup variable in case self.maxEvents counted events in a non-included block
385 >        numBlocksInDataset = len(blocks)
386 >
387 >        jobCount = 0
388 >        list_of_lists = []
389 >
390 >        # list tracking which jobs are in which jobs belong to which block
391 >        jobsOfBlock = {}
392 >
393 >        # ---- Iterate over the blocks in the dataset until ---- #
394 >        # ---- we've met the requested total # of events    ---- #
395 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
396 >            block = blocks[blockCount]
397 >            blockCount += 1
398 >            if block not in jobsOfBlock.keys() :
399 >                jobsOfBlock[block] = []
400 >
401 >            if self.eventsbyblock.has_key(block) :
402 >                numEventsInBlock = self.eventsbyblock[block]
403 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
404 >
405 >                files = self.filesbyblock[block]
406 >                numFilesInBlock = len(files)
407 >                if (numFilesInBlock <= 0):
408 >                    continue
409 >                fileCount = 0
410 >
411 >                # ---- New block => New job ---- #
412 >                parString = ""
413 >                # counter for number of events in files currently worked on
414 >                filesEventCount = 0
415 >                # flag if next while loop should touch new file
416 >                newFile = 1
417 >                # job event counter
418 >                jobSkipEventCount = 0
419 >
420 >                # ---- Iterate over the files in the block until we've met the requested ---- #
421 >                # ---- total # of events or we've gone over all the files in this block  ---- #
422 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
423 >                    file = files[fileCount]
424 >                    if newFile :
425 >                        try:
426 >                            numEventsInFile = self.eventsbyfile[file]
427 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
428 >                            # increase filesEventCount
429 >                            filesEventCount += numEventsInFile
430 >                            # Add file to current job
431 >                            parString += '\\\"' + file + '\\\"\,'
432 >                            newFile = 0
433 >                        except KeyError:
434 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
435 >
436 >
437 >                    # if less events in file remain than eventsPerJobRequested
438 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
439 >                        # if last file in block
440 >                        if ( fileCount == numFilesInBlock-1 ) :
441 >                            # end job using last file, use remaining events in block
442 >                            # close job and touch new file
443 >                            fullString = parString[:-2]
444 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
445 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
446 >                            self.jobDestination.append(blockSites[block])
447 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
448 >                            # fill jobs of block dictionary
449 >                            jobsOfBlock[block].append(jobCount+1)
450 >                            # reset counter
451 >                            jobCount = jobCount + 1
452 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
453 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
454 >                            jobSkipEventCount = 0
455 >                            # reset file
456 >                            parString = ""
457 >                            filesEventCount = 0
458 >                            newFile = 1
459 >                            fileCount += 1
460 >                        else :
461 >                            # go to next file
462 >                            newFile = 1
463 >                            fileCount += 1
464 >                    # if events in file equal to eventsPerJobRequested
465 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
466 >                        # close job and touch new file
467 >                        fullString = parString[:-2]
468 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
469 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
470 >                        self.jobDestination.append(blockSites[block])
471 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
472 >                        jobsOfBlock[block].append(jobCount+1)
473 >                        # reset counter
474 >                        jobCount = jobCount + 1
475 >                        totalEventCount = totalEventCount + eventsPerJobRequested
476 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
477 >                        jobSkipEventCount = 0
478 >                        # reset file
479 >                        parString = ""
480 >                        filesEventCount = 0
481 >                        newFile = 1
482 >                        fileCount += 1
483 >
484 >                    # if more events in file remain than eventsPerJobRequested
485 >                    else :
486 >                        # close job but don't touch new file
487 >                        fullString = parString[:-2]
488 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
489 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
490 >                        self.jobDestination.append(blockSites[block])
491 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
492 >                        jobsOfBlock[block].append(jobCount+1)
493 >                        # increase counter
494 >                        jobCount = jobCount + 1
495 >                        totalEventCount = totalEventCount + eventsPerJobRequested
496 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
497 >                        # calculate skip events for last file
498 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
499 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
500 >                        # remove all but the last file
501 >                        filesEventCount = self.eventsbyfile[file]
502 >                        parString = ""
503 >                        parString += '\\\"' + file + '\\\"\,'
504 >                    pass # END if
505 >                pass # END while (iterate over files in the block)
506 >        pass # END while (iterate over blocks in the dataset)
507 >        self.ncjobs = self.total_number_of_jobs = jobCount
508 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
509 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
510 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
511 >
512 >        # screen output
513 >        screenOutput = "List of jobs and available destination sites:\n\n"
514 >
515 >        # keep trace of block with no sites to print a warning at the end
516 >        noSiteBlock = []
517 >        bloskNoSite = []
518 >
519 >        blockCounter = 0
520 >        for block in blocks:
521 >            if block in jobsOfBlock.keys() :
522 >                blockCounter += 1
523 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
524 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
525 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
526 >                    bloskNoSite.append( blockCounter )
527 >
528 >        common.logger.message(screenOutput)
529 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
530 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
531 >            virgola = ""
532 >            if len(bloskNoSite) > 1:
533 >                virgola = ","
534 >            for block in bloskNoSite:
535 >                msg += ' ' + str(block) + virgola
536 >            msg += '\n               Related jobs:\n                 '
537 >            virgola = ""
538 >            if len(noSiteBlock) > 1:
539 >                virgola = ","
540 >            for range_jobs in noSiteBlock:
541 >                msg += str(range_jobs) + virgola
542 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
543 >            if self.cfg_params.has_key('EDG.se_white_list'):
544 >                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
545 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
546 >                msg += 'Please check if the dataset is available at this site!)\n'
547 >            if self.cfg_params.has_key('EDG.ce_white_list'):
548 >                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
549 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
550 >                msg += 'Please check if the dataset is available at this site!)\n'
551 >
552 >            common.logger.message(msg)
553 >
554 >        self.list_of_args = list_of_lists
555 >        return
556 >
557 >    def jobSplittingNoInput(self):
558 >        """
559 >        Perform job splitting based on number of event per job
560 >        """
561 >        common.logger.debug(5,'Splitting per events')
562 >
563 >        if (self.selectEventsPerJob):
564 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
565 >        if (self.selectNumberOfJobs):
566 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
567 >        if (self.selectTotalNumberEvents):
568 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
569 >
570 >        if (self.total_number_of_events < 0):
571 >            msg='Cannot split jobs per Events with "-1" as total number of events'
572 >            raise CrabException(msg)
573 >
574 >        if (self.selectEventsPerJob):
575 >            if (self.selectTotalNumberEvents):
576 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
577 >            elif(self.selectNumberOfJobs) :
578 >                self.total_number_of_jobs =self.theNumberOfJobs
579 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
580 >
581 >        elif (self.selectNumberOfJobs) :
582 >            self.total_number_of_jobs = self.theNumberOfJobs
583 >            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
584 >
585 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
586 >
587 >        # is there any remainder?
588 >        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
589 >
590 >        common.logger.debug(5,'Check  '+str(check))
591 >
592 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
593 >        if check > 0:
594 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
595 >
596 >        # argument is seed number.$i
597 >        self.list_of_args = []
598 >        for i in range(self.total_number_of_jobs):
599 >            ## Since there is no input, any site is good
600 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
601 >            args=[]
602 >            if (self.firstRun):
603 >                ## pythia first run
604 >                args.append(str(self.firstRun)+str(i))
605 >            if (self.sourceSeed):
606 >                args.append(str(self.sourceSeed)+str(i))
607 >                if (self.sourceSeedVtx):
608 >                    ## + vtx random seed
609 >                    args.append(str(self.sourceSeedVtx)+str(i))
610 >                if (self.sourceSeedG4):
611 >                    ## + G4 random seed
612 >                    args.append(str(self.sourceSeedG4)+str(i))
613 >                if (self.sourceSeedMix):
614 >                    ## + Mix random seed
615 >                    args.append(str(self.sourceSeedMix)+str(i))
616 >                pass
617 >            pass
618 >            self.list_of_args.append(args)
619 >        pass
620 >
621 >        return
622 >
623 >
624 >    def jobSplittingForScript(self):#CarlosDaniele
625 >        """
626 >        Perform job splitting based on number of job
627 >        """
628 >        common.logger.debug(5,'Splitting per job')
629 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
630 >
631 >        self.total_number_of_jobs = self.theNumberOfJobs
632 >
633 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
634 >
635 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
636 >
637 >        # argument is seed number.$i
638 >        self.list_of_args = []
639 >        for i in range(self.total_number_of_jobs):
640 >            ## Since there is no input, any site is good
641 >           # self.jobDestination.append(["Any"])
642 >            self.jobDestination.append([""])
643 >            ## no random seed
644 >            self.list_of_args.append([str(i)])
645 >        return
646 >
647 >    def split(self, jobParams):
648 >
649 >        common.jobDB.load()
650 >        #### Fabio
651 >        njobs = self.total_number_of_jobs
652 >        arglist = self.list_of_args
653 >        # create the empty structure
654 >        for i in range(njobs):
655 >            jobParams.append("")
656 >
657 >        for job in range(njobs):
658 >            jobParams[job] = arglist[job]
659 >            # print str(arglist[job])
660 >            # print jobParams[job]
661 >            common.jobDB.setArguments(job, jobParams[job])
662 >            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
663 >            common.jobDB.setDestination(job, self.jobDestination[job])
664 >
665 >        common.jobDB.save()
666 >        return
667 >
668 >    def getJobTypeArguments(self, nj, sched):
669 >        result = ''
670 >        for i in common.jobDB.arguments(nj):
671 >            result=result+str(i)+" "
672 >        return result
673 >
674 >    def numberOfJobs(self):
675 >        # Fabio
676 >        return self.total_number_of_jobs
677  
678      def getTarBall(self, exe):
679          """
680          Return the TarBall with lib and exe
681          """
682 <        
682 >
683          # if it exist, just return it
684 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
684 >        #
685 >        # Marco. Let's start to use relative path for Boss XML files
686 >        #
687 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
688          if os.path.exists(self.tgzNameWithPath):
689              return self.tgzNameWithPath
690  
# Line 291 | Line 698 | class Cmssw(JobType):
698          # First of all declare the user Scram area
699          swArea = self.scram.getSWArea_()
700          #print "swArea = ", swArea
701 <        swVersion = self.scram.getSWVersion()
702 <        #print "swVersion = ", swVersion
701 >        # swVersion = self.scram.getSWVersion()
702 >        # print "swVersion = ", swVersion
703          swReleaseTop = self.scram.getReleaseTop_()
704          #print "swReleaseTop = ", swReleaseTop
705 <        
705 >
706          ## check if working area is release top
707          if swReleaseTop == '' or swArea == swReleaseTop:
708              return
709  
710 <        filesToBeTarred = []
711 <        ## First find the executable
712 <        if (self.executable != ''):
713 <            exeWithPath = self.scram.findFile_(executable)
714 < #           print exeWithPath
715 <            if ( not exeWithPath ):
716 <                raise CrabException('User executable '+executable+' not found')
717 <
718 <            ## then check if it's private or not
719 <            if exeWithPath.find(swReleaseTop) == -1:
720 <                # the exe is private, so we must ship
721 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
722 <                path = swArea+'/'
723 <                exe = string.replace(exeWithPath, path,'')
724 <                filesToBeTarred.append(exe)
725 <                pass
726 <            else:
727 <                # the exe is from release, we'll find it on WN
728 <                pass
729 <
730 <        ## Now get the libraries: only those in local working area
731 <        libDir = 'lib'
732 <        lib = swArea+'/' +libDir
733 <        common.logger.debug(5,"lib "+lib+" to be tarred")
734 <        if os.path.exists(lib):
735 <            filesToBeTarred.append(libDir)
736 <
737 <        ## Now check if the Data dir is present
738 <        dataDir = 'src/Data/'
739 <        if os.path.isdir(swArea+'/'+dataDir):
740 <            filesToBeTarred.append(dataDir)
741 <
742 <        ## Create the tar-ball
743 <        if len(filesToBeTarred)>0:
744 <            cwd = os.getcwd()
745 <            os.chdir(swArea)
746 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
747 <            for line in filesToBeTarred:
748 <                tarcmd = tarcmd + line + ' '
749 <            cout = runCommand(tarcmd)
750 <            if not cout:
751 <                raise CrabException('Could not create tar-ball')
752 <            os.chdir(cwd)
753 <        else:
754 <            common.logger.debug(5,"No files to be to be tarred")
755 <        
710 >        import tarfile
711 >        try: # create tar ball
712 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
713 >            ## First find the executable
714 >            if (self.executable != ''):
715 >                exeWithPath = self.scram.findFile_(executable)
716 >                if ( not exeWithPath ):
717 >                    raise CrabException('User executable '+executable+' not found')
718 >
719 >                ## then check if it's private or not
720 >                if exeWithPath.find(swReleaseTop) == -1:
721 >                    # the exe is private, so we must ship
722 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
723 >                    path = swArea+'/'
724 >                    # distinguish case when script is in user project area or given by full path somewhere else
725 >                    if exeWithPath.find(path) >= 0 :
726 >                        exe = string.replace(exeWithPath, path,'')
727 >                        tar.add(path+exe,exe)
728 >                    else :
729 >                        tar.add(exeWithPath,os.path.basename(executable))
730 >                    pass
731 >                else:
732 >                    # the exe is from release, we'll find it on WN
733 >                    pass
734 >
735 >            ## Now get the libraries: only those in local working area
736 >            libDir = 'lib'
737 >            lib = swArea+'/' +libDir
738 >            common.logger.debug(5,"lib "+lib+" to be tarred")
739 >            if os.path.exists(lib):
740 >                tar.add(lib,libDir)
741 >
742 >            ## Now check if module dir is present
743 >            moduleDir = 'module'
744 >            module = swArea + '/' + moduleDir
745 >            if os.path.isdir(module):
746 >                tar.add(module,moduleDir)
747 >
748 >            ## Now check if any data dir(s) is present
749 >            swAreaLen=len(swArea)
750 >            for root, dirs, files in os.walk(swArea):
751 >                if "data" in dirs:
752 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
753 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
754 >
755 >            ### Removed ProdAgent Api dependencies ###
756 >            ### Add ProdAgent dir to tar
757 >            #paDir = 'ProdAgentApi'
758 >            #pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
759 >            #if os.path.isdir(pa):
760 >            #    tar.add(pa,paDir)
761 >
762 >            ## Add ProdCommon dir to tar
763 >            prodcommonDir = 'ProdCommon'
764 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
765 >            if os.path.isdir(prodcommonPath):
766 >                tar.add(prodcommonPath,prodcommonDir)
767 >
768 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
769 >            tar.close()
770 >        except :
771 >            raise CrabException('Could not create tar-ball')
772 >
773 >        ## check for tarball size
774 >        tarballinfo = os.stat(self.tgzNameWithPath)
775 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
776 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
777 >
778 >        ## create tar-ball with ML stuff
779 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
780 >        try:
781 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
782 >            path=os.environ['CRABDIR'] + '/python/'
783 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
784 >                tar.add(path+file,file)
785 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
786 >            tar.close()
787 >        except :
788 >            raise CrabException('Could not create ML files tar-ball')
789 >
790          return
791 <        
791 >
792 >    def additionalInputFileTgz(self):
793 >        """
794 >        Put all additional files into a tar ball and return its name
795 >        """
796 >        import tarfile
797 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
798 >        tar = tarfile.open(tarName, "w:gz")
799 >        for file in self.additional_inbox_files:
800 >            tar.add(file,string.split(file,'/')[-1])
801 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
802 >        tar.close()
803 >        return tarName
804 >
805      def wsSetupEnvironment(self, nj):
806          """
807          Returns part of a job script which prepares
808          the execution environment for the job 'nj'.
809          """
810          # Prepare JobType-independent part
811 <        txt = self.wsSetupCMSEnvironment_()
811 >        txt = ''
812 >        txt += 'echo ">>> setup environment"\n'
813 >        txt += 'if [ $middleware == LCG ]; then \n'
814 >        txt += self.wsSetupCMSLCGEnvironment_()
815 >        txt += 'elif [ $middleware == OSG ]; then\n'
816 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
817 >        txt += '    if [ ! $? == 0 ] ;then\n'
818 >        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
819 >        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
820 >        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
821 >        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
822 >        txt += '        exit 1\n'
823 >        txt += '    fi\n'
824 >        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
825 >        txt += '\n'
826 >        txt += '    echo "Change to working directory: $WORKING_DIR"\n'
827 >        txt += '    cd $WORKING_DIR\n'
828 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
829 >        txt += self.wsSetupCMSOSGEnvironment_()
830 >        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
831 >        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
832 >        txt += 'fi\n'
833  
834          # Prepare JobType-specific part
835          scram = self.scram.commandName()
836          txt += '\n\n'
837 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
837 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
838 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
839          txt += scram+' project CMSSW '+self.version+'\n'
840          txt += 'status=$?\n'
841          txt += 'if [ $status != 0 ] ; then\n'
842 <        txt += '   echo "SET_EXE_ENV 1 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
843 <        txt += '   echo "JOB_EXIT_STATUS = 5"\n'
844 <        txt += '   echo "SanityCheckCode = 5" | tee -a $RUNTIME_AREA/$repo\n'
845 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
846 <        txt += '   exit 5 \n'
842 >        txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
843 >        txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
844 >        txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
845 >        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
846 >        txt += '    if [ $middleware == OSG ]; then \n'
847 >        txt += '        cd $RUNTIME_AREA\n'
848 >        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
849 >        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
850 >        txt += '        /bin/rm -rf $WORKING_DIR\n'
851 >        txt += '        if [ -d $WORKING_DIR ] ;then\n'
852 >        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
853 >        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
854 >        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
855 >        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
856 >        txt += '        fi\n'
857 >        txt += '    fi \n'
858 >        txt += '    exit 1 \n'
859          txt += 'fi \n'
372        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
860          txt += 'cd '+self.version+'\n'
861 +        ########## FEDE FOR DBS2 ######################
862 +        txt += 'SOFTWARE_DIR=`pwd`\n'
863 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
864 +        ###############################################
865          ### needed grep for bug in scramv1 ###
866          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
376
867          # Handle the arguments:
868          txt += "\n"
869 <        txt += "## ARGUMNETS: $1 Job Number\n"
380 <        # txt += "## ARGUMNETS: $2 First Event for this job\n"
381 <        # txt += "## ARGUMNETS: $3 Max Event for this job\n"
869 >        txt += "## number of arguments (first argument always jobnumber)\n"
870          txt += "\n"
871 <        txt += "narg=$#\n"
384 <        txt += "if [ $narg -lt 1 ]\n"
871 >        txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
872          txt += "then\n"
873 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
874 <        txt += '    echo "JOB_EXIT_STATUS = 1"\n'
875 <        txt += '    echo "SanityCheckCode = 1" | tee -a $RUNTIME_AREA/$repo\n'
873 >        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
874 >        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
875 >        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
876          txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
877 +        txt += '    if [ $middleware == OSG ]; then \n'
878 +        txt += '        cd $RUNTIME_AREA\n'
879 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
880 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
881 +        txt += '        /bin/rm -rf $WORKING_DIR\n'
882 +        txt += '        if [ -d $WORKING_DIR ] ;then\n'
883 +        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
884 +        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
885 +        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
886 +        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
887 +        txt += '        fi\n'
888 +        txt += '    fi \n'
889          txt += "    exit 1\n"
890          txt += "fi\n"
891          txt += "\n"
393        txt += "NJob=$1\n"
394        # txt += "FirstEvent=$2\n"
395        # txt += "MaxEvents=$3\n"
892  
893          # Prepare job-specific part
894          job = common.job_list[nj]
895 <        pset = os.path.basename(job.configFilename())
896 <        txt += '\n'
897 <        txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
898 <        # txt += 'if [ -e $RUNTIME_AREA/orcarc_$CE ] ; then\n'
899 <        # txt += '  cat $RUNTIME_AREA/orcarc_$CE .orcarc >> .orcarc_tmp\n'
900 <        # txt += '  mv .orcarc_tmp .orcarc\n'
901 <        # txt += 'fi\n'
902 <        # txt += 'if [ -e $RUNTIME_AREA/init_$CE.sh ] ; then\n'
903 <        # txt += '  cp $RUNTIME_AREA/init_$CE.sh init.sh\n'
904 <        # txt += 'fi\n'
895 >        ### FEDE FOR DBS OUTPUT PUBLICATION
896 >        if (self.datasetPath):
897 >            txt += '\n'
898 >            txt += 'DatasetPath='+self.datasetPath+'\n'
899 >
900 >            datasetpath_split = self.datasetPath.split("/")
901 >
902 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
903 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
904 >            txt += 'ApplicationFamily=cmsRun\n'
905 >
906 >        else:
907 >            txt += 'DatasetPath=MCDataTier\n'
908 >            txt += 'PrimaryDataset=null\n'
909 >            txt += 'DataTier=null\n'
910 >            txt += 'ApplicationFamily=MCDataTier\n'
911 >        if self.pset != None: #CarlosDaniele
912 >            pset = os.path.basename(job.configFilename())
913 >            txt += '\n'
914 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
915 >            if (self.datasetPath): # standard job
916 >                txt += 'InputFiles=${args[1]}\n'
917 >                txt += 'MaxEvents=${args[2]}\n'
918 >                txt += 'SkipEvents=${args[3]}\n'
919 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
920 >                txt += 'sed "s#\'INPUTFILE\'#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
921 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
922 >                txt += 'sed "s#int32 input = 0#int32 input = $MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
923 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
924 >                txt += 'sed "s#uint32 skipEvents = 0#uint32 skipEvents = $SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
925 >            else:  # pythia like job
926 >                seedIndex=1
927 >                if (self.firstRun):
928 >                    txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
929 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
930 >                    txt += 'sed "s#uint32 firstRun = 0#uint32 firstRun = $FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
931 >                    seedIndex=seedIndex+1
932 >
933 >                if (self.sourceSeed):
934 >                    txt += 'Seed=${args['+str(seedIndex)+']}\n'
935 >                    txt += 'sed "s#uint32 sourceSeed = 0#uint32 sourceSeed = $Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
936 >                    seedIndex=seedIndex+1
937 >                    ## the following seeds are not always present
938 >                    if (self.sourceSeedVtx):
939 >                        txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
940 >                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
941 >                        txt += 'sed "s#uint32 VtxSmeared = 0#uint32 VtxSmeared = $VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
942 >                        seedIndex += 1
943 >                    if (self.sourceSeedG4):
944 >                        txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
945 >                        txt += 'echo "G4Seed: <$G4Seed>"\n'
946 >                        txt += 'sed "s#uint32 g4SimHits = 0#uint32 g4SimHits = $G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
947 >                        seedIndex += 1
948 >                    if (self.sourceSeedMix):
949 >                        txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
950 >                        txt += 'echo "MixSeed: <$mixSeed>"\n'
951 >                        txt += 'sed "s#uint32 mix = 0#uint32 mix = $mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
952 >                        seedIndex += 1
953 >                    pass
954 >                pass
955 >            txt += 'mv -f '+pset+' pset.cfg\n'
956  
957          if len(self.additional_inbox_files) > 0:
958 <            for file in self.additional_inbox_files:
959 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
960 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
961 <                txt += '   chmod +x '+file+'\n'
962 <                txt += 'fi\n'
963 <            pass
964 <
965 <        # txt += '\n'
966 <        # txt += 'chmod +x ./init.sh\n'
967 <        # txt += './init.sh\n'
968 <        # txt += 'exitStatus=$?\n'
969 <        # txt += 'if [ $exitStatus != 0 ] ; then\n'
970 <        # txt += '  echo "SET_EXE_ENV 1 ==> ERROR StageIn init script failed"\n'
971 <        # txt += '  echo "JOB_EXIT_STATUS = $exitStatus" \n'
972 <        # txt += '  echo "SanityCheckCode = $exitStatus" | tee -a $RUNTIME_AREA/$repo\n'
973 <        # txt += '  dumpStatus $RUNTIME_AREA/$repo\n'
974 <        # txt += '  exit $exitStatus\n'
975 <        # txt += 'fi\n'
976 <        # txt += "echo 'SET_EXE_ENV 0 ==> job setup ok'\n"
977 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
978 <
979 <        # txt += 'echo "FirstEvent=$FirstEvent" >> .orcarc\n'
980 <        # txt += 'echo "MaxEvents=$MaxEvents" >> .orcarc\n'
981 <        # if self.ML:
982 <        #     txt += 'echo "MonalisaJobId=$NJob" >> .orcarc\n'
958 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
959 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
960 >            txt += 'fi\n'
961 >            pass
962 >
963 >        if self.pset != None: #CarlosDaniele
964 >            txt += '\n'
965 >            txt += 'echo "***** cat pset.cfg *********"\n'
966 >            txt += 'cat pset.cfg\n'
967 >            txt += 'echo "****** end pset.cfg ********"\n'
968 >            txt += '\n'
969 >            ### FEDE FOR DBS OUTPUT PUBLICATION
970 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
971 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
972 >            ##############
973 >            txt += '\n'
974 >        return txt
975 >
976 >    def wsBuildExe(self, nj=0):
977 >        """
978 >        Put in the script the commands to build an executable
979 >        or a library.
980 >        """
981 >
982 >        txt = ""
983 >
984 >        if os.path.isfile(self.tgzNameWithPath):
985 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
986 >            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
987 >            txt += 'untar_status=$? \n'
988 >            txt += 'if [ $untar_status -ne 0 ]; then \n'
989 >            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
990 >            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
991 >            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
992 >            txt += '   if [ $middleware == OSG ]; then \n'
993 >            txt += '       cd $RUNTIME_AREA\n'
994 >            txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
995 >            txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
996 >            txt += '       /bin/rm -rf $WORKING_DIR\n'
997 >            txt += '       if [ -d $WORKING_DIR ] ;then\n'
998 >            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
999 >            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
1000 >            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
1001 >            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1002 >            txt += '       fi\n'
1003 >            txt += '   fi \n'
1004 >            txt += '   \n'
1005 >            txt += '   exit 1 \n'
1006 >            txt += 'else \n'
1007 >            txt += '   echo "Successful untar" \n'
1008 >            txt += 'fi \n'
1009 >            txt += '\n'
1010 >            #### Removed ProdAgent API dependencies
1011 >            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
1012 >            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1013 >            #### FEDE FOR DBS OUTPUT PUBLICATION
1014 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1015 >            txt += 'else\n'
1016 >            txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1017 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1018 >            ###################
1019 >            txt += 'fi\n'
1020 >            txt += '\n'
1021 >
1022 >            pass
1023  
437        txt += '\n'
438        txt += 'echo "***** cat pset.cfg *********"\n'
439        txt += 'cat pset.cfg\n'
440        txt += 'echo "****** end pset.cfg ********"\n'
1024          return txt
1025  
1026      def modifySteeringCards(self, nj):
1027          """
1028 <        modify the card provided by the user,
1028 >        modify the card provided by the user,
1029          writing a new card into share dir
1030          """
1031 <        
1031 >
1032      def executableName(self):
1033 <        return self.executable
1033 >        if self.scriptExe: #CarlosDaniele
1034 >            return "sh "
1035 >        else:
1036 >            return self.executable
1037  
1038      def executableArgs(self):
1039 <        return "-p pset.cfg"
1039 >        if self.scriptExe:#CarlosDaniele
1040 >            return   self.scriptExe + " $NJob"
1041 >        else:
1042 >            # if >= CMSSW_1_5_X, add -j crab_fjr.xml
1043 >            version_array = self.scram.getSWVersion().split('_')
1044 >            major = 0
1045 >            minor = 0
1046 >            try:
1047 >                major = int(version_array[1])
1048 >                minor = int(version_array[2])
1049 >            except:
1050 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1051 >                raise CrabException(msg)
1052 >            if major >= 1 and minor >= 5 :
1053 >                return " -j " + self.fjrFileName + " -p pset.cfg"
1054 >            else:
1055 >                return " -p pset.cfg"
1056  
1057      def inputSandbox(self, nj):
1058          """
1059          Returns a list of filenames to be put in JDL input sandbox.
1060          """
1061          inp_box = []
1062 <        # dict added to delete duplicate from input sandbox file list
1063 <        seen = {}
1062 >        # # dict added to delete duplicate from input sandbox file list
1063 >        # seen = {}
1064          ## code
1065          if os.path.isfile(self.tgzNameWithPath):
1066              inp_box.append(self.tgzNameWithPath)
1067 +        if os.path.isfile(self.MLtgzfile):
1068 +            inp_box.append(self.MLtgzfile)
1069          ## config
1070 <        inp_box.append(common.job_list[nj].configFilename())
1070 >        if not self.pset is None:
1071 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1072          ## additional input files
1073 <        for file in self.additional_inbox_files:
1074 <            inp_box.append(common.work_space.cwdDir()+file)
470 <        #print "sono inputSandbox, inp_box = ", inp_box
1073 >        tgz = self.additionalInputFileTgz()
1074 >        inp_box.append(tgz)
1075          return inp_box
1076  
1077      def outputSandbox(self, nj):
# Line 476 | Line 1080 | class Cmssw(JobType):
1080          """
1081          out_box = []
1082  
479        stdout=common.job_list[nj].stdout()
480        stderr=common.job_list[nj].stderr()
481
1083          ## User Declared output files
1084 <        for out in self.output_file:
1085 <            n_out = nj + 1
1084 >        for out in (self.output_file+self.output_file_sandbox):
1085 >            n_out = nj + 1
1086              out_box.append(self.numberFile_(out,str(n_out)))
1087          return out_box
487        return []
1088  
1089      def prepareSteeringCards(self):
1090          """
1091          Make initial modifications of the user's steering card file.
1092          """
493        infile = open(self.pset,'r')
494            
495        outfile = open(common.work_space.jobDir()+self.name()+'.cfg', 'w')
496          
497        outfile.write('\n\n##### The following cards have been created by CRAB: DO NOT TOUCH #####\n')
498
499        outfile.write('InputCollections=/System/'+self.owner+'/'+self.dataset+'/'+self.dataset+'\n')
500
501        infile.close()
502        outfile.close()
1093          return
1094  
1095      def wsRenameOutput(self, nj):
# Line 508 | Line 1098 | class Cmssw(JobType):
1098          """
1099  
1100          txt = '\n'
1101 <        file_list = ''
1102 <        for fileWithSuffix in self.output_file:
1101 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1102 >        txt += 'echo ">>> current directory content:"\n'
1103 >        txt += 'ls \n'
1104 >        txt += '\n'
1105 >
1106 >        txt += 'output_exit_status=0\n'
1107 >
1108 >        for fileWithSuffix in (self.output_file_sandbox):
1109              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
514            file_list=file_list+output_file_num+','
1110              txt += '\n'
1111 <            txt += 'ls \n'
1111 >            txt += '# check output file\n'
1112 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1113 >            txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1114 >            txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1115 >            txt += 'else\n'
1116 >            txt += '    exit_status=60302\n'
1117 >            txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1118 >            if common.scheduler.name() == 'CONDOR_G':
1119 >                txt += '    if [ $middleware == OSG ]; then \n'
1120 >                txt += '        echo "prepare dummy output file"\n'
1121 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1122 >                txt += '    fi \n'
1123 >            txt += 'fi\n'
1124 >
1125 >        for fileWithSuffix in (self.output_file):
1126 >            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1127              txt += '\n'
1128 <            txt += 'ls '+fileWithSuffix+'\n'
1129 <            txt += 'exe_result=$?\n'
1130 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
1131 <            txt += '   echo "ERROR: No output file to manage"\n'
1132 <            txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
1133 <            txt += '   echo "SanityCheckCode = $exe_result" | tee -a $RUNTIME_AREA/$repo\n'
1134 <            txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
1135 <            txt += '   exit $exe_result \n'
1128 >            txt += '# check output file\n'
1129 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1130 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1131 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1132 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1133 >            else:
1134 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1135 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1136              txt += 'else\n'
1137 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1137 >            txt += '    exit_status=60302\n'
1138 >            txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1139 >            txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1140 >            txt += '    output_exit_status=$exit_status\n'
1141 >            if common.scheduler.name() == 'CONDOR_G':
1142 >                txt += '    if [ $middleware == OSG ]; then \n'
1143 >                txt += '        echo "prepare dummy output file"\n'
1144 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1145 >                txt += '    fi \n'
1146              txt += 'fi\n'
1147 <            txt += 'cd $RUNTIME_AREA\n'
1148 <                      
1149 <            pass
1150 <      
1151 <        file_list=file_list[:-1]
1152 <        txt += 'file_list='+file_list+'\n'
1147 >        file_list = []
1148 >        for fileWithSuffix in (self.output_file):
1149 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1150 >
1151 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1152 >        txt += '\n'
1153 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1154 >        txt += 'echo ">>> current directory content:"\n'
1155 >        txt += 'ls \n'
1156 >        txt += '\n'
1157 >        txt += 'cd $RUNTIME_AREA\n'
1158 >        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1159          return txt
1160  
1161      def numberFile_(self, file, txt):
# Line 542 | Line 1166 | class Cmssw(JobType):
1166          # take away last extension
1167          name = p[0]
1168          for x in p[1:-1]:
1169 <           name=name+"."+x
1169 >            name=name+"."+x
1170          # add "_txt"
1171          if len(p)>1:
1172 <          ext = p[len(p)-1]
1173 <          #result = name + '_' + str(txt) + "." + ext
1174 <          result = name + '_' + txt + "." + ext
1175 <        else:
1176 <          #result = name + '_' + str(txt)
553 <          result = name + '_' + txt
554 <        
1172 >            ext = p[len(p)-1]
1173 >            result = name + '_' + txt + "." + ext
1174 >        else:
1175 >            result = name + '_' + txt
1176 >
1177          return result
1178  
1179 <    def getRequirements(self):
1179 >    def getRequirements(self, nj=[]):
1180          """
1181 <        return job requirements to add to jdl files
1181 >        return job requirements to add to jdl files
1182          """
1183          req = ''
1184 <        if common.analisys_common_info['sites']:
1185 <            if common.analisys_common_info['sw_version']:
1186 <                req='Member("VO-cms-' + \
1187 <                     common.analisys_common_info['sw_version'] + \
1188 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1189 <            if len(common.analisys_common_info['sites'])>0:
1190 <                req = req + ' && ('
1191 <                for i in range(len(common.analisys_common_info['sites'])):
1192 <                    req = req + 'other.GlueCEInfoHostName == "' \
1193 <                         + common.analisys_common_info['sites'][i] + '"'
1194 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1195 <                        req = req + ' || '
1196 <            req = req + ')'
575 <        #print "req = ", req
1184 >        if self.version:
1185 >            req='Member("VO-cms-' + \
1186 >                 self.version + \
1187 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1188 >        ## SL add requirement for OS version only if SL4
1189 >        #reSL4 = re.compile( r'slc4' )
1190 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1191 >            req+=' && Member("VO-cms-' + \
1192 >                 self.executable_arch + \
1193 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1194 >
1195 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1196 >
1197          return req
1198 +
1199 +    def configFilename(self):
1200 +        """ return the config filename """
1201 +        return self.name()+'.cfg'
1202 +
1203 +    def wsSetupCMSOSGEnvironment_(self):
1204 +        """
1205 +        Returns part of a job script which is prepares
1206 +        the execution environment and which is common for all CMS jobs.
1207 +        """
1208 +        txt = '    echo ">>> setup CMS OSG environment:"\n'
1209 +        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1210 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1211 +        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1212 +        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1213 +        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1214 +        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1215 +        txt += '    else\n'
1216 +        txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1217 +        txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1218 +        txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1219 +        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1220 +        txt += '\n'
1221 +        txt += '        cd $RUNTIME_AREA\n'
1222 +        txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1223 +        txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1224 +        txt += '        /bin/rm -rf $WORKING_DIR\n'
1225 +        txt += '        if [ -d $WORKING_DIR ] ;then\n'
1226 +        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1227 +        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1228 +        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1229 +        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1230 +        txt += '        fi\n'
1231 +        txt += '\n'
1232 +        txt += '        exit 1\n'
1233 +        txt += '    fi\n'
1234 +        txt += '\n'
1235 +        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1236 +        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1237 +
1238 +        return txt
1239 +
1240 +    ### OLI_DANIELE
1241 +    def wsSetupCMSLCGEnvironment_(self):
1242 +        """
1243 +        Returns part of a job script which is prepares
1244 +        the execution environment and which is common for all CMS jobs.
1245 +        """
1246 +        txt = '    echo ">>> setup CMS LCG environment:"\n'
1247 +        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1248 +        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1249 +        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1250 +        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1251 +        txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1252 +        txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1253 +        txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1254 +        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1255 +        txt += '        exit 1\n'
1256 +        txt += '    else\n'
1257 +        txt += '        echo "Sourcing environment... "\n'
1258 +        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1259 +        txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1260 +        txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1261 +        txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1262 +        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1263 +        txt += '            exit 1\n'
1264 +        txt += '        fi\n'
1265 +        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1266 +        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1267 +        txt += '        result=$?\n'
1268 +        txt += '        if [ $result -ne 0 ]; then\n'
1269 +        txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1270 +        txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1271 +        txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1272 +        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1273 +        txt += '            exit 1\n'
1274 +        txt += '        fi\n'
1275 +        txt += '    fi\n'
1276 +        txt += '    \n'
1277 +        txt += '    echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1278 +        return txt
1279 +
1280 +    ### FEDE FOR DBS OUTPUT PUBLICATION
1281 +    def modifyReport(self, nj):
1282 +        """
1283 +        insert the part of the script that modifies the FrameworkJob Report
1284 +        """
1285 +
1286 +        txt = ''
1287 +        try:
1288 +            publish_data = int(self.cfg_params['USER.publish_data'])
1289 +        except KeyError:
1290 +            publish_data = 0
1291 +        if (publish_data == 1):
1292 +            txt += 'echo ">>> Modify Job Report:" \n'
1293 +            ################ FEDE FOR DBS2 #############################################
1294 +            #txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1295 +            txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1296 +            #############################################################################
1297 +
1298 +            txt += 'if [ -z "$SE" ]; then\n'
1299 +            txt += '    SE="" \n'
1300 +            txt += 'fi \n'
1301 +            txt += 'if [ -z "$SE_PATH" ]; then\n'
1302 +            txt += '    SE_PATH="" \n'
1303 +            txt += 'fi \n'
1304 +            txt += 'echo "SE = $SE"\n'
1305 +            txt += 'echo "SE_PATH = $SE_PATH"\n'
1306 +
1307 +            processedDataset = self.cfg_params['USER.publish_data_name']
1308 +            txt += 'ProcessedDataset='+processedDataset+'\n'
1309 +            #### LFN=/store/user/<user>/processedDataset_PSETHASH
1310 +            txt += 'if [ "$SE_PATH" == "" ]; then\n'
1311 +            #### FEDE: added slash in LFN ##############
1312 +            txt += '    FOR_LFN=/copy_problems/ \n'
1313 +            txt += 'else \n'
1314 +            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1315 +            #####  FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1316 +            txt += '    FOR_LFN=/store$tmp \n'
1317 +            txt += 'fi \n'
1318 +            txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1319 +            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1320 +            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1321 +            txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1322 +            txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1323 +
1324 +            txt += 'modifyReport_result=$?\n'
1325 +            txt += 'echo modifyReport_result = $modifyReport_result\n'
1326 +            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1327 +            txt += '    exit_status=1\n'
1328 +            txt += '    echo "ERROR: Problem with ModifyJobReport"\n'
1329 +            txt += 'else\n'
1330 +            txt += '    mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1331 +            txt += 'fi\n'
1332 +        else:
1333 +            txt += 'echo "no data publication required"\n'
1334 +        return txt
1335 +
1336 +    def cleanEnv(self):
1337 +        txt = ''
1338 +        txt += 'if [ $middleware == OSG ]; then\n'
1339 +        txt += '    cd $RUNTIME_AREA\n'
1340 +        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1341 +        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1342 +        txt += '    /bin/rm -rf $WORKING_DIR\n'
1343 +        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1344 +        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1345 +        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1346 +        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1347 +        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1348 +        txt += '    fi\n'
1349 +        txt += 'fi\n'
1350 +        txt += '\n'
1351 +        return txt
1352 +
1353 +    def setParam_(self, param, value):
1354 +        self._params[param] = value
1355 +
1356 +    def getParams(self):
1357 +        return self._params
1358 +
1359 +    def uniquelist(self, old):
1360 +        """
1361 +        remove duplicates from a list
1362 +        """
1363 +        nd={}
1364 +        for e in old:
1365 +            nd[e]=0
1366 +        return nd.keys()
1367 +
1368 +
1369 +    def checkOut(self, limit):
1370 +        """
1371 +        check the dimension of the output files
1372 +        """
1373 +        txt = 'echo ">>> Starting output sandbox limit check :"\n'
1374 +        allOutFiles = ""
1375 +        listOutFiles = []
1376 +        txt += 'stdoutFile=`ls *stdout` \n'
1377 +        txt += 'stderrFile=`ls *stderr` \n'
1378 +        if (self.return_data == 1):
1379 +            for fileOut in (self.output_file+self.output_file_sandbox):
1380 +                allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1381 +        else:            
1382 +            for fileOut in (self.output_file_sandbox):
1383 +                txt += 'echo " '+fileOut+'";\n'
1384 +                allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob') + " $stdoutFile $stderrFile"
1385 +        txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1386 +        txt += 'ls -gGhrta;\n'
1387 +        txt += 'sum=0;\n'
1388 +        txt += 'for file in '+str(allOutFiles)+' ; do\n'
1389 +        txt += '    if [ -e $file ]; then\n'
1390 +        txt += '        tt=`ls -gGrta $file | awk \'{ print $3 }\'`\n'
1391 +        txt += '        sum=`expr $sum + $tt`\n'
1392 +        txt += '    else\n'
1393 +        txt += '        echo "WARNING: output file $file not found!"\n'
1394 +        txt += '    fi\n'
1395 +        txt += 'done\n'
1396 +        txt += 'echo "Total Output dimension: $sum";\n'
1397 +        txt += 'limit='+str(limit)+';\n'
1398 +        txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1399 +        txt += 'if [ $limit -lt $sum ]; then\n'
1400 +        txt += '    echo "WARNING: output files have to big size - something will be lost;"\n'
1401 +        txt += '    echo "         checking the output file sizes..."\n'
1402 +        txt += '    tot=0;\n'
1403 +        txt += '    for filefile in '+str(allOutFiles)+' ; do\n'
1404 +        txt += '        dimFile=`ls -gGrta $filefile | awk \'{ print $3 }\';`\n'
1405 +        txt += '        tot=`expr $tot + $tt`;\n'
1406 +        txt += '        if [ $limit -lt $dimFile ]; then\n'
1407 +        txt += '            echo "deleting file: $filefile";\n'
1408 +        txt += '            rm -f $filefile\n'
1409 +        txt += '        elif [ $limit -lt $tot ]; then\n'
1410 +        txt += '            echo "deleting file: $filefile";\n'
1411 +        txt += '            rm -f $filefile\n'
1412 +        txt += '        else\n'
1413 +        txt += '            echo "saving file: $filefile"\n'
1414 +        txt += '        fi\n'
1415 +        txt += '    done\n'
1416 +
1417 +        txt += '    ls -agGhrt;\n'
1418 +        txt += '    echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1419 +        txt += '    echo "JOB_EXIT_STATUS = 70000";\n'
1420 +        txt += '    exit_status=70000;\n'
1421 +        txt += 'else'
1422 +        txt += '    echo "Total Output dimension $sum is fine.";\n'
1423 +        txt += 'fi\n'
1424 +        txt += 'echo "Ending output sandbox limit check"\n'
1425 +        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines