ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.8 by gutsche, Wed Jun 14 20:52:32 2006 UTC vs.
Revision 1.169 by spiga, Tue Apr 1 14:53:36 2008 UTC

# Line 2 | Line 2 | from JobType import JobType
2   from crab_logger import Logger
3   from crab_exceptions import *
4   from crab_util import *
5 + from BlackWhiteListParser import BlackWhiteListParser
6   import common
6 import PsetManipulator  
7
8 import DBSInfo_EDM
9 import DataDiscovery_EDM
10 import DataLocation_EDM
7   import Scram
8  
9 < import os, string, re
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs):
13          JobType.__init__(self, 'CMSSW')
14          common.logger.debug(3,'CMSSW::__init__')
15  
16 <        self.analisys_common_info = {}
17 <        # Marco.
16 >        self.argsList = []
17 >
18          self._params = {}
19          self.cfg_params = cfg_params
20 +        # init BlackWhiteListParser
21 +        self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
22 +
23 +        self.MaxTarBallSize = float(self.cfg_params.get('EDG.maxtarballsize',9.5))
24 +
25 +        # number of jobs requested to be created, limit obj splitting
26 +        self.ncjobs = ncjobs
27 +
28          log = common.logger
29 <        
29 >
30          self.scram = Scram.Scram(cfg_params)
27        scramArea = ''
31          self.additional_inbox_files = []
32          self.scriptExe = ''
33          self.executable = ''
34 +        self.executable_arch = self.scram.getArch()
35          self.tgz_name = 'default.tgz'
36 +        self.additional_tgz_name = 'additional.tgz'
37 +        self.scriptName = 'CMSSW.sh'
38 +        self.pset = ''      #scrip use case Da
39 +        self.datasetPath = '' #scrip use case Da
40  
41 +        # set FJR file name
42 +        self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45 +
46 +        #
47 +        # Try to block creation in case of arch/version mismatch
48 +        #
49 +
50 + #        a = string.split(self.version, "_")
51 + #
52 + #        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 + #            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54 + #            common.logger.message(msg)
55 + #        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
56 + #            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
57 + #            raise CrabException(msg)
58 + #
59 +
60          self.setParam_('application', self.version)
36        common.analisys_common_info['sw_version'] = self.version
37        ### FEDE
38        common.analisys_common_info['copy_input_data'] = 0
39        common.analisys_common_info['events_management'] = 1
61  
62          ### collect Data cards
63 <        try:
64 <         #   self.owner = cfg_params['CMSSW.owner']
65 <         #   log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
45 <         #   self.dataset = cfg_params['CMSSW.dataset']
46 <            self.datasetPath = cfg_params['CMSSW.datasetpath']
47 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+self.datasetPath)
48 <        except KeyError:
49 <        #    msg = "Error: owner and/or dataset not defined "
50 <            msg = "Error: datasetpath not defined "  
63 >
64 >        if not cfg_params.has_key('CMSSW.datasetpath'):
65 >            msg = "Error: datasetpath not defined "
66              raise CrabException(msg)
67 +        tmp =  cfg_params['CMSSW.datasetpath']
68 +        log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
69 +        if string.lower(tmp)=='none':
70 +            self.datasetPath = None
71 +            self.selectNoInput = 1
72 +        else:
73 +            self.datasetPath = tmp
74 +            self.selectNoInput = 0
75  
76          # ML monitoring
77          # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
78 <        datasetpath_split = self.datasetPath.split("/")
79 <        self.setParam_('dataset', datasetpath_split[1])
80 <        self.setParam_('owner', datasetpath_split[-1])
81 <        self.setTaskid_()
82 <        self.setParam_('taskId', self.cfg_params['taskId'])
83 <
84 <
78 >        if not self.datasetPath:
79 >            self.setParam_('dataset', 'None')
80 >            self.setParam_('owner', 'None')
81 >        else:
82 >            ## SL what is supposed to fail here?
83 >            try:
84 >                datasetpath_split = self.datasetPath.split("/")
85 >                # standard style
86 >                self.setParam_('datasetFull', self.datasetPath)
87 >                self.setParam_('dataset', datasetpath_split[1])
88 >                self.setParam_('owner', datasetpath_split[2])
89 >            except:
90 >                self.setParam_('dataset', self.datasetPath)
91 >                self.setParam_('owner', self.datasetPath)
92  
93 +        self.setParam_('taskId', common._db.queryTask('name')) ## new BL--DS
94  
95          self.dataTiers = []
65 #       try:
66 #           tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
67 #           for tmp in tmpDataTiers:
68 #               tmp=string.strip(tmp)
69 #               self.dataTiers.append(tmp)
70 #               pass
71 #           pass
72 #       except KeyError:
73 #           pass
74 #       log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
96  
97          ## now the application
98 <        try:
99 <            self.executable = cfg_params['CMSSW.executable']
100 <            self.setParam_('exe', self.executable)
80 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
81 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
82 <            log.debug(3,msg)
83 <        except KeyError:
84 <            self.executable = 'cmsRun'
85 <            self.setParam_('exe', self.executable)
86 <            msg = "User executable not defined. Use cmsRun"
87 <            log.debug(3,msg)
88 <            pass
98 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
99 >        self.setParam_('exe', self.executable)
100 >        log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
101  
102 <        try:
103 <            self.pset = cfg_params['CMSSW.pset']
104 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
102 >        if not cfg_params.has_key('CMSSW.pset'):
103 >            raise CrabException("PSet file missing. Cannot run cmsRun ")
104 >        self.pset = cfg_params['CMSSW.pset']
105 >        log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
106 >        if self.pset.lower() != 'none' :
107              if (not os.path.exists(self.pset)):
108                  raise CrabException("User defined PSet file "+self.pset+" does not exist")
109 <        except KeyError:
110 <            raise CrabException("PSet file missing. Cannot run cmsRun ")
109 >        else:
110 >            self.pset = None
111  
112          # output files
113 <        try:
114 <            self.output_file = []
113 >        ## stuff which must be returned always via sandbox
114 >        self.output_file_sandbox = []
115  
116 <            tmp = cfg_params['CMSSW.output_file']
117 <            if tmp != '':
118 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
119 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
120 <                for tmp in tmpOutFiles:
121 <                    tmp=string.strip(tmp)
122 <                    self.output_file.append(tmp)
123 <                    pass
124 <            else:
125 <                log.message("No output file defined: only stdout/err will be available")
116 >        # add fjr report by default via sandbox
117 >        self.output_file_sandbox.append(self.fjrFileName)
118 >
119 >        # other output files to be returned via sandbox or copied to SE
120 >        self.output_file = []
121 >        tmp = cfg_params.get('CMSSW.output_file',None)
122 >        if tmp :
123 >            tmpOutFiles = string.split(tmp,',')
124 >            log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
125 >            for tmp in tmpOutFiles:
126 >                tmp=string.strip(tmp)
127 >                self.output_file.append(tmp)
128                  pass
129 <            pass
130 <        except KeyError:
131 <            log.message("No output file defined: only stdout/err will be available")
116 <            pass
129 >        else:
130 >            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
131 >        pass
132  
133          # script_exe file as additional file in inputSandbox
134 <        try:
135 <           self.scriptExe = cfg_params['USER.script_exe']
136 <           self.additional_inbox_files.append(self.scriptExe)
137 <        except KeyError:
138 <           pass
139 <        if self.scriptExe != '':
140 <           if os.path.isfile(self.scriptExe):
141 <              pass
142 <           else:
143 <              log.message("WARNING. file "+self.scriptExe+" not found")
144 <              sys.exit()
145 <                  
134 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
135 >        if self.scriptExe :
136 >           if not os.path.isfile(self.scriptExe):
137 >              msg ="ERROR. file "+self.scriptExe+" not found"
138 >              raise CrabException(msg)
139 >           self.additional_inbox_files.append(string.strip(self.scriptExe))
140 >
141 >        #CarlosDaniele
142 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
143 >           msg ="Error. script_exe  not defined"
144 >           raise CrabException(msg)
145 >
146          ## additional input files
147 <        try:
148 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
147 >        if cfg_params.has_key('USER.additional_input_files'):
148 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
149              for tmp in tmpAddFiles:
150 <                if not os.path.exists(tmp):
151 <                    raise CrabException("Additional input file not found: "+tmp)
152 <                tmp=string.strip(tmp)
153 <                self.additional_inbox_files.append(tmp)
150 >                tmp = string.strip(tmp)
151 >                dirname = ''
152 >                if not tmp[0]=="/": dirname = "."
153 >                files = []
154 >                if string.find(tmp,"*")>-1:
155 >                    files = glob.glob(os.path.join(dirname, tmp))
156 >                    if len(files)==0:
157 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
158 >                else:
159 >                    files.append(tmp)
160 >                for file in files:
161 >                    if not os.path.exists(file):
162 >                        raise CrabException("Additional input file not found: "+file)
163 >                    pass
164 >                    # fname = string.split(file, '/')[-1]
165 >                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
166 >                    # shutil.copyfile(file, storedFile)
167 >                    self.additional_inbox_files.append(string.strip(file))
168                  pass
169              pass
170 <        except KeyError:
171 <            pass
143 <
144 <        try:
145 <            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
146 <        except KeyError:
147 <            self.filesPerJob = 1
148 <
149 <        ## Max event   will be total_number_of_events ???  Daniele
150 <        try:
151 <            self.maxEv = cfg_params['CMSSW.event_per_job']
152 <        except KeyError:
153 <            self.maxEv = "-1"
154 <        ##  
155 <        try:
156 <            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
157 <        except KeyError:
158 <            msg = 'Must define total_number_of_events'
159 <            raise CrabException(msg)
160 <        
161 <        CEBlackList = []
162 <        try:
163 <            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
164 <            for tmp in tmpBad:
165 <                tmp=string.strip(tmp)
166 <                CEBlackList.append(tmp)
167 <        except KeyError:
168 <            pass
170 >            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
171 >        pass
172  
173 <        self.reCEBlackList=[]
174 <        for bad in CEBlackList:
175 <            self.reCEBlackList.append(re.compile( bad ))
173 >        ## Events per job
174 >        if cfg_params.has_key('CMSSW.events_per_job'):
175 >            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
176 >            self.selectEventsPerJob = 1
177 >        else:
178 >            self.eventsPerJob = -1
179 >            self.selectEventsPerJob = 0
180  
181 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
181 >        ## number of jobs
182 >        if cfg_params.has_key('CMSSW.number_of_jobs'):
183 >            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
184 >            self.selectNumberOfJobs = 1
185 >        else:
186 >            self.theNumberOfJobs = 0
187 >            self.selectNumberOfJobs = 0
188  
189 <        CEWhiteList = []
190 <        try:
191 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
192 <            for tmp in tmpGood:
193 <                tmp=string.strip(tmp)
194 <                CEWhiteList.append(tmp)
182 <        except KeyError:
183 <            pass
189 >        if cfg_params.has_key('CMSSW.total_number_of_events'):
190 >            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
191 >            self.selectTotalNumberEvents = 1
192 >        else:
193 >            self.total_number_of_events = 0
194 >            self.selectTotalNumberEvents = 0
195  
196 <        #print 'CEWhiteList: ',CEWhiteList
197 <        self.reCEWhiteList=[]
198 <        for Good in CEWhiteList:
199 <            self.reCEWhiteList.append(re.compile( Good ))
196 >        if self.pset != None: #CarlosDaniele
197 >             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
198 >                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
199 >                 raise CrabException(msg)
200 >        else:
201 >             if (self.selectNumberOfJobs == 0):
202 >                 msg = 'Must specify  number_of_jobs.'
203 >                 raise CrabException(msg)
204 >
205 >        ## New method of dealing with seeds
206 >        self.incrementSeeds = []
207 >        self.preserveSeeds = []
208 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
209 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
210 >            for tmp in tmpList:
211 >                tmp.strip()
212 >                self.preserveSeeds.append(tmp)
213 >        if cfg_params.has_key('CMSSW.increment_seeds'):
214 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
215 >            for tmp in tmpList:
216 >                tmp.strip()
217 >                self.incrementSeeds.append(tmp)
218 >
219 >        ## Old method of dealing with seeds
220 >        ## FUTURE: This is for old CMSSW and old CRAB. Can throw exceptions after a couple of CRAB releases and then
221 >        ## remove
222 >        self.sourceSeed = cfg_params.get('CMSSW.pythia_seed',None)
223 >        if self.sourceSeed:
224 >          print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
225 >          self.incrementSeeds.append('sourceSeed')
226 >
227 >        self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
228 >        if self.sourceSeedVtx:
229 >          print "vtx_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
230 >          self.incrementSeeds.append('VtxSmeared')
231 >
232 >        self.sourceSeedG4 = cfg_params.get('CMSSW.g4_seed',None)
233 >        if self.sourceSeedG4:
234 >          print "g4_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
235 >          self.incrementSeeds.append('g4SimHits')
236 >
237 >        self.sourceSeedMix = cfg_params.get('CMSSW.mix_seed',None)
238 >        if self.sourceSeedMix:
239 >          print "mix_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
240 >          self.incrementSeeds.append('mix')
241 >
242 >        self.firstRun = cfg_params.get('CMSSW.first_run',None)
243 >
244 >        if self.pset != None: #CarlosDaniele
245 >            import PsetManipulator as pp
246 >            PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
247  
248 <        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
248 >        # Copy/return
249  
250 <        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
250 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
251 >        self.return_data = int(cfg_params.get('USER.return_data',0))
252  
253          #DBSDLS-start
254 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
254 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
255          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
256          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
257 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
258          ## Perform the data location and discovery (based on DBS/DLS)
259 <        self.DataDiscoveryAndLocation(cfg_params)
260 <        #DBSDLS-end          
259 >        ## SL: Don't if NONE is specified as input (pythia use case)
260 >        blockSites = {}
261 >        if self.datasetPath:
262 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
263 >        #DBSDLS-end
264  
265          self.tgzNameWithPath = self.getTarBall(self.executable)
266  
267 <        self.jobSplitting()  #Daniele job Splitting
268 <        self.PsetEdit.maxEvent(self.maxEv) #Daniele  
269 <        self.PsetEdit.inputModule("INPUT") #Daniele  
270 <        self.PsetEdit.psetWriter(self.configFilename())
271 <        
267 >        ## Select Splitting
268 >        if self.selectNoInput:
269 >            if self.pset == None: #CarlosDaniele
270 >                self.jobSplittingForScript()
271 >            else:
272 >                self.jobSplittingNoInput()
273 >        else:
274 >            self.jobSplittingByBlocks(blockSites)
275  
276 +        # modify Pset
277 +        if self.pset != None: #CarlosDaniele
278 +            try:
279 +                # Add FrameworkJobReport to parameter-set, set max events.
280 +                # Reset later for data jobs by writeCFG which does all modifications
281 +                PsetEdit.addCrabFJR(self.fjrFileName)
282 +                PsetEdit.maxEvent(self.eventsPerJob)
283 +                PsetEdit.psetWriter(self.configFilename())
284 +            except:
285 +                msg='Error while manipuliating ParameterSet: exiting...'
286 +                raise CrabException(msg)
287  
288      def DataDiscoveryAndLocation(self, cfg_params):
289  
290 +        import DataDiscovery
291 +        import DataLocation
292          common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
293  
215        #datasetPath = "/"+self.owner+"/"+self.dataTiers[0]+"/"+self.dataset
216        
294          datasetPath=self.datasetPath
295  
219        ## TODO
220        dataTiersList = ""
221        dataTiers = dataTiersList.split(',')
222
296          ## Contact the DBS
297 +        common.logger.message("Contacting Data Discovery Services ...")
298          try:
299 <            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
299 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
300              self.pubdata.fetchDBSInfo()
301  
302 <        except DataDiscovery_EDM.NotExistingDatasetError, ex :
302 >        except DataDiscovery.NotExistingDatasetError, ex :
303              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
304              raise CrabException(msg)
305 <
232 <        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
305 >        except DataDiscovery.NoDataTierinProvenanceError, ex :
306              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
307              raise CrabException(msg)
308 <        except DataDiscovery_EDM.DataDiscoveryError, ex:
309 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
308 >        except DataDiscovery.DataDiscoveryError, ex:
309 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
310              raise CrabException(msg)
311  
312 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
313 <        ## self.DBSPaths=self.pubdata.getDBSPaths()
314 <        common.logger.message("Required data are :"+self.datasetPath)
242 <
243 <        filesbyblock=self.pubdata.getFiles()
244 <        self.AllInputFiles=filesbyblock.values()
245 <        self.files = self.AllInputFiles        
246 <
247 <        ## TEMP
248 <    #    self.filesTmp = filesbyblock.values()
249 <    #    self.files = []
250 <    #    locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/'
251 <    #    locPath=''
252 <    #    tmp = []
253 <    #    for file in self.filesTmp[0]:
254 <    #        tmp.append(locPath+file)
255 <    #    self.files.append(tmp)
256 <        ## END TEMP
312 >        self.filesbyblock=self.pubdata.getFiles()
313 >        self.eventsbyblock=self.pubdata.getEventsPerBlock()
314 >        self.eventsbyfile=self.pubdata.getEventsPerFile()
315  
316          ## get max number of events
317 <        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
260 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
261 <        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
317 >        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
318  
319          ## Contact the DLS and build a list of sites hosting the fileblocks
320          try:
321 <            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
321 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
322              dataloc.fetchDLSInfo()
323 <        except DataLocation_EDM.DataLocationError , ex:
323 >        except DataLocation.DataLocationError , ex:
324              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
325              raise CrabException(msg)
270        
271        allsites=dataloc.getSites()
272        common.logger.debug(5,"sites are %s"%allsites)
273        sites=self.checkBlackList(allsites)
274        common.logger.debug(5,"sites are (after black list) %s"%sites)
275        sites=self.checkWhiteList(sites)
276        common.logger.debug(5,"sites are (after white list) %s"%sites)
326  
278        if len(sites)==0:
279            msg = 'No sites hosting all the needed data! Exiting... '
280            raise CrabException(msg)
327  
328 <        common.logger.message("List of Sites hosting the data : "+str(sites))
329 <        common.logger.debug(6, "List of Sites: "+str(sites))
330 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
331 <        self.setParam_('TargetCE', ','.join(sites))
332 <        return
333 <    
334 <    def jobSplitting(self):
289 <        """
290 <        first implemntation for job splitting  
291 <        """    
292 <      #  print 'eventi totali '+str(self.maxEvents)
293 <      #  print 'eventi totali richiesti dallo user '+str(self.total_number_of_events)
294 <        #print 'files per job '+str(self.filesPerJob)
295 <        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
296 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
297 <
298 <        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
299 <        n_tot_files = (len(self.files[0]))
300 <        ## SL: this is wrong if the files have different number of events
301 <        evPerFile = int(self.maxEvents)/n_tot_files
302 <        
303 <        common.logger.debug(5,'Events per File '+str(evPerFile))
328 >        sites = dataloc.getSites()
329 >        allSites = []
330 >        listSites = sites.values()
331 >        for listSite in listSites:
332 >            for oneSite in listSite:
333 >                allSites.append(oneSite)
334 >        allSites = self.uniquelist(allSites)
335  
336 <        ## if asked to process all events, do it
337 <        if self.total_number_of_events == -1:
338 <            self.total_number_of_events=self.maxEvents
339 <            self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
340 <            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events')
341 <        
336 >        # screen output
337 >        common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
338 >
339 >        return sites
340 >
341 >  # to Be Removed  DS -- BL
342 >  #  def setArgsList(self, argsList):
343 >  #      self.argsList = argsList
344 >
345 >    def jobSplittingByBlocks(self, blockSites):
346 >        """
347 >        Perform job splitting. Jobs run over an integer number of files
348 >        and no more than one block.
349 >        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
350 >        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
351 >                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
352 >                  self.maxEvents, self.filesbyblock
353 >        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
354 >              self.total_number_of_jobs - Total # of jobs
355 >              self.list_of_args - File(s) job will run on (a list of lists)
356 >        """
357 >
358 >        # ---- Handle the possible job splitting configurations ---- #
359 >        if (self.selectTotalNumberEvents):
360 >            totalEventsRequested = self.total_number_of_events
361 >        if (self.selectEventsPerJob):
362 >            eventsPerJobRequested = self.eventsPerJob
363 >            if (self.selectNumberOfJobs):
364 >                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
365 >
366 >        # If user requested all the events in the dataset
367 >        if (totalEventsRequested == -1):
368 >            eventsRemaining=self.maxEvents
369 >        # If user requested more events than are in the dataset
370 >        elif (totalEventsRequested > self.maxEvents):
371 >            eventsRemaining = self.maxEvents
372 >            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
373 >        # If user requested less events than are in the dataset
374          else:
375 <            self.total_number_of_files = int(self.total_number_of_events/evPerFile)
313 <            ## SL: if ask for less event than what is computed to be available on a
314 <            ##     file, process the first file anyhow.
315 <            if self.total_number_of_files == 0:
316 <                self.total_number_of_files = self.total_number_of_files + 1
375 >            eventsRemaining = totalEventsRequested
376  
377 <            common.logger.debug(5,'N files  '+str(self.total_number_of_files))
377 >        # If user requested more events per job than are in the dataset
378 >        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
379 >            eventsPerJobRequested = self.maxEvents
380  
381 <            check = 0
382 <            
322 <            ## Compute the number of jobs
323 <            #self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
324 <            self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob)
325 <            common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
381 >        # For user info at end
382 >        totalEventCount = 0
383  
384 <            ## is there any remainder?
385 <            check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob)
384 >        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
385 >            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
386  
387 <            common.logger.debug(5,'Check  '+str(check))
387 >        if (self.selectNumberOfJobs):
388 >            common.logger.message("May not create the exact number_of_jobs requested.")
389  
390 <            if check > 0:
391 <                self.total_number_of_jobs =  self.total_number_of_jobs + 1
392 <                common.logger.message('Warning: last job will be created with '+str(check)+' files')
390 >        if ( self.ncjobs == 'all' ) :
391 >            totalNumberOfJobs = 999999999
392 >        else :
393 >            totalNumberOfJobs = self.ncjobs
394  
336            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)*self.filesPerJob*evPerFile + check*evPerFile)+' events')
337            pass
395  
396 +        blocks = blockSites.keys()
397 +        blockCount = 0
398 +        # Backup variable in case self.maxEvents counted events in a non-included block
399 +        numBlocksInDataset = len(blocks)
400 +
401 +        jobCount = 0
402          list_of_lists = []
340        for i in xrange(0, int(n_tot_files), self.filesPerJob):
341            list_of_lists.append(self.files[0][i: i+self.filesPerJob])
403  
404 <        self.list_of_files = list_of_lists
405 <      
404 >        # list tracking which jobs are in which jobs belong to which block
405 >        jobsOfBlock = {}
406 >
407 >        # ---- Iterate over the blocks in the dataset until ---- #
408 >        # ---- we've met the requested total # of events    ---- #
409 >        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
410 >            block = blocks[blockCount]
411 >            blockCount += 1
412 >            if block not in jobsOfBlock.keys() :
413 >                jobsOfBlock[block] = []
414 >
415 >            if self.eventsbyblock.has_key(block) :
416 >                numEventsInBlock = self.eventsbyblock[block]
417 >                common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
418 >
419 >                files = self.filesbyblock[block]
420 >                numFilesInBlock = len(files)
421 >                if (numFilesInBlock <= 0):
422 >                    continue
423 >                fileCount = 0
424 >
425 >                # ---- New block => New job ---- #
426 >                parString = ""
427 >                # counter for number of events in files currently worked on
428 >                filesEventCount = 0
429 >                # flag if next while loop should touch new file
430 >                newFile = 1
431 >                # job event counter
432 >                jobSkipEventCount = 0
433 >
434 >                # ---- Iterate over the files in the block until we've met the requested ---- #
435 >                # ---- total # of events or we've gone over all the files in this block  ---- #
436 >                while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
437 >                    file = files[fileCount]
438 >                    if newFile :
439 >                        try:
440 >                            numEventsInFile = self.eventsbyfile[file]
441 >                            common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
442 >                            # increase filesEventCount
443 >                            filesEventCount += numEventsInFile
444 >                            # Add file to current job
445 >                            parString += '\\\"' + file + '\\\"\,'
446 >                            newFile = 0
447 >                        except KeyError:
448 >                            common.logger.message("File "+str(file)+" has unknown number of events: skipping")
449 >
450 >
451 >                    # if less events in file remain than eventsPerJobRequested
452 >                    if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
453 >                        # if last file in block
454 >                        if ( fileCount == numFilesInBlock-1 ) :
455 >                            # end job using last file, use remaining events in block
456 >                            # close job and touch new file
457 >                            fullString = parString[:-2]
458 >                            list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
459 >                            common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
460 >                            self.jobDestination.append(blockSites[block])
461 >                            common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
462 >                            # fill jobs of block dictionary
463 >                            jobsOfBlock[block].append(jobCount+1)
464 >                            # reset counter
465 >                            jobCount = jobCount + 1
466 >                            totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
467 >                            eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
468 >                            jobSkipEventCount = 0
469 >                            # reset file
470 >                            parString = ""
471 >                            filesEventCount = 0
472 >                            newFile = 1
473 >                            fileCount += 1
474 >                        else :
475 >                            # go to next file
476 >                            newFile = 1
477 >                            fileCount += 1
478 >                    # if events in file equal to eventsPerJobRequested
479 >                    elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
480 >                        # close job and touch new file
481 >                        fullString = parString[:-2]
482 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
483 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
484 >                        self.jobDestination.append(blockSites[block])
485 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
486 >                        jobsOfBlock[block].append(jobCount+1)
487 >                        # reset counter
488 >                        jobCount = jobCount + 1
489 >                        totalEventCount = totalEventCount + eventsPerJobRequested
490 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
491 >                        jobSkipEventCount = 0
492 >                        # reset file
493 >                        parString = ""
494 >                        filesEventCount = 0
495 >                        newFile = 1
496 >                        fileCount += 1
497 >
498 >                    # if more events in file remain than eventsPerJobRequested
499 >                    else :
500 >                        # close job but don't touch new file
501 >                        fullString = parString[:-2]
502 >                        list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
503 >                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
504 >                        self.jobDestination.append(blockSites[block])
505 >                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
506 >                        jobsOfBlock[block].append(jobCount+1)
507 >                        # increase counter
508 >                        jobCount = jobCount + 1
509 >                        totalEventCount = totalEventCount + eventsPerJobRequested
510 >                        eventsRemaining = eventsRemaining - eventsPerJobRequested
511 >                        # calculate skip events for last file
512 >                        # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
513 >                        jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
514 >                        # remove all but the last file
515 >                        filesEventCount = self.eventsbyfile[file]
516 >                        parString = '\\\"' + file + '\\\"\,'
517 >                    pass # END if
518 >                pass # END while (iterate over files in the block)
519 >        pass # END while (iterate over blocks in the dataset)
520 >        self.ncjobs = self.total_number_of_jobs = jobCount
521 >        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
522 >            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
523 >        common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
524 >
525 >        # screen output
526 >        screenOutput = "List of jobs and available destination sites:\n\n"
527 >
528 >        # keep trace of block with no sites to print a warning at the end
529 >        noSiteBlock = []
530 >        bloskNoSite = []
531 >
532 >        blockCounter = 0
533 >        for block in blocks:
534 >            if block in jobsOfBlock.keys() :
535 >                blockCounter += 1
536 >                screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
537 >                if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
538 >                    noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
539 >                    bloskNoSite.append( blockCounter )
540 >
541 >        common.logger.message(screenOutput)
542 >        if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
543 >            msg = 'WARNING: No sites are hosting any part of data for block:\n                '
544 >            virgola = ""
545 >            if len(bloskNoSite) > 1:
546 >                virgola = ","
547 >            for block in bloskNoSite:
548 >                msg += ' ' + str(block) + virgola
549 >            msg += '\n               Related jobs:\n                 '
550 >            virgola = ""
551 >            if len(noSiteBlock) > 1:
552 >                virgola = ","
553 >            for range_jobs in noSiteBlock:
554 >                msg += str(range_jobs) + virgola
555 >            msg += '\n               will not be submitted and this block of data can not be analyzed!\n'
556 >            if self.cfg_params.has_key('EDG.se_white_list'):
557 >                msg += 'WARNING: SE White List: '+self.cfg_params['EDG.se_white_list']+'\n'
558 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
559 >                msg += 'Please check if the dataset is available at this site!)\n'
560 >            if self.cfg_params.has_key('EDG.ce_white_list'):
561 >                msg += 'WARNING: CE White List: '+self.cfg_params['EDG.ce_white_list']+'\n'
562 >                msg += '(Hint: By whitelisting you force the job to run at this particular site(s).\n'
563 >                msg += 'Please check if the dataset is available at this site!)\n'
564 >
565 >            common.logger.message(msg)
566 >
567 >        self.list_of_args = list_of_lists
568 >        return
569 >
570 >    def jobSplittingNoInput(self):
571 >        """
572 >        Perform job splitting based on number of event per job
573 >        """
574 >        common.logger.debug(5,'Splitting per events')
575 >
576 >        if (self.selectEventsPerJob):
577 >            common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
578 >        if (self.selectNumberOfJobs):
579 >            common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
580 >        if (self.selectTotalNumberEvents):
581 >            common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
582 >
583 >        if (self.total_number_of_events < 0):
584 >            msg='Cannot split jobs per Events with "-1" as total number of events'
585 >            raise CrabException(msg)
586 >
587 >        if (self.selectEventsPerJob):
588 >            if (self.selectTotalNumberEvents):
589 >                self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
590 >            elif(self.selectNumberOfJobs) :
591 >                self.total_number_of_jobs =self.theNumberOfJobs
592 >                self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
593 >
594 >        elif (self.selectNumberOfJobs) :
595 >            self.total_number_of_jobs = self.theNumberOfJobs
596 >            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
597 >
598 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
599 >
600 >        # is there any remainder?
601 >        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
602 >
603 >        common.logger.debug(5,'Check  '+str(check))
604 >
605 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
606 >        if check > 0:
607 >            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
608 >
609 >        # argument is seed number.$i
610 >        self.list_of_args = []
611 >        for i in range(self.total_number_of_jobs):
612 >            ## Since there is no input, any site is good
613 >            self.jobDestination.append([""]) #must be empty to write correctly the xml
614 >            args=[]
615 >            if (self.firstRun):
616 >                ## pythia first run
617 >                args.append(str(self.firstRun)+str(i))
618 >            self.list_of_args.append(args)
619 >
620 >        return
621 >
622 >
623 >    def jobSplittingForScript(self):#CarlosDaniele
624 >        """
625 >        Perform job splitting based on number of job
626 >        """
627 >        common.logger.debug(5,'Splitting per job')
628 >        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
629 >
630 >        self.total_number_of_jobs = self.theNumberOfJobs
631 >
632 >        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
633 >
634 >        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
635 >
636 >        # argument is seed number.$i
637 >        self.list_of_args = []
638 >        for i in range(self.total_number_of_jobs):
639 >            ## Since there is no input, any site is good
640 >           # self.jobDestination.append(["Any"])
641 >            self.jobDestination.append([""])
642 >            ## no random seed
643 >            self.list_of_args.append([str(i)])
644          return
645  
646      def split(self, jobParams):
647 <
349 <        common.jobDB.load()
647 >
648          #### Fabio
649          njobs = self.total_number_of_jobs
650 <        filelist = self.list_of_files
650 >        arglist = self.list_of_args
651          # create the empty structure
652          for i in range(njobs):
653              jobParams.append("")
654 <        
654 >
655 >        listID=[]
656 >        listField=[]
657          for job in range(njobs):
658 <            jobParams[job] = filelist[job]
659 <            common.jobDB.setArguments(job, jobParams[job])
658 >            jobParams[job] = arglist[job]
659 >            listID.append(job+1)
660 >            job_ToSave ={}
661 >            concString = ' '
662 >            argu=''
663 >            if len(jobParams[job]):
664 >                argu +=   concString.join(jobParams[job] )
665 >            job_ToSave['arguments']= str(job+1)+' '+argu## new BL--DS
666 >            job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
667 >            #common._db.updateJob_(job,job_ToSave)## new BL--DS
668 >            listField.append(job_ToSave)
669 >            msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
670 >            +"                     Destination: "+str(self.jobDestination[job])
671 >            common.logger.debug(5,msg)
672 >            #common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
673 >        common._db.updateJob_(listID,listField)## new BL--DS
674 >        ## Pay Attention Here....DS--BL
675 >        self.argsList = (len(jobParams[1])+1)
676  
361        common.jobDB.save()
677          return
678 <    
679 <    def getJobTypeArguments(self, nj, sched):
680 <        params = common.jobDB.arguments(nj)
681 <        #print params
682 <        parString = "\\{"
683 <        
684 <        for i in range(len(params) - 1):
685 <            parString += '\\\"' + params[i] + '\\\"\,'
686 <        
372 <        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
373 <        return parString
374 <  
678 > #
679 > #    def getJobTypeArguments(self, nj, sched):
680 > #        result = ''
681 > #        jobs=[]
682 > #        jobs.append(nj)
683 > #        for i in common._db.queryJob('arguments',jobs):##  BL--DS
684 > #            result=result+str(i)+" "
685 > #        return result
686 >
687      def numberOfJobs(self):
688          # Fabio
377
689          return self.total_number_of_jobs
379
380
381
382    def checkBlackList(self, allSites):
383        if len(self.reCEBlackList)==0: return allSites
384        sites = []
385        for site in allSites:
386            common.logger.debug(10,'Site '+site)
387            good=1
388            for re in self.reCEBlackList:
389                if re.search(site):
390                    common.logger.message('CE in black list, skipping site '+site)
391                    good=0
392                pass
393            if good: sites.append(site)
394        if len(sites) == 0:
395            common.logger.debug(3,"No sites found after BlackList")
396        return sites
397
398    def checkWhiteList(self, allSites):
399
400        if len(self.reCEWhiteList)==0: return allSites
401        sites = []
402        for site in allSites:
403            good=0
404            for re in self.reCEWhiteList:
405                if re.search(site):
406                    common.logger.debug(5,'CE in white list, adding site '+site)
407                    good=1
408                if not good: continue
409                sites.append(site)
410        if len(sites) == 0:
411            common.logger.message("No sites found after WhiteList\n")
412        else:
413            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
414        return sites
690  
691      def getTarBall(self, exe):
692          """
693          Return the TarBall with lib and exe
694          """
695 <        
695 >
696          # if it exist, just return it
697 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
697 >        #
698 >        # Marco. Let's start to use relative path for Boss XML files
699 >        #
700 >        self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
701          if os.path.exists(self.tgzNameWithPath):
702              return self.tgzNameWithPath
703  
# Line 433 | Line 711 | class Cmssw(JobType):
711          # First of all declare the user Scram area
712          swArea = self.scram.getSWArea_()
713          #print "swArea = ", swArea
714 <        swVersion = self.scram.getSWVersion()
715 <        #print "swVersion = ", swVersion
714 >        # swVersion = self.scram.getSWVersion()
715 >        # print "swVersion = ", swVersion
716          swReleaseTop = self.scram.getReleaseTop_()
717          #print "swReleaseTop = ", swReleaseTop
718 <        
718 >
719          ## check if working area is release top
720          if swReleaseTop == '' or swArea == swReleaseTop:
721              return
722  
723 <        filesToBeTarred = []
724 <        ## First find the executable
725 <        if (self.executable != ''):
726 <            exeWithPath = self.scram.findFile_(executable)
727 < #           print exeWithPath
728 <            if ( not exeWithPath ):
729 <                raise CrabException('User executable '+executable+' not found')
730 <
731 <            ## then check if it's private or not
732 <            if exeWithPath.find(swReleaseTop) == -1:
733 <                # the exe is private, so we must ship
734 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
735 <                path = swArea+'/'
736 <                exe = string.replace(exeWithPath, path,'')
737 <                filesToBeTarred.append(exe)
738 <                pass
739 <            else:
740 <                # the exe is from release, we'll find it on WN
741 <                pass
742 <
743 <        ## Now get the libraries: only those in local working area
744 <        libDir = 'lib'
745 <        lib = swArea+'/' +libDir
746 <        common.logger.debug(5,"lib "+lib+" to be tarred")
747 <        if os.path.exists(lib):
748 <            filesToBeTarred.append(libDir)
749 <
750 <        ## Now check if module dir is present
751 <        moduleDir = 'module'
752 <        if os.path.isdir(swArea+'/'+moduleDir):
753 <            filesToBeTarred.append(moduleDir)
754 <
755 <        ## Now check if the Data dir is present
756 <        dataDir = 'src/Data/'
757 <        if os.path.isdir(swArea+'/'+dataDir):
758 <            filesToBeTarred.append(dataDir)
759 <
760 <        ## Create the tar-ball
761 <        if len(filesToBeTarred)>0:
762 <            cwd = os.getcwd()
763 <            os.chdir(swArea)
764 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
765 <            for line in filesToBeTarred:
766 <                tarcmd = tarcmd + line + ' '
767 <            cout = runCommand(tarcmd)
768 <            if not cout:
769 <                raise CrabException('Could not create tar-ball')
770 <            os.chdir(cwd)
771 <        else:
772 <            common.logger.debug(5,"No files to be to be tarred")
773 <        
723 >        import tarfile
724 >        try: # create tar ball
725 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
726 >            ## First find the executable
727 >            if (self.executable != ''):
728 >                exeWithPath = self.scram.findFile_(executable)
729 >                if ( not exeWithPath ):
730 >                    raise CrabException('User executable '+executable+' not found')
731 >
732 >                ## then check if it's private or not
733 >                if exeWithPath.find(swReleaseTop) == -1:
734 >                    # the exe is private, so we must ship
735 >                    common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
736 >                    path = swArea+'/'
737 >                    # distinguish case when script is in user project area or given by full path somewhere else
738 >                    if exeWithPath.find(path) >= 0 :
739 >                        exe = string.replace(exeWithPath, path,'')
740 >                        tar.add(path+exe,exe)
741 >                    else :
742 >                        tar.add(exeWithPath,os.path.basename(executable))
743 >                    pass
744 >                else:
745 >                    # the exe is from release, we'll find it on WN
746 >                    pass
747 >
748 >            ## Now get the libraries: only those in local working area
749 >            libDir = 'lib'
750 >            lib = swArea+'/' +libDir
751 >            common.logger.debug(5,"lib "+lib+" to be tarred")
752 >            if os.path.exists(lib):
753 >                tar.add(lib,libDir)
754 >
755 >            ## Now check if module dir is present
756 >            moduleDir = 'module'
757 >            module = swArea + '/' + moduleDir
758 >            if os.path.isdir(module):
759 >                tar.add(module,moduleDir)
760 >
761 >            ## Now check if any data dir(s) is present
762 >            swAreaLen=len(swArea)
763 >            for root, dirs, files in os.walk(swArea):
764 >                if "data" in dirs:
765 >                    common.logger.debug(5,"data "+root+"/data"+" to be tarred")
766 >                    tar.add(root+"/data",root[swAreaLen:]+"/data")
767 >
768 >
769 >            ## Add ProdCommon dir to tar
770 >            prodcommonDir = 'ProdCommon'
771 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
772 >            if os.path.isdir(prodcommonPath):
773 >                tar.add(prodcommonPath,prodcommonDir)
774 >
775 >            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
776 >            tar.close()
777 >        except :
778 >            raise CrabException('Could not create tar-ball')
779 >
780 >        ## check for tarball size
781 >        tarballinfo = os.stat(self.tgzNameWithPath)
782 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
783 >            raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
784 >
785 >        ## create tar-ball with ML stuff
786 >        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
787 >        try:
788 >            tar = tarfile.open(self.MLtgzfile, "w:gz")
789 >            path=os.environ['CRABDIR'] + '/python/'
790 >            #for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py']:
791 >            ### FEDE ####
792 >            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']:
793 >            ###############
794 >                tar.add(path+file,file)
795 >            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
796 >            tar.close()
797 >        except :
798 >            raise CrabException('Could not create ML files tar-ball')
799 >
800          return
801 <        
802 <    def wsSetupEnvironment(self, nj):
801 >
802 >    def additionalInputFileTgz(self):
803 >        """
804 >        Put all additional files into a tar ball and return its name
805 >        """
806 >        import tarfile
807 >        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
808 >        tar = tarfile.open(tarName, "w:gz")
809 >        for file in self.additional_inbox_files:
810 >            tar.add(file,string.split(file,'/')[-1])
811 >        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
812 >        tar.close()
813 >        return tarName
814 >
815 >    def wsSetupEnvironment(self, nj=0):
816          """
817          Returns part of a job script which prepares
818          the execution environment for the job 'nj'.
819          """
820          # Prepare JobType-independent part
821 <        txt = ''
822 <  
823 <        ## OLI_Daniele at this level  middleware already known
507 <
508 <        txt += 'if [ $middleware == LCG ]; then \n'
821 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
822 >        txt += 'echo ">>> setup environment"\n'
823 >        txt += 'if [ $middleware == LCG ]; then \n'
824          txt += self.wsSetupCMSLCGEnvironment_()
825          txt += 'elif [ $middleware == OSG ]; then\n'
826 <        txt += '    time=`date -u +"%s"`\n'
827 <        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
828 <        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
829 <        txt += '    /bin/mkdir -p $WORKING_DIR\n'
830 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
831 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
832 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
833 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
834 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
835 <        txt += '        exit 1\n'
826 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
827 >        txt += '    if [ ! $? == 0 ] ;then\n'
828 >        #txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
829 >        #txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
830 >        #txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
831 >        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
832 >        #txt += '        exit 1\n'
833 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
834 >        txt += '        job_exit_code=10016\n'
835 >        txt += '        func_exit\n'
836          txt += '    fi\n'
837 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
838          txt += '\n'
839          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
840          txt += '    cd $WORKING_DIR\n'
841 <        txt += self.wsSetupCMSOSGEnvironment_()
841 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
842 >        txt += self.wsSetupCMSOSGEnvironment_()
843 >        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
844 >        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
845          txt += 'fi\n'
846  
847          # Prepare JobType-specific part
848          scram = self.scram.commandName()
849          txt += '\n\n'
850 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
850 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
851 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
852          txt += scram+' project CMSSW '+self.version+'\n'
853          txt += 'status=$?\n'
854          txt += 'if [ $status != 0 ] ; then\n'
855 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
856 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
857 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
858 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
859 <        ## OLI_Daniele
860 <        txt += '    if [ $middleware == OSG ]; then \n'
861 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
862 <        txt += '        cd $RUNTIME_AREA\n'
863 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
864 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
865 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
866 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
867 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
868 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
869 <        txt += '        fi\n'
870 <        txt += '    fi \n'
871 <        txt += '   exit 1 \n'
855 >        #txt += '    echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
856 >        #txt += '    echo "JOB_EXIT_STATUS = 10034"\n'
857 >        #txt += '    echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
858 >        #txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
859 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
860 >        txt += '    job_exit_code=10034\n'
861 >        #txt += '    if [ $middleware == OSG ]; then \n'
862 >        #txt += '        cd $RUNTIME_AREA\n'
863 >        #txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
864 >        #txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
865 >        #txt += '        /bin/rm -rf $WORKING_DIR\n'
866 >        #txt += '        if [ -d $WORKING_DIR ] ;then\n'
867 >        #txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
868 >        #txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
869 >        #txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
870 >        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
871 >        #txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
872 >        #txt += '            job_exit_code=10017\n'
873 >        #txt += '        fi\n'
874 >        #txt += '    fi \n'
875 >        #txt += '    exit 1 \n'
876 >        txt += '    func_exit\n'
877          txt += 'fi \n'
553        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
878          txt += 'cd '+self.version+'\n'
879 +        ########## FEDE FOR DBS2 ######################
880 +        txt += 'SOFTWARE_DIR=`pwd`\n'
881 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
882 +        ###############################################
883          ### needed grep for bug in scramv1 ###
884          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
557
885          # Handle the arguments:
886          txt += "\n"
887          txt += "## number of arguments (first argument always jobnumber)\n"
888          txt += "\n"
889 <        txt += "narg=$#\n"
890 <        txt += "if [ $narg -lt 2 ]\n"
889 >       # txt += "if [ $nargs -lt "+str(len(self.argsList[nj].split()))+" ]\n"
890 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
891          txt += "then\n"
892 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
893 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
894 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
895 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
896 <        ## OLI_Daniele
897 <        txt += '    if [ $middleware == OSG ]; then \n'
898 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
899 <        txt += '        cd $RUNTIME_AREA\n'
900 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
901 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
902 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
903 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
904 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
905 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
906 <        txt += '        fi\n'
907 <        txt += '    fi \n'
908 <        txt += "    exit 1\n"
892 >        #txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
893 >        #txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
894 >        #txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
895 >        #txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
896 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
897 >        txt += '    job_exit_code=50113\n'
898 >        #txt += '    if [ $middleware == OSG ]; then \n'
899 >        #txt += '        cd $RUNTIME_AREA\n'
900 >        #txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
901 >        #txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
902 >        #txt += '        /bin/rm -rf $WORKING_DIR\n'
903 >        #txt += '        if [ -d $WORKING_DIR ] ;then\n'
904 >        #txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
905 >        #txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
906 >        #txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
907 >        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
908 >        #txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
909 >        #txt += '            job_exit_code=10017\n'
910 >        #txt += '        fi\n'
911 >        #txt += '    fi\n'
912 >        #txt += "    exit 1\n"
913 >        txt += "    func_exit\n"
914          txt += "fi\n"
915          txt += "\n"
916  
917          # Prepare job-specific part
918          job = common.job_list[nj]
919 <        pset = os.path.basename(job.configFilename())
920 <        txt += '\n'
921 <        txt += 'InputFiles=$2\n'
922 <        txt += 'echo "<$InputFiles>"\n'
591 <        #txt += 'echo sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' \n'
592 <        txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
593 <        #txt += 'sed "s#{\'INPUT\'}#${InputFiles}#" $RUNTIME_AREA/'+pset+' > pset1.cfg\n'
919 >        ### FEDE FOR DBS OUTPUT PUBLICATION
920 >        if (self.datasetPath):
921 >            txt += '\n'
922 >            txt += 'DatasetPath='+self.datasetPath+'\n'
923  
924 <        if len(self.additional_inbox_files) > 0:
596 <            for file in self.additional_inbox_files:
597 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
598 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
599 <                txt += '   chmod +x '+file+'\n'
600 <                txt += 'fi\n'
601 <            pass
924 >            datasetpath_split = self.datasetPath.split("/")
925  
926 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
926 >            txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
927 >            txt += 'DataTier='+datasetpath_split[2]+'\n'
928 >            txt += 'ApplicationFamily=cmsRun\n'
929  
930 <        txt += '\n'
931 <        txt += 'echo "***** cat pset.cfg *********"\n'
932 <        txt += 'cat pset.cfg\n'
933 <        txt += 'echo "****** end pset.cfg ********"\n'
934 <        txt += '\n'
935 <        # txt += 'echo "***** cat pset1.cfg *********"\n'
936 <        # txt += 'cat pset1.cfg\n'
937 <        # txt += 'echo "****** end pset1.cfg ********"\n'
938 <        return txt
930 >        else:
931 >            txt += 'DatasetPath=MCDataTier\n'
932 >            txt += 'PrimaryDataset=null\n'
933 >            txt += 'DataTier=null\n'
934 >            txt += 'ApplicationFamily=MCDataTier\n'
935 >        if self.pset != None:
936 >            pset = os.path.basename(job.configFilename())
937 >            txt += '\n'
938 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
939 >            if (self.datasetPath): # standard job
940 >                txt += 'InputFiles=${args[1]}; export InputFiles\n'
941 >                txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
942 >                txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
943 >                txt += 'echo "Inputfiles:<$InputFiles>"\n'
944 >                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
945 >                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
946 >            else:  # pythia like job
947 >                txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
948 >                txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
949 >                txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
950 >                txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
951 >                if (self.firstRun):
952 >                    txt += 'FirstRun=${args[1]}; export FirstRun\n'
953 >                    txt += 'echo "FirstRun: <$FirstRun>"\n'
954  
955 <    def wsBuildExe(self, nj):
955 >            txt += 'mv -f '+pset+' pset.cfg\n'
956 >
957 >        if len(self.additional_inbox_files) > 0:
958 >            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
959 >            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
960 >            txt += 'fi\n'
961 >            pass
962 >
963 >        if self.pset != None:
964 >            txt += '\n'
965 >            txt += 'echo "***** cat pset.cfg *********"\n'
966 >            txt += 'cat pset.cfg\n'
967 >            txt += 'echo "****** end pset.cfg ********"\n'
968 >            txt += '\n'
969 >            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
970 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
971 >            txt += '\n'
972 >        return txt
973 >    #### FEDE #####
974 >    def wsUntarSoftware(self, nj=0):
975          """
976          Put in the script the commands to build an executable
977          or a library.
978          """
979  
980 <        txt = ""
980 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
981  
982          if os.path.isfile(self.tgzNameWithPath):
983 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
983 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
984              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
985              txt += 'untar_status=$? \n'
986              txt += 'if [ $untar_status -ne 0 ]; then \n'
987 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
988 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
989 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
631 <            txt += '   if [ $middleware == OSG ]; then \n'
632 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
633 <            txt += '       cd $RUNTIME_AREA\n'
634 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
635 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
636 <            txt += '        echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
637 <            txt += '        echo "JOB_EXIT_STATUS = 50999"\n'
638 <            txt += '        echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
639 <            txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
640 <            txt += '       fi\n'
641 <            txt += '   fi \n'
642 <            txt += '   \n'
643 <            txt += '   exit 1 \n'
987 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
988 >            txt += '   job_exit_code=$untar_status\n'
989 >            txt += '   func_exit\n'
990              txt += 'else \n'
991              txt += '   echo "Successful untar" \n'
992              txt += 'fi \n'
993 +            txt += '\n'
994 +            txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
995 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
996 +            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
997 +            txt += 'else\n'
998 +            txt += '   export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
999 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1000 +            txt += 'fi\n'
1001 +            txt += '\n'
1002 +
1003              pass
1004 +
1005 +        return txt
1006 +        
1007 +    def wsBuildExe(self, nj=0):
1008 +        """
1009 +        Put in the script the commands to build an executable
1010 +        or a library.
1011 +        """
1012 +
1013 +        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
1014 +        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
1015 +
1016 +        txt += 'mv $RUNTIME_AREA/lib . \n'
1017 +        txt += 'mv $RUNTIME_AREA/module . \n'
1018 +        txt += 'mv $RUNTIME_AREA/ProdCommon . \n'
1019          
1020 +
1021 +        #if os.path.isfile(self.tgzNameWithPath):
1022 +        #    txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
1023 +        #    txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
1024 +        #    txt += 'untar_status=$? \n'
1025 +        #    txt += 'if [ $untar_status -ne 0 ]; then \n'
1026 +        #    txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
1027 +        #    txt += '   job_exit_code=$untar_status\n'
1028 +        #    txt += '   func_exit\n'
1029 +        #    txt += 'else \n'
1030 +        #    txt += '   echo "Successful untar" \n'
1031 +        #    txt += 'fi \n'
1032 +        #    txt += '\n'
1033 +        #    txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
1034 +        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1035 +        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1036 +        txt += 'else\n'
1037 +        txt += '   export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1038 +        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1039 +        txt += 'fi\n'
1040 +        txt += '\n'
1041 +
1042          return txt
1043 +    ############################################################################
1044  
1045      def modifySteeringCards(self, nj):
1046          """
1047 <        modify the card provided by the user,
1047 >        modify the card provided by the user,
1048          writing a new card into share dir
1049          """
1050 <        
1050 >
1051      def executableName(self):
1052 <        return self.executable
1052 >        if self.scriptExe: #CarlosDaniele
1053 >            return "sh "
1054 >        else:
1055 >            return self.executable
1056  
1057      def executableArgs(self):
1058 <        return " -p pset.cfg"
1058 >        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
1059 >        if self.scriptExe:#CarlosDaniele
1060 >            return   self.scriptExe + " $NJob"
1061 >        else:
1062 >            version_array = self.scram.getSWVersion().split('_')
1063 >            major = 0
1064 >            minor = 0
1065 >            try:
1066 >                major = int(version_array[1])
1067 >                minor = int(version_array[2])
1068 >            except:
1069 >                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1070 >                raise CrabException(msg)
1071 >
1072 >            ex_args = ""
1073 >
1074 >            # Framework job report
1075 >            if major >= 1 and minor >= 5 :
1076 >                #ex_args += " -j " + self.fjrFileName
1077 >            ### FEDE it could be improved!!! ####    
1078 >                ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1079 >            #######################################
1080 >            # Type of cfg file
1081 >            if major >= 2 :
1082 >                ex_args += " -p pset.pycfg"
1083 >            else:
1084 >                ex_args += " -p pset.cfg"
1085 >            return ex_args
1086  
1087      def inputSandbox(self, nj):
1088          """
1089          Returns a list of filenames to be put in JDL input sandbox.
1090          """
1091          inp_box = []
1092 <        # dict added to delete duplicate from input sandbox file list
1093 <        seen = {}
1092 >        # # dict added to delete duplicate from input sandbox file list
1093 >        # seen = {}
1094          ## code
1095          if os.path.isfile(self.tgzNameWithPath):
1096              inp_box.append(self.tgzNameWithPath)
1097 +        if os.path.isfile(self.MLtgzfile):
1098 +            inp_box.append(self.MLtgzfile)
1099          ## config
1100 <        inp_box.append(common.job_list[nj].configFilename())
1100 >        if not self.pset is None:
1101 >            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1102          ## additional input files
1103 <        #for file in self.additional_inbox_files:
1104 <        #    inp_box.append(common.work_space.cwdDir()+file)
1103 >        tgz = self.additionalInputFileTgz()
1104 >        inp_box.append(tgz)
1105 >        ## executable
1106 >        wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
1107 >        inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
1108          return inp_box
1109  
1110      def outputSandbox(self, nj):
# Line 683 | Line 1113 | class Cmssw(JobType):
1113          """
1114          out_box = []
1115  
686        stdout=common.job_list[nj].stdout()
687        stderr=common.job_list[nj].stderr()
688
1116          ## User Declared output files
1117 <        for out in self.output_file:
1118 <            n_out = nj + 1
1117 >        for out in (self.output_file+self.output_file_sandbox):
1118 >            n_out = nj + 1
1119              out_box.append(self.numberFile_(out,str(n_out)))
1120          return out_box
694        return []
1121  
1122      def prepareSteeringCards(self):
1123          """
# Line 704 | Line 1130 | class Cmssw(JobType):
1130          Returns part of a job script which renames the produced files.
1131          """
1132  
1133 <        txt = '\n'
1134 <        txt += '# directory content\n'
1133 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1134 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1135 >        txt += 'echo ">>> current directory content:"\n'
1136          txt += 'ls \n'
1137 <        file_list = ''
1138 <        for fileWithSuffix in self.output_file:
1137 >        txt += '\n'
1138 >
1139 >        #txt += 'output_exit_status=0\n'
1140 >
1141 >        ### FEDE #######
1142 >        #for fileWithSuffix in (self.output_file_sandbox):
1143 >        #    output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1144 >        #    txt += '\n'
1145 >        #    txt += '# check output file\n'
1146 >        #    txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1147 >        #    txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1148 >        #    txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1149 >        #    txt += 'else\n'
1150 >        #    txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1151 >        #    txt += '    job_exit_code=60302\n'
1152 >        #    if common.scheduler.name().upper() == 'CONDOR_G':
1153 >        #        txt += '    if [ $middleware == OSG ]; then \n'
1154 >        #        txt += '        echo "prepare dummy output file"\n'
1155 >        #        txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1156 >        #        txt += '    fi \n'
1157 >        #    txt += 'fi\n'
1158 >
1159 >        for fileWithSuffix in (self.output_file):
1160              output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
713            file_list=file_list+output_file_num+' '
1161              txt += '\n'
1162              txt += '# check output file\n'
1163 <            txt += 'ls '+fileWithSuffix+'\n'
1164 <            txt += 'exe_result=$?\n'
1165 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
1166 <            txt += '   echo "ERROR: No output file to manage"\n'
1167 <            txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
1168 <            txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
1169 <            txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
1170 <            ### OLI_DANIELE
1171 <            if common.scheduler.boss_scheduler_name == 'condor_g':
1163 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1164 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
1165 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
1166 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1167 >            else:
1168 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1169 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
1170 >            txt += 'else\n'
1171 >            #txt += '    exit_status=60302\n'
1172 >            #txt += '    echo "ERROR: Output file '+fileWithSuffix+' not found"\n'
1173 >            #txt += '    echo "JOB_EXIT_STATUS = $exit_status"\n'
1174 >            #txt += '    output_exit_status=$exit_status\n'
1175 >            txt += '    job_exit_code=60302\n'
1176 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
1177 >            if common.scheduler.name().upper() == 'CONDOR_G':
1178                  txt += '    if [ $middleware == OSG ]; then \n'
1179                  txt += '        echo "prepare dummy output file"\n'
1180                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1181                  txt += '    fi \n'
729            txt += 'else\n'
730            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1182              txt += 'fi\n'
1183 <      
1184 <        txt += 'cd $RUNTIME_AREA\n'
1185 <        file_list=file_list[:-1]
1186 <        txt += 'file_list="'+file_list+'"\n'
1187 <        ### OLI_DANIELE
1188 <        txt += 'if [ $middleware == OSG ]; then\n'  
1189 <        txt += '    cd $RUNTIME_AREA\n'
1190 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
1191 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
741 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
742 <        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
743 <        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
744 <        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
745 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
746 <        txt += '    fi\n'
747 <        txt += 'fi\n'
1183 >        file_list = []
1184 >        for fileWithSuffix in (self.output_file):
1185 >             file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1186 >
1187 >        txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1188 >        txt += '\n'
1189 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1190 >        txt += 'echo ">>> current directory content:"\n'
1191 >        txt += 'ls \n'
1192          txt += '\n'
1193 +        txt += 'cd $RUNTIME_AREA\n'
1194 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
1195          return txt
1196  
1197      def numberFile_(self, file, txt):
# Line 756 | Line 1202 | class Cmssw(JobType):
1202          # take away last extension
1203          name = p[0]
1204          for x in p[1:-1]:
1205 <           name=name+"."+x
1205 >            name=name+"."+x
1206          # add "_txt"
1207          if len(p)>1:
1208 <          ext = p[len(p)-1]
1209 <          #result = name + '_' + str(txt) + "." + ext
764 <          result = name + '_' + txt + "." + ext
1208 >            ext = p[len(p)-1]
1209 >            result = name + '_' + txt + "." + ext
1210          else:
1211 <          #result = name + '_' + str(txt)
1212 <          result = name + '_' + txt
768 <        
1211 >            result = name + '_' + txt
1212 >
1213          return result
1214  
1215 <    def getRequirements(self):
1215 >    def getRequirements(self, nj=[]):
1216          """
1217 <        return job requirements to add to jdl files
1217 >        return job requirements to add to jdl files
1218          """
1219          req = ''
1220 <        if common.analisys_common_info['sites']:
1221 <            if common.analisys_common_info['sw_version']:
1222 <                req='Member("VO-cms-' + \
1223 <                     common.analisys_common_info['sw_version'] + \
1224 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1225 <            if len(common.analisys_common_info['sites'])>0:
1226 <                req = req + ' && ('
1227 <                for i in range(len(common.analisys_common_info['sites'])):
1228 <                    req = req + 'other.GlueCEInfoHostName == "' \
1229 <                         + common.analisys_common_info['sites'][i] + '"'
1230 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
1231 <                        req = req + ' || '
1232 <            req = req + ')'
1233 <        #print "req = ", req
1220 >        if self.version:
1221 >            req='Member("VO-cms-' + \
1222 >                 self.version + \
1223 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1224 >        ## SL add requirement for OS version only if SL4
1225 >        #reSL4 = re.compile( r'slc4' )
1226 >        if self.executable_arch: # and reSL4.search(self.executable_arch):
1227 >            req+=' && Member("VO-cms-' + \
1228 >                 self.executable_arch + \
1229 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1230 >
1231 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1232 >        if common.scheduler.name() == "glitecoll":
1233 >            req += ' && other.GlueCEStateStatus == "Production" '
1234 >
1235          return req
1236  
1237      def configFilename(self):
1238          """ return the config filename """
1239          return self.name()+'.cfg'
1240  
796    ### OLI_DANIELE
1241      def wsSetupCMSOSGEnvironment_(self):
1242          """
1243          Returns part of a job script which is prepares
1244          the execution environment and which is common for all CMS jobs.
1245          """
1246 <        txt = '\n'
1247 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
1248 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1249 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1250 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1251 <        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1252 <        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1253 <        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1254 <        txt += '   else\n'
1255 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1256 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1257 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1258 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1259 <        txt += '       exit 1\n'
1246 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1247 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
1248 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1249 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1250 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1251 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1252 >        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1253 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1254 >        txt += '    else\n'
1255 >        #txt += '        echo "SET_CMS_ENV 10020 ==> ERROR $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1256 >        #txt += '        echo "JOB_EXIT_STATUS = 10020"\n'
1257 >        #txt += '        echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1258 >        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1259 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1260 >        txt += '        job_exit_code=10020\n'
1261 >        #txt += '        cd $RUNTIME_AREA\n'
1262 >        #txt += '        echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1263 >        #txt += '        echo ">>> Remove working directory: $WORKING_DIR"\n'
1264 >        #txt += '        /bin/rm -rf $WORKING_DIR\n'
1265 >        #txt += '        if [ -d $WORKING_DIR ] ;then\n'
1266 >        #txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1267 >        #txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1268 >        #txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1269 >        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1270 >        #txt += '            echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1271 >        #txt += '            job_exit_code=10017\n'
1272 >        #txt += '        fi\n'
1273          txt += '\n'
1274 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1275 <        txt += '       cd $RUNTIME_AREA\n'
1276 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
820 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
821 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
822 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
823 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
824 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
825 <        txt += '       fi\n'
826 <        txt += '\n'
827 <        txt += '       exit 1\n'
828 <        txt += '   fi\n'
1274 >        #txt += '        exit 1\n'
1275 >        txt += '        func_exit\n'
1276 >        txt += '    fi\n'
1277          txt += '\n'
1278 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1279 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1278 >        txt += '    echo "==> setup cms environment ok"\n'
1279 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1280  
1281          return txt
1282 <
1282 >
1283      ### OLI_DANIELE
1284      def wsSetupCMSLCGEnvironment_(self):
1285          """
1286          Returns part of a job script which is prepares
1287          the execution environment and which is common for all CMS jobs.
1288          """
1289 <        txt  = '   \n'
1290 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1291 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1292 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1293 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1294 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1295 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1296 <        txt += '       exit 1\n'
1297 <        txt += '   else\n'
1298 <        txt += '       echo "Sourcing environment... "\n'
1299 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1300 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1301 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1302 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1303 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1304 <        txt += '           exit 1\n'
1305 <        txt += '       fi\n'
1306 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1307 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1308 <        txt += '       result=$?\n'
1309 <        txt += '       if [ $result -ne 0 ]; then\n'
1310 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1311 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1312 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1313 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1314 <        txt += '           exit 1\n'
1315 <        txt += '       fi\n'
1316 <        txt += '   fi\n'
1317 <        txt += '   \n'
1318 <        txt += '   string=`cat /etc/redhat-release`\n'
1319 <        txt += '   echo $string\n'
1320 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
1321 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1322 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1323 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1324 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1325 <        txt += '   else\n'
1326 <        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1327 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1328 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1329 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1330 <        txt += '       exit 1\n'
1331 <        txt += '   fi\n'
1332 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1333 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1289 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1290 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1291 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1292 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1293 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1294 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1295 >        #txt += '        echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1296 >        #txt += '        echo "JOB_EXIT_STATUS = 10031" \n'
1297 >        #txt += '        echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1298 >        #txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1299 >        #txt += '        exit 1\n'
1300 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1301 >        txt += '        job_exit_code=10031\n'
1302 >        txt += '        func_exit\n'
1303 >        txt += '    else\n'
1304 >        txt += '        echo "Sourcing environment... "\n'
1305 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1306 >        #txt += '            echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1307 >        #txt += '            echo "JOB_EXIT_STATUS = 10020"\n'
1308 >        #txt += '            echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1309 >        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1310 >        #txt += '            exit 1\n'
1311 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1312 >        txt += '            job_exit_code=10020\n'
1313 >        txt += '            func_exit\n'
1314 >        txt += '        fi\n'
1315 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1316 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1317 >        txt += '        result=$?\n'
1318 >        txt += '        if [ $result -ne 0 ]; then\n'
1319 >        #txt += '            echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1320 >        #txt += '            echo "JOB_EXIT_STATUS = 10032"\n'
1321 >        #txt += '            echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1322 >        #txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1323 >        #txt += '            exit 1\n'
1324 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1325 >        txt += '            job_exit_code=10032\n'
1326 >        txt += '            func_exit\n'
1327 >        txt += '        fi\n'
1328 >        txt += '    fi\n'
1329 >        txt += '    \n'
1330 >        txt += '    echo "==> setup cms environment ok"\n'
1331 >        return txt
1332 >
1333 >    ### FEDE FOR DBS OUTPUT PUBLICATION
1334 >    def modifyReport(self, nj):
1335 >        """
1336 >        insert the part of the script that modifies the FrameworkJob Report
1337 >        """
1338 >
1339 >        txt = '\n#Written by cms_cmssw::modifyReport\n'
1340 >        try:
1341 >            publish_data = int(self.cfg_params['USER.publish_data'])
1342 >        except KeyError:
1343 >            publish_data = 0
1344 >        if (publish_data == 1):
1345 >            
1346 >            txt += 'if [ $copy_exit_status -eq 0 ]; then\n'
1347 >            txt += '    echo ">>> Modify Job Report:" \n'
1348 >            txt += '    chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1349 >            #txt += '    if [ -z "$SE" ]; then\n'
1350 >            #txt += '        SE="" \n'
1351 >            #txt += '    fi \n'
1352 >            #txt += '    if [ -z "$SE_PATH" ]; then\n'
1353 >            #txt += '        SE_PATH="" \n'
1354 >            #txt += '    fi \n'
1355 >            txt += '    echo "SE = $SE"\n'
1356 >            txt += '    echo "SE_PATH = $SE_PATH"\n'
1357 >
1358 >            processedDataset = self.cfg_params['USER.publish_data_name']
1359 >            txt += '    ProcessedDataset='+processedDataset+'\n'
1360 >            #txt += '    if [ "$SE_PATH" == "" ]; then\n'
1361 >            #txt += '        FOR_LFN=/copy_problems/ \n'
1362 >            #txt += '    else \n'
1363 >            #txt += '        tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1364 >            #txt += '        FOR_LFN=/store$tmp \n'
1365 >            #txt += '    fi \n'
1366 >            txt += '    tmp=`echo $SE_PATH | awk -F \'store\' \'{print$2}\'` \n'
1367 >            txt += '    FOR_LFN=/store$tmp \n'
1368 >            txt += '    echo "ProcessedDataset = $ProcessedDataset"\n'
1369 >            txt += '    echo "FOR_LFN = $FOR_LFN" \n'
1370 >            txt += '    echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1371 >            #txt += '    echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1372 >            #txt += '    $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1373 >            ### FEDE ####
1374 >            txt += '    echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1375 >            txt += '    $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1376 >            ####################################
1377 >            txt += '    modifyReport_result=$?\n'
1378 >            txt += '    if [ $modifyReport_result -ne 0 ]; then\n'
1379 >            txt += '        modifyReport_result=70500\n'
1380 >            txt += '        job_exit_code=$modifyReport_result\n'
1381 >            txt += '        echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1382 >            txt += '        echo "WARNING: Problem with ModifyJobReport"\n'
1383 >            txt += '    else\n'
1384 >            ### FEDE #####
1385 >            #txt += '        mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1386 >            #######################
1387 >            txt += '        mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1388 >            txt += '    fi\n'
1389 >            txt += 'fi\n'
1390 >        return txt
1391 >
1392 >    def cleanEnv(self):
1393 >        txt = '\n#Written by cms_cmssw::cleanEnv\n'
1394 >        txt += 'if [ $middleware == OSG ]; then\n'
1395 >        txt += '    cd $RUNTIME_AREA\n'
1396 >        txt += '    echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1397 >        txt += '    echo ">>> Remove working directory: $WORKING_DIR"\n'
1398 >        txt += '    /bin/rm -rf $WORKING_DIR\n'
1399 >        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1400 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1401 >        txt += '        job_exit_code=10017\n'
1402 >        txt += '        func_exit\n'
1403 >        txt += '    fi\n'
1404 >        txt += 'fi\n'
1405 >        txt += '\n'
1406          return txt
1407  
1408      def setParam_(self, param, value):
# Line 891 | Line 1411 | class Cmssw(JobType):
1411      def getParams(self):
1412          return self._params
1413  
1414 <    def setTaskid_(self):
1415 <        self._taskId = self.cfg_params['taskId']
1416 <        
1417 <    def getTaskid(self):
1418 <        return self._taskId
1414 >    def uniquelist(self, old):
1415 >        """
1416 >        remove duplicates from a list
1417 >        """
1418 >        nd={}
1419 >        for e in old:
1420 >            nd[e]=0
1421 >        return nd.keys()
1422 >
1423 >    def outList(self):
1424 >        """
1425 >        check the dimension of the output files
1426 >        """
1427 >        txt = ''
1428 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1429 >        listOutFiles = []
1430 >        stdout = 'CMSSW_$NJob.stdout'
1431 >        stderr = 'CMSSW_$NJob.stderr'
1432 >        if (self.return_data == 1):
1433 >            for file in (self.output_file+self.output_file_sandbox):
1434 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1435 >            listOutFiles.append(stdout)
1436 >            listOutFiles.append(stderr)
1437 >        else:
1438 >            for file in (self.output_file_sandbox):
1439 >                listOutFiles.append(self.numberFile_(file, '$NJob'))
1440 >            listOutFiles.append(stdout)
1441 >            listOutFiles.append(stderr)
1442 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1443 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1444 >        txt += 'export filesToCheck\n'
1445 >        return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines