ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.3 by gutsche, Sun May 28 02:27:52 2006 UTC vs.
Revision 1.314 by spiga, Thu Jun 18 17:23:02 2009 UTC

# Line 1 | Line 1
1   from JobType import JobType
2 from crab_logger import Logger
2   from crab_exceptions import *
3   from crab_util import *
4   import common
6 import PsetManipulator  
7
8 import DBSInfo_EDM
9 #from DataDiscovery_EDM import DataDiscovery_EDM
10 import DataDiscovery_EDM
11 #from DataLocation_EDM import DataLocation_EDM
12 import DataLocation_EDM
5   import Scram
6 + from Splitter import JobSplitter
7  
8 < import os, string, re
8 > from IMProv.IMProvNode import IMProvNode
9 > import os, string, glob
10  
11   class Cmssw(JobType):
12 <    def __init__(self, cfg_params):
12 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
13          JobType.__init__(self, 'CMSSW')
14 <        common.logger.debug(3,'CMSSW::__init__')
14 >        common.logger.debug('CMSSW::__init__')
15 >        self.skip_blocks = skip_blocks
16 >        self.argsList = 1
17  
22        self.analisys_common_info = {}
23        # Marco.
18          self._params = {}
19          self.cfg_params = cfg_params
20  
21 <        log = common.logger
22 <        
21 >        ### Temporary patch to automatically skip the ISB size check:
22 >        server=self.cfg_params.get('CRAB.server_name',None)
23 >        size = 9.5
24 >        if server or common.scheduler.name().upper() in ['LSF','CAF']: size = 99999
25 >        ### D.S.
26 >        self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
27 >
28 >        # number of jobs requested to be created, limit obj splitting
29 >        self.ncjobs = ncjobs
30 >
31          self.scram = Scram.Scram(cfg_params)
30        scramArea = ''
32          self.additional_inbox_files = []
33          self.scriptExe = ''
34          self.executable = ''
35 <        self.tgz_name = 'default.tgz'
36 <
35 >        self.executable_arch = self.scram.getArch()
36 >        self.tgz_name = 'default.tar.gz'
37 >        self.tar_name = 'default.tar'
38 >        self.scriptName = 'CMSSW.sh'
39 >        self.pset = ''
40 >        self.datasetPath = ''
41 >
42 >        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
43 >        # set FJR file name
44 >        self.fjrFileName = 'crab_fjr.xml'
45  
46          self.version = self.scram.getSWVersion()
47 <        common.analisys_common_info['sw_version'] = self.version
39 <        ### FEDE
40 <        common.analisys_common_info['copy_input_data'] = 0
41 <        common.analisys_common_info['events_management'] = 1
47 >        common.logger.log(10-1,"CMSSW version is: "+str(self.version))
48  
49 <        ### collect Data cards
49 >        version_array = self.version.split('_')
50 >        self.CMSSW_major = 0
51 >        self.CMSSW_minor = 0
52 >        self.CMSSW_patch = 0
53          try:
54 <         #   self.owner = cfg_params['CMSSW.owner']
55 <         #   log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner)
56 <         #   self.dataset = cfg_params['CMSSW.dataset']
57 <            self.datasetPath = cfg_params['CMSSW.datasetpath']
58 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+self.datasetPath)
59 <        except KeyError:
60 <        #    msg = "Error: owner and/or dataset not defined "
61 <            msg = "Error: datasetpath not defined "  
54 >            self.CMSSW_major = int(version_array[1])
55 >            self.CMSSW_minor = int(version_array[2])
56 >            self.CMSSW_patch = int(version_array[3])
57 >        except:
58 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
59 >            raise CrabException(msg)
60 >
61 >        if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5):
62 >            msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version."
63              raise CrabException(msg)
64 +            """
65 +            As CMSSW versions are dropped we can drop more code:
66 +            1.X dropped: drop support for running .cfg on WN
67 +            2.0 dropped: drop all support for cfg here and in writeCfg
68 +            2.0 dropped: Recheck the random number seed support
69 +            """
70 +
71 +        ### collect Data cards
72 +
73 +
74 +        ### Temporary: added to remove input file control in the case of PU
75 +        self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
76 +
77 +        tmp =  cfg_params['CMSSW.datasetpath']
78 +        common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
79 +
80 +        if tmp =='':
81 +            msg = "Error: datasetpath not defined "
82 +            raise CrabException(msg)
83 +        elif string.lower(tmp)=='none':
84 +            self.datasetPath = None
85 +            self.selectNoInput = 1
86 +        else:
87 +            self.datasetPath = tmp
88 +            self.selectNoInput = 0
89 +
90          self.dataTiers = []
91 < #       try:
92 < #           tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',')
93 < #           for tmp in tmpDataTiers:
94 < #               tmp=string.strip(tmp)
59 < #               self.dataTiers.append(tmp)
60 < #               pass
61 < #           pass
62 < #       except KeyError:
63 < #           pass
64 < #       log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers))
91 >
92 >        self.debugWrap=''
93 >        self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
94 >        if self.debug_wrapper == 1: self.debugWrap='--debug'
95  
96          ## now the application
97 <        try:
98 <            self.executable = cfg_params['CMSSW.executable']
99 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
100 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
71 <            log.debug(3,msg)
72 <        except KeyError:
73 <            self.executable = 'cmsRun'
74 <            msg = "User executable not defined. Use cmsRun"
75 <            log.debug(3,msg)
76 <            pass
97 >        self.managedGenerators = ['madgraph', 'comphep', 'lhe']
98 >        self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
99 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
100 >        common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
101  
102 <        try:
103 <            self.pset = cfg_params['CMSSW.pset']
104 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
102 >        if not cfg_params.has_key('CMSSW.pset'):
103 >            raise CrabException("PSet file missing. Cannot run cmsRun ")
104 >        self.pset = cfg_params['CMSSW.pset']
105 >        common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
106 >        if self.pset.lower() != 'none' :
107              if (not os.path.exists(self.pset)):
108                  raise CrabException("User defined PSet file "+self.pset+" does not exist")
109 <        except KeyError:
110 <            raise CrabException("PSet file missing. Cannot run cmsRun ")
109 >        else:
110 >            self.pset = None
111  
112          # output files
113 <        try:
114 <            self.output_file = []
113 >        ## stuff which must be returned always via sandbox
114 >        self.output_file_sandbox = []
115  
116 <            tmp = cfg_params['CMSSW.output_file']
117 <            if tmp != '':
118 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
119 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
120 <                for tmp in tmpOutFiles:
121 <                    tmp=string.strip(tmp)
122 <                    self.output_file.append(tmp)
123 <                    pass
124 <            else:
125 <                log.message("No output file defined: only stdout/err will be available")
126 <                pass
127 <            pass
102 <        except KeyError:
103 <            log.message("No output file defined: only stdout/err will be available")
104 <            pass
116 >        # add fjr report by default via sandbox
117 >        self.output_file_sandbox.append(self.fjrFileName)
118 >
119 >        # other output files to be returned via sandbox or copied to SE
120 >        outfileflag = False
121 >        self.output_file = []
122 >        tmp = cfg_params.get('CMSSW.output_file',None)
123 >        if tmp :
124 >            self.output_file = [x.strip() for x in tmp.split(',')]
125 >            outfileflag = True #output found
126 >        #else:
127 >        #    log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
128  
129          # script_exe file as additional file in inputSandbox
130 <        try:
131 <           self.scriptExe = cfg_params['USER.script_exe']
132 <           self.additional_inbox_files.append(self.scriptExe)
133 <        except KeyError:
134 <           pass
135 <        if self.scriptExe != '':
113 <           if os.path.isfile(self.scriptExe):
114 <              pass
115 <           else:
116 <              log.message("WARNING. file "+self.scriptExe+" not found")
117 <              sys.exit()
118 <                  
119 <        ## additional input files
120 <        try:
121 <            tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
122 <            for tmp in tmpAddFiles:
123 <                if not os.path.exists(tmp):
124 <                    raise CrabException("Additional input file not found: "+tmp)
125 <                tmp=string.strip(tmp)
126 <                self.additional_inbox_files.append(tmp)
127 <                pass
128 <            pass
129 <        except KeyError:
130 <            pass
130 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
131 >        if self.scriptExe :
132 >            if not os.path.isfile(self.scriptExe):
133 >                msg ="ERROR. file "+self.scriptExe+" not found"
134 >                raise CrabException(msg)
135 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
136  
137 <        try:
138 <            self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
134 <        except KeyError:
135 <            self.filesPerJob = 1
137 >        self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
138 >        if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
139  
140 <        ## Max event   will be total_number_of_events ???  Daniele
141 <        try:
139 <            self.maxEv = cfg_params['CMSSW.event_per_job']
140 <        except KeyError:
141 <            self.maxEv = "-1"
142 <        ##  
143 <        try:
144 <            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
145 <        except KeyError:
146 <            msg = 'Must define total_number_of_events'
140 >        if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
141 >            msg ="Error. script_exe  not defined"
142              raise CrabException(msg)
148        
149        CEBlackList = []
150        try:
151            tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
152            for tmp in tmpBad:
153                tmp=string.strip(tmp)
154                CEBlackList.append(tmp)
155        except KeyError:
156            pass
157
158        self.reCEBlackList=[]
159        for bad in CEBlackList:
160            self.reCEBlackList.append(re.compile( bad ))
143  
144 <        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
144 >        # use parent files...
145 >        self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
146  
147 <        CEWhiteList = []
148 <        try:
149 <            tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
150 <            for tmp in tmpGood:
151 <                tmp=string.strip(tmp)
152 <                CEWhiteList.append(tmp)
153 <        except KeyError:
147 >        ## additional input files
148 >        if cfg_params.has_key('USER.additional_input_files'):
149 >            tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
150 >            for tmp in tmpAddFiles:
151 >                tmp = string.strip(tmp)
152 >                dirname = ''
153 >                if not tmp[0]=="/": dirname = "."
154 >                files = []
155 >                if string.find(tmp,"*")>-1:
156 >                    files = glob.glob(os.path.join(dirname, tmp))
157 >                    if len(files)==0:
158 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
159 >                else:
160 >                    files.append(tmp)
161 >                for file in files:
162 >                    if not os.path.exists(file):
163 >                        raise CrabException("Additional input file not found: "+file)
164 >                    pass
165 >                    self.additional_inbox_files.append(string.strip(file))
166 >                pass
167              pass
168 +            common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
169 +        pass
170  
173        #print 'CEWhiteList: ',CEWhiteList
174        self.reCEWhiteList=[]
175        for Good in CEWhiteList:
176            self.reCEWhiteList.append(re.compile( Good ))
177
178        common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
179
180        self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
171  
172 +        ## New method of dealing with seeds
173 +        self.incrementSeeds = []
174 +        self.preserveSeeds = []
175 +        if cfg_params.has_key('CMSSW.preserve_seeds'):
176 +            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
177 +            for tmp in tmpList:
178 +                tmp.strip()
179 +                self.preserveSeeds.append(tmp)
180 +        if cfg_params.has_key('CMSSW.increment_seeds'):
181 +            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
182 +            for tmp in tmpList:
183 +                tmp.strip()
184 +                self.incrementSeeds.append(tmp)
185 +
186 +        self.firstRun = cfg_params.get('CMSSW.first_run',None)
187 +
188 +        # Copy/return
189 +        self.copy_data = int(cfg_params.get('USER.copy_data',0))
190 +        self.return_data = int(cfg_params.get('USER.return_data',0))
191 +
192 +        self.conf = {}
193 +        self.conf['pubdata'] = None
194 +        # number of jobs requested to be created, limit obj splitting DD
195          #DBSDLS-start
196 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
196 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
197          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
198          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
199 +        self.jobDestination=[]  # Site destination(s) for each job (list of lists)
200          ## Perform the data location and discovery (based on DBS/DLS)
201 <        self.DataDiscoveryAndLocation(cfg_params)
202 <        #DBSDLS-end          
201 >        ## SL: Don't if NONE is specified as input (pythia use case)
202 >        blockSites = {}
203 >        if self.datasetPath:
204 >            blockSites = self.DataDiscoveryAndLocation(cfg_params)
205 >        #DBSDLS-end
206 >        self.conf['blockSites']=blockSites
207 >
208 >        ## Select Splitting
209 >        splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
210 >
211 >        if self.selectNoInput:
212 >            if self.pset == None:
213 >                self.algo = 'ForScript'
214 >            else:
215 >                self.algo = 'NoInput'
216 >                self.conf['managedGenerators']=self.managedGenerators
217 >                self.conf['generator']=self.generator
218 >        elif splitByRun ==1:
219 >            self.algo = 'RunBased'
220 >        else:
221 >            self.algo = 'EventBased'
222 >
223 > #        self.algo = 'LumiBased'
224 >        splitter = JobSplitter(self.cfg_params,self.conf)
225 >        self.dict = splitter.Algos()[self.algo]()
226 >
227 >        self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
228 >        self.rootArgsFilename= 'arguments'
229 >        # modify Pset only the first time
230 >        if (isNew and self.pset != None): self.ModifyPset()
231  
232 <        self.tgzNameWithPath = self.getTarBall(self.executable)
232 >        ## Prepare inputSandbox TarBall (only the first time)
233 >        self.tarNameWithPath = self.getTarBall(self.executable)
234 >
235 >
236 >    def ModifyPset(self):
237 >        import PsetManipulator as pp
238 >        PsetEdit = pp.PsetManipulator(self.pset)
239 >        try:
240 >            # Add FrameworkJobReport to parameter-set, set max events.
241 >            # Reset later for data jobs by writeCFG which does all modifications
242 >            PsetEdit.maxEvent(1)
243 >            PsetEdit.skipEvent(0)
244 >            PsetEdit.psetWriter(self.configFilename())
245 >            ## If present, add TFileService to output files
246 >            if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
247 >                tfsOutput = PsetEdit.getTFileService()
248 >                if tfsOutput:
249 >                    if tfsOutput in self.output_file:
250 >                        common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
251 >                    else:
252 >                        outfileflag = True #output found
253 >                        self.output_file.append(tfsOutput)
254 >                        common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
255 >                    pass
256 >                pass
257 >            ## If present and requested, add PoolOutputModule to output files
258 >            edmOutput = PsetEdit.getPoolOutputModule()
259 >            if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
260 >                if edmOutput:
261 >                    if edmOutput in self.output_file:
262 >                        common.logger.debug("Output from PoolOutputModule "+edmOutput+" already in output files")
263 >                    else:
264 >                        self.output_file.append(edmOutput)
265 >                        common.logger.info("Adding "+edmOutput+" (from PoolOutputModule) to list of output files")
266 >                    pass
267 >                pass
268 >            # not required: check anyhow if present, to avoid accidental T2 overload
269 >            else:
270 >                if edmOutput and (edmOutput not in self.output_file):
271 >                    msg = "ERROR: a PoolOutputModule is present in your ParameteSet %s \n"%self.pset
272 >                    msg +="         but the file produced ( %s ) is not in the list of output files\n"%edmOutput
273 >                    msg += "WARNING: please remove it. If you want to keep it, add the file to output_files or use CMSSW.get_edm_output\n"
274 >                    if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
275 >                        msg +=" CMSSW.ignore_edm_output==True : Hope you know what you are doing...\n"
276 >                        common.logger.info(msg)
277 >                    else:
278 >                        raise CrabException(msg)
279 >                pass
280 >            pass
281 >
282 >            if (PsetEdit.getBadFilesSetting()):
283 >                msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
284 >                common.logger.info(msg)
285 >
286 >        except CrabException, msg:
287 >            common.logger.info(str(msg))
288 >            msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
289 >            raise CrabException(msg)
290  
192        self.jobSplitting()  #Daniele job Splitting
193        self.PsetEdit.maxEvent(self.maxEv) #Daniele  
194        self.PsetEdit.inputModule("INPUT") #Daniele  
195        self.PsetEdit.psetWriter(self.configFilename())
291  
292      def DataDiscoveryAndLocation(self, cfg_params):
293  
294 <        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
294 >        import DataDiscovery
295 >        import DataLocation
296 >        common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
297  
201        #datasetPath = "/"+self.owner+"/"+self.dataTiers[0]+"/"+self.dataset
202        
298          datasetPath=self.datasetPath
299  
205        ## TODO
206        dataTiersList = ""
207        dataTiers = dataTiersList.split(',')
208
300          ## Contact the DBS
301 +        common.logger.info("Contacting Data Discovery Services ...")
302          try:
303 <            self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, dataTiers)
303 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
304              self.pubdata.fetchDBSInfo()
305  
306 <        except DataDiscovery_EDM.NotExistingDatasetError, ex :
306 >        except DataDiscovery.NotExistingDatasetError, ex :
307              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
308              raise CrabException(msg)
309 <
218 <        except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
309 >        except DataDiscovery.NoDataTierinProvenanceError, ex :
310              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
311              raise CrabException(msg)
312 <        except DataDiscovery_EDM.DataDiscoveryError, ex:
313 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
312 >        except DataDiscovery.DataDiscoveryError, ex:
313 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
314              raise CrabException(msg)
315  
316 <        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
317 <        ## self.DBSPaths=self.pubdata.getDBSPaths()
318 <        common.logger.message("Required data are :"+self.datasetPath)
228 <
229 <        filesbyblock=self.pubdata.getFiles()
230 <        self.AllInputFiles=filesbyblock.values()
231 <        self.files = self.AllInputFiles        
232 <
233 <        ## TEMP
234 <    #    self.filesTmp = filesbyblock.values()
235 <    #    self.files = []
236 <    #    locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/'
237 <    #    locPath=''
238 <    #    tmp = []
239 <    #    for file in self.filesTmp[0]:
240 <    #        tmp.append(locPath+file)
241 <    #    self.files.append(tmp)
242 <        ## END TEMP
316 >        self.filesbyblock=self.pubdata.getFiles()
317 >        #print self.filesbyblock
318 >        self.conf['pubdata']=self.pubdata
319  
320          ## get max number of events
321 <        #common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
246 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
247 <        common.logger.message("\nThe number of available events is %s"%self.maxEvents)
321 >        self.maxEvents=self.pubdata.getMaxEvents()
322  
323          ## Contact the DLS and build a list of sites hosting the fileblocks
324          try:
325 <            dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
325 >            dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
326              dataloc.fetchDLSInfo()
327 <        except DataLocation_EDM.DataLocationError , ex:
327 >
328 >        except DataLocation.DataLocationError , ex:
329              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
330              raise CrabException(msg)
331 <        
332 <        allsites=dataloc.getSites()
333 <        common.logger.debug(5,"sites are %s"%allsites)
334 <        sites=self.checkBlackList(allsites)
335 <        common.logger.debug(5,"sites are (after black list) %s"%sites)
336 <        sites=self.checkWhiteList(sites)
337 <        common.logger.debug(5,"sites are (after white list) %s"%sites)
331 >
332 >
333 >        unsorted_sites = dataloc.getSites()
334 >        sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
335 >        for lfn in self.filesbyblock.keys():
336 >            if unsorted_sites.has_key(lfn):
337 >                sites[lfn]=unsorted_sites[lfn]
338 >            else:
339 >                sites[lfn]=[]
340  
341          if len(sites)==0:
342 <            msg = 'No sites hosting all the needed data! Exiting... '
342 >            msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
343 >            msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
344 >            msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
345              raise CrabException(msg)
346  
347 <        common.logger.message("List of Sites hosting the data : "+str(sites))
348 <        common.logger.debug(6, "List of Sites: "+str(sites))
349 <        common.analisys_common_info['sites']=sites    ## used in SchedulerEdg.py in createSchScript
350 <        return
351 <    
352 <    def jobSplitting(self):
274 <        """
275 <        first implemntation for job splitting  
276 <        """    
277 <      #  print 'eventi totali '+str(self.maxEvents)
278 <      #  print 'eventi totali richiesti dallo user '+str(self.total_number_of_events)
279 <        #print 'files per job '+str(self.filesPerJob)
280 <        common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
281 <        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
282 <
283 <        ## TODO: SL need to have (from DBS) a detailed list of how many events per each file
284 <        n_tot_files = (len(self.files[0]))
285 <        ## SL: this is wrong if the files have different number of events
286 <        evPerFile = int(self.maxEvents)/n_tot_files
287 <        
288 <        common.logger.debug(5,'Events per File '+str(evPerFile))
289 <
290 <        ## if asked to process all events, do it
291 <        if self.total_number_of_events == -1:
292 <            self.total_number_of_events=self.maxEvents
293 <            self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
294 <            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events')
295 <        
296 <        else:
297 <            self.total_number_of_files = int(self.total_number_of_events/evPerFile)
298 <            ## SL: if ask for less event than what is computed to be available on a
299 <            ##     file, process the first file anyhow.
300 <            if self.total_number_of_files == 0:
301 <                self.total_number_of_files = self.total_number_of_files + 1
302 <
303 <            common.logger.debug(5,'N files  '+str(self.total_number_of_files))
304 <
305 <            check = 0
306 <            
307 <            ## Compute the number of jobs
308 <            #self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
309 <            self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob)
310 <            common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
311 <
312 <            ## is there any remainder?
313 <            check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob)
314 <
315 <            common.logger.debug(5,'Check  '+str(check))
316 <
317 <            if check > 0:
318 <                self.total_number_of_jobs =  self.total_number_of_jobs + 1
319 <                common.logger.message('Warning: last job will be created with '+str(check)+' files')
347 >        allSites = []
348 >        listSites = sites.values()
349 >        for listSite in listSites:
350 >            for oneSite in listSite:
351 >                allSites.append(oneSite)
352 >        [allSites.append(it) for it in allSites if not allSites.count(it)]
353  
321            common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)*self.filesPerJob*evPerFile + check*evPerFile)+' events')
322            pass
354  
355 <        list_of_lists = []
356 <        for i in xrange(0, int(n_tot_files), self.filesPerJob):
326 <            list_of_lists.append(self.files[0][i: i+self.filesPerJob])
355 >        # screen output
356 >        common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
357  
358 <        self.list_of_files = list_of_lists
359 <      
360 <        return
358 >        return sites
359 >
360 >
361 >    def split(self, jobParams,firstJobID):
362 >
363 >        jobParams = self.dict['args']
364 >        njobs = self.dict['njobs']
365 >        self.jobDestination = self.dict['jobDestination']
366 >
367 >        if njobs==0:
368 >            raise CrabException("Ask to split "+str(njobs)+" jobs: aborting")
369  
332    def split(self, jobParams):
333
334        common.jobDB.load()
335        #### Fabio
336        njobs = self.total_number_of_jobs
337        filelist = self.list_of_files
370          # create the empty structure
371          for i in range(njobs):
372              jobParams.append("")
341        
342        for job in range(njobs):
343            jobParams[job] = filelist[job]
344            common.jobDB.setArguments(job, jobParams[job])
373  
374 <        common.jobDB.save()
374 >        listID=[]
375 >        listField=[]
376 >        listDictions=[]
377 >        exist= os.path.exists(self.argsFile)
378 >        for id in range(njobs):
379 >            job = id + int(firstJobID)
380 >            listID.append(job+1)
381 >            job_ToSave ={}
382 >            concString = ' '
383 >            argu=''
384 >            str_argu = str(job+1)
385 >            if len(jobParams[id]):
386 >                argu = {'JobID': job+1}
387 >                for i in range(len(jobParams[id])):
388 >                    argu[self.dict['params'][i]]=jobParams[id][i]
389 >                # just for debug
390 >                str_argu += concString.join(jobParams[id])
391 >            if argu != '': listDictions.append(argu)
392 >            job_ToSave['arguments']= str(job+1)
393 >            job_ToSave['dlsDestination']= self.jobDestination[id]
394 >            listField.append(job_ToSave)
395 >            from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
396 >            cms_se = CmsSEMap()
397 >            msg="Job  %s  Arguments:  %s\n"%(str(job+1),str_argu)
398 >            msg+="\t  Destination: %s "%(str(self.jobDestination[id]))
399 >            SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
400 >            msg+="\t  CMSDestination: %s "%(str(SEDestination))
401 >            common.logger.log(10-1,msg)
402 >        # write xml
403 >        if len(listDictions):
404 >            if exist==False: self.CreateXML()
405 >            self.addEntry(listDictions)
406 >            self.addXMLfile()
407 >        common._db.updateJob_(listID,listField)
408 >        self.zipTarFile()
409          return
348    
349    def getJobTypeArguments(self, nj, sched):
350        params = common.jobDB.arguments(nj)
351        #print params
352        parString = "\\{"
353        
354        for i in range(len(params) - 1):
355            parString += '\\\"' + params[i] + '\\\"\,'
356        
357        parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
358        return parString
359  
360    def numberOfJobs(self):
361        # Fabio
410  
411 <        return self.total_number_of_jobs
412 <
411 >    def addXMLfile(self):
412 >
413 >        import tarfile
414 >        try:
415 >            tar = tarfile.open(self.tarNameWithPath, "a")
416 >            tar.add(self.argsFile, os.path.basename(self.argsFile))
417 >            tar.close()
418 >        except IOError, exc:
419 >            msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
420 >            msg += str(exc)
421 >            raise CrabException(msg)
422 >        except tarfile.TarError, exc:
423 >            msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
424 >            msg += str(exc)
425 >            raise CrabException(msg)
426  
427 +    def CreateXML(self):
428 +        """
429 +        """
430 +        result = IMProvNode( self.rootArgsFilename )
431 +        outfile = file( self.argsFile, 'w').write(str(result))
432 +        return
433  
434 <    def checkBlackList(self, allSites):
435 <        if len(self.reCEBlackList)==0: return allSites
436 <        sites = []
370 <        for site in allSites:
371 <            common.logger.debug(10,'Site '+site)
372 <            good=1
373 <            for re in self.reCEBlackList:
374 <                if re.search(site):
375 <                    common.logger.message('CE in black list, skipping site '+site)
376 <                    good=0
377 <                pass
378 <            if good: sites.append(site)
379 <        if len(sites) == 0:
380 <            common.logger.debug(3,"No sites found after BlackList")
381 <        return sites
434 >    def addEntry(self, listDictions):
435 >        """
436 >        _addEntry_
437  
438 <    def checkWhiteList(self, allSites):
438 >        add an entry to the xml file
439 >        """
440 >        from IMProv.IMProvLoader import loadIMProvFile
441 >        ## load xml
442 >        improvDoc = loadIMProvFile(self.argsFile)
443 >        entrname= 'Job'
444 >        for dictions in listDictions:
445 >           report = IMProvNode(entrname , None, **dictions)
446 >           improvDoc.addNode(report)
447 >        outfile = file( self.argsFile, 'w').write(str(improvDoc))
448 >        return
449  
450 <        if len(self.reCEWhiteList)==0: return allSites
451 <        sites = []
387 <        for site in allSites:
388 <            good=0
389 <            for re in self.reCEWhiteList:
390 <                if re.search(site):
391 <                    common.logger.debug(5,'CE in white list, adding site '+site)
392 <                    good=1
393 <                if not good: continue
394 <                sites.append(site)
395 <        if len(sites) == 0:
396 <            common.logger.message("No sites found after WhiteList\n")
397 <        else:
398 <            common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
399 <        return sites
450 >    def numberOfJobs(self):
451 >        return self.dict['njobs']
452  
453      def getTarBall(self, exe):
454          """
455          Return the TarBall with lib and exe
456          """
457 <        
458 <        # if it exist, just return it
459 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
408 <        if os.path.exists(self.tgzNameWithPath):
409 <            return self.tgzNameWithPath
457 >        self.tarNameWithPath = common.work_space.pathForTgz()+self.tar_name
458 >        if os.path.exists(self.tarNameWithPath):
459 >            return self.tarNameWithPath
460  
461          # Prepare a tar gzipped file with user binaries.
462          self.buildTar_(exe)
463  
464 <        return string.strip(self.tgzNameWithPath)
464 >        return string.strip(self.tarNameWithPath)
465  
466      def buildTar_(self, executable):
467  
468          # First of all declare the user Scram area
469          swArea = self.scram.getSWArea_()
420        #print "swArea = ", swArea
421        swVersion = self.scram.getSWVersion()
422        #print "swVersion = ", swVersion
470          swReleaseTop = self.scram.getReleaseTop_()
471 <        #print "swReleaseTop = ", swReleaseTop
425 <        
471 >
472          ## check if working area is release top
473          if swReleaseTop == '' or swArea == swReleaseTop:
474 +            common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
475              return
476  
477 <        filesToBeTarred = []
478 <        ## First find the executable
479 <        if (self.executable != ''):
480 <            exeWithPath = self.scram.findFile_(executable)
481 < #           print exeWithPath
482 <            if ( not exeWithPath ):
483 <                raise CrabException('User executable '+executable+' not found')
484 <
485 <            ## then check if it's private or not
486 <            if exeWithPath.find(swReleaseTop) == -1:
487 <                # the exe is private, so we must ship
488 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
489 <                path = swArea+'/'
490 <                exe = string.replace(exeWithPath, path,'')
491 <                filesToBeTarred.append(exe)
477 >        import tarfile
478 >        try: # create tar ball
479 >            #tar = tarfile.open(self.tgzNameWithPath, "w:gz")
480 >            tar = tarfile.open(self.tarNameWithPath, "w")
481 >            ## First find the executable
482 >            if (self.executable != ''):
483 >                exeWithPath = self.scram.findFile_(executable)
484 >                if ( not exeWithPath ):
485 >                    raise CrabException('User executable '+executable+' not found')
486 >
487 >                ## then check if it's private or not
488 >                if exeWithPath.find(swReleaseTop) == -1:
489 >                    # the exe is private, so we must ship
490 >                    common.logger.debug("Exe "+exeWithPath+" to be tarred")
491 >                    path = swArea+'/'
492 >                    # distinguish case when script is in user project area or given by full path somewhere else
493 >                    if exeWithPath.find(path) >= 0 :
494 >                        exe = string.replace(exeWithPath, path,'')
495 >                        tar.add(path+exe,exe)
496 >                    else :
497 >                        tar.add(exeWithPath,os.path.basename(executable))
498 >                    pass
499 >                else:
500 >                    # the exe is from release, we'll find it on WN
501 >                    pass
502 >
503 >            ## Now get the libraries: only those in local working area
504 >            tar.dereference=True
505 >            libDir = 'lib'
506 >            lib = swArea+'/' +libDir
507 >            common.logger.debug("lib "+lib+" to be tarred")
508 >            if os.path.exists(lib):
509 >                tar.add(lib,libDir)
510 >
511 >            ## Now check if module dir is present
512 >            moduleDir = 'module'
513 >            module = swArea + '/' + moduleDir
514 >            if os.path.isdir(module):
515 >                tar.add(module,moduleDir)
516 >            tar.dereference=False
517 >
518 >            ## Now check if any data dir(s) is present
519 >            self.dataExist = False
520 >            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
521 >            while len(todo_list):
522 >                entry, name = todo_list.pop()
523 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
524 >                    continue
525 >                if os.path.isdir(swArea+"/src/"+entry):
526 >                    entryPath = entry + '/'
527 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
528 >                    if name == 'data':
529 >                        self.dataExist=True
530 >                        common.logger.debug("data "+entry+" to be tarred")
531 >                        tar.add(swArea+"/src/"+entry,"src/"+entry)
532 >                    pass
533                  pass
534 <            else:
535 <                # the exe is from release, we'll find it on WN
534 >
535 >            ### CMSSW ParameterSet
536 >            if not self.pset is None:
537 >                cfg_file = common.work_space.jobDir()+self.configFilename()
538 >                tar.add(cfg_file,self.configFilename())
539 >
540 >            try:
541 >                crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
542 >                tar.add(crab_cfg_file,'crab.cfg')
543 >            except:
544                  pass
545 <
546 <        ## Now get the libraries: only those in local working area
547 <        libDir = 'lib'
548 <        lib = swArea+'/' +libDir
549 <        common.logger.debug(5,"lib "+lib+" to be tarred")
550 <        if os.path.exists(lib):
551 <            filesToBeTarred.append(libDir)
552 <
553 <        ## Now check if module dir is present
554 <        moduleDir = 'module'
555 <        if os.path.isdir(swArea+'/'+moduleDir):
556 <            filesToBeTarred.append(moduleDir)
557 <
558 <        ## Now check if the Data dir is present
559 <        dataDir = 'src/Data/'
560 <        if os.path.isdir(swArea+'/'+dataDir):
561 <            filesToBeTarred.append(dataDir)
562 <
563 <        ## Create the tar-ball
564 <        if len(filesToBeTarred)>0:
565 <            cwd = os.getcwd()
566 <            os.chdir(swArea)
567 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
568 <            for line in filesToBeTarred:
569 <                tarcmd = tarcmd + line + ' '
570 <            cout = runCommand(tarcmd)
571 <            if not cout:
572 <                raise CrabException('Could not create tar-ball')
573 <            os.chdir(cwd)
574 <        else:
575 <            common.logger.debug(5,"No files to be to be tarred")
576 <        
577 <        return
578 <        
579 <    def wsSetupEnvironment(self, nj):
545 >
546 >            ## Add ProdCommon dir to tar
547 >            prodcommonDir = './'
548 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
549 >            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
550 >                           'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
551 >                           'WMCore/__init__.py','WMCore/Algorithms']
552 >            for file in neededStuff:
553 >                tar.add(prodcommonPath+file,prodcommonDir+file)
554 >
555 >            ##### ML stuff
556 >            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
557 >            path=os.environ['CRABDIR'] + '/python/'
558 >            for file in ML_file_list:
559 >                tar.add(path+file,file)
560 >
561 >            ##### Utils
562 >            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
563 >            for file in Utils_file_list:
564 >                tar.add(path+file,file)
565 >
566 >            ##### AdditionalFiles
567 >            tar.dereference=True
568 >            for file in self.additional_inbox_files:
569 >                tar.add(file,string.split(file,'/')[-1])
570 >            tar.dereference=False
571 >            common.logger.log(10-1,"Files in "+self.tarNameWithPath+" : "+str(tar.getnames()))
572 >
573 >            tar.close()
574 >        except IOError, exc:
575 >            msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
576 >            msg += str(exc)
577 >            raise CrabException(msg)
578 >        except tarfile.TarError, exc:
579 >            msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
580 >            msg += str(exc)
581 >            raise CrabException(msg)
582 >
583 >    def zipTarFile(self):
584 >
585 >        cmd = "gzip -c %s > %s "%(self.tarNameWithPath,self.tgzNameWithPath)
586 >        res=runCommand(cmd)
587 >
588 >        tarballinfo = os.stat(self.tgzNameWithPath)
589 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
590 >            msg  = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
591 >               +'MB input sandbox limit \n'
592 >            msg += '      and not supported by the direct GRID submission system.\n'
593 >            msg += '      Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
594 >            msg += '      For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
595 >            raise CrabException(msg)
596 >
597 >        ## create tar-ball with ML stuff
598 >
599 >    def wsSetupEnvironment(self, nj=0):
600          """
601          Returns part of a job script which prepares
602          the execution environment for the job 'nj'.
603          """
604 +        # FUTURE: Drop support for .cfg when possible
605 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
606 +            psetName = 'pset.py'
607 +        else:
608 +            psetName = 'pset.cfg'
609          # Prepare JobType-independent part
610 <        txt = ''
611 <  
612 <        ## OLI_Daniele at this level  middleware already known
492 <
493 <        txt += 'if [ $middleware == LCG ]; then \n'
610 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
611 >        txt += 'echo ">>> setup environment"\n'
612 >        txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
613          txt += self.wsSetupCMSLCGEnvironment_()
614          txt += 'elif [ $middleware == OSG ]; then\n'
615 <        txt += '    time=`date -u +"%s"`\n'
616 <        txt += '    WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
617 <        txt += '    echo "Creating working directory: $WORKING_DIR"\n'
618 <        txt += '    /bin/mkdir -p $WORKING_DIR\n'
619 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
501 <        txt += '        echo "OSG WORKING DIR ==> $WORKING_DIR could not be created on on WN `hostname`"\n'
502 <    
503 <        txt += '        echo "JOB_EXIT_STATUS = 1"\n'
504 <        txt += '        exit 1\n'
615 >        txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
616 >        txt += '    if [ ! $? == 0 ] ;then\n'
617 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
618 >        txt += '        job_exit_code=10016\n'
619 >        txt += '        func_exit\n'
620          txt += '    fi\n'
621 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
622          txt += '\n'
623          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
624          txt += '    cd $WORKING_DIR\n'
625 <        txt += self.wsSetupCMSOSGEnvironment_()
625 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
626 >        txt += self.wsSetupCMSOSGEnvironment_()
627 >        #Setup SGE Environment
628 >        txt += 'elif [ $middleware == SGE ]; then\n'
629 >        txt += self.wsSetupCMSLCGEnvironment_()
630 >
631 >        txt += 'elif [ $middleware == ARC ]; then\n'
632 >        txt += self.wsSetupCMSLCGEnvironment_()
633 >
634          txt += 'fi\n'
635  
636          # Prepare JobType-specific part
637          scram = self.scram.commandName()
638          txt += '\n\n'
639 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
639 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
640 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
641          txt += scram+' project CMSSW '+self.version+'\n'
642          txt += 'status=$?\n'
643          txt += 'if [ $status != 0 ] ; then\n'
644 <        txt += '   echo "SET_EXE_ENV 1 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
645 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
646 <        txt += '   echo "SanityCheckCode = 10034" | tee -a $RUNTIME_AREA/$repo\n'
522 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
523 <        ## OLI_Daniele
524 <        txt += '    if [ $middleware == OSG ]; then \n'
525 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
526 <        txt += '        cd $RUNTIME_AREA\n'
527 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
528 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
529 <        txt += '            echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
530 <        txt += '        fi\n'
531 <        txt += '    fi \n'
532 <        txt += '   exit 1 \n'
644 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
645 >        txt += '    job_exit_code=10034\n'
646 >        txt += '    func_exit\n'
647          txt += 'fi \n'
534        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
648          txt += 'cd '+self.version+'\n'
649 <        ### needed grep for bug in scramv1 ###
649 >        txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
650 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
651          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
652 <
652 >        txt += 'if [ $? != 0 ] ; then\n'
653 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
654 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
655 >        txt += '    job_exit_code=10034\n'
656 >        txt += '    func_exit\n'
657 >        txt += 'fi \n'
658          # Handle the arguments:
659          txt += "\n"
660 <        txt += "## ARGUMNETS: $1 Job Number\n"
542 <        # txt += "## ARGUMNETS: $2 First Event for this job\n"
543 <        # txt += "## ARGUMNETS: $3 Max Event for this job\n"
660 >        txt += "## number of arguments (first argument always jobnumber)\n"
661          txt += "\n"
662 <        txt += "narg=$#\n"
546 <        txt += "if [ $narg -lt 2 ]\n"
662 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
663          txt += "then\n"
664 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
665 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
666 <        txt += '    echo "SanityCheckCode = 50113" | tee -a $RUNTIME_AREA/$repo\n'
551 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
552 <        ## OLI_Daniele
553 <        txt += '    if [ $middleware == OSG ]; then \n'
554 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
555 <        txt += '        cd $RUNTIME_AREA\n'
556 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
557 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
558 <        txt += '            echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
559 <        txt += '        fi\n'
560 <        txt += '    fi \n'
561 <        txt += "    exit 1\n"
664 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
665 >        txt += '    job_exit_code=50113\n'
666 >        txt += "    func_exit\n"
667          txt += "fi\n"
668          txt += "\n"
564        txt += "NJob=$1\n"
565        txt += "InputFiles=$2\n"
566        txt += "echo \"<$InputFiles>\"\n"
567        # txt += "Args = ` cat $2 |  sed -e \'s/\\\\//g\' -e \'s/\"/\\x27/g\' `"
568
569        ### OLI_DANIELE
570        txt += 'if [ $middleware == LCG ]; then \n'
571        txt += '    echo "MonitorJobID=`echo ${NJob}_$EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
572        txt += '    echo "SyncGridJobId=`echo $EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
573        txt += '    echo "SyncCE=`edg-brokerinfo getCE`" | tee -a $RUNTIME_AREA/$repo\n'
574        txt += 'elif [ $middleware == OSG ]; then\n'
575
576        # OLI: added monitoring for dashbord, use hash of crab.cfg
577        if common.scheduler.boss_scheduler_name == 'condor_g':
578            # create hash of cfg file
579            hash = makeCksum(common.work_space.cfgFileName())
580            txt += '    echo "MonitorJobID=`echo ${NJob}_'+hash+'_$GLOBUS_GRAM_JOB_CONTACT`" | tee -a $RUNTIME_AREA/$repo\n'
581            txt += '    echo "SyncGridJobId=`echo $GLOBUS_GRAM_JOB_CONTACT`" | tee -a $RUNTIME_AREA/$repo\n'
582            txt += '    echo "SyncCE=`echo $hostname`" | tee -a $RUNTIME_AREA/$repo\n'
583        else :
584            txt += '    echo "MonitorJobID=`echo ${NJob}_$EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
585            txt += '    echo "SyncGridJobId=`echo $EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
586            txt += '    echo "SyncCE=`$EDG_WL_LOG_DESTINATION`" | tee -a $RUNTIME_AREA/$repo\n'
587
588        txt += 'fi\n'
589        txt += 'dumpStatus $RUNTIME_AREA/$repo\n'
669  
670          # Prepare job-specific part
671          job = common.job_list[nj]
672 <        pset = os.path.basename(job.configFilename())
673 <        txt += '\n'
674 <        #txt += 'echo sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' \n'
675 <        txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
676 <        #txt += 'sed "s#{\'INPUT\'}#${InputFiles}#" $RUNTIME_AREA/'+pset+' > pset1.cfg\n'
672 >        if (self.datasetPath):
673 >            self.primaryDataset = self.datasetPath.split("/")[1]
674 >            DataTier = self.datasetPath.split("/")[2]
675 >            txt += '\n'
676 >            txt += 'DatasetPath='+self.datasetPath+'\n'
677  
678 <        if len(self.additional_inbox_files) > 0:
679 <            for file in self.additional_inbox_files:
680 <                txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
602 <                txt += '   cp $RUNTIME_AREA/'+file+' .\n'
603 <                txt += '   chmod +x '+file+'\n'
604 <                txt += 'fi\n'
605 <            pass
678 >            txt += 'PrimaryDataset='+self.primaryDataset +'\n'
679 >            txt += 'DataTier='+DataTier+'\n'
680 >            txt += 'ApplicationFamily=cmsRun\n'
681  
682 <        txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
682 >        else:
683 >            self.primaryDataset = 'null'
684 >            txt += 'DatasetPath=MCDataTier\n'
685 >            txt += 'PrimaryDataset=null\n'
686 >            txt += 'DataTier=null\n'
687 >            txt += 'ApplicationFamily=MCDataTier\n'
688 >        if self.pset != None:
689 >            pset = os.path.basename(job.configFilename())
690 >            txt += '\n'
691 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
692 >
693 >            txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
694 >            txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
695 >            txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
696 >            txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
697 >
698 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
699 >        else:
700 >            txt += '\n'
701 >            txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs)
702  
609        txt += '\n'
610        txt += 'echo "***** cat pset.cfg *********"\n'
611        txt += 'cat pset.cfg\n'
612        txt += 'echo "****** end pset.cfg ********"\n'
613        txt += '\n'
614        # txt += 'echo "***** cat pset1.cfg *********"\n'
615        # txt += 'cat pset1.cfg\n'
616        # txt += 'echo "****** end pset1.cfg ********"\n'
703          return txt
704  
705 <    def wsBuildExe(self, nj):
705 >    def wsUntarSoftware(self, nj=0):
706          """
707          Put in the script the commands to build an executable
708          or a library.
709          """
710  
711 <        txt = ""
711 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
712  
713          if os.path.isfile(self.tgzNameWithPath):
714 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
715 <            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
714 >            txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
715 >            txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
716 >            if  self.debug_wrapper==1 :
717 >                txt += 'ls -Al \n'
718              txt += 'untar_status=$? \n'
719              txt += 'if [ $untar_status -ne 0 ]; then \n'
720 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
721 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
722 <            txt += '   echo "SanityCheckCode = $untar_status" | tee -a $repo\n'
635 <            txt += '   if [ $middleware == OSG ]; then \n'
636 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
637 <            txt += '       cd $RUNTIME_AREA\n'
638 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
639 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
640 <            txt += '           echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
641 <            txt += '       fi\n'
642 <            txt += '   fi \n'
643 <            txt += '   \n'
644 <            txt += '   exit $untar_status \n'
720 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
721 >            txt += '   job_exit_code=$untar_status\n'
722 >            txt += '   func_exit\n'
723              txt += 'else \n'
724              txt += '   echo "Successful untar" \n'
725              txt += 'fi \n'
726 +            txt += '\n'
727 +            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
728 +            txt += 'if [ -z "$PYTHONPATH" ]; then\n'
729 +            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
730 +            txt += 'else\n'
731 +            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
732 +            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
733 +            txt += 'fi\n'
734 +            txt += '\n'
735 +
736              pass
737 <        
737 >
738          return txt
739  
740 <    def modifySteeringCards(self, nj):
740 >    def wsBuildExe(self, nj=0):
741          """
742 <        modify the card provided by the user,
743 <        writing a new card into share dir
742 >        Put in the script the commands to build an executable
743 >        or a library.
744          """
745 <        
745 >
746 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
747 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
748 >
749 >        txt += 'rm -r lib/ module/ \n'
750 >        txt += 'mv $RUNTIME_AREA/lib/ . \n'
751 >        txt += 'mv $RUNTIME_AREA/module/ . \n'
752 >        if self.dataExist == True:
753 >            txt += 'rm -r src/ \n'
754 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
755 >        if len(self.additional_inbox_files)>0:
756 >            for file in self.additional_inbox_files:
757 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
758 >        # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
759 >        # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
760 >
761 >        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
762 >        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
763 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
764 >        txt += 'else\n'
765 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
766 >        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
767 >        txt += 'fi\n'
768 >        txt += '\n'
769 >
770 >        if self.pset != None:
771 >            # FUTURE: Drop support for .cfg when possible
772 >            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
773 >                psetName = 'pset.py'
774 >            else:
775 >                psetName = 'pset.cfg'
776 >            # FUTURE: Can simply for 2_1_x and higher
777 >            txt += '\n'
778 >            if self.debug_wrapper == 1:
779 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
780 >                txt += 'cat ' + psetName + '\n'
781 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
782 >                txt += '\n'
783 >                txt += 'echo "***********************" \n'
784 >                txt += 'which edmConfigHash \n'
785 >                txt += 'echo "***********************" \n'
786 >            if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
787 >                txt += 'edmConfigHash ' + psetName + ' \n'
788 >                txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
789 >            else:
790 >                txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
791 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
792 >            #### FEDE temporary fix for noEdm files #####
793 >            txt += 'if [ -z "$PSETHASH" ]; then \n'
794 >            txt += '   export PSETHASH=null\n'
795 >            txt += 'fi \n'
796 >            #############################################
797 >            txt += '\n'
798 >        return txt
799 >
800 >
801      def executableName(self):
802 <        return self.executable
802 >        if self.scriptExe:
803 >            return "sh "
804 >        else:
805 >            return self.executable
806  
807      def executableArgs(self):
808 <        return " -p pset.cfg"
808 >        # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
809 >        if self.scriptExe:
810 >            return self.scriptExe + " $NJob $AdditionalArgs"
811 >        else:
812 >            ex_args = ""
813 >            ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
814 >            # Type of config file depends on CMSSW version
815 >            if self.CMSSW_major >= 2 :
816 >                ex_args += " -p pset.py"
817 >            else:
818 >                ex_args += " -p pset.cfg"
819 >            return ex_args
820  
821      def inputSandbox(self, nj):
822          """
823          Returns a list of filenames to be put in JDL input sandbox.
824          """
825          inp_box = []
669        # dict added to delete duplicate from input sandbox file list
670        seen = {}
671        ## code
826          if os.path.isfile(self.tgzNameWithPath):
827              inp_box.append(self.tgzNameWithPath)
828 <        ## config
675 <        inp_box.append(common.job_list[nj].configFilename())
676 <        ## additional input files
677 <        #for file in self.additional_inbox_files:
678 <        #    inp_box.append(common.work_space.cwdDir()+file)
828 >        inp_box.append(common.work_space.jobDir() + self.scriptName)
829          return inp_box
830  
831      def outputSandbox(self, nj):
# Line 684 | Line 834 | class Cmssw(JobType):
834          """
835          out_box = []
836  
687        stdout=common.job_list[nj].stdout()
688        stderr=common.job_list[nj].stderr()
689
837          ## User Declared output files
838 <        for out in self.output_file:
839 <            n_out = nj + 1
840 <            out_box.append(self.numberFile_(out,str(n_out)))
838 >        for out in (self.output_file+self.output_file_sandbox):
839 >            n_out = nj + 1
840 >            out_box.append(numberFile(out,str(n_out)))
841          return out_box
695        return []
842  
697    def prepareSteeringCards(self):
698        """
699        Make initial modifications of the user's steering card file.
700        """
701        return
843  
844      def wsRenameOutput(self, nj):
845          """
846          Returns part of a job script which renames the produced files.
847          """
848  
849 <        txt = '\n'
850 <        file_list = ''
851 <        check = len(self.output_file)
852 <        i = 0
853 <        for fileWithSuffix in self.output_file:
854 <            i= i + 1
855 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
856 <            file_list=file_list+output_file_num+''
857 <            txt += '\n'
717 <            txt += 'ls \n'
849 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
850 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
851 >        txt += 'echo ">>> current directory content:"\n'
852 >        if self.debug_wrapper==1:
853 >            txt += 'ls -Al\n'
854 >        txt += '\n'
855 >
856 >        for fileWithSuffix in (self.output_file):
857 >            output_file_num = numberFile(fileWithSuffix, '$NJob')
858              txt += '\n'
859 <            txt += 'ls '+fileWithSuffix+'\n'
860 <            txt += 'exe_result=$?\n'
861 <            txt += 'if [ $exe_result -ne 0 ] ; then\n'
862 <            txt += '   echo "ERROR: No output file to manage"\n'
863 <            ### OLI_DANIELE
864 <            txt += '    if [ $middleware == OSG ]; then \n'
865 <            txt += '        echo "prepare dummy output file"\n'
866 <            txt += '        cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
727 <            txt += '    fi \n'
859 >            txt += '# check output file\n'
860 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
861 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
862 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
863 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
864 >            else:
865 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
866 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
867              txt += 'else\n'
868 <            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
868 >            txt += '    job_exit_code=60302\n'
869 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
870 >            if common.scheduler.name().upper() == 'CONDOR_G':
871 >                txt += '    if [ $middleware == OSG ]; then \n'
872 >                txt += '        echo "prepare dummy output file"\n'
873 >                txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
874 >                txt += '    fi \n'
875              txt += 'fi\n'
876 <            if i == check:
877 <                txt += 'cd $RUNTIME_AREA\n'
878 <                pass      
879 <            pass
880 <      
881 <        file_list=file_list[:-1]
882 <        txt += 'file_list="'+file_list+'"\n'
883 <        ### OLI_DANIELE
884 <        txt += 'if [ $middleware == OSG ]; then\n'  
885 <        txt += '    cd $RUNTIME_AREA\n'
741 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
742 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
743 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
744 <        txt += '        echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
745 <        txt += '    fi\n'
746 <        txt += 'fi\n'
876 >        file_list = []
877 >        for fileWithSuffix in (self.output_file):
878 >             file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
879 >
880 >        txt += 'file_list="'+string.join(file_list,',')+'"\n'
881 >        txt += '\n'
882 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
883 >        txt += 'echo ">>> current directory content:"\n'
884 >        if self.debug_wrapper==1:
885 >            txt += 'ls -Al\n'
886          txt += '\n'
887 +        txt += 'cd $RUNTIME_AREA\n'
888 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
889          return txt
890  
891 <    def numberFile_(self, file, txt):
751 <        """
752 <        append _'txt' before last extension of a file
753 <        """
754 <        p = string.split(file,".")
755 <        # take away last extension
756 <        name = p[0]
757 <        for x in p[1:-1]:
758 <           name=name+"."+x
759 <        # add "_txt"
760 <        if len(p)>1:
761 <          ext = p[len(p)-1]
762 <          #result = name + '_' + str(txt) + "." + ext
763 <          result = name + '_' + txt + "." + ext
764 <        else:
765 <          #result = name + '_' + str(txt)
766 <          result = name + '_' + txt
767 <        
768 <        return result
769 <
770 <    def getRequirements(self):
891 >    def getRequirements(self, nj=[]):
892          """
893 <        return job requirements to add to jdl files
893 >        return job requirements to add to jdl files
894          """
895          req = ''
896 <        if common.analisys_common_info['sites']:
897 <            if common.analisys_common_info['sw_version']:
898 <                req='Member("VO-cms-' + \
899 <                     common.analisys_common_info['sw_version'] + \
900 <                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
901 <            if len(common.analisys_common_info['sites'])>0:
902 <                req = req + ' && ('
903 <                for i in range(len(common.analisys_common_info['sites'])):
904 <                    req = req + 'other.GlueCEInfoHostName == "' \
905 <                         + common.analisys_common_info['sites'][i] + '"'
906 <                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
907 <                        req = req + ' || '
908 <            req = req + ')'
788 <        #print "req = ", req
896 >        if self.version:
897 >            req='Member("VO-cms-' + \
898 >                 self.version + \
899 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
900 >        if self.executable_arch:
901 >            req+=' && Member("VO-cms-' + \
902 >                 self.executable_arch + \
903 >                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
904 >
905 >        req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
906 >        if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
907 >            req += ' && other.GlueCEStateStatus == "Production" '
908 >
909          return req
910  
911      def configFilename(self):
912          """ return the config filename """
913 <        return self.name()+'.cfg'
913 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
914 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
915 >          return self.name()+'.py'
916 >        else:
917 >          return self.name()+'.cfg'
918  
795    ### OLI_DANIELE
919      def wsSetupCMSOSGEnvironment_(self):
920          """
921          Returns part of a job script which is prepares
922          the execution environment and which is common for all CMS jobs.
923          """
924 <        txt = '\n'
925 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
926 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
927 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
928 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
929 <        txt += '   elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
930 <        txt += '      # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
931 <        txt += '       source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
932 <        txt += '   else\n'
933 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
934 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
935 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
936 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
814 <        txt += '       exit\n'
815 <        txt += '\n'
816 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
817 <        txt += '       cd $RUNTIME_AREA\n'
818 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
819 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
820 <        txt += '           echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n'
821 <        txt += '       fi\n'
822 <        txt += '\n'
823 <        txt += '       exit\n'
824 <        txt += '   fi\n'
924 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
925 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
926 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
927 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
928 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
929 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
930 >        txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
931 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
932 >        txt += '    else\n'
933 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
934 >        txt += '        job_exit_code=10020\n'
935 >        txt += '        func_exit\n'
936 >        txt += '    fi\n'
937          txt += '\n'
938 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
939 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
938 >        txt += '    echo "==> setup cms environment ok"\n'
939 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
940  
941          return txt
942 <
831 <    ### OLI_DANIELE
942 >
943      def wsSetupCMSLCGEnvironment_(self):
944          """
945          Returns part of a job script which is prepares
946          the execution environment and which is common for all CMS jobs.
947          """
948 <        txt  = '   \n'
949 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
950 <        txt += '      echo "JOB_EXIT_STATUS = 0"\n'
951 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
952 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
953 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
954 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
955 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
956 <        txt += '       exit\n'
957 <        txt += '   else\n'
958 <        txt += '       echo "Sourcing environment... "\n'
959 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
960 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
961 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
962 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
963 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
964 <        txt += '           exit\n'
965 <        txt += '       fi\n'
966 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
967 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
968 <        txt += '       result=$?\n'
969 <        txt += '       if [ $result -ne 0 ]; then\n'
970 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
971 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
972 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
973 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
974 <        txt += '           exit\n'
975 <        txt += '       fi\n'
976 <        txt += '   fi\n'
977 <        txt += '   \n'
978 <        txt += '   string=`cat /etc/redhat-release`\n'
979 <        txt += '   echo $string\n'
980 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
981 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
982 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
983 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
984 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
985 <        txt += '   else\n'
986 <        txt += '       echo "SET_CMS_ENV 1 ==> ERROR OS unknown, LCG environment not initialized"\n'
987 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
988 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
989 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
990 <        txt += '       exit\n'
991 <        txt += '   fi\n'
992 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
993 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
948 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
949 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
950 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
951 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
952 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
953 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
954 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
955 >        txt += '        job_exit_code=10031\n'
956 >        txt += '        func_exit\n'
957 >        txt += '    else\n'
958 >        txt += '        echo "Sourcing environment... "\n'
959 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
960 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
961 >        txt += '            job_exit_code=10020\n'
962 >        txt += '            func_exit\n'
963 >        txt += '        fi\n'
964 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
965 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
966 >        txt += '        result=$?\n'
967 >        txt += '        if [ $result -ne 0 ]; then\n'
968 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
969 >        txt += '            job_exit_code=10032\n'
970 >        txt += '            func_exit\n'
971 >        txt += '        fi\n'
972 >        txt += '    fi\n'
973 >        txt += '    \n'
974 >        txt += '    echo "==> setup cms environment ok"\n'
975 >        return txt
976 >
977 >    def wsModifyReport(self, nj):
978 >        """
979 >        insert the part of the script that modifies the FrameworkJob Report
980 >        """
981 >
982 >        txt = ''
983 >        publish_data = int(self.cfg_params.get('USER.publish_data',0))
984 >        #if (publish_data == 1):
985 >        if (self.copy_data == 1):
986 >            txt = '\n#Written by cms_cmssw::wsModifyReport\n'
987 >            publish_data = int(self.cfg_params.get('USER.publish_data',0))
988 >
989 >
990 >            txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
991 >            txt += '    FOR_LFN=$LFNBaseName\n'
992 >            txt += 'else\n'
993 >            txt += '    FOR_LFN=/copy_problems/ \n'
994 >            txt += 'fi\n'
995 >
996 >            txt += 'echo ">>> Modify Job Report:" \n'
997 >            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
998 >            txt += 'echo "SE = $SE"\n'
999 >            txt += 'echo "SE_PATH = $SE_PATH"\n'
1000 >            txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1001 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1002 >
1003 >
1004 >            args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset  ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH'
1005 >            if (publish_data == 1):
1006 >                processedDataset = self.cfg_params['USER.publish_data_name']
1007 >                txt += 'ProcessedDataset='+processedDataset+'\n'
1008 >                txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1009 >                args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1010 >
1011 >            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1012 >            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1013 >            txt += 'modifyReport_result=$?\n'
1014 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1015 >            txt += '    modifyReport_result=70500\n'
1016 >            txt += '    job_exit_code=$modifyReport_result\n'
1017 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1018 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1019 >            txt += 'else\n'
1020 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1021 >            txt += 'fi\n'
1022 >        return txt
1023 >
1024 >    def wsParseFJR(self):
1025 >        """
1026 >        Parse the FrameworkJobReport to obtain useful infos
1027 >        """
1028 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1029 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1030 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1031 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1032 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1033 >        if self.debug_wrapper==1 :
1034 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1035 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1036 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1037 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1038 >        txt += '            cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1039 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1040 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1041 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1042 >        txt += '        else\n'
1043 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1044 >        txt += '        fi\n'
1045 >        txt += '    else\n'
1046 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1047 >        txt += '    fi\n'
1048 >          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1049 >        txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1050 >        txt += '        echo ">>> Executable succeded  $executable_exit_status"\n'
1051 >        ## This cannot more work given the changes on the Job argumentsJob
1052 >        """
1053 >        if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1054 >          # VERIFY PROCESSED DATA
1055 >            txt += '        echo ">>> Verify list of processed files:"\n'
1056 >            txt += '        echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1057 >            txt += '        python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1058 >            txt += '        cat input-files.txt  | sort | uniq > tmp.txt\n'
1059 >            txt += '        mv tmp.txt input-files.txt\n'
1060 >            txt += '        echo "cat input-files.txt"\n'
1061 >            txt += '        echo "----------------------"\n'
1062 >            txt += '        cat input-files.txt\n'
1063 >            txt += '        cat processed-files.txt | sort | uniq > tmp.txt\n'
1064 >            txt += '        mv tmp.txt processed-files.txt\n'
1065 >            txt += '        echo "----------------------"\n'
1066 >            txt += '        echo "cat processed-files.txt"\n'
1067 >            txt += '        echo "----------------------"\n'
1068 >            txt += '        cat processed-files.txt\n'
1069 >            txt += '        echo "----------------------"\n'
1070 >            txt += '        diff -qbB input-files.txt processed-files.txt\n'
1071 >            txt += '        fileverify_status=$?\n'
1072 >            txt += '        if [ $fileverify_status -ne 0 ]; then\n'
1073 >            txt += '            executable_exit_status=30001\n'
1074 >            txt += '            echo "ERROR ==> not all input files processed"\n'
1075 >            txt += '            echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1076 >            txt += '            echo "      ==> diff input-files.txt processed-files.txt"\n'
1077 >            txt += '        fi\n'
1078 >        """
1079 >        txt += '    fi\n'
1080 >        txt += 'else\n'
1081 >        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1082 >        txt += 'fi\n'
1083 >        txt += '\n'
1084 >        txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1085 >        txt += '    echo ">>> Executable failed  $executable_exit_status"\n'
1086 >        txt += '    echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1087 >        txt += '    echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1088 >        txt += '    job_exit_code=$executable_exit_status\n'
1089 >        txt += '    func_exit\n'
1090 >        txt += 'fi\n\n'
1091 >        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1092 >        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1093 >        txt += 'job_exit_code=$executable_exit_status\n'
1094 >
1095 >        return txt
1096 >
1097 >    def setParam_(self, param, value):
1098 >        self._params[param] = value
1099 >
1100 >    def getParams(self):
1101 >        return self._params
1102 >
1103 >    def outList(self,list=False):
1104 >        """
1105 >        check the dimension of the output files
1106 >        """
1107 >        txt = ''
1108 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1109 >        listOutFiles = []
1110 >        stdout = 'CMSSW_$NJob.stdout'
1111 >        stderr = 'CMSSW_$NJob.stderr'
1112 >        if len(self.output_file) <= 0:
1113 >            msg ="WARNING: no output files name have been defined!!\n"
1114 >            msg+="\tno output files will be reported back/staged\n"
1115 >            common.logger.info(msg)
1116 >        if (self.return_data == 1):
1117 >            for file in (self.output_file+self.output_file_sandbox):
1118 >                listOutFiles.append(numberFile(file, '$NJob'))
1119 >            listOutFiles.append(stdout)
1120 >            listOutFiles.append(stderr)
1121 >        else:
1122 >            for file in (self.output_file_sandbox):
1123 >                listOutFiles.append(numberFile(file, '$NJob'))
1124 >            listOutFiles.append(stdout)
1125 >            listOutFiles.append(stderr)
1126 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1127 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1128 >        txt += 'export filesToCheck\n'
1129 >
1130 >        if list : return self.output_file
1131          return txt

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines