ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.390
Committed: Thu Nov 8 15:29:51 2012 UTC (12 years, 5 months ago) by belforte
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_8_4_pre1
Changes since 1.389: +10 -2 lines
Log Message:
allow to set RSS limit via crab.cfg for remoteGlidein see https://savannah.cern.ch/bugs/index.php?97656

File Contents

# User Rev Content
1 spiga 1.358
2 belforte 1.390 __revision__ = "$Id: cms_cmssw.py,v 1.389 2012/11/05 23:44:26 belforte Exp $"
3     __version__ = "$Revision: 1.389 $"
4 spiga 1.358
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9 ewv 1.363 import re
10 slacapra 1.1 import Scram
11 spiga 1.269 from Splitter import JobSplitter
12 ewv 1.355 from Downloader import Downloader
13 ewv 1.363 try:
14 fanzago 1.360 import json
15     except:
16     import simplejson as json
17 slacapra 1.1
18 spiga 1.293 from IMProv.IMProvNode import IMProvNode
19 ewv 1.355 from IMProv.IMProvLoader import loadIMProvFile
20 belforte 1.388 from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
21    
22 slacapra 1.105 import os, string, glob
23 ewv 1.355 from xml.dom import pulldom
24 slacapra 1.1
25     class Cmssw(JobType):
26 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
27 slacapra 1.1 JobType.__init__(self, 'CMSSW')
28 spiga 1.304 common.logger.debug('CMSSW::__init__')
29 spiga 1.208 self.skip_blocks = skip_blocks
30 farinafa 1.346 self.argsList = 2
31 spiga 1.315 self.NumEvents=0
32 gutsche 1.3 self._params = {}
33     self.cfg_params = cfg_params
34 fanzago 1.368 ### FOR MULTI ###
35 fanzago 1.360 self.var_filter=''
36 ewv 1.254
37 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
38 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
39     self.cfg_params.get('CRAB.use_server',0)
40 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
41 ewv 1.250 size = 9.5
42 spiga 1.365 if self.server :
43 spiga 1.366 size = 100
44 spiga 1.365 elif self.local:
45     size = 9999999
46 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
47 gutsche 1.72
48 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
49 gutsche 1.38 self.ncjobs = ncjobs
50    
51 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
52     self.additional_inbox_files = []
53     self.scriptExe = ''
54     self.executable = ''
55 slacapra 1.71 self.executable_arch = self.scram.getArch()
56 spiga 1.320 self.tgz_name = 'default.tgz'
57 corvo 1.56 self.scriptName = 'CMSSW.sh'
58 ewv 1.192 self.pset = ''
59 spiga 1.187 self.datasetPath = ''
60 gutsche 1.3
61 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
62 gutsche 1.50 # set FJR file name
63     self.fjrFileName = 'crab_fjr.xml'
64    
65 slacapra 1.1 self.version = self.scram.getSWVersion()
66 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
67 spiga 1.324 version_array = self.version.split('_')
68     self.CMSSW_major = 0
69     self.CMSSW_minor = 0
70     self.CMSSW_patch = 0
71 ewv 1.182 try:
72 spiga 1.324 self.CMSSW_major = int(version_array[1])
73     self.CMSSW_minor = int(version_array[2])
74     self.CMSSW_patch = int(version_array[3])
75 ewv 1.182 except:
76 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
77 ewv 1.182 raise CrabException(msg)
78    
79 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
80     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
81 ewv 1.276 raise CrabException(msg)
82     """
83     As CMSSW versions are dropped we can drop more code:
84 ewv 1.334 2.x dropped: drop check for lumi range setting
85 ewv 1.276 """
86 ewv 1.355 self.checkCMSSWVersion()
87 slacapra 1.1 ### collect Data cards
88 gutsche 1.66
89 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
90 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
91 ewv 1.226
92 fanzago 1.368 if not cfg_params.has_key('CMSSW.datasetpath'):
93     msg = "Error: datasetpath not defined in the section [CMSSW] of crab.cfg file "
94 spiga 1.236 raise CrabException(msg)
95 fanzago 1.368 else:
96     tmp = cfg_params['CMSSW.datasetpath']
97     common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
98     if string.lower(tmp)=='none':
99     self.datasetPath = None
100     self.selectNoInput = 1
101     self.primaryDataset = 'null'
102     else:
103     self.datasetPath = tmp
104     self.selectNoInput = 0
105     ll = len(self.datasetPath.split("/"))
106 belforte 1.381 if (ll != 4) or self.datasetPath[0] != '/' or self.datasetPath[-1] == '/':
107 fanzago 1.368 msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
108 belforte 1.380 msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER'
109 fanzago 1.368 raise CrabException(msg)
110     self.primaryDataset = self.datasetPath.split("/")[1]
111     self.dataTier = self.datasetPath.split("/")[2]
112 gutsche 1.5
113 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
114 ewv 1.330 self.ads = False
115     if self.datasetPath:
116     self.ads = len(self.datasetPath.split("/")) > 4
117 spiga 1.354 self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
118 ewv 1.356 self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
119     self.cfg_params.get('CMSSW.lumis_per_job',None)
120 spiga 1.358
121 ewv 1.327 # FUTURE: Can remove this check
122     if self.ads and self.CMSSW_major < 3:
123     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
124     common.logger.info(' Only file level, not lumi level, granularity is supported.')
125    
126 spiga 1.288 self.debugWrap=''
127 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
128     if self.debug_wrapper == 1: self.debugWrap='--debug'
129 slacapra 1.291
130 slacapra 1.1 ## now the application
131 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
132 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
133 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
134 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
135 slacapra 1.1
136 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
137 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
138 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
139 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
140 slacapra 1.153 if self.pset.lower() != 'none' :
141     if (not os.path.exists(self.pset)):
142     raise CrabException("User defined PSet file "+self.pset+" does not exist")
143     else:
144     self.pset = None
145 slacapra 1.1
146     # output files
147 slacapra 1.53 ## stuff which must be returned always via sandbox
148     self.output_file_sandbox = []
149    
150     # add fjr report by default via sandbox
151     self.output_file_sandbox.append(self.fjrFileName)
152    
153     # other output files to be returned via sandbox or copied to SE
154 mcinquil 1.216 outfileflag = False
155 slacapra 1.153 self.output_file = []
156     tmp = cfg_params.get('CMSSW.output_file',None)
157     if tmp :
158 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
159 mcinquil 1.216 outfileflag = True #output found
160 slacapra 1.1
161 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
162     if self.scriptExe :
163 slacapra 1.176 if not os.path.isfile(self.scriptExe):
164     msg ="ERROR. file "+self.scriptExe+" not found"
165     raise CrabException(msg)
166     self.additional_inbox_files.append(string.strip(self.scriptExe))
167 slacapra 1.70
168 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
169     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
170    
171 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
172 slacapra 1.176 msg ="Error. script_exe not defined"
173     raise CrabException(msg)
174 spiga 1.42
175 ewv 1.226 # use parent files...
176 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
177 spiga 1.204
178 slacapra 1.1 ## additional input files
179 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
180 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
181 slacapra 1.70 for tmp in tmpAddFiles:
182     tmp = string.strip(tmp)
183     dirname = ''
184     if not tmp[0]=="/": dirname = "."
185 corvo 1.85 files = []
186     if string.find(tmp,"*")>-1:
187     files = glob.glob(os.path.join(dirname, tmp))
188     if len(files)==0:
189     raise CrabException("No additional input file found with this pattern: "+tmp)
190     else:
191     files.append(tmp)
192 slacapra 1.70 for file in files:
193     if not os.path.exists(file):
194     raise CrabException("Additional input file not found: "+file)
195 slacapra 1.45 pass
196 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
197 slacapra 1.1 pass
198     pass
199 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
200 slacapra 1.153 pass
201 gutsche 1.3
202 gutsche 1.35
203 ewv 1.160 ## New method of dealing with seeds
204     self.incrementSeeds = []
205     self.preserveSeeds = []
206     if cfg_params.has_key('CMSSW.preserve_seeds'):
207     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
208     for tmp in tmpList:
209     tmp.strip()
210     self.preserveSeeds.append(tmp)
211     if cfg_params.has_key('CMSSW.increment_seeds'):
212     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
213     for tmp in tmpList:
214     tmp.strip()
215     self.incrementSeeds.append(tmp)
216    
217 fanzago 1.318 # Copy/return/publish
218 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
219     self.return_data = int(cfg_params.get('USER.return_data',0))
220 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
221     if (self.publish_data == 1):
222     if not cfg_params.has_key('USER.publish_data_name'):
223     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
224     else:
225     self.processedDataset = cfg_params['USER.publish_data_name']
226 ewv 1.276
227     self.conf = {}
228     self.conf['pubdata'] = None
229 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
230 slacapra 1.1 #DBSDLS-start
231 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
232 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
233     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
234 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
235 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
236 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
237 gutsche 1.35 blockSites = {}
238 spiga 1.342 #wmbs
239     self.automation = int(self.cfg_params.get('WMBS.automation',0))
240     if self.automation == 0:
241 belforte 1.388 self.seListParser= SEBlackWhiteListParser(logger=common.logger())
242 spiga 1.342 if self.datasetPath:
243     blockSites = self.DataDiscoveryAndLocation(cfg_params)
244     #DBSDLS-end
245 belforte 1.388 # insert here site override from crab.cfg
246     # note that b/w lists will still be applied
247     if cfg_params.has_key('GRID.data_location_override'):
248     sitesOverride = cfg_params['GRID.data_location_override'].split(',')
249     common.logger.info("DataSite overridden by user to: %s" % sitesOverride)
250     seOverride = self.seListParser.expandList(sitesOverride)
251     common.logger.info("DataLocation overridden by user to: %s\n" % seOverride)
252     for block in blockSites.keys():
253     blockSites[block] = seOverride
254    
255 spiga 1.342 self.conf['blockSites']=blockSites
256 belforte 1.388
257 spiga 1.342 ## Select Splitting
258     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
259 ewv 1.347
260 spiga 1.342 if self.selectNoInput:
261     if self.pset == None:
262     self.algo = 'ForScript'
263     else:
264     self.algo = 'NoInput'
265     self.conf['managedGenerators']=self.managedGenerators
266     self.conf['generator']=self.generator
267 ewv 1.356 elif self.ads or self.lumiMask or self.lumiParams:
268 spiga 1.342 self.algo = 'LumiBased'
269 ewv 1.359 if splitByRun:
270     msg = "Cannot combine split by run with lumi_mask, ADS, " \
271     "or lumis_per_job. Use split by lumi mode instead."
272     raise CrabException(msg)
273    
274 spiga 1.342 elif splitByRun ==1:
275     self.algo = 'RunBased'
276 spiga 1.42 else:
277 spiga 1.342 self.algo = 'EventBased'
278     common.logger.debug("Job splitting method: %s" % self.algo)
279 ewv 1.347
280 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
281     self.dict = splitter.Algos()[self.algo]()
282 gutsche 1.5
283 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
284     self.rootArgsFilename= 'arguments'
285 spiga 1.208 # modify Pset only the first time
286 spiga 1.320 if isNew:
287     if self.pset != None: self.ModifyPset()
288 spiga 1.300
289 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
290     self.tarNameWithPath = self.getTarBall(self.executable)
291 spiga 1.293
292    
293     def ModifyPset(self):
294     import PsetManipulator as pp
295 ewv 1.335
296     # If pycfg_params set, fake out the config script
297     # to make it think it was called with those args
298     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
299     if pycfg_params:
300     trueArgv = sys.argv
301     sys.argv = [self.pset]
302     sys.argv.extend(pycfg_params.split(' '))
303 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
304 ewv 1.335 if pycfg_params: # Restore original sys.argv
305     sys.argv = trueArgv
306    
307 spiga 1.293 try:
308     # Add FrameworkJobReport to parameter-set, set max events.
309     # Reset later for data jobs by writeCFG which does all modifications
310 ewv 1.295 PsetEdit.maxEvent(1)
311 spiga 1.293 PsetEdit.skipEvent(0)
312     PsetEdit.psetWriter(self.configFilename())
313     ## If present, add TFileService to output files
314 slacapra 1.349 if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
315 spiga 1.293 tfsOutput = PsetEdit.getTFileService()
316     if tfsOutput:
317     if tfsOutput in self.output_file:
318 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
319 spiga 1.293 else:
320     outfileflag = True #output found
321     self.output_file.append(tfsOutput)
322 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
323 spiga 1.293 pass
324     pass
325 fanzago 1.360
326     # If requested, add PoolOutputModule to output files
327 fanzago 1.368 ### FOR MULTI ###
328 fanzago 1.360 #edmOutput = PsetEdit.getPoolOutputModule()
329     edmOutputDict = PsetEdit.getPoolOutputModule()
330     common.logger.debug("(test) edmOutputDict = "+str(edmOutputDict))
331     filter_dict = {}
332     for key in edmOutputDict.keys():
333     filter_dict[key]=edmOutputDict[key]['dataset']
334     common.logger.debug("(test) filter_dict for multi = "+str(filter_dict))
335    
336     #### in CMSSW.sh: export var_filter
337    
338     self.var_filter = json.dumps(filter_dict)
339     common.logger.debug("(test) var_filter for multi = "+self.var_filter)
340 ewv 1.363
341 fanzago 1.360 edmOutput = edmOutputDict.keys()
342 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
343     if edmOutput:
344 ewv 1.321 for outputFile in edmOutput:
345     if outputFile in self.output_file:
346 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
347 ewv 1.321 else:
348     self.output_file.append(outputFile)
349     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
350     # not requested, check anyhow to avoid accidental T2 overload
351 slacapra 1.297 else:
352 ewv 1.321 if edmOutput:
353     missedFiles = []
354     for outputFile in edmOutput:
355     if outputFile not in self.output_file:
356     missedFiles.append(outputFile)
357     if missedFiles:
358     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
359     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
360     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
361     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
362     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
363     common.logger.info(msg)
364 spiga 1.322 else :
365 ewv 1.321 raise CrabException(msg)
366 ewv 1.301
367     if (PsetEdit.getBadFilesSetting()):
368     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
369 spiga 1.304 common.logger.info(msg)
370 ewv 1.301
371 slacapra 1.297 except CrabException, msg:
372 spiga 1.304 common.logger.info(str(msg))
373 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
374 spiga 1.293 raise CrabException(msg)
375    
376 ewv 1.363 valid = re.compile('^[\w\.\-]+$')
377     for fileName in self.output_file:
378     if not valid.match(fileName):
379     msg = "The file %s may only contain alphanumeric characters and -, _, ." % fileName
380     raise CrabException(msg)
381    
382 gutsche 1.3
383 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
384    
385 slacapra 1.86 import DataDiscovery
386     import DataLocation
387 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
388 gutsche 1.3
389     datasetPath=self.datasetPath
390    
391 slacapra 1.1 ## Contact the DBS
392 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
393 slacapra 1.1 try:
394 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
395 slacapra 1.1 self.pubdata.fetchDBSInfo()
396    
397 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
398 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
399     raise CrabException(msg)
400 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
401 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
402     raise CrabException(msg)
403 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
404 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
405 slacapra 1.1 raise CrabException(msg)
406    
407 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
408 spiga 1.269 self.conf['pubdata']=self.pubdata
409 gutsche 1.3
410 slacapra 1.1 ## get max number of events
411 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
412 slacapra 1.1
413     ## Contact the DLS and build a list of sites hosting the fileblocks
414     try:
415 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
416 gutsche 1.6 dataloc.fetchDLSInfo()
417 slacapra 1.263
418 slacapra 1.41 except DataLocation.DataLocationError , ex:
419 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
420     raise CrabException(msg)
421 ewv 1.131
422 slacapra 1.1
423 slacapra 1.270 unsorted_sites = dataloc.getSites()
424     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
425     for lfn in self.filesbyblock.keys():
426     if unsorted_sites.has_key(lfn):
427     sites[lfn]=unsorted_sites[lfn]
428     else:
429     sites[lfn]=[]
430    
431 slacapra 1.264 if len(sites)==0:
432 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
433     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
434     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
435 slacapra 1.264 raise CrabException(msg)
436    
437 gutsche 1.35 allSites = []
438     listSites = sites.values()
439 slacapra 1.63 for listSite in listSites:
440     for oneSite in listSite:
441 gutsche 1.35 allSites.append(oneSite)
442 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
443 ewv 1.295
444 gutsche 1.3
445 gutsche 1.92 # screen output
446 spiga 1.354 if self.ads or self.lumiMask:
447     common.logger.info("Requested (A)DS %s has %s block(s)." %
448 ewv 1.350 (datasetPath, len(self.filesbyblock.keys())))
449     else:
450     common.logger.info("Requested dataset: " + datasetPath + \
451     " has " + str(self.maxEvents) + " events in " + \
452     str(len(self.filesbyblock.keys())) + " blocks.\n")
453 gutsche 1.92
454 gutsche 1.35 return sites
455 ewv 1.131
456 spiga 1.42
457 spiga 1.208 def split(self, jobParams,firstJobID):
458 ewv 1.276
459 spiga 1.293 jobParams = self.dict['args']
460 spiga 1.269 njobs = self.dict['njobs']
461     self.jobDestination = self.dict['jobDestination']
462 ewv 1.131
463 ewv 1.333 if njobs == 0:
464     raise CrabException("Asked to split zero jobs: aborting")
465 belforte 1.387 if not self.server and not self.local :
466     if common.scheduler.name().upper() == 'REMOTEGLIDEIN' :
467     if njobs > 5000 :
468     raise CrabException("too many jobs. remoteGlidein has a limit at 5000")
469     else :
470     if njobs > 500:
471     msg = "Error: this task contains more than 500 jobs. \n"
472     msg += " The CRAB SA does not submit more than 500 jobs.\n"
473     msg += " Use the server mode. \n"
474     raise CrabException(msg)
475 belforte 1.389 if self.server and njobs > 5000 :
476     raise CrabException("too many jobs. CrabServer has a limit at 5000")
477 gutsche 1.3 # create the empty structure
478     for i in range(njobs):
479     jobParams.append("")
480 ewv 1.131
481 spiga 1.165 listID=[]
482     listField=[]
483 spiga 1.293 listDictions=[]
484 spiga 1.300 exist= os.path.exists(self.argsFile)
485 spiga 1.208 for id in range(njobs):
486     job = id + int(firstJobID)
487 spiga 1.167 listID.append(job+1)
488 spiga 1.162 job_ToSave ={}
489 spiga 1.169 concString = ' '
490 spiga 1.165 argu=''
491 spiga 1.293 str_argu = str(job+1)
492 spiga 1.208 if len(jobParams[id]):
493 ewv 1.295 argu = {'JobID': job+1}
494 spiga 1.293 for i in range(len(jobParams[id])):
495     argu[self.dict['params'][i]]=jobParams[id][i]
496 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
497 ewv 1.295 # just for debug
498 spiga 1.293 str_argu += concString.join(jobParams[id])
499 spiga 1.314 if argu != '': listDictions.append(argu)
500 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
501 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
502 spiga 1.165 listField.append(job_ToSave)
503 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
504     cms_se = CmsSEMap()
505 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
506 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
507 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
508     msg+="\t CMSDestination: %s "%(str(SEDestination))
509 spiga 1.307 common.logger.log(10-1,msg)
510 spiga 1.293 # write xml
511 ewv 1.295 if len(listDictions):
512 spiga 1.293 if exist==False: self.CreateXML()
513     self.addEntry(listDictions)
514 spiga 1.187 common._db.updateJob_(listID,listField)
515 spiga 1.293 return
516 ewv 1.313
517 spiga 1.293 def CreateXML(self):
518     """
519 ewv 1.295 """
520 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
521     outfile = file( self.argsFile, 'w').write(str(result))
522 ewv 1.295 return
523 spiga 1.293
524     def addEntry(self, listDictions):
525     """
526     _addEntry_
527 ewv 1.295
528 spiga 1.293 add an entry to the xml file
529     """
530     ## load xml
531 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
532 spiga 1.293 entrname= 'Job'
533     for dictions in listDictions:
534     report = IMProvNode(entrname , None, **dictions)
535     improvDoc.addNode(report)
536 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
537 gutsche 1.3 return
538 ewv 1.131
539 gutsche 1.3 def numberOfJobs(self):
540 spiga 1.342 #wmbs
541 ewv 1.347 if self.automation==0:
542 spiga 1.342 return self.dict['njobs']
543     else:
544     return None
545 ewv 1.347
546 slacapra 1.1 def getTarBall(self, exe):
547     """
548     Return the TarBall with lib and exe
549     """
550 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
551     if os.path.exists(self.tgzNameWithPath):
552     return self.tgzNameWithPath
553 slacapra 1.1
554     # Prepare a tar gzipped file with user binaries.
555     self.buildTar_(exe)
556    
557 spiga 1.320 return string.strip(self.tgzNameWithPath)
558 slacapra 1.1
559     def buildTar_(self, executable):
560    
561     # First of all declare the user Scram area
562     swArea = self.scram.getSWArea_()
563     swReleaseTop = self.scram.getReleaseTop_()
564 ewv 1.131
565 slacapra 1.1 ## check if working area is release top
566     if swReleaseTop == '' or swArea == swReleaseTop:
567 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
568 slacapra 1.1 return
569    
570 slacapra 1.61 import tarfile
571     try: # create tar ball
572 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
573 slacapra 1.61 ## First find the executable
574 slacapra 1.86 if (self.executable != ''):
575 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
576     if ( not exeWithPath ):
577     raise CrabException('User executable '+executable+' not found')
578 ewv 1.131
579 slacapra 1.61 ## then check if it's private or not
580     if exeWithPath.find(swReleaseTop) == -1:
581     # the exe is private, so we must ship
582 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
583 slacapra 1.61 path = swArea+'/'
584 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
585     if exeWithPath.find(path) >= 0 :
586     exe = string.replace(exeWithPath, path,'')
587 slacapra 1.129 tar.add(path+exe,exe)
588 corvo 1.85 else :
589     tar.add(exeWithPath,os.path.basename(executable))
590 slacapra 1.61 pass
591     else:
592     # the exe is from release, we'll find it on WN
593     pass
594 ewv 1.131
595 slacapra 1.61 ## Now get the libraries: only those in local working area
596 slacapra 1.256 tar.dereference=True
597 slacapra 1.61 libDir = 'lib'
598     lib = swArea+'/' +libDir
599 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
600 slacapra 1.61 if os.path.exists(lib):
601     tar.add(lib,libDir)
602 ewv 1.131
603 slacapra 1.61 ## Now check if module dir is present
604     moduleDir = 'module'
605     module = swArea + '/' + moduleDir
606     if os.path.isdir(module):
607     tar.add(module,moduleDir)
608 slacapra 1.256 tar.dereference=False
609 slacapra 1.61
610     ## Now check if any data dir(s) is present
611 spiga 1.374 self.dataExist = False
612 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
613 slacapra 1.206 while len(todo_list):
614     entry, name = todo_list.pop()
615 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
616 slacapra 1.206 continue
617 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
618 slacapra 1.206 entryPath = entry + '/'
619 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
620 spiga 1.374 if name == 'data':
621     self.dataExist=True
622     common.logger.debug("data "+entry+" to be tarred")
623     tar.add(swArea+"/src/"+entry,"src/"+entry)
624     pass
625 slacapra 1.206 pass
626 ewv 1.182
627 spiga 1.179 ### CMSSW ParameterSet
628     if not self.pset is None:
629     cfg_file = common.work_space.jobDir()+self.configFilename()
630 ewv 1.357 pickleFile = common.work_space.jobDir()+self.configFilename() + '.pkl'
631 ewv 1.182 tar.add(cfg_file,self.configFilename())
632 ewv 1.357 tar.add(pickleFile,self.configFilename() + '.pkl')
633 ewv 1.313
634 spiga 1.309 try:
635     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
636     tar.add(crab_cfg_file,'crab.cfg')
637     except:
638     pass
639 fanzago 1.93
640 fanzago 1.152 ## Add ProdCommon dir to tar
641 slacapra 1.211 prodcommonDir = './'
642     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
643 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
644 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
645     'WMCore/__init__.py','WMCore/Algorithms']
646 slacapra 1.214 for file in neededStuff:
647     tar.add(prodcommonPath+file,prodcommonDir+file)
648 spiga 1.179
649     ##### ML stuff
650     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
651     path=os.environ['CRABDIR'] + '/python/'
652     for file in ML_file_list:
653     tar.add(path+file,file)
654    
655     ##### Utils
656 belforte 1.382 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py','crabWatchdog.sh']
657 spiga 1.179 for file in Utils_file_list:
658     tar.add(path+file,file)
659 ewv 1.131
660 ewv 1.182 ##### AdditionalFiles
661 slacapra 1.253 tar.dereference=True
662 spiga 1.179 for file in self.additional_inbox_files:
663     tar.add(file,string.split(file,'/')[-1])
664 slacapra 1.253 tar.dereference=False
665 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
666 ewv 1.182
667 slacapra 1.61 tar.close()
668 mcinquil 1.241 except IOError, exc:
669 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
670 spiga 1.304 msg += str(exc)
671     raise CrabException(msg)
672 mcinquil 1.241 except tarfile.TarError, exc:
673 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
674 spiga 1.304 msg += str(exc)
675     raise CrabException(msg)
676 spiga 1.300
677 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
678     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
679 fanzago 1.385 #### FEDE FOR SAVANNAH BUG 94491 ########
680     cmdOut = runCommand('tar -ztvf %s|sort -n -k3'%self.tgzNameWithPath)
681 spiga 1.365 if not self.server:
682     msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + \
683     str(self.MaxTarBallSize) +'MB input sandbox limit \n'
684     msg += ' and not supported by the direct GRID submission system.\n'
685 belforte 1.386 msg += ' Please use the CRAB server mode by setting use_server=1 in section [CRAB] of your crab.cfg.\n'
686 fanzago 1.385 msg += ' Content of your default.tgz archive is \n'
687     msg += cmdOut
688 ewv 1.367 else:
689 spiga 1.365 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + \
690     str(self.MaxTarBallSize) +'MB input sandbox limit in the server.'
691 fanzago 1.385 msg += 'Content of your default.tgz archive is \n'
692     msg += cmdOut
693 spiga 1.238 raise CrabException(msg)
694 gutsche 1.72
695 slacapra 1.61 ## create tar-ball with ML stuff
696 slacapra 1.97
697 spiga 1.165 def wsSetupEnvironment(self, nj=0):
698 slacapra 1.1 """
699     Returns part of a job script which prepares
700     the execution environment for the job 'nj'.
701     """
702 ewv 1.334 psetName = 'pset.py'
703    
704 slacapra 1.1 # Prepare JobType-independent part
705 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
706 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
707 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
708     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
709     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
710 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
711 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
712 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
713 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
714 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
715 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
716     txt += ' job_exit_code=10016\n'
717     txt += ' func_exit\n'
718 gutsche 1.3 txt += ' fi\n'
719 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
720 gutsche 1.3 txt += '\n'
721     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
722     txt += ' cd $WORKING_DIR\n'
723 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
724 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
725 spiga 1.282 #Setup SGE Environment
726 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
727 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
728    
729 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
730     txt += self.wsSetupCMSLCGEnvironment_()
731    
732 mcinquil 1.340 #Setup PBS Environment
733 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
734 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
735    
736 gutsche 1.3 txt += 'fi\n'
737 slacapra 1.1
738     # Prepare JobType-specific part
739     scram = self.scram.commandName()
740     txt += '\n\n'
741 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
742     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
743 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
744     txt += 'status=$?\n'
745     txt += 'if [ $status != 0 ] ; then\n'
746 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
747     txt += ' job_exit_code=10034\n'
748 fanzago 1.163 txt += ' func_exit\n'
749 slacapra 1.1 txt += 'fi \n'
750     txt += 'cd '+self.version+'\n'
751 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
752 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
753 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
754 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
755     txt += ' echo "ERROR ==> Problem with the command: "\n'
756     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
757     txt += ' job_exit_code=10034\n'
758     txt += ' func_exit\n'
759     txt += 'fi \n'
760 slacapra 1.1 # Handle the arguments:
761     txt += "\n"
762 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
763 slacapra 1.1 txt += "\n"
764 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
765 slacapra 1.1 txt += "then\n"
766 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
767     txt += ' job_exit_code=50113\n'
768     txt += " func_exit\n"
769 slacapra 1.1 txt += "fi\n"
770     txt += "\n"
771    
772     # Prepare job-specific part
773     job = common.job_list[nj]
774 ewv 1.131 if (self.datasetPath):
775 fanzago 1.93 txt += '\n'
776     txt += 'DatasetPath='+self.datasetPath+'\n'
777    
778 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
779 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
780 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
781 fanzago 1.93
782     else:
783     txt += 'DatasetPath=MCDataTier\n'
784     txt += 'PrimaryDataset=null\n'
785     txt += 'DataTier=null\n'
786     txt += 'ApplicationFamily=MCDataTier\n'
787 ewv 1.170 if self.pset != None:
788 spiga 1.42 pset = os.path.basename(job.configFilename())
789 ewv 1.357 pkl = os.path.basename(job.configFilename()) + '.pkl'
790 spiga 1.42 txt += '\n'
791 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
792 ewv 1.357 txt += 'cp $RUNTIME_AREA/'+pkl+' .\n'
793 spiga 1.296
794 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
795     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
796     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
797     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
798 slacapra 1.90
799 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
800 fanzago 1.360 if self.var_filter:
801     #print "self.var_filter = ",self.var_filter
802     txt += "export var_filter="+"'"+self.var_filter+"'\n"
803     txt += 'echo $var_filter'
804 ewv 1.319 else:
805 spiga 1.314 txt += '\n'
806 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
807 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
808 gutsche 1.3 return txt
809 slacapra 1.176
810 fanzago 1.166 def wsUntarSoftware(self, nj=0):
811 gutsche 1.3 """
812     Put in the script the commands to build an executable
813     or a library.
814     """
815    
816 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
817 gutsche 1.3
818     if os.path.isfile(self.tgzNameWithPath):
819 spiga 1.358 txt += 'echo ">>> tar xzf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
820 belforte 1.382 txt += 'tar xzf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
821 fanzago 1.375 txt += 'untar_status=$? \n'
822 fanzago 1.285 if self.debug_wrapper==1 :
823 fanzago 1.375 txt += 'echo "----------------" \n'
824     txt += 'ls -AlR $RUNTIME_AREA \n'
825     txt += 'echo "----------------" \n'
826 gutsche 1.3 txt += 'if [ $untar_status -ne 0 ]; then \n'
827 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
828     txt += ' job_exit_code=$untar_status\n'
829     txt += ' func_exit\n'
830 gutsche 1.3 txt += 'else \n'
831     txt += ' echo "Successful untar" \n'
832 fanzago 1.375 txt += ' chmod a+w -R $RUNTIME_AREA \n'
833 belforte 1.379 txt += ' chmod 600 $X509_USER_PROXY \n'
834 fanzago 1.375 if self.debug_wrapper==1 :
835     txt += ' echo "changed in a+w the permission of $RUNTIME_AREA "\n'
836     txt += ' ls -AlR $RUNTIME_AREA \n'
837 gutsche 1.3 txt += 'fi \n'
838 gutsche 1.50 txt += '\n'
839 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
840 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
841 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
842 gutsche 1.50 txt += 'else\n'
843 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
844 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
845 gutsche 1.50 txt += 'fi\n'
846     txt += '\n'
847    
848 gutsche 1.3 pass
849 ewv 1.131
850 belforte 1.390 # add the rssLimit file for the watchdog
851     if common.scheduler.name().upper() == 'REMOTEGLIDEIN' :
852     if self.cfg_params.get('GRID.max_rss'):
853     max_rss = int(self.cfg_params.get('GRID.max_rss')) * 1000
854     txt += 'echo "%d" > rssLimit\n' % max_rss
855     txt += 'maxrss=`cat rssLimit`\n'
856     txt += 'echo "RSS limit set to: ${maxrss} KBytes"\n'
857    
858 slacapra 1.1 return txt
859 ewv 1.170
860 fanzago 1.166 def wsBuildExe(self, nj=0):
861     """
862     Put in the script the commands to build an executable
863     or a library.
864     """
865    
866     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
867     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
868 fanzago 1.375
869     txt += 'rm -rf lib/ module/ \n'
870 ewv 1.170 txt += 'mv $RUNTIME_AREA/lib/ . \n'
871     txt += 'mv $RUNTIME_AREA/module/ . \n'
872 spiga 1.374 if self.dataExist == True:
873 fanzago 1.375 txt += 'rm -rf src/ \n'
874 spiga 1.374 txt += 'mv $RUNTIME_AREA/src/ . \n'
875 ewv 1.182 if len(self.additional_inbox_files)>0:
876 spiga 1.179 for file in self.additional_inbox_files:
877 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
878 ewv 1.170
879 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
880 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
881 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
882 fanzago 1.166 txt += 'else\n'
883 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
884 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
885     txt += 'fi\n'
886     txt += '\n'
887    
888 slacapra 1.302 if self.pset != None:
889 ewv 1.334 psetName = 'pset.py'
890    
891 slacapra 1.302 txt += '\n'
892     if self.debug_wrapper == 1:
893     txt += 'echo "***** cat ' + psetName + ' *********"\n'
894     txt += 'cat ' + psetName + '\n'
895     txt += 'echo "****** end ' + psetName + ' ********"\n'
896     txt += '\n'
897     txt += 'echo "***********************" \n'
898     txt += 'which edmConfigHash \n'
899     txt += 'echo "***********************" \n'
900 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
901     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
902 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
903 fanzago 1.368 #### temporary fix for noEdm files #####
904 slacapra 1.302 txt += 'if [ -z "$PSETHASH" ]; then \n'
905     txt += ' export PSETHASH=null\n'
906     txt += 'fi \n'
907     #############################################
908     txt += '\n'
909 fanzago 1.166 return txt
910 slacapra 1.1
911 ewv 1.131
912 slacapra 1.1 def executableName(self):
913 ewv 1.192 if self.scriptExe:
914 spiga 1.42 return "sh "
915     else:
916     return self.executable
917 slacapra 1.1
918     def executableArgs(self):
919 ewv 1.276 if self.scriptExe:
920 spiga 1.370 return os.path.basename(self.scriptExe) + " $NJob $AdditionalArgs"
921 fanzago 1.115 else:
922 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
923 slacapra 1.1
924     def inputSandbox(self, nj):
925     """
926     Returns a list of filenames to be put in JDL input sandbox.
927     """
928     inp_box = []
929     if os.path.isfile(self.tgzNameWithPath):
930     inp_box.append(self.tgzNameWithPath)
931 spiga 1.320 if os.path.isfile(self.argsFile):
932     inp_box.append(self.argsFile)
933 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
934 slacapra 1.1 return inp_box
935    
936     def outputSandbox(self, nj):
937     """
938     Returns a list of filenames to be put in JDL output sandbox.
939     """
940     out_box = []
941    
942     ## User Declared output files
943 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
944 ewv 1.131 n_out = nj + 1
945 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
946 slacapra 1.1 return out_box
947    
948    
949     def wsRenameOutput(self, nj):
950     """
951     Returns part of a job script which renames the produced files.
952     """
953    
954 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
955 belforte 1.386 txt += 'echo ">>> current directory $PWD" \n'
956     txt += 'echo ">>> (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
957     txt += 'echo ">>> (WORKING_DIR): $WORKING_DIR" \n'
958 fanzago 1.148 txt += 'echo ">>> current directory content:"\n'
959 fanzago 1.378 #if self.debug_wrapper==1:
960     txt += 'ls -Al\n'
961 fanzago 1.145 txt += '\n'
962 slacapra 1.54
963 fanzago 1.128 for fileWithSuffix in (self.output_file):
964 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
965 slacapra 1.1 txt += '\n'
966 gutsche 1.7 txt += '# check output file\n'
967 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
968 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
969     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
970 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
971 ewv 1.147 else:
972     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
973     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
974 slacapra 1.106 txt += 'else\n'
975 fanzago 1.161 txt += ' job_exit_code=60302\n'
976     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
977 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
978 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
979     txt += ' echo "prepare dummy output file"\n'
980     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
981     txt += ' fi \n'
982 slacapra 1.1 txt += 'fi\n'
983 slacapra 1.105 file_list = []
984     for fileWithSuffix in (self.output_file):
985 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
986 ewv 1.131
987 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
988 fanzago 1.149 txt += '\n'
989 belforte 1.386 txt += 'echo ">>> current directory $PWD" \n'
990     txt += 'echo ">>> (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
991     txt += 'echo ">>> (WORKING_DIR): $WORKING_DIR" \n'
992 fanzago 1.148 txt += 'echo ">>> current directory content:"\n'
993 fanzago 1.378 #if self.debug_wrapper==1:
994     txt += 'ls -Al\n'
995 fanzago 1.148 txt += '\n'
996 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
997 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
998 slacapra 1.1 return txt
999    
1000 slacapra 1.63 def getRequirements(self, nj=[]):
1001 slacapra 1.1 """
1002 ewv 1.131 return job requirements to add to jdl files
1003 slacapra 1.1 """
1004     req = ''
1005 slacapra 1.47 if self.version:
1006 slacapra 1.10 req='Member("VO-cms-' + \
1007 slacapra 1.47 self.version + \
1008 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1009 ewv 1.192 if self.executable_arch:
1010 gutsche 1.107 req+=' && Member("VO-cms-' + \
1011 slacapra 1.105 self.executable_arch + \
1012     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1013 gutsche 1.35
1014     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1015 spiga 1.353 if ( common.scheduler.name() in ["glite"] ):
1016 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
1017     if ( self.cfg_params.get('GRID.use_cream',None) ):
1018     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
1019     else:
1020     req += ' && other.GlueCEStateStatus == "Production" '
1021 gutsche 1.35
1022 slacapra 1.1 return req
1023 gutsche 1.3
1024     def configFilename(self):
1025     """ return the config filename """
1026 ewv 1.334 return self.name()+'.py'
1027 gutsche 1.3
1028     def wsSetupCMSOSGEnvironment_(self):
1029     """
1030     Returns part of a job script which is prepares
1031     the execution environment and which is common for all CMS jobs.
1032     """
1033 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1034     txt += ' echo ">>> setup CMS OSG environment:"\n'
1035 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1036     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1037 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1038 belforte 1.383 txt += ' echo "OSG_APP is $OSG_APP"\n'
1039 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1040 belforte 1.382 txt += ' cmsSetupFile=$OSG_APP/cmssoft/cms/cmsset_default.sh\n'
1041     txt += ' elif [ -f $CVMFS/cms.cern.ch/cmsset_default.sh ] ; then \n'
1042     txt += ' cmsSetupFile=$CVMFS/cms.cern.ch/cmsset_default.sh\n'
1043     txt += ' elif [ -f /cvmfs/cms.cern.ch/cmsset_default.sh ] ; then \n'
1044     txt += ' cmsSetupFile=/cvmfs/cms.cern.ch/cmsset_default.sh\n'
1045 fanzago 1.133 txt += ' else\n'
1046 belforte 1.383 txt += ' echo "CVMSF = $CVMFS"\n'
1047     txt += ' echo "/cvmfs/ is"\n'
1048     txt += ' echo "ls /"\n'
1049     txt += ' ls /\n'
1050     txt += ' echo "ls /cvmfs"\n'
1051     txt += ' ls /cvmfs\n'
1052     txt += ' echo "ls /cvmfs/cms.cern.ch"\n'
1053     txt += ' ls /cvmfs/cms.cern.ch\n'
1054     txt += ' ls /cvmfs/cms.cern.ch/cmsset*\n'
1055     txt += ' ls /cvmfs/cms.cern.ch/cmsset_default.sh\n'
1056 belforte 1.382 txt += ' echo "ERROR ==> cmsset_default.sh file not found"\n'
1057 fanzago 1.161 txt += ' job_exit_code=10020\n'
1058     txt += ' func_exit\n'
1059 fanzago 1.133 txt += ' fi\n'
1060 gutsche 1.3 txt += '\n'
1061 belforte 1.382 txt += ' echo "sourcing $cmsSetupFile ..."\n'
1062     txt += ' source $cmsSetupFile\n'
1063     txt += ' result=$?\n'
1064     txt += ' if [ $result -ne 0 ]; then\n'
1065     txt += ' echo "ERROR ==> problem sourcing $cmsSetupFile"\n'
1066     txt += ' job_exit_code=10032\n'
1067     txt += ' func_exit\n'
1068     txt += ' else\n'
1069     txt += ' echo "==> setup cms environment ok"\n'
1070     txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1071     txt += ' fi\n'
1072 gutsche 1.3
1073     return txt
1074 ewv 1.131
1075 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1076     """
1077     Returns part of a job script which is prepares
1078     the execution environment and which is common for all CMS jobs.
1079     """
1080 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1081     txt += ' echo ">>> setup CMS LCG environment:"\n'
1082 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1083     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1084     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1085     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1086 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1087     txt += ' job_exit_code=10031\n'
1088     txt += ' func_exit\n'
1089 fanzago 1.133 txt += ' else\n'
1090     txt += ' echo "Sourcing environment... "\n'
1091     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1092 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1093     txt += ' job_exit_code=10020\n'
1094     txt += ' func_exit\n'
1095 fanzago 1.133 txt += ' fi\n'
1096     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1097     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1098     txt += ' result=$?\n'
1099     txt += ' if [ $result -ne 0 ]; then\n'
1100 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1101     txt += ' job_exit_code=10032\n'
1102     txt += ' func_exit\n'
1103 fanzago 1.133 txt += ' fi\n'
1104     txt += ' fi\n'
1105     txt += ' \n'
1106 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1107 gutsche 1.3 return txt
1108 gutsche 1.5
1109 spiga 1.238 def wsModifyReport(self, nj):
1110 fanzago 1.93 """
1111 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1112 fanzago 1.93 """
1113 ewv 1.250
1114 fanzago 1.281 txt = ''
1115 fanzago 1.292 if (self.copy_data == 1):
1116 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1117 ewv 1.283
1118 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1119 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1120 fanzago 1.175 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1121 fanzago 1.344
1122 fanzago 1.360 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml json $RUNTIME_AREA/resultCopyFile n_job $OutUniqueID PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH'
1123 fanzago 1.281
1124 fanzago 1.318 if (self.publish_data == 1):
1125     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1126 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1127     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1128 fanzago 1.281
1129 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1130     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1131 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1132     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1133     txt += ' modifyReport_result=70500\n'
1134     txt += ' job_exit_code=$modifyReport_result\n'
1135     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1136     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1137     txt += 'else\n'
1138     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1139 spiga 1.103 txt += 'fi\n'
1140 fanzago 1.93 return txt
1141 ewv 1.283
1142 ewv 1.192 def wsParseFJR(self):
1143 spiga 1.189 """
1144 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1145 spiga 1.189 """
1146     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1147     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1148     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1149     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1150 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1151 fanzago 1.285 if self.debug_wrapper==1 :
1152 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1153 spiga 1.371 txt += ' cmd_out_1=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --popularity $MonitorID,$MonitorJobID,$RUNTIME_AREA/inputsReport.txt '+self.debugWrap+'`\n'
1154 spiga 1.372 # if self.debug_wrapper==1 :
1155     txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out_1"\n'
1156 spiga 1.197 txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1157 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1158     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1159 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1160 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1161 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1162     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1163 spiga 1.189 txt += ' else\n'
1164     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1165     txt += ' fi\n'
1166     txt += ' else\n'
1167     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1168     txt += ' fi\n'
1169     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1170 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1171 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1172 spiga 1.232 txt += ' fi\n'
1173 spiga 1.189 txt += 'else\n'
1174     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1175     txt += 'fi\n'
1176     txt += '\n'
1177 fanzago 1.364 txt += 'if [ $executable_exit_status -ne 0 ];then\n'
1178 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1179     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1180     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1181     txt += ' job_exit_code=$executable_exit_status\n'
1182     txt += ' func_exit\n'
1183     txt += 'fi\n\n'
1184 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1185     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1186     txt += 'job_exit_code=$executable_exit_status\n'
1187    
1188     return txt
1189    
1190 gutsche 1.5 def setParam_(self, param, value):
1191     self._params[param] = value
1192    
1193     def getParams(self):
1194     return self._params
1195 gutsche 1.8
1196 spiga 1.257 def outList(self,list=False):
1197 mcinquil 1.121 """
1198     check the dimension of the output files
1199     """
1200 spiga 1.169 txt = ''
1201     txt += 'echo ">>> list of expected files on output sandbox"\n'
1202 mcinquil 1.121 listOutFiles = []
1203 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1204 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1205 spiga 1.268 if len(self.output_file) <= 0:
1206     msg ="WARNING: no output files name have been defined!!\n"
1207     msg+="\tno output files will be reported back/staged\n"
1208 spiga 1.304 common.logger.info(msg)
1209 ewv 1.350
1210 fanzago 1.148 if (self.return_data == 1):
1211 farinafa 1.348 for file in (self.output_file):
1212     listOutFiles.append(numberFile(file, '$OutUniqueID'))
1213     for file in (self.output_file_sandbox):
1214     listOutFiles.append(numberFile(file, '$NJob'))
1215     listOutFiles.append(stdout)
1216     listOutFiles.append(stderr)
1217 belforte 1.384 listOutFiles.append('Watchdog_$NJob.log.gz')
1218 farinafa 1.348
1219 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1220 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1221 spiga 1.169 txt += 'export filesToCheck\n'
1222 spiga 1.341 taskinfo={}
1223     taskinfo['outfileBasename'] = self.output_file
1224     common._db.updateTask_(taskinfo)
1225 ewv 1.276
1226 spiga 1.257 if list : return self.output_file
1227 ewv 1.170 return txt
1228 ewv 1.355
1229 fanzago 1.369 def checkCMSSWVersion(self, url = "https://cmstags.cern.ch/tc/", fileName = "ReleasesXML"):
1230 ewv 1.355 """
1231     compare current CMSSW release and arch with allowed releases
1232     """
1233    
1234     downloader = Downloader(url)
1235     goodRelease = False
1236 ewv 1.367 tagCollectorUrl = url + fileName
1237 ewv 1.355
1238     try:
1239     result = downloader.config(fileName)
1240     except:
1241     common.logger.info("ERROR: Problem reading file of allowed CMSSW releases.")
1242    
1243     try:
1244     events = pulldom.parseString(result)
1245    
1246     arch = None
1247     release = None
1248     relState = None
1249     for (event, node) in events:
1250     if event == pulldom.START_ELEMENT:
1251     if node.tagName == 'architecture':
1252     arch = node.attributes.getNamedItem('name').nodeValue
1253     if node.tagName == 'project':
1254     relState = node.attributes.getNamedItem('state').nodeValue
1255 ewv 1.367 if relState == 'Announced':
1256 ewv 1.355 release = node.attributes.getNamedItem('label').nodeValue
1257     if self.executable_arch == arch and self.version == release:
1258     goodRelease = True
1259     return goodRelease
1260    
1261     if not goodRelease:
1262 ewv 1.367 msg = "WARNING: %s on %s is not among supported releases listed at %s ." % \
1263     (self.version, self.executable_arch, tagCollectorUrl)
1264 ewv 1.355 msg += "Submission may fail."
1265     common.logger.info(msg)
1266     except:
1267     common.logger.info("Problems parsing file of allowed CMSSW releases.")
1268    
1269     return goodRelease
1270