ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.400
Committed: Wed Sep 11 12:59:51 2013 UTC (11 years, 7 months ago) by belforte
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_9_1, CRAB_2_9_1_pre2, HEAD
Changes since 1.399: +4 -3 lines
Error occurred while calculating annotation data.
Log Message:
beware possible double spaces in pycfg_params: https://savannah.cern.ch/bugs/?102552

File Contents

# Content
1
2 __revision__ = "$Id: cms_cmssw.py,v 1.399 2013/09/09 12:45:09 belforte Exp $"
3 __version__ = "$Revision: 1.399 $"
4
5 from JobType import JobType
6 from crab_exceptions import *
7 from crab_util import *
8 import common
9 import re
10 import Scram
11 from Splitter import JobSplitter
12 from Downloader import Downloader
13 try:
14 import json
15 except:
16 import simplejson as json
17
18 from IMProv.IMProvNode import IMProvNode
19 from IMProv.IMProvLoader import loadIMProvFile
20 from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
21
22 import os, string, glob
23 from xml.dom import pulldom
24
25 class Cmssw(JobType):
26 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
27 JobType.__init__(self, 'CMSSW')
28 common.logger.debug('CMSSW::__init__')
29 self.skip_blocks = skip_blocks
30 self.argsList = 2
31 self.NumEvents=0
32 self._params = {}
33 self.cfg_params = cfg_params
34 ### FOR MULTI ###
35 self.var_filter=''
36
37 ### Temporary patch to automatically skip the ISB size check:
38 self.server = self.cfg_params.get('CRAB.server_name',None) or \
39 self.cfg_params.get('CRAB.use_server',0)
40 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
41 size = 9.5
42 if self.server or \
43 common.scheduler.name().upper() == 'REMOTEGLIDEIN' :
44 size = 100
45 elif self.local:
46 size = 9999999
47
48 self.MaxTarBallSize = size
49
50 # number of jobs requested to be created, limit obj splitting
51 self.ncjobs = ncjobs
52
53 self.scram = Scram.Scram(cfg_params)
54 self.additional_inbox_files = []
55 self.scriptExe = ''
56 self.executable = ''
57 self.executable_arch = self.scram.getArch()
58 self.tgz_name = 'default.tgz'
59 self.scriptName = 'CMSSW.sh'
60 self.pset = ''
61 self.datasetPath = ''
62
63 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
64 # set FJR file name
65 self.fjrFileName = 'crab_fjr.xml'
66
67 self.version = self.scram.getSWVersion()
68 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
69 version_array = self.version.split('_')
70 self.CMSSW_major = 0
71 self.CMSSW_minor = 0
72 self.CMSSW_patch = 0
73 try:
74 self.CMSSW_major = int(version_array[1])
75 self.CMSSW_minor = int(version_array[2])
76 self.CMSSW_patch = int(version_array[3])
77 except:
78 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
79 raise CrabException(msg)
80
81 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
82 msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
83 raise CrabException(msg)
84 """
85 As CMSSW versions are dropped we can drop more code:
86 2.x dropped: drop check for lumi range setting
87 """
88 self.checkCMSSWVersion()
89 ### collect Data cards
90
91 ### Temporary: added to remove input file control in the case of PU
92 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
93
94 if not cfg_params.has_key('CMSSW.datasetpath'):
95 msg = "Error: datasetpath not defined in the section [CMSSW] of crab.cfg file "
96 raise CrabException(msg)
97 else:
98 tmp = cfg_params['CMSSW.datasetpath']
99 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
100 if string.lower(tmp)=='none':
101 self.datasetPath = None
102 self.selectNoInput = 1
103 self.primaryDataset = 'null'
104 else:
105 self.datasetPath = tmp
106 self.selectNoInput = 0
107 ll = len(self.datasetPath.split("/"))
108 if (ll != 4) or self.datasetPath[0] != '/' or self.datasetPath[-1] == '/':
109 msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
110 msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER'
111 raise CrabException(msg)
112 self.primaryDataset = self.datasetPath.split("/")[1]
113 self.dataTier = self.datasetPath.split("/")[2]
114
115 # Analysis dataset is primary/processed/tier/definition
116 self.ads = False
117 if self.datasetPath:
118 self.ads = len(self.datasetPath.split("/")) > 4
119 self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
120 self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
121 self.cfg_params.get('CMSSW.lumis_per_job',None)
122
123 # FUTURE: Can remove this check
124 if self.ads and self.CMSSW_major < 3:
125 common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
126 common.logger.info(' Only file level, not lumi level, granularity is supported.')
127
128 self.debugWrap=''
129 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
130 if self.debug_wrapper == 1: self.debugWrap='--debug'
131
132 ## now the application
133 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
134 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
135 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
136 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
137
138 if not cfg_params.has_key('CMSSW.pset'):
139 raise CrabException("PSet file missing. Cannot run cmsRun ")
140 self.pset = cfg_params['CMSSW.pset']
141 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
142 if self.pset.lower() != 'none' :
143 if (not os.path.exists(self.pset)):
144 raise CrabException("User defined PSet file "+self.pset+" does not exist")
145 else:
146 self.pset = None
147
148 # output files
149 ## stuff which must be returned always via sandbox
150 self.output_file_sandbox = []
151
152 # add fjr report by default via sandbox
153 self.output_file_sandbox.append(self.fjrFileName)
154
155 # other output files to be returned via sandbox or copied to SE
156 outfileflag = False
157 self.output_file = []
158 tmp = cfg_params.get('CMSSW.output_file',None)
159 if tmp :
160 self.output_file = [x.strip() for x in tmp.split(',')]
161 outfileflag = True #output found
162
163 self.scriptExe = cfg_params.get('USER.script_exe',None)
164 if self.scriptExe :
165 if not os.path.isfile(self.scriptExe):
166 msg ="ERROR. file "+self.scriptExe+" not found"
167 raise CrabException(msg)
168 self.additional_inbox_files.append(string.strip(self.scriptExe))
169
170 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
171 if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
172
173 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174 msg ="Error. script_exe not defined"
175 raise CrabException(msg)
176
177 # use parent files...
178 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
179
180 ## additional input files
181 if cfg_params.has_key('USER.additional_input_files'):
182 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
183 for tmp in tmpAddFiles:
184 tmp = string.strip(tmp)
185 dirname = ''
186 if not tmp[0]=="/": dirname = "."
187 files = []
188 if string.find(tmp,"*")>-1:
189 files = glob.glob(os.path.join(dirname, tmp))
190 if len(files)==0:
191 raise CrabException("No additional input file found with this pattern: "+tmp)
192 else:
193 files.append(tmp)
194 for file in files:
195 if not os.path.exists(file):
196 raise CrabException("Additional input file not found: "+file)
197 pass
198 self.additional_inbox_files.append(string.strip(file))
199 pass
200 pass
201 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
202 pass
203
204
205 ## New method of dealing with seeds
206 self.incrementSeeds = []
207 self.preserveSeeds = []
208 if cfg_params.has_key('CMSSW.preserve_seeds'):
209 tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
210 for tmp in tmpList:
211 tmp.strip()
212 self.preserveSeeds.append(tmp)
213 if cfg_params.has_key('CMSSW.increment_seeds'):
214 tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
215 for tmp in tmpList:
216 tmp.strip()
217 self.incrementSeeds.append(tmp)
218
219 # Copy/return/publish
220 self.copy_data = int(cfg_params.get('USER.copy_data',0))
221 self.return_data = int(cfg_params.get('USER.return_data',0))
222 self.publish_data = int(cfg_params.get('USER.publish_data',0))
223 if (self.publish_data == 1):
224 if not cfg_params.has_key('USER.publish_data_name'):
225 raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
226 else:
227 self.processedDataset = cfg_params['USER.publish_data_name']
228
229 self.conf = {}
230 self.conf['pubdata'] = None
231 # number of jobs requested to be created, limit obj splitting DD
232 #DBSDLS-start
233 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
234 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
235 self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
236 self.jobDestination=[] # Site destination(s) for each job (list of lists)
237 ## Perform the data location and discovery (based on DBS/DLS)
238 ## SL: Don't if NONE is specified as input (pythia use case)
239 blockSites = {}
240 #wmbs
241 self.automation = int(self.cfg_params.get('WMBS.automation',0))
242 if self.automation == 0:
243 self.seListParser= SEBlackWhiteListParser(logger=common.logger())
244 if self.datasetPath:
245 blockSites = self.DataDiscoveryAndLocation(cfg_params)
246 #DBSDLS-end
247 # insert here site override from crab.cfg
248 # note that b/w lists will still be applied
249 if cfg_params.has_key('GRID.data_location_override'):
250 sitesOverride = cfg_params['GRID.data_location_override'].split(',')
251 common.logger.info("DataSite overridden by user to: %s" % sitesOverride)
252 seOverride = self.seListParser.expandList(sitesOverride)
253 # beware SiteDB V2 API, cast unicode to string
254 seOverride = map(lambda x:str(x), seOverride)
255 common.logger.info("DataLocation overridden by user to: %s\n" % seOverride)
256 for block in blockSites.keys():
257 blockSites[block] = seOverride
258
259 self.conf['blockSites']=blockSites
260
261 ## Select Splitting
262 splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
263
264 if self.selectNoInput:
265 if self.pset == None:
266 self.algo = 'ForScript'
267 else:
268 self.algo = 'NoInput'
269 self.conf['managedGenerators']=self.managedGenerators
270 self.conf['generator']=self.generator
271 elif self.ads or self.lumiMask or self.lumiParams:
272 self.algo = 'LumiBased'
273 if splitByRun:
274 msg = "Cannot combine split by run with lumi_mask, ADS, " \
275 "or lumis_per_job. Use split by lumi mode instead."
276 raise CrabException(msg)
277
278 elif splitByRun ==1:
279 self.algo = 'RunBased'
280 else:
281 self.algo = 'EventBased'
282 common.logger.debug("Job splitting method: %s" % self.algo)
283
284 splitter = JobSplitter(self.cfg_params,self.conf)
285 self.dict = splitter.Algos()[self.algo]()
286
287 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
288 self.rootArgsFilename= 'arguments'
289 # modify Pset only the first time
290 if isNew:
291 if self.pset != None: self.ModifyPset()
292
293 ## Prepare inputSandbox TarBall (only the first time)
294 self.tarNameWithPath = self.getTarBall(self.executable)
295
296
297 def ModifyPset(self):
298 import PsetManipulator as pp
299
300 # If pycfg_params set, fake out the config script
301 # to make it think it was called with those args
302 pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
303 if pycfg_params:
304 trueArgv = sys.argv
305 sys.argv = [self.pset]
306 # beware double space: https://savannah.cern.ch/bugs/?102552
307 sys.argv.extend([x for x in pycfg_params.split(' ') if x ])
308 PsetEdit = pp.PsetManipulator(self.pset)
309 if pycfg_params: # Restore original sys.argv
310 sys.argv = trueArgv
311
312 try:
313 # Add FrameworkJobReport to parameter-set, set max events.
314 # Reset later for data jobs by writeCFG which does all modifications
315 PsetEdit.maxEvent(1)
316 PsetEdit.skipEvent(0)
317 PsetEdit.psetWriter(self.configFilename())
318 ## If present, add TFileService to output files
319 if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
320 tfsOutput = PsetEdit.getTFileService()
321 if tfsOutput:
322 if tfsOutput in self.output_file:
323 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
324 else:
325 outfileflag = True #output found
326 self.output_file.append(tfsOutput)
327 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
328 pass
329 pass
330
331 # If requested, add PoolOutputModule to output files
332 ### FOR MULTI ###
333 #edmOutput = PsetEdit.getPoolOutputModule()
334 edmOutputDict = PsetEdit.getPoolOutputModule()
335 common.logger.debug("(test) edmOutputDict = "+str(edmOutputDict))
336 filter_dict = {}
337 for key in edmOutputDict.keys():
338 filter_dict[key]=edmOutputDict[key]['dataset']
339 common.logger.debug("(test) filter_dict for multi = "+str(filter_dict))
340
341 #### in CMSSW.sh: export var_filter
342
343 self.var_filter = json.dumps(filter_dict)
344 common.logger.debug("(test) var_filter for multi = "+self.var_filter)
345
346 edmOutput = edmOutputDict.keys()
347 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
348 if edmOutput:
349 for outputFile in edmOutput:
350 if outputFile in self.output_file:
351 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
352 else:
353 self.output_file.append(outputFile)
354 common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
355 # not requested, check anyhow to avoid accidental T2 overload
356 else:
357 if edmOutput:
358 missedFiles = []
359 for outputFile in edmOutput:
360 if outputFile not in self.output_file:
361 missedFiles.append(outputFile)
362 if missedFiles:
363 msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
364 msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
365 msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
366 if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
367 msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
368 common.logger.info(msg)
369 else :
370 raise CrabException(msg)
371
372 if (PsetEdit.getBadFilesSetting()):
373 msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
374 common.logger.info(msg)
375
376 except CrabException, msg:
377 common.logger.info(str(msg))
378 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
379 raise CrabException(msg)
380
381 valid = re.compile('^[\w\.\-]+$')
382 for fileName in self.output_file:
383 if not valid.match(fileName):
384 msg = "The file %s may only contain alphanumeric characters and -, _, ." % fileName
385 raise CrabException(msg)
386
387
388 def DataDiscoveryAndLocation(self, cfg_params):
389
390 import DataDiscovery
391 import DataLocation
392 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
393
394 datasetPath=self.datasetPath
395
396 ## Contact the DBS
397 common.logger.info("Contacting Data Discovery Services ...")
398 try:
399 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
400 self.pubdata.fetchDBSInfo()
401
402 except DataDiscovery.NotExistingDatasetError, ex :
403 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
404 raise CrabException(msg)
405 except DataDiscovery.NoDataTierinProvenanceError, ex :
406 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
407 raise CrabException(msg)
408 except DataDiscovery.DataDiscoveryError, ex:
409 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
410 raise CrabException(msg)
411
412 self.filesbyblock=self.pubdata.getFiles()
413 self.conf['pubdata']=self.pubdata
414
415 ## get max number of events
416 self.maxEvents=self.pubdata.getMaxEvents()
417
418 ## Contact the DLS and build a list of sites hosting the fileblocks
419 try:
420 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
421 dataloc.fetchDLSInfo()
422
423 except DataLocation.DataLocationError , ex:
424 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
425 raise CrabException(msg)
426
427
428 unsorted_sites = dataloc.getSites()
429 sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
430 for lfn in self.filesbyblock.keys():
431 if unsorted_sites.has_key(lfn):
432 sites[lfn]=unsorted_sites[lfn]
433 else:
434 sites[lfn]=[]
435
436 if len(sites)==0:
437 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
438 msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
439 msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
440 raise CrabException(msg)
441
442 allSites = []
443 listSites = sites.values()
444 for listSite in listSites:
445 for oneSite in listSite:
446 allSites.append(oneSite)
447 [allSites.append(it) for it in allSites if not allSites.count(it)]
448
449
450 # screen output
451 if self.ads or self.lumiMask:
452 common.logger.info("Requested (A)DS %s has %s block(s)." %
453 (datasetPath, len(self.filesbyblock.keys())))
454 else:
455 common.logger.info("Requested dataset: " + datasetPath + \
456 " has " + str(self.maxEvents) + " events in " + \
457 str(len(self.filesbyblock.keys())) + " blocks.\n")
458
459 return sites
460
461
462 def split(self, jobParams,firstJobID):
463
464 jobParams = self.dict['args']
465 njobs = self.dict['njobs']
466 self.jobDestination = self.dict['jobDestination']
467
468 if njobs == 0:
469 raise CrabException("Asked to split zero jobs: aborting")
470 if not self.server and not self.local :
471 if common.scheduler.name().upper() == 'REMOTEGLIDEIN' :
472 if njobs > 5000 :
473 raise CrabException("too many jobs. remoteGlidein has a limit at 5000")
474 else :
475 if njobs > 500:
476 msg = "Error: this task contains more than 500 jobs. \n"
477 msg += " The CRAB SA does not submit more than 500 jobs.\n"
478 msg += " Use the server mode. \n"
479 raise CrabException(msg)
480 if self.server and njobs > 5000 :
481 raise CrabException("too many jobs. CrabServer has a limit at 5000")
482 # create the empty structure
483 for i in range(njobs):
484 jobParams.append("")
485
486 listID=[]
487 listField=[]
488 listDictions=[]
489 exist= os.path.exists(self.argsFile)
490 for id in range(njobs):
491 job = id + int(firstJobID)
492 listID.append(job+1)
493 job_ToSave ={}
494 concString = ' '
495 argu=''
496 str_argu = str(job+1)
497 if len(jobParams[id]):
498 argu = {'JobID': job+1}
499 for i in range(len(jobParams[id])):
500 argu[self.dict['params'][i]]=jobParams[id][i]
501 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
502 # just for debug
503 str_argu += concString.join(jobParams[id])
504 if argu != '': listDictions.append(argu)
505 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
506 job_ToSave['dlsDestination']= self.jobDestination[id]
507 listField.append(job_ToSave)
508 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
509 cms_se = CmsSEMap()
510 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
511 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
512 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
513 msg+="\t CMSDestination: %s "%(str(SEDestination))
514 common.logger.log(10-1,msg)
515 # write xml
516 if len(listDictions):
517 if exist==False: self.CreateXML()
518 self.addEntry(listDictions)
519 common._db.updateJob_(listID,listField)
520 return
521
522 def CreateXML(self):
523 """
524 """
525 result = IMProvNode( self.rootArgsFilename )
526 outfile = file( self.argsFile, 'w').write(str(result))
527 return
528
529 def addEntry(self, listDictions):
530 """
531 _addEntry_
532
533 add an entry to the xml file
534 """
535 ## load xml
536 improvDoc = loadIMProvFile(self.argsFile)
537 entrname= 'Job'
538 for dictions in listDictions:
539 report = IMProvNode(entrname , None, **dictions)
540 improvDoc.addNode(report)
541 outfile = file( self.argsFile, 'w').write(str(improvDoc))
542 return
543
544 def numberOfJobs(self):
545 #wmbs
546 if self.automation==0:
547 return self.dict['njobs']
548 else:
549 return None
550
551 def getTarBall(self, exe):
552 """
553 Return the TarBall with lib and exe
554 """
555 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
556 if os.path.exists(self.tgzNameWithPath):
557 return self.tgzNameWithPath
558
559 # Prepare a tar gzipped file with user binaries.
560 self.buildTar_(exe)
561
562 return string.strip(self.tgzNameWithPath)
563
564 def buildTar_(self, executable):
565
566 # First of all declare the user Scram area
567 swArea = self.scram.getSWArea_()
568 swReleaseTop = self.scram.getReleaseTop_()
569
570 ## check if working area is release top
571 if swReleaseTop == '' or swArea == swReleaseTop:
572 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
573 return
574
575 import tarfile
576 try: # create tar ball
577 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
578 ## First find the executable
579 if (self.executable != ''):
580 exeWithPath = self.scram.findFile_(executable)
581 if ( not exeWithPath ):
582 raise CrabException('User executable '+executable+' not found')
583
584 ## then check if it's private or not
585 if exeWithPath.find(swReleaseTop) == -1:
586 # the exe is private, so we must ship
587 common.logger.debug("Exe "+exeWithPath+" to be tarred")
588 path = swArea+'/'
589 # distinguish case when script is in user project area or given by full path somewhere else
590 if exeWithPath.find(path) >= 0 :
591 exe = string.replace(exeWithPath, path,'')
592 tar.add(path+exe,exe)
593 else :
594 tar.add(exeWithPath,os.path.basename(executable))
595 pass
596 else:
597 # the exe is from release, we'll find it on WN
598 pass
599
600 ## Now get the libraries: only those in local working area
601 tar.dereference=True
602 libDir = 'lib'
603 lib = swArea+'/' +libDir
604 common.logger.debug("lib "+lib+" to be tarred")
605 if os.path.exists(lib):
606 tar.add(lib,libDir)
607
608 ## Now check if module dir is present
609 moduleDir = 'module'
610 module = swArea + '/' + moduleDir
611 if os.path.isdir(module):
612 tar.add(module,moduleDir)
613 tar.dereference=False
614
615 ## Now check if any data dir(s) is present
616 self.dataExist = False
617 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
618 while len(todo_list):
619 entry, name = todo_list.pop()
620 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
621 continue
622 if os.path.isdir(swArea+"/src/"+entry):
623 entryPath = entry + '/'
624 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
625 if name == 'data':
626 self.dataExist=True
627 common.logger.debug("data "+entry+" to be tarred")
628 tar.add(swArea+"/src/"+entry,"src/"+entry)
629 pass
630 pass
631
632 ### CMSSW ParameterSet
633 if not self.pset is None:
634 cfg_file = common.work_space.jobDir()+self.configFilename()
635 pickleFile = common.work_space.jobDir()+self.configFilename() + '.pkl'
636 tar.add(cfg_file,self.configFilename())
637 tar.add(pickleFile,self.configFilename() + '.pkl')
638
639 try:
640 crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
641 tar.add(crab_cfg_file,'crab.cfg')
642 except:
643 pass
644
645 ## Add ProdCommon dir to tar
646 prodcommonDir = './'
647 prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
648 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
649 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
650 'WMCore/__init__.py','WMCore/Algorithms']
651 for file in neededStuff:
652 tar.add(prodcommonPath+file,prodcommonDir+file)
653
654 ##### ML stuff
655 ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
656 path=os.environ['CRABDIR'] + '/python/'
657 for file in ML_file_list:
658 tar.add(path+file,file)
659
660 ##### Utils
661 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py','crabWatchdog.sh']
662 for file in Utils_file_list:
663 tar.add(path+file,file)
664
665 ##### AdditionalFiles
666 tar.dereference=True
667 for file in self.additional_inbox_files:
668 tar.add(file,string.split(file,'/')[-1])
669 tar.dereference=False
670 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
671
672 tar.close()
673 except IOError, exc:
674 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
675 msg += str(exc)
676 raise CrabException(msg)
677 except tarfile.TarError, exc:
678 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
679 msg += str(exc)
680 raise CrabException(msg)
681
682 tarballinfo = os.stat(self.tgzNameWithPath)
683 if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
684 #### FEDE FOR SAVANNAH BUG 94491 ########
685 cmdOut = runCommand('tar -ztvf %s|sort -n -k3'%self.tgzNameWithPath)
686 if not self.server:
687 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + \
688 str(self.MaxTarBallSize) +'MB input sandbox limit \n'
689 msg += ' and not supported by the direct GRID submission system.\n'
690 msg += ' Please use the CRAB server mode by setting use_server=1 in section [CRAB] of your crab.cfg.\n'
691 msg += ' Content of your default.tgz archive is \n'
692 msg += cmdOut
693 else:
694 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + \
695 str(self.MaxTarBallSize) +'MB input sandbox limit in the server.'
696 msg += 'Content of your default.tgz archive is \n'
697 msg += cmdOut
698 raise CrabException(msg)
699
700 ## create tar-ball with ML stuff
701
702 def wsSetupEnvironment(self, nj=0):
703 """
704 Returns part of a job script which prepares
705 the execution environment for the job 'nj'.
706 """
707 psetName = 'pset.py'
708
709 # Prepare JobType-independent part
710 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
711 txt += 'echo ">>> setup environment"\n'
712 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
713 txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
714 txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
715 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
716 txt += self.wsSetupCMSLCGEnvironment_()
717 txt += 'elif [ $middleware == OSG ]; then\n'
718 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
719 txt += ' if [ ! $? == 0 ] ;then\n'
720 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
721 txt += ' job_exit_code=10016\n'
722 txt += ' func_exit\n'
723 txt += ' fi\n'
724 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
725 txt += '\n'
726 txt += ' echo "Change to working directory: $WORKING_DIR"\n'
727 txt += ' cd $WORKING_DIR\n'
728 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
729 txt += self.wsSetupCMSOSGEnvironment_()
730 #Setup SGE Environment
731 txt += 'elif [ $middleware == SGE ]; then\n'
732 txt += self.wsSetupCMSLCGEnvironment_()
733
734 txt += 'elif [ $middleware == ARC ]; then\n'
735 txt += self.wsSetupCMSLCGEnvironment_()
736
737 #Setup PBS Environment
738 txt += 'elif [ $middleware == PBS ]; then\n'
739 txt += self.wsSetupCMSLCGEnvironment_()
740
741 txt += 'fi\n'
742
743 # Prepare JobType-specific part
744 scram = self.scram.commandName()
745 txt += '\n\n'
746 txt += 'echo ">>> specific cmssw setup environment:"\n'
747 txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
748 txt += scram+' project CMSSW '+self.version+'\n'
749 txt += 'status=$?\n'
750 txt += 'if [ $status != 0 ] ; then\n'
751 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
752 txt += ' job_exit_code=10034\n'
753 txt += ' func_exit\n'
754 txt += 'fi \n'
755 txt += 'cd '+self.version+'\n'
756 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
757 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
758 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
759 txt += 'if [ $? != 0 ] ; then\n'
760 txt += ' echo "ERROR ==> Problem with the command: "\n'
761 txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
762 txt += ' job_exit_code=10034\n'
763 txt += ' func_exit\n'
764 txt += 'fi \n'
765 # Handle the arguments:
766 txt += "\n"
767 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
768 txt += "\n"
769 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
770 txt += "then\n"
771 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
772 txt += ' job_exit_code=50113\n'
773 txt += " func_exit\n"
774 txt += "fi\n"
775 txt += "\n"
776
777 # Prepare job-specific part
778 job = common.job_list[nj]
779 if (self.datasetPath):
780 txt += '\n'
781 txt += 'DatasetPath='+self.datasetPath+'\n'
782
783 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
784 txt += 'DataTier='+self.dataTier+'\n'
785 txt += 'ApplicationFamily=cmsRun\n'
786
787 else:
788 txt += 'DatasetPath=MCDataTier\n'
789 txt += 'PrimaryDataset=null\n'
790 txt += 'DataTier=null\n'
791 txt += 'ApplicationFamily=MCDataTier\n'
792 if self.pset != None:
793 pset = os.path.basename(job.configFilename())
794 pkl = os.path.basename(job.configFilename()) + '.pkl'
795 txt += '\n'
796 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
797 txt += 'cp $RUNTIME_AREA/'+pkl+' .\n'
798
799 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
800 txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
801 txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
802 txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
803
804 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
805 if self.var_filter:
806 #print "self.var_filter = ",self.var_filter
807 txt += "export var_filter="+"'"+self.var_filter+"'\n"
808 txt += 'echo $var_filter'
809 else:
810 txt += '\n'
811 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
812 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
813 return txt
814
815 def wsUntarSoftware(self, nj=0):
816 """
817 Put in the script the commands to build an executable
818 or a library.
819 """
820
821 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
822
823 if os.path.isfile(self.tgzNameWithPath):
824 txt += 'echo ">>> tar --no-same-permissions -xf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
825 txt += 'tar --no-same-permissions -xf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
826 txt += 'untar_status=$? \n'
827 if self.debug_wrapper==1 :
828 txt += 'echo "----------------" \n'
829 txt += 'ls -AlR $RUNTIME_AREA \n'
830 txt += 'echo "----------------" \n'
831 txt += 'if [ $untar_status -ne 0 ]; then \n'
832 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
833 txt += ' job_exit_code=$untar_status\n'
834 txt += ' func_exit\n'
835 txt += 'else \n'
836 txt += ' echo "Successful untar" \n'
837 txt += 'fi \n'
838 txt += '\n'
839 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
840 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
841 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
842 txt += 'else\n'
843 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
844 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
845 txt += 'fi\n'
846 txt += '\n'
847
848 pass
849
850 # add the cpuLimi, wallLimit and rssLimit file for the watchdog
851 if self.cfg_params.get('GRID.max_rss'):
852 max_rss = int(self.cfg_params.get('GRID.max_rss')) * 1000
853 txt += 'echo "%d" > rssLimit\n' % max_rss
854 txt += 'maxrss=`cat rssLimit`\n'
855 txt += 'echo "RSS limit set to: ${maxrss} KBytes"\n'
856 if self.cfg_params.get('GRID.max_cpu_time'):
857 cpu_sec = int(self.cfg_params.get('GRID.max_cpu_time')) * 60
858 txt += 'echo "%d" > cpuLimit\n' % cpu_sec
859 txt += 'maxcpus=`cat cpuLimit`\n'
860 txt += 'maxcpuhms=`printf "%dh:%dm:%ds" $(($maxcpus/3600)) $(($maxcpus%3600/60)) $(($maxcpus%60))`\n'
861 txt += 'echo "Cpu Time limit set to: ${maxcpus} seconds '
862 txt += 'i.e. ${maxcpuhms}"\n'
863 if self.cfg_params.get('GRID.max_wall_clock_time'):
864 max_wall_sec = int(self.cfg_params.get('GRID.max_wall_clock_time')) * 60
865 txt += 'echo "%d" > wallLimit\n' % max_wall_sec
866 txt += 'maxwalls=`cat wallLimit`\n'
867 txt += 'maxwallhms=`printf "%dh:%dm:%ds" $(($maxwalls/3600)) $(($maxwalls%3600/60)) $(($maxwalls%60))`\n'
868 txt += 'echo "Wall Time limit set to: ${maxwalls} seconds '
869 txt += 'i.e. ${maxwallhms}"\n'
870
871 return txt
872
873 def wsBuildExe(self, nj=0):
874 """
875 Put in the script the commands to build an executable
876 or a library.
877 """
878
879 txt = '\n#Written by cms_cmssw::wsBuildExe\n'
880 txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
881
882 txt += 'rm -rf lib/ module/ \n'
883 txt += 'mv $RUNTIME_AREA/lib/ . \n'
884 txt += 'mv $RUNTIME_AREA/module/ . \n'
885 if self.dataExist == True:
886 txt += 'rm -rf src/ \n'
887 txt += 'mv $RUNTIME_AREA/src/ . \n'
888 if len(self.additional_inbox_files)>0:
889 #files used by Watchdog must not be moved
890 watchdogFiles=['rssLimit','vszLimit','diskLimit','cpuLimit','wallLimit']
891 for file in self.additional_inbox_files:
892 if file in watchdogFiles :
893 pass
894 else:
895 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
896
897 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
898 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
899 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
900 txt += 'else\n'
901 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
902 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
903 txt += 'fi\n'
904 txt += '\n'
905
906 if self.pset != None:
907 psetName = 'pset.py'
908
909 txt += '\n'
910 if self.debug_wrapper == 1:
911 txt += 'echo "***** cat ' + psetName + ' *********"\n'
912 txt += 'cat ' + psetName + '\n'
913 txt += 'echo "****** end ' + psetName + ' ********"\n'
914 txt += '\n'
915 txt += 'echo "***********************" \n'
916 txt += 'which edmConfigHash \n'
917 txt += 'echo "***********************" \n'
918 txt += 'edmConfigHash ' + psetName + ' \n'
919 txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
920 txt += 'echo "PSETHASH = $PSETHASH" \n'
921 #### temporary fix for noEdm files #####
922 txt += 'if [ -z "$PSETHASH" ]; then \n'
923 txt += ' export PSETHASH=null\n'
924 txt += 'fi \n'
925 #############################################
926 txt += '\n'
927 return txt
928
929
930 def executableName(self):
931 if self.scriptExe:
932 return "sh "
933 else:
934 return self.executable
935
936 def executableArgs(self):
937 if self.scriptExe:
938 return os.path.basename(self.scriptExe) + " $NJob $AdditionalArgs"
939 else:
940 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
941
942 def inputSandbox(self, nj):
943 """
944 Returns a list of filenames to be put in JDL input sandbox.
945 """
946 inp_box = []
947 if os.path.isfile(self.tgzNameWithPath):
948 inp_box.append(self.tgzNameWithPath)
949 if os.path.isfile(self.argsFile):
950 inp_box.append(self.argsFile)
951 inp_box.append(common.work_space.jobDir() + self.scriptName)
952 return inp_box
953
954 def outputSandbox(self, nj):
955 """
956 Returns a list of filenames to be put in JDL output sandbox.
957 """
958 out_box = []
959
960 ## User Declared output files
961 for out in (self.output_file+self.output_file_sandbox):
962 n_out = nj + 1
963 out_box.append(numberFile(out,str(n_out)))
964 return out_box
965
966
967 def wsRenameOutput(self, nj):
968 """
969 Returns part of a job script which renames the produced files.
970 """
971
972 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
973 txt += 'echo ">>> current directory $PWD" \n'
974 txt += 'echo ">>> (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
975 txt += 'echo ">>> (WORKING_DIR): $WORKING_DIR" \n'
976 txt += 'echo ">>> current directory content:"\n'
977 #if self.debug_wrapper==1:
978 txt += 'ls -Al\n'
979 txt += '\n'
980
981 for fileWithSuffix in (self.output_file):
982 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
983 txt += '\n'
984 txt += '# check output file\n'
985 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
986 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
987 txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
988 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
989 else:
990 txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
991 txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
992 txt += 'else\n'
993 txt += ' job_exit_code=60302\n'
994 txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
995 if common.scheduler.name().upper() == 'CONDOR_G':
996 txt += ' if [ $middleware == OSG ]; then \n'
997 txt += ' echo "prepare dummy output file"\n'
998 txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
999 txt += ' fi \n'
1000 txt += 'fi\n'
1001 file_list = []
1002 for fileWithSuffix in (self.output_file):
1003 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
1004
1005 txt += 'file_list="'+string.join(file_list,',')+'"\n'
1006 txt += '\n'
1007 txt += 'echo ">>> current directory $PWD" \n'
1008 txt += 'echo ">>> (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1009 txt += 'echo ">>> (WORKING_DIR): $WORKING_DIR" \n'
1010 txt += 'echo ">>> current directory content:"\n'
1011 #if self.debug_wrapper==1:
1012 txt += 'ls -Al\n'
1013 txt += '\n'
1014 txt += 'cd $RUNTIME_AREA\n'
1015 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1016 return txt
1017
1018 def getRequirements(self, nj=[]):
1019 """
1020 return job requirements to add to jdl files
1021 """
1022 req = ''
1023 if self.version:
1024 req='Member("VO-cms-' + \
1025 self.version + \
1026 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1027 if self.executable_arch:
1028 req+=' && Member("VO-cms-' + \
1029 self.executable_arch + \
1030 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1031
1032 req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1033 if ( common.scheduler.name() in ["glite"] ):
1034 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
1035 if ( self.cfg_params.get('GRID.use_cream',None) ):
1036 req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
1037 else:
1038 req += ' && other.GlueCEStateStatus == "Production" '
1039
1040 return req
1041
1042 def configFilename(self):
1043 """ return the config filename """
1044 return self.name()+'.py'
1045
1046 def wsSetupCMSOSGEnvironment_(self):
1047 """
1048 Returns part of a job script which is prepares
1049 the execution environment and which is common for all CMS jobs.
1050 """
1051 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1052 txt += ' echo ">>> setup CMS OSG environment:"\n'
1053 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1054 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1055 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1056 txt += ' echo "OSG_APP is $OSG_APP"\n'
1057 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1058 txt += ' cmsSetupFile=$OSG_APP/cmssoft/cms/cmsset_default.sh\n'
1059 txt += ' elif [ -f $CVMFS/cms.cern.ch/cmsset_default.sh ] ; then \n'
1060 txt += ' cmsSetupFile=$CVMFS/cms.cern.ch/cmsset_default.sh\n'
1061 txt += ' elif [ -f /cvmfs/cms.cern.ch/cmsset_default.sh ] ; then \n'
1062 txt += ' cmsSetupFile=/cvmfs/cms.cern.ch/cmsset_default.sh\n'
1063 txt += ' else\n'
1064 txt += ' echo "CVMSF = $CVMFS"\n'
1065 txt += ' echo "/cvmfs/ is"\n'
1066 txt += ' echo "ls /"\n'
1067 txt += ' ls /\n'
1068 txt += ' echo "ls /cvmfs"\n'
1069 txt += ' ls /cvmfs\n'
1070 txt += ' echo "ls /cvmfs/cms.cern.ch"\n'
1071 txt += ' ls /cvmfs/cms.cern.ch\n'
1072 txt += ' ls /cvmfs/cms.cern.ch/cmsset*\n'
1073 txt += ' ls /cvmfs/cms.cern.ch/cmsset_default.sh\n'
1074 txt += ' echo "ERROR ==> cmsset_default.sh file not found"\n'
1075 txt += ' job_exit_code=10020\n'
1076 txt += ' func_exit\n'
1077 txt += ' fi\n'
1078 txt += '\n'
1079 txt += ' echo "sourcing $cmsSetupFile ..."\n'
1080 txt += ' source $cmsSetupFile\n'
1081 txt += ' result=$?\n'
1082 txt += ' if [ $result -ne 0 ]; then\n'
1083 txt += ' echo "ERROR ==> problem sourcing $cmsSetupFile"\n'
1084 txt += ' job_exit_code=10032\n'
1085 txt += ' func_exit\n'
1086 txt += ' else\n'
1087 txt += ' echo "==> setup cms environment ok"\n'
1088 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1089 txt += ' fi\n'
1090
1091 return txt
1092
1093 def wsSetupCMSLCGEnvironment_(self):
1094 """
1095 Returns part of a job script which is prepares
1096 the execution environment and which is common for all CMS jobs.
1097 """
1098 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1099 txt += ' echo ">>> setup CMS LCG environment:"\n'
1100 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1101 txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1102 txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1103 txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1104 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1105 txt += ' job_exit_code=10031\n'
1106 txt += ' func_exit\n'
1107 txt += ' else\n'
1108 txt += ' echo "Sourcing environment... "\n'
1109 txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1110 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1111 txt += ' job_exit_code=10020\n'
1112 txt += ' func_exit\n'
1113 txt += ' fi\n'
1114 txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1115 txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1116 txt += ' result=$?\n'
1117 txt += ' if [ $result -ne 0 ]; then\n'
1118 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1119 txt += ' job_exit_code=10032\n'
1120 txt += ' func_exit\n'
1121 txt += ' fi\n'
1122 txt += ' fi\n'
1123 txt += ' \n'
1124 txt += ' echo "==> setup cms environment ok"\n'
1125 return txt
1126
1127 def wsModifyReport(self, nj):
1128 """
1129 insert the part of the script that modifies the FrameworkJob Report
1130 """
1131
1132 txt = ''
1133 if (self.copy_data == 1):
1134 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1135
1136 txt += 'echo ">>> Modify Job Report:" \n'
1137 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1138 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1139
1140 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml json $RUNTIME_AREA/resultCopyFile n_job $OutUniqueID PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH'
1141
1142 if (self.publish_data == 1):
1143 txt += 'ProcessedDataset='+self.processedDataset+'\n'
1144 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1145 args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1146
1147 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1148 txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1149 txt += 'modifyReport_result=$?\n'
1150 txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1151 txt += ' modifyReport_result=70500\n'
1152 txt += ' job_exit_code=$modifyReport_result\n'
1153 txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1154 txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1155 txt += 'else\n'
1156 txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1157 txt += 'fi\n'
1158 return txt
1159
1160 def wsParseFJR(self):
1161 """
1162 Parse the FrameworkJobReport to obtain useful infos
1163 """
1164 txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1165 txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1166 txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1167 txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1168 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1169 if self.debug_wrapper==1 :
1170 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1171 txt += ' cmd_out_1=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --popularity $MonitorID,$MonitorJobID,$RUNTIME_AREA/inputsReport.txt '+self.debugWrap+'`\n'
1172 # if self.debug_wrapper==1 :
1173 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out_1"\n'
1174 txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1175 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1176 txt += ' echo ">>> crab_fjr.xml contents: "\n'
1177 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1178 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1179 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1180 txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1181 txt += ' else\n'
1182 txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1183 txt += ' fi\n'
1184 txt += ' else\n'
1185 txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1186 txt += ' fi\n'
1187 #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1188 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1189 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1190 txt += ' fi\n'
1191 txt += 'else\n'
1192 txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1193 txt += 'fi\n'
1194 txt += '\n'
1195 txt += 'if [ $executable_exit_status -ne 0 ];then\n'
1196 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1197 txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1198 txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1199 txt += ' job_exit_code=$executable_exit_status\n'
1200 txt += ' func_exit\n'
1201 txt += 'fi\n\n'
1202 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1203 txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1204 txt += 'job_exit_code=$executable_exit_status\n'
1205
1206 return txt
1207
1208 def setParam_(self, param, value):
1209 self._params[param] = value
1210
1211 def getParams(self):
1212 return self._params
1213
1214 def outList(self,list=False):
1215 """
1216 check the dimension of the output files
1217 """
1218 txt = ''
1219 txt += 'echo ">>> list of expected files on output sandbox"\n'
1220 listOutFiles = []
1221 stdout = 'CMSSW_$NJob.stdout'
1222 stderr = 'CMSSW_$NJob.stderr'
1223 if len(self.output_file) <= 0:
1224 msg ="WARNING: no output files name have been defined!!\n"
1225 msg+="\tno output files will be reported back/staged\n"
1226 common.logger.info(msg)
1227
1228 if (self.return_data == 1):
1229 for file in (self.output_file):
1230 listOutFiles.append(numberFile(file, '$OutUniqueID'))
1231 for file in (self.output_file_sandbox):
1232 listOutFiles.append(numberFile(file, '$NJob'))
1233 listOutFiles.append(stdout)
1234 listOutFiles.append(stderr)
1235 listOutFiles.append('Watchdog_$NJob.log.gz')
1236
1237 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1238 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1239 txt += 'export filesToCheck\n'
1240 taskinfo={}
1241 taskinfo['outfileBasename'] = self.output_file
1242 common._db.updateTask_(taskinfo)
1243
1244 if list : return self.output_file
1245 return txt
1246
1247 def checkCMSSWVersion(self, url = "https://cmstags.cern.ch/tc/", fileName = "ReleasesXML"):
1248 """
1249 compare current CMSSW release and arch with allowed releases
1250 """
1251
1252 downloader = Downloader(url)
1253 goodRelease = False
1254 tagCollectorUrl = url + fileName
1255
1256 try:
1257 result = downloader.config(fileName)
1258 except:
1259 common.logger.info("ERROR: Problem reading file of allowed CMSSW releases.")
1260
1261 try:
1262 events = pulldom.parseString(result)
1263
1264 arch = None
1265 release = None
1266 relState = None
1267 for (event, node) in events:
1268 if event == pulldom.START_ELEMENT:
1269 if node.tagName == 'architecture':
1270 arch = node.attributes.getNamedItem('name').nodeValue
1271 if node.tagName == 'project':
1272 relState = node.attributes.getNamedItem('state').nodeValue
1273 if relState == 'Announced':
1274 release = node.attributes.getNamedItem('label').nodeValue
1275 if self.executable_arch == arch and self.version == release:
1276 goodRelease = True
1277 return goodRelease
1278
1279 if not goodRelease:
1280 msg = "WARNING: %s on %s is not among supported releases listed at %s ." % \
1281 (self.version, self.executable_arch, tagCollectorUrl)
1282 msg += "Submission may fail."
1283 common.logger.info(msg)
1284 except:
1285 common.logger.info("Problems parsing file of allowed CMSSW releases.")
1286
1287 return goodRelease
1288