ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.368
Committed: Mon Feb 7 17:21:36 2011 UTC (14 years, 2 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_7_8_dash, CRAB_2_7_8_pre1
Changes since 1.367: +25 -24 lines
Log Message:
modify datasetpath check, savannah bug 77899

File Contents

# User Rev Content
1 spiga 1.358
2 fanzago 1.368 __revision__ = "$Id: cms_cmssw.py,v 1.367 2010/12/29 21:16:05 ewv Exp $"
3     __version__ = "$Revision: 1.367 $"
4 spiga 1.358
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9 ewv 1.363 import re
10 slacapra 1.1 import Scram
11 spiga 1.269 from Splitter import JobSplitter
12 ewv 1.355 from Downloader import Downloader
13 ewv 1.363 try:
14 fanzago 1.360 import json
15     except:
16     import simplejson as json
17 slacapra 1.1
18 spiga 1.293 from IMProv.IMProvNode import IMProvNode
19 ewv 1.355 from IMProv.IMProvLoader import loadIMProvFile
20 slacapra 1.105 import os, string, glob
21 ewv 1.355 from xml.dom import pulldom
22 slacapra 1.1
23     class Cmssw(JobType):
24 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
25 slacapra 1.1 JobType.__init__(self, 'CMSSW')
26 spiga 1.304 common.logger.debug('CMSSW::__init__')
27 spiga 1.208 self.skip_blocks = skip_blocks
28 farinafa 1.346 self.argsList = 2
29 spiga 1.315 self.NumEvents=0
30 gutsche 1.3 self._params = {}
31     self.cfg_params = cfg_params
32 fanzago 1.368 ### FOR MULTI ###
33 fanzago 1.360 self.var_filter=''
34 ewv 1.254
35 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
36 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
37     self.cfg_params.get('CRAB.use_server',0)
38 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
39 ewv 1.250 size = 9.5
40 spiga 1.365 if self.server :
41 spiga 1.366 size = 100
42 spiga 1.365 elif self.local:
43     size = 9999999
44 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
45 gutsche 1.72
46 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
47 gutsche 1.38 self.ncjobs = ncjobs
48    
49 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
50     self.additional_inbox_files = []
51     self.scriptExe = ''
52     self.executable = ''
53 slacapra 1.71 self.executable_arch = self.scram.getArch()
54 spiga 1.320 self.tgz_name = 'default.tgz'
55 corvo 1.56 self.scriptName = 'CMSSW.sh'
56 ewv 1.192 self.pset = ''
57 spiga 1.187 self.datasetPath = ''
58 gutsche 1.3
59 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
60 gutsche 1.50 # set FJR file name
61     self.fjrFileName = 'crab_fjr.xml'
62    
63 slacapra 1.1 self.version = self.scram.getSWVersion()
64 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
65 spiga 1.324 version_array = self.version.split('_')
66     self.CMSSW_major = 0
67     self.CMSSW_minor = 0
68     self.CMSSW_patch = 0
69 ewv 1.182 try:
70 spiga 1.324 self.CMSSW_major = int(version_array[1])
71     self.CMSSW_minor = int(version_array[2])
72     self.CMSSW_patch = int(version_array[3])
73 ewv 1.182 except:
74 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
75 ewv 1.182 raise CrabException(msg)
76    
77 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
78     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
79 ewv 1.276 raise CrabException(msg)
80     """
81     As CMSSW versions are dropped we can drop more code:
82 ewv 1.334 2.x dropped: drop check for lumi range setting
83 ewv 1.276 """
84 ewv 1.355 self.checkCMSSWVersion()
85 slacapra 1.1 ### collect Data cards
86 gutsche 1.66
87 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
88 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
89 ewv 1.226
90 fanzago 1.368 #### FEDE ADDED CHECK FOR DATASETPATH ##############################################
91     if not cfg_params.has_key('CMSSW.datasetpath'):
92     msg = "Error: datasetpath not defined in the section [CMSSW] of crab.cfg file "
93 spiga 1.236 raise CrabException(msg)
94 fanzago 1.368 else:
95     tmp = cfg_params['CMSSW.datasetpath']
96     common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
97     if string.lower(tmp)=='none':
98     self.datasetPath = None
99     self.selectNoInput = 1
100     self.primaryDataset = 'null'
101     else:
102     self.datasetPath = tmp
103     self.selectNoInput = 0
104     ll = len(self.datasetPath.split("/"))
105     if (ll < 4):
106     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
107     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
108     raise CrabException(msg)
109     self.primaryDataset = self.datasetPath.split("/")[1]
110     self.dataTier = self.datasetPath.split("/")[2]
111 gutsche 1.5
112 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
113 ewv 1.330 self.ads = False
114     if self.datasetPath:
115     self.ads = len(self.datasetPath.split("/")) > 4
116 spiga 1.354 self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
117 ewv 1.356 self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
118     self.cfg_params.get('CMSSW.lumis_per_job',None)
119 spiga 1.358
120 ewv 1.327 # FUTURE: Can remove this check
121     if self.ads and self.CMSSW_major < 3:
122     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
123     common.logger.info(' Only file level, not lumi level, granularity is supported.')
124    
125 spiga 1.288 self.debugWrap=''
126 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
127     if self.debug_wrapper == 1: self.debugWrap='--debug'
128 slacapra 1.291
129 slacapra 1.1 ## now the application
130 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
131 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
132 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
133 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
134 slacapra 1.1
135 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
136 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
137 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
138 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
139 slacapra 1.153 if self.pset.lower() != 'none' :
140     if (not os.path.exists(self.pset)):
141     raise CrabException("User defined PSet file "+self.pset+" does not exist")
142     else:
143     self.pset = None
144 slacapra 1.1
145     # output files
146 slacapra 1.53 ## stuff which must be returned always via sandbox
147     self.output_file_sandbox = []
148    
149     # add fjr report by default via sandbox
150     self.output_file_sandbox.append(self.fjrFileName)
151    
152     # other output files to be returned via sandbox or copied to SE
153 mcinquil 1.216 outfileflag = False
154 slacapra 1.153 self.output_file = []
155     tmp = cfg_params.get('CMSSW.output_file',None)
156     if tmp :
157 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
158 mcinquil 1.216 outfileflag = True #output found
159 slacapra 1.1
160 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
161     if self.scriptExe :
162 slacapra 1.176 if not os.path.isfile(self.scriptExe):
163     msg ="ERROR. file "+self.scriptExe+" not found"
164     raise CrabException(msg)
165     self.additional_inbox_files.append(string.strip(self.scriptExe))
166 slacapra 1.70
167 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
168     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
169    
170 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
171 slacapra 1.176 msg ="Error. script_exe not defined"
172     raise CrabException(msg)
173 spiga 1.42
174 ewv 1.226 # use parent files...
175 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
176 spiga 1.204
177 slacapra 1.1 ## additional input files
178 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
179 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180 slacapra 1.70 for tmp in tmpAddFiles:
181     tmp = string.strip(tmp)
182     dirname = ''
183     if not tmp[0]=="/": dirname = "."
184 corvo 1.85 files = []
185     if string.find(tmp,"*")>-1:
186     files = glob.glob(os.path.join(dirname, tmp))
187     if len(files)==0:
188     raise CrabException("No additional input file found with this pattern: "+tmp)
189     else:
190     files.append(tmp)
191 slacapra 1.70 for file in files:
192     if not os.path.exists(file):
193     raise CrabException("Additional input file not found: "+file)
194 slacapra 1.45 pass
195 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
196 slacapra 1.1 pass
197     pass
198 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
199 slacapra 1.153 pass
200 gutsche 1.3
201 gutsche 1.35
202 ewv 1.160 ## New method of dealing with seeds
203     self.incrementSeeds = []
204     self.preserveSeeds = []
205     if cfg_params.has_key('CMSSW.preserve_seeds'):
206     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
207     for tmp in tmpList:
208     tmp.strip()
209     self.preserveSeeds.append(tmp)
210     if cfg_params.has_key('CMSSW.increment_seeds'):
211     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
212     for tmp in tmpList:
213     tmp.strip()
214     self.incrementSeeds.append(tmp)
215    
216 fanzago 1.318 # Copy/return/publish
217 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
218     self.return_data = int(cfg_params.get('USER.return_data',0))
219 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
220     if (self.publish_data == 1):
221     if not cfg_params.has_key('USER.publish_data_name'):
222     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
223     else:
224     self.processedDataset = cfg_params['USER.publish_data_name']
225 ewv 1.276
226     self.conf = {}
227     self.conf['pubdata'] = None
228 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
229 slacapra 1.1 #DBSDLS-start
230 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
231 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
232     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
233 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
234 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
235 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
236 gutsche 1.35 blockSites = {}
237 spiga 1.342 #wmbs
238     self.automation = int(self.cfg_params.get('WMBS.automation',0))
239     if self.automation == 0:
240     if self.datasetPath:
241     blockSites = self.DataDiscoveryAndLocation(cfg_params)
242     #DBSDLS-end
243     self.conf['blockSites']=blockSites
244 ewv 1.347
245 spiga 1.342 ## Select Splitting
246     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
247 ewv 1.347
248 spiga 1.342 if self.selectNoInput:
249     if self.pset == None:
250     self.algo = 'ForScript'
251     else:
252     self.algo = 'NoInput'
253     self.conf['managedGenerators']=self.managedGenerators
254     self.conf['generator']=self.generator
255 ewv 1.356 elif self.ads or self.lumiMask or self.lumiParams:
256 spiga 1.342 self.algo = 'LumiBased'
257 ewv 1.359 if splitByRun:
258     msg = "Cannot combine split by run with lumi_mask, ADS, " \
259     "or lumis_per_job. Use split by lumi mode instead."
260     raise CrabException(msg)
261    
262 spiga 1.342 elif splitByRun ==1:
263     self.algo = 'RunBased'
264 spiga 1.42 else:
265 spiga 1.342 self.algo = 'EventBased'
266     common.logger.debug("Job splitting method: %s" % self.algo)
267 ewv 1.347
268 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
269     self.dict = splitter.Algos()[self.algo]()
270 gutsche 1.5
271 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
272     self.rootArgsFilename= 'arguments'
273 spiga 1.208 # modify Pset only the first time
274 spiga 1.320 if isNew:
275     if self.pset != None: self.ModifyPset()
276 spiga 1.300
277 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
278     self.tarNameWithPath = self.getTarBall(self.executable)
279 spiga 1.293
280    
281     def ModifyPset(self):
282     import PsetManipulator as pp
283 ewv 1.335
284     # If pycfg_params set, fake out the config script
285     # to make it think it was called with those args
286     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
287     if pycfg_params:
288     trueArgv = sys.argv
289     sys.argv = [self.pset]
290     sys.argv.extend(pycfg_params.split(' '))
291 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
292 ewv 1.335 if pycfg_params: # Restore original sys.argv
293     sys.argv = trueArgv
294    
295 spiga 1.293 try:
296     # Add FrameworkJobReport to parameter-set, set max events.
297     # Reset later for data jobs by writeCFG which does all modifications
298 ewv 1.295 PsetEdit.maxEvent(1)
299 spiga 1.293 PsetEdit.skipEvent(0)
300     PsetEdit.psetWriter(self.configFilename())
301     ## If present, add TFileService to output files
302 slacapra 1.349 if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
303 spiga 1.293 tfsOutput = PsetEdit.getTFileService()
304     if tfsOutput:
305     if tfsOutput in self.output_file:
306 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
307 spiga 1.293 else:
308     outfileflag = True #output found
309     self.output_file.append(tfsOutput)
310 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
311 spiga 1.293 pass
312     pass
313 fanzago 1.360
314     # If requested, add PoolOutputModule to output files
315 fanzago 1.368 ### FOR MULTI ###
316 fanzago 1.360 #edmOutput = PsetEdit.getPoolOutputModule()
317     edmOutputDict = PsetEdit.getPoolOutputModule()
318     common.logger.debug("(test) edmOutputDict = "+str(edmOutputDict))
319     filter_dict = {}
320     for key in edmOutputDict.keys():
321     filter_dict[key]=edmOutputDict[key]['dataset']
322     common.logger.debug("(test) filter_dict for multi = "+str(filter_dict))
323    
324     #### in CMSSW.sh: export var_filter
325    
326     self.var_filter = json.dumps(filter_dict)
327     common.logger.debug("(test) var_filter for multi = "+self.var_filter)
328 ewv 1.363
329 fanzago 1.360 edmOutput = edmOutputDict.keys()
330 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
331     if edmOutput:
332 ewv 1.321 for outputFile in edmOutput:
333     if outputFile in self.output_file:
334 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
335 ewv 1.321 else:
336     self.output_file.append(outputFile)
337     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
338     # not requested, check anyhow to avoid accidental T2 overload
339 slacapra 1.297 else:
340 ewv 1.321 if edmOutput:
341     missedFiles = []
342     for outputFile in edmOutput:
343     if outputFile not in self.output_file:
344     missedFiles.append(outputFile)
345     if missedFiles:
346     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
347     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
348     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
349     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
350     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
351     common.logger.info(msg)
352 spiga 1.322 else :
353 ewv 1.321 raise CrabException(msg)
354 ewv 1.301
355     if (PsetEdit.getBadFilesSetting()):
356     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
357 spiga 1.304 common.logger.info(msg)
358 ewv 1.301
359 slacapra 1.297 except CrabException, msg:
360 spiga 1.304 common.logger.info(str(msg))
361 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
362 spiga 1.293 raise CrabException(msg)
363    
364 ewv 1.363 valid = re.compile('^[\w\.\-]+$')
365     for fileName in self.output_file:
366     if not valid.match(fileName):
367     msg = "The file %s may only contain alphanumeric characters and -, _, ." % fileName
368     raise CrabException(msg)
369    
370 gutsche 1.3
371 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
372    
373 slacapra 1.86 import DataDiscovery
374     import DataLocation
375 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
376 gutsche 1.3
377     datasetPath=self.datasetPath
378    
379 slacapra 1.1 ## Contact the DBS
380 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
381 slacapra 1.1 try:
382 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
383 slacapra 1.1 self.pubdata.fetchDBSInfo()
384    
385 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
386 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
387     raise CrabException(msg)
388 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
389 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
390     raise CrabException(msg)
391 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
392 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
393 slacapra 1.1 raise CrabException(msg)
394    
395 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
396 spiga 1.269 self.conf['pubdata']=self.pubdata
397 gutsche 1.3
398 slacapra 1.1 ## get max number of events
399 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
400 slacapra 1.1
401     ## Contact the DLS and build a list of sites hosting the fileblocks
402     try:
403 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
404 gutsche 1.6 dataloc.fetchDLSInfo()
405 slacapra 1.263
406 slacapra 1.41 except DataLocation.DataLocationError , ex:
407 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
408     raise CrabException(msg)
409 ewv 1.131
410 slacapra 1.1
411 slacapra 1.270 unsorted_sites = dataloc.getSites()
412     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
413     for lfn in self.filesbyblock.keys():
414     if unsorted_sites.has_key(lfn):
415     sites[lfn]=unsorted_sites[lfn]
416     else:
417     sites[lfn]=[]
418    
419 slacapra 1.264 if len(sites)==0:
420 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
421     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
422     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
423 slacapra 1.264 raise CrabException(msg)
424    
425 gutsche 1.35 allSites = []
426     listSites = sites.values()
427 slacapra 1.63 for listSite in listSites:
428     for oneSite in listSite:
429 gutsche 1.35 allSites.append(oneSite)
430 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
431 ewv 1.295
432 gutsche 1.3
433 gutsche 1.92 # screen output
434 spiga 1.354 if self.ads or self.lumiMask:
435     common.logger.info("Requested (A)DS %s has %s block(s)." %
436 ewv 1.350 (datasetPath, len(self.filesbyblock.keys())))
437     else:
438     common.logger.info("Requested dataset: " + datasetPath + \
439     " has " + str(self.maxEvents) + " events in " + \
440     str(len(self.filesbyblock.keys())) + " blocks.\n")
441 gutsche 1.92
442 gutsche 1.35 return sites
443 ewv 1.131
444 spiga 1.42
445 spiga 1.208 def split(self, jobParams,firstJobID):
446 ewv 1.276
447 spiga 1.293 jobParams = self.dict['args']
448 spiga 1.269 njobs = self.dict['njobs']
449     self.jobDestination = self.dict['jobDestination']
450 ewv 1.131
451 ewv 1.333 if njobs == 0:
452     raise CrabException("Asked to split zero jobs: aborting")
453     if not self.server and not self.local and njobs > 500:
454     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
455 slacapra 1.263
456 gutsche 1.3 # create the empty structure
457     for i in range(njobs):
458     jobParams.append("")
459 ewv 1.131
460 spiga 1.165 listID=[]
461     listField=[]
462 spiga 1.293 listDictions=[]
463 spiga 1.300 exist= os.path.exists(self.argsFile)
464 spiga 1.208 for id in range(njobs):
465     job = id + int(firstJobID)
466 spiga 1.167 listID.append(job+1)
467 spiga 1.162 job_ToSave ={}
468 spiga 1.169 concString = ' '
469 spiga 1.165 argu=''
470 spiga 1.293 str_argu = str(job+1)
471 spiga 1.208 if len(jobParams[id]):
472 ewv 1.295 argu = {'JobID': job+1}
473 spiga 1.293 for i in range(len(jobParams[id])):
474     argu[self.dict['params'][i]]=jobParams[id][i]
475 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
476 ewv 1.295 # just for debug
477 spiga 1.293 str_argu += concString.join(jobParams[id])
478 spiga 1.314 if argu != '': listDictions.append(argu)
479 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
480 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
481 spiga 1.165 listField.append(job_ToSave)
482 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
483     cms_se = CmsSEMap()
484 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
485 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
486 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
487     msg+="\t CMSDestination: %s "%(str(SEDestination))
488 spiga 1.307 common.logger.log(10-1,msg)
489 spiga 1.293 # write xml
490 ewv 1.295 if len(listDictions):
491 spiga 1.293 if exist==False: self.CreateXML()
492     self.addEntry(listDictions)
493 spiga 1.187 common._db.updateJob_(listID,listField)
494 spiga 1.293 return
495 ewv 1.313
496 spiga 1.293 def CreateXML(self):
497     """
498 ewv 1.295 """
499 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
500     outfile = file( self.argsFile, 'w').write(str(result))
501 ewv 1.295 return
502 spiga 1.293
503     def addEntry(self, listDictions):
504     """
505     _addEntry_
506 ewv 1.295
507 spiga 1.293 add an entry to the xml file
508     """
509     ## load xml
510 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
511 spiga 1.293 entrname= 'Job'
512     for dictions in listDictions:
513     report = IMProvNode(entrname , None, **dictions)
514     improvDoc.addNode(report)
515 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
516 gutsche 1.3 return
517 ewv 1.131
518 gutsche 1.3 def numberOfJobs(self):
519 spiga 1.342 #wmbs
520 ewv 1.347 if self.automation==0:
521 spiga 1.342 return self.dict['njobs']
522     else:
523     return None
524 ewv 1.347
525 slacapra 1.1 def getTarBall(self, exe):
526     """
527     Return the TarBall with lib and exe
528     """
529 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
530     if os.path.exists(self.tgzNameWithPath):
531     return self.tgzNameWithPath
532 slacapra 1.1
533     # Prepare a tar gzipped file with user binaries.
534     self.buildTar_(exe)
535    
536 spiga 1.320 return string.strip(self.tgzNameWithPath)
537 slacapra 1.1
538     def buildTar_(self, executable):
539    
540     # First of all declare the user Scram area
541     swArea = self.scram.getSWArea_()
542     swReleaseTop = self.scram.getReleaseTop_()
543 ewv 1.131
544 slacapra 1.1 ## check if working area is release top
545     if swReleaseTop == '' or swArea == swReleaseTop:
546 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
547 slacapra 1.1 return
548    
549 slacapra 1.61 import tarfile
550     try: # create tar ball
551 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
552 slacapra 1.61 ## First find the executable
553 slacapra 1.86 if (self.executable != ''):
554 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
555     if ( not exeWithPath ):
556     raise CrabException('User executable '+executable+' not found')
557 ewv 1.131
558 slacapra 1.61 ## then check if it's private or not
559     if exeWithPath.find(swReleaseTop) == -1:
560     # the exe is private, so we must ship
561 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
562 slacapra 1.61 path = swArea+'/'
563 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
564     if exeWithPath.find(path) >= 0 :
565     exe = string.replace(exeWithPath, path,'')
566 slacapra 1.129 tar.add(path+exe,exe)
567 corvo 1.85 else :
568     tar.add(exeWithPath,os.path.basename(executable))
569 slacapra 1.61 pass
570     else:
571     # the exe is from release, we'll find it on WN
572     pass
573 ewv 1.131
574 slacapra 1.61 ## Now get the libraries: only those in local working area
575 slacapra 1.256 tar.dereference=True
576 slacapra 1.61 libDir = 'lib'
577     lib = swArea+'/' +libDir
578 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
579 slacapra 1.61 if os.path.exists(lib):
580     tar.add(lib,libDir)
581 ewv 1.131
582 slacapra 1.61 ## Now check if module dir is present
583     moduleDir = 'module'
584     module = swArea + '/' + moduleDir
585     if os.path.isdir(module):
586     tar.add(module,moduleDir)
587 slacapra 1.256 tar.dereference=False
588 slacapra 1.61
589     ## Now check if any data dir(s) is present
590 spiga 1.179 self.dataExist = False
591 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
592 slacapra 1.206 while len(todo_list):
593     entry, name = todo_list.pop()
594 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
595 slacapra 1.206 continue
596 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
597 slacapra 1.206 entryPath = entry + '/'
598 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
599 slacapra 1.206 if name == 'data':
600     self.dataExist=True
601 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
602 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
603 slacapra 1.206 pass
604     pass
605 ewv 1.182
606 spiga 1.179 ### CMSSW ParameterSet
607     if not self.pset is None:
608     cfg_file = common.work_space.jobDir()+self.configFilename()
609 ewv 1.357 pickleFile = common.work_space.jobDir()+self.configFilename() + '.pkl'
610 ewv 1.182 tar.add(cfg_file,self.configFilename())
611 ewv 1.357 tar.add(pickleFile,self.configFilename() + '.pkl')
612 ewv 1.313
613 spiga 1.309 try:
614     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
615     tar.add(crab_cfg_file,'crab.cfg')
616     except:
617     pass
618 fanzago 1.93
619 fanzago 1.152 ## Add ProdCommon dir to tar
620 slacapra 1.211 prodcommonDir = './'
621     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
622 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
623 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
624     'WMCore/__init__.py','WMCore/Algorithms']
625 slacapra 1.214 for file in neededStuff:
626     tar.add(prodcommonPath+file,prodcommonDir+file)
627 spiga 1.179
628     ##### ML stuff
629     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
630     path=os.environ['CRABDIR'] + '/python/'
631     for file in ML_file_list:
632     tar.add(path+file,file)
633    
634     ##### Utils
635 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
636 spiga 1.179 for file in Utils_file_list:
637     tar.add(path+file,file)
638 ewv 1.131
639 ewv 1.182 ##### AdditionalFiles
640 slacapra 1.253 tar.dereference=True
641 spiga 1.179 for file in self.additional_inbox_files:
642     tar.add(file,string.split(file,'/')[-1])
643 slacapra 1.253 tar.dereference=False
644 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
645 ewv 1.182
646 slacapra 1.61 tar.close()
647 mcinquil 1.241 except IOError, exc:
648 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
649 spiga 1.304 msg += str(exc)
650     raise CrabException(msg)
651 mcinquil 1.241 except tarfile.TarError, exc:
652 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
653 spiga 1.304 msg += str(exc)
654     raise CrabException(msg)
655 spiga 1.300
656 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
657     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
658 spiga 1.365 if not self.server:
659     msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + \
660     str(self.MaxTarBallSize) +'MB input sandbox limit \n'
661     msg += ' and not supported by the direct GRID submission system.\n'
662     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
663     msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabServerForUsers#Server_available_for_users'
664 ewv 1.367 else:
665 spiga 1.365 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + \
666     str(self.MaxTarBallSize) +'MB input sandbox limit in the server.'
667 spiga 1.238 raise CrabException(msg)
668 gutsche 1.72
669 slacapra 1.61 ## create tar-ball with ML stuff
670 slacapra 1.97
671 spiga 1.165 def wsSetupEnvironment(self, nj=0):
672 slacapra 1.1 """
673     Returns part of a job script which prepares
674     the execution environment for the job 'nj'.
675     """
676 ewv 1.334 psetName = 'pset.py'
677    
678 slacapra 1.1 # Prepare JobType-independent part
679 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
680 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
681 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
682     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
683     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
684 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
685 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
686 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
687 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
688 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
689 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
690     txt += ' job_exit_code=10016\n'
691     txt += ' func_exit\n'
692 gutsche 1.3 txt += ' fi\n'
693 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
694 gutsche 1.3 txt += '\n'
695     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
696     txt += ' cd $WORKING_DIR\n'
697 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
698 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
699 spiga 1.282 #Setup SGE Environment
700 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
701 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
702    
703 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
704     txt += self.wsSetupCMSLCGEnvironment_()
705    
706 mcinquil 1.340 #Setup PBS Environment
707 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
708 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
709    
710 gutsche 1.3 txt += 'fi\n'
711 slacapra 1.1
712     # Prepare JobType-specific part
713     scram = self.scram.commandName()
714     txt += '\n\n'
715 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
716     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
717 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
718     txt += 'status=$?\n'
719     txt += 'if [ $status != 0 ] ; then\n'
720 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
721     txt += ' job_exit_code=10034\n'
722 fanzago 1.163 txt += ' func_exit\n'
723 slacapra 1.1 txt += 'fi \n'
724     txt += 'cd '+self.version+'\n'
725 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
726 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
727 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
728 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
729     txt += ' echo "ERROR ==> Problem with the command: "\n'
730     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
731     txt += ' job_exit_code=10034\n'
732     txt += ' func_exit\n'
733     txt += 'fi \n'
734 slacapra 1.1 # Handle the arguments:
735     txt += "\n"
736 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
737 slacapra 1.1 txt += "\n"
738 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
739 slacapra 1.1 txt += "then\n"
740 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
741     txt += ' job_exit_code=50113\n'
742     txt += " func_exit\n"
743 slacapra 1.1 txt += "fi\n"
744     txt += "\n"
745    
746     # Prepare job-specific part
747     job = common.job_list[nj]
748 ewv 1.131 if (self.datasetPath):
749 fanzago 1.93 txt += '\n'
750     txt += 'DatasetPath='+self.datasetPath+'\n'
751    
752 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
753 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
754 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
755 fanzago 1.93
756     else:
757     txt += 'DatasetPath=MCDataTier\n'
758     txt += 'PrimaryDataset=null\n'
759     txt += 'DataTier=null\n'
760     txt += 'ApplicationFamily=MCDataTier\n'
761 ewv 1.170 if self.pset != None:
762 spiga 1.42 pset = os.path.basename(job.configFilename())
763 ewv 1.357 pkl = os.path.basename(job.configFilename()) + '.pkl'
764 spiga 1.42 txt += '\n'
765 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
766 ewv 1.357 txt += 'cp $RUNTIME_AREA/'+pkl+' .\n'
767 spiga 1.296
768 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
769     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
770     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
771     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
772 slacapra 1.90
773 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
774 fanzago 1.360 if self.var_filter:
775     #print "self.var_filter = ",self.var_filter
776     txt += "export var_filter="+"'"+self.var_filter+"'\n"
777     txt += 'echo $var_filter'
778 ewv 1.319 else:
779 spiga 1.314 txt += '\n'
780 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
781 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
782 gutsche 1.3 return txt
783 slacapra 1.176
784 fanzago 1.166 def wsUntarSoftware(self, nj=0):
785 gutsche 1.3 """
786     Put in the script the commands to build an executable
787     or a library.
788     """
789    
790 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
791 gutsche 1.3
792     if os.path.isfile(self.tgzNameWithPath):
793 spiga 1.358 txt += 'echo ">>> tar xzf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
794 fanzago 1.285 if self.debug_wrapper==1 :
795 spiga 1.358 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
796 spiga 1.199 txt += 'ls -Al \n'
797 spiga 1.358 else:
798     txt += 'tar zxf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
799 gutsche 1.3 txt += 'untar_status=$? \n'
800     txt += 'if [ $untar_status -ne 0 ]; then \n'
801 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
802     txt += ' job_exit_code=$untar_status\n'
803     txt += ' func_exit\n'
804 gutsche 1.3 txt += 'else \n'
805     txt += ' echo "Successful untar" \n'
806     txt += 'fi \n'
807 gutsche 1.50 txt += '\n'
808 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
809 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
810 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
811 gutsche 1.50 txt += 'else\n'
812 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
813 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
814 gutsche 1.50 txt += 'fi\n'
815     txt += '\n'
816    
817 gutsche 1.3 pass
818 ewv 1.131
819 slacapra 1.1 return txt
820 ewv 1.170
821 fanzago 1.166 def wsBuildExe(self, nj=0):
822     """
823     Put in the script the commands to build an executable
824     or a library.
825     """
826    
827     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
828     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
829    
830 ewv 1.170 txt += 'rm -r lib/ module/ \n'
831     txt += 'mv $RUNTIME_AREA/lib/ . \n'
832     txt += 'mv $RUNTIME_AREA/module/ . \n'
833 spiga 1.186 if self.dataExist == True:
834     txt += 'rm -r src/ \n'
835     txt += 'mv $RUNTIME_AREA/src/ . \n'
836 ewv 1.182 if len(self.additional_inbox_files)>0:
837 spiga 1.179 for file in self.additional_inbox_files:
838 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
839 ewv 1.170
840 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
841 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
842 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
843 fanzago 1.166 txt += 'else\n'
844 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
845 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
846     txt += 'fi\n'
847     txt += '\n'
848    
849 slacapra 1.302 if self.pset != None:
850 ewv 1.334 psetName = 'pset.py'
851    
852 slacapra 1.302 txt += '\n'
853     if self.debug_wrapper == 1:
854     txt += 'echo "***** cat ' + psetName + ' *********"\n'
855     txt += 'cat ' + psetName + '\n'
856     txt += 'echo "****** end ' + psetName + ' ********"\n'
857     txt += '\n'
858     txt += 'echo "***********************" \n'
859     txt += 'which edmConfigHash \n'
860     txt += 'echo "***********************" \n'
861 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
862     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
863 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
864 fanzago 1.368 #### temporary fix for noEdm files #####
865 slacapra 1.302 txt += 'if [ -z "$PSETHASH" ]; then \n'
866     txt += ' export PSETHASH=null\n'
867     txt += 'fi \n'
868     #############################################
869     txt += '\n'
870 fanzago 1.166 return txt
871 slacapra 1.1
872 ewv 1.131
873 slacapra 1.1 def executableName(self):
874 ewv 1.192 if self.scriptExe:
875 spiga 1.42 return "sh "
876     else:
877     return self.executable
878 slacapra 1.1
879     def executableArgs(self):
880 ewv 1.276 if self.scriptExe:
881 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
882 fanzago 1.115 else:
883 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
884 slacapra 1.1
885     def inputSandbox(self, nj):
886     """
887     Returns a list of filenames to be put in JDL input sandbox.
888     """
889     inp_box = []
890     if os.path.isfile(self.tgzNameWithPath):
891     inp_box.append(self.tgzNameWithPath)
892 spiga 1.320 if os.path.isfile(self.argsFile):
893     inp_box.append(self.argsFile)
894 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
895 slacapra 1.1 return inp_box
896    
897     def outputSandbox(self, nj):
898     """
899     Returns a list of filenames to be put in JDL output sandbox.
900     """
901     out_box = []
902    
903     ## User Declared output files
904 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
905 ewv 1.131 n_out = nj + 1
906 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
907 slacapra 1.1 return out_box
908    
909    
910     def wsRenameOutput(self, nj):
911     """
912     Returns part of a job script which renames the produced files.
913     """
914    
915 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
916 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
917     txt += 'echo ">>> current directory content:"\n'
918 fanzago 1.285 if self.debug_wrapper==1:
919 spiga 1.199 txt += 'ls -Al\n'
920 fanzago 1.145 txt += '\n'
921 slacapra 1.54
922 fanzago 1.128 for fileWithSuffix in (self.output_file):
923 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
924 slacapra 1.1 txt += '\n'
925 gutsche 1.7 txt += '# check output file\n'
926 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
927 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
928     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
929 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
930 ewv 1.147 else:
931     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
932     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
933 slacapra 1.106 txt += 'else\n'
934 fanzago 1.161 txt += ' job_exit_code=60302\n'
935     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
936 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
937 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
938     txt += ' echo "prepare dummy output file"\n'
939     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
940     txt += ' fi \n'
941 slacapra 1.1 txt += 'fi\n'
942 slacapra 1.105 file_list = []
943     for fileWithSuffix in (self.output_file):
944 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
945 ewv 1.131
946 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
947 fanzago 1.149 txt += '\n'
948 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
949     txt += 'echo ">>> current directory content:"\n'
950 fanzago 1.285 if self.debug_wrapper==1:
951 spiga 1.199 txt += 'ls -Al\n'
952 fanzago 1.148 txt += '\n'
953 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
954 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
955 slacapra 1.1 return txt
956    
957 slacapra 1.63 def getRequirements(self, nj=[]):
958 slacapra 1.1 """
959 ewv 1.131 return job requirements to add to jdl files
960 slacapra 1.1 """
961     req = ''
962 slacapra 1.47 if self.version:
963 slacapra 1.10 req='Member("VO-cms-' + \
964 slacapra 1.47 self.version + \
965 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
966 ewv 1.192 if self.executable_arch:
967 gutsche 1.107 req+=' && Member("VO-cms-' + \
968 slacapra 1.105 self.executable_arch + \
969     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
970 gutsche 1.35
971     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
972 spiga 1.353 if ( common.scheduler.name() in ["glite"] ):
973 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
974     if ( self.cfg_params.get('GRID.use_cream',None) ):
975     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
976     else:
977     req += ' && other.GlueCEStateStatus == "Production" '
978 gutsche 1.35
979 slacapra 1.1 return req
980 gutsche 1.3
981     def configFilename(self):
982     """ return the config filename """
983 ewv 1.334 return self.name()+'.py'
984 gutsche 1.3
985     def wsSetupCMSOSGEnvironment_(self):
986     """
987     Returns part of a job script which is prepares
988     the execution environment and which is common for all CMS jobs.
989     """
990 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
991     txt += ' echo ">>> setup CMS OSG environment:"\n'
992 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
993     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
994 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
995 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
996 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
997 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
998     txt += ' else\n'
999 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1000     txt += ' job_exit_code=10020\n'
1001     txt += ' func_exit\n'
1002 fanzago 1.133 txt += ' fi\n'
1003 gutsche 1.3 txt += '\n'
1004 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1005 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1006 gutsche 1.3
1007     return txt
1008 ewv 1.131
1009 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1010     """
1011     Returns part of a job script which is prepares
1012     the execution environment and which is common for all CMS jobs.
1013     """
1014 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1015     txt += ' echo ">>> setup CMS LCG environment:"\n'
1016 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1017     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1018     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1019     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1020 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1021     txt += ' job_exit_code=10031\n'
1022     txt += ' func_exit\n'
1023 fanzago 1.133 txt += ' else\n'
1024     txt += ' echo "Sourcing environment... "\n'
1025     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1026 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1027     txt += ' job_exit_code=10020\n'
1028     txt += ' func_exit\n'
1029 fanzago 1.133 txt += ' fi\n'
1030     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1031     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1032     txt += ' result=$?\n'
1033     txt += ' if [ $result -ne 0 ]; then\n'
1034 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1035     txt += ' job_exit_code=10032\n'
1036     txt += ' func_exit\n'
1037 fanzago 1.133 txt += ' fi\n'
1038     txt += ' fi\n'
1039     txt += ' \n'
1040 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1041 gutsche 1.3 return txt
1042 gutsche 1.5
1043 spiga 1.238 def wsModifyReport(self, nj):
1044 fanzago 1.93 """
1045 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1046 fanzago 1.93 """
1047 ewv 1.250
1048 fanzago 1.281 txt = ''
1049 fanzago 1.292 if (self.copy_data == 1):
1050 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1051 ewv 1.283
1052 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1053 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1054 fanzago 1.175 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1055 fanzago 1.344
1056 fanzago 1.360 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml json $RUNTIME_AREA/resultCopyFile n_job $OutUniqueID PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH'
1057 fanzago 1.281
1058 fanzago 1.318 if (self.publish_data == 1):
1059     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1060 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1061     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1062 fanzago 1.281
1063 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1064     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1065 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1066     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1067     txt += ' modifyReport_result=70500\n'
1068     txt += ' job_exit_code=$modifyReport_result\n'
1069     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1070     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1071     txt += 'else\n'
1072     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1073 spiga 1.103 txt += 'fi\n'
1074 fanzago 1.93 return txt
1075 ewv 1.283
1076 ewv 1.192 def wsParseFJR(self):
1077 spiga 1.189 """
1078 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1079 spiga 1.189 """
1080     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1081     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1082     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1083     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1084 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1085 fanzago 1.285 if self.debug_wrapper==1 :
1086 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1087     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1088 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1089     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1090 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1091 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1092 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1093     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1094 spiga 1.189 txt += ' else\n'
1095     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1096     txt += ' fi\n'
1097     txt += ' else\n'
1098     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1099     txt += ' fi\n'
1100     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1101 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1102 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1103 spiga 1.232 txt += ' fi\n'
1104 spiga 1.189 txt += 'else\n'
1105     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1106     txt += 'fi\n'
1107     txt += '\n'
1108 fanzago 1.364 txt += 'if [ $executable_exit_status -ne 0 ];then\n'
1109 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1110     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1111     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1112     txt += ' job_exit_code=$executable_exit_status\n'
1113     txt += ' func_exit\n'
1114     txt += 'fi\n\n'
1115 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1116     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1117     txt += 'job_exit_code=$executable_exit_status\n'
1118    
1119     return txt
1120    
1121 gutsche 1.5 def setParam_(self, param, value):
1122     self._params[param] = value
1123    
1124     def getParams(self):
1125     return self._params
1126 gutsche 1.8
1127 spiga 1.257 def outList(self,list=False):
1128 mcinquil 1.121 """
1129     check the dimension of the output files
1130     """
1131 spiga 1.169 txt = ''
1132     txt += 'echo ">>> list of expected files on output sandbox"\n'
1133 mcinquil 1.121 listOutFiles = []
1134 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1135 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1136 spiga 1.268 if len(self.output_file) <= 0:
1137     msg ="WARNING: no output files name have been defined!!\n"
1138     msg+="\tno output files will be reported back/staged\n"
1139 spiga 1.304 common.logger.info(msg)
1140 ewv 1.350
1141 fanzago 1.148 if (self.return_data == 1):
1142 farinafa 1.348 for file in (self.output_file):
1143     listOutFiles.append(numberFile(file, '$OutUniqueID'))
1144     for file in (self.output_file_sandbox):
1145     listOutFiles.append(numberFile(file, '$NJob'))
1146     listOutFiles.append(stdout)
1147     listOutFiles.append(stderr)
1148    
1149 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1150 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1151 spiga 1.169 txt += 'export filesToCheck\n'
1152 spiga 1.341 taskinfo={}
1153     taskinfo['outfileBasename'] = self.output_file
1154     common._db.updateTask_(taskinfo)
1155 ewv 1.276
1156 spiga 1.257 if list : return self.output_file
1157 ewv 1.170 return txt
1158 ewv 1.355
1159     def checkCMSSWVersion(self, url = "https://cmstags.cern.ch/cgi-bin/CmsTC/", fileName = "ReleasesXML"):
1160     """
1161     compare current CMSSW release and arch with allowed releases
1162     """
1163    
1164     downloader = Downloader(url)
1165     goodRelease = False
1166 ewv 1.367 tagCollectorUrl = url + fileName
1167 ewv 1.355
1168     try:
1169     result = downloader.config(fileName)
1170     except:
1171     common.logger.info("ERROR: Problem reading file of allowed CMSSW releases.")
1172    
1173     try:
1174     events = pulldom.parseString(result)
1175    
1176     arch = None
1177     release = None
1178     relState = None
1179     for (event, node) in events:
1180     if event == pulldom.START_ELEMENT:
1181     if node.tagName == 'architecture':
1182     arch = node.attributes.getNamedItem('name').nodeValue
1183     if node.tagName == 'project':
1184     relState = node.attributes.getNamedItem('state').nodeValue
1185 ewv 1.367 if relState == 'Announced':
1186 ewv 1.355 release = node.attributes.getNamedItem('label').nodeValue
1187     if self.executable_arch == arch and self.version == release:
1188     goodRelease = True
1189     return goodRelease
1190    
1191     if not goodRelease:
1192 ewv 1.367 msg = "WARNING: %s on %s is not among supported releases listed at %s ." % \
1193     (self.version, self.executable_arch, tagCollectorUrl)
1194 ewv 1.355 msg += "Submission may fail."
1195     common.logger.info(msg)
1196     except:
1197     common.logger.info("Problems parsing file of allowed CMSSW releases.")
1198    
1199     return goodRelease
1200