ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.361
Committed: Tue Jul 6 16:31:55 2010 UTC (14 years, 9 months ago) by ewv
Content type: text/x-python
Branch: MAIN
Changes since 1.360: +8 -5 lines
Log Message:
Cleaner logic on retriveValue and throw exception for split by run with no runselection

File Contents

# User Rev Content
1 spiga 1.358
2 ewv 1.361 __revision__ = "$Id: cms_cmssw.py,v 1.360 2010/05/28 09:46:00 fanzago Exp $"
3     __version__ = "$Revision: 1.360 $"
4 spiga 1.358
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 ewv 1.355 from Downloader import Downloader
12 ewv 1.361 try:
13 fanzago 1.360 import json
14     except:
15     import simplejson as json
16 slacapra 1.1
17 spiga 1.293 from IMProv.IMProvNode import IMProvNode
18 ewv 1.355 from IMProv.IMProvLoader import loadIMProvFile
19 slacapra 1.105 import os, string, glob
20 ewv 1.355 from xml.dom import pulldom
21 slacapra 1.1
22     class Cmssw(JobType):
23 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
24 slacapra 1.1 JobType.__init__(self, 'CMSSW')
25 spiga 1.304 common.logger.debug('CMSSW::__init__')
26 spiga 1.208 self.skip_blocks = skip_blocks
27 farinafa 1.346 self.argsList = 2
28 spiga 1.315 self.NumEvents=0
29 gutsche 1.3 self._params = {}
30     self.cfg_params = cfg_params
31 fanzago 1.360 ### FEDE FOR MULTI ###
32     self.var_filter=''
33 ewv 1.254
34 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
35 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
36     self.cfg_params.get('CRAB.use_server',0)
37 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
38 ewv 1.250 size = 9.5
39 ewv 1.333 if self.server or self.local:
40 ewv 1.319 size = 99999
41 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
42 gutsche 1.72
43 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
44 gutsche 1.38 self.ncjobs = ncjobs
45    
46 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
47     self.additional_inbox_files = []
48     self.scriptExe = ''
49     self.executable = ''
50 slacapra 1.71 self.executable_arch = self.scram.getArch()
51 spiga 1.320 self.tgz_name = 'default.tgz'
52 corvo 1.56 self.scriptName = 'CMSSW.sh'
53 ewv 1.192 self.pset = ''
54 spiga 1.187 self.datasetPath = ''
55 gutsche 1.3
56 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
57 gutsche 1.50 # set FJR file name
58     self.fjrFileName = 'crab_fjr.xml'
59    
60 slacapra 1.1 self.version = self.scram.getSWVersion()
61 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
62 spiga 1.324 version_array = self.version.split('_')
63     self.CMSSW_major = 0
64     self.CMSSW_minor = 0
65     self.CMSSW_patch = 0
66 ewv 1.182 try:
67 spiga 1.324 self.CMSSW_major = int(version_array[1])
68     self.CMSSW_minor = int(version_array[2])
69     self.CMSSW_patch = int(version_array[3])
70 ewv 1.182 except:
71 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
72 ewv 1.182 raise CrabException(msg)
73    
74 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
75     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
76 ewv 1.276 raise CrabException(msg)
77     """
78     As CMSSW versions are dropped we can drop more code:
79 ewv 1.334 2.x dropped: drop check for lumi range setting
80 ewv 1.276 """
81 ewv 1.355 self.checkCMSSWVersion()
82 slacapra 1.1 ### collect Data cards
83 gutsche 1.66
84 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
85 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
86 ewv 1.226
87 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
88 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
89 spiga 1.236
90     if tmp =='':
91     msg = "Error: datasetpath not defined "
92     raise CrabException(msg)
93     elif string.lower(tmp)=='none':
94 slacapra 1.153 self.datasetPath = None
95     self.selectNoInput = 1
96 fanzago 1.318 self.primaryDataset = 'null'
97 slacapra 1.153 else:
98     self.datasetPath = tmp
99     self.selectNoInput = 0
100 fanzago 1.338 ll = len(self.datasetPath.split("/"))
101     if (ll < 4):
102     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
103     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
104     raise CrabException(msg)
105 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
106     self.dataTier = self.datasetPath.split("/")[2]
107 gutsche 1.5
108 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
109 ewv 1.330 self.ads = False
110     if self.datasetPath:
111     self.ads = len(self.datasetPath.split("/")) > 4
112 spiga 1.354 self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
113 ewv 1.356 self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
114     self.cfg_params.get('CMSSW.lumis_per_job',None)
115 spiga 1.358
116 ewv 1.327 # FUTURE: Can remove this check
117     if self.ads and self.CMSSW_major < 3:
118     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
119     common.logger.info(' Only file level, not lumi level, granularity is supported.')
120    
121 spiga 1.288 self.debugWrap=''
122 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
123     if self.debug_wrapper == 1: self.debugWrap='--debug'
124 slacapra 1.291
125 slacapra 1.1 ## now the application
126 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
127 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
128 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
129 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
130 slacapra 1.1
131 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
132 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
133 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
134 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
135 slacapra 1.153 if self.pset.lower() != 'none' :
136     if (not os.path.exists(self.pset)):
137     raise CrabException("User defined PSet file "+self.pset+" does not exist")
138     else:
139     self.pset = None
140 slacapra 1.1
141     # output files
142 slacapra 1.53 ## stuff which must be returned always via sandbox
143     self.output_file_sandbox = []
144    
145     # add fjr report by default via sandbox
146     self.output_file_sandbox.append(self.fjrFileName)
147    
148     # other output files to be returned via sandbox or copied to SE
149 mcinquil 1.216 outfileflag = False
150 slacapra 1.153 self.output_file = []
151     tmp = cfg_params.get('CMSSW.output_file',None)
152     if tmp :
153 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
154 mcinquil 1.216 outfileflag = True #output found
155 slacapra 1.1
156 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
157     if self.scriptExe :
158 slacapra 1.176 if not os.path.isfile(self.scriptExe):
159     msg ="ERROR. file "+self.scriptExe+" not found"
160     raise CrabException(msg)
161     self.additional_inbox_files.append(string.strip(self.scriptExe))
162 slacapra 1.70
163 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
164     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
165    
166 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
167 slacapra 1.176 msg ="Error. script_exe not defined"
168     raise CrabException(msg)
169 spiga 1.42
170 ewv 1.226 # use parent files...
171 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
172 spiga 1.204
173 slacapra 1.1 ## additional input files
174 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
175 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
176 slacapra 1.70 for tmp in tmpAddFiles:
177     tmp = string.strip(tmp)
178     dirname = ''
179     if not tmp[0]=="/": dirname = "."
180 corvo 1.85 files = []
181     if string.find(tmp,"*")>-1:
182     files = glob.glob(os.path.join(dirname, tmp))
183     if len(files)==0:
184     raise CrabException("No additional input file found with this pattern: "+tmp)
185     else:
186     files.append(tmp)
187 slacapra 1.70 for file in files:
188     if not os.path.exists(file):
189     raise CrabException("Additional input file not found: "+file)
190 slacapra 1.45 pass
191 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
192 slacapra 1.1 pass
193     pass
194 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
195 slacapra 1.153 pass
196 gutsche 1.3
197 gutsche 1.35
198 ewv 1.160 ## New method of dealing with seeds
199     self.incrementSeeds = []
200     self.preserveSeeds = []
201     if cfg_params.has_key('CMSSW.preserve_seeds'):
202     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
203     for tmp in tmpList:
204     tmp.strip()
205     self.preserveSeeds.append(tmp)
206     if cfg_params.has_key('CMSSW.increment_seeds'):
207     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
208     for tmp in tmpList:
209     tmp.strip()
210     self.incrementSeeds.append(tmp)
211    
212 fanzago 1.318 # Copy/return/publish
213 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
214     self.return_data = int(cfg_params.get('USER.return_data',0))
215 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
216     if (self.publish_data == 1):
217     if not cfg_params.has_key('USER.publish_data_name'):
218     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
219     else:
220     self.processedDataset = cfg_params['USER.publish_data_name']
221 ewv 1.276
222     self.conf = {}
223     self.conf['pubdata'] = None
224 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
225 slacapra 1.1 #DBSDLS-start
226 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
227 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
228     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
229 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
230 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
231 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
232 gutsche 1.35 blockSites = {}
233 spiga 1.342 #wmbs
234     self.automation = int(self.cfg_params.get('WMBS.automation',0))
235     if self.automation == 0:
236     if self.datasetPath:
237     blockSites = self.DataDiscoveryAndLocation(cfg_params)
238     #DBSDLS-end
239     self.conf['blockSites']=blockSites
240 ewv 1.347
241 spiga 1.342 ## Select Splitting
242     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
243 ewv 1.361 if splitByRun and not self.cfg_params.has_key('CMSSW.runselection'):
244     msg = "split_by_run must be combined with a runselection"
245     raise CrabException(msg)
246 ewv 1.347
247 spiga 1.342 if self.selectNoInput:
248     if self.pset == None:
249     self.algo = 'ForScript'
250     else:
251     self.algo = 'NoInput'
252     self.conf['managedGenerators']=self.managedGenerators
253     self.conf['generator']=self.generator
254 ewv 1.356 elif self.ads or self.lumiMask or self.lumiParams:
255 spiga 1.342 self.algo = 'LumiBased'
256 ewv 1.359 if splitByRun:
257     msg = "Cannot combine split by run with lumi_mask, ADS, " \
258     "or lumis_per_job. Use split by lumi mode instead."
259     raise CrabException(msg)
260    
261 spiga 1.342 elif splitByRun ==1:
262     self.algo = 'RunBased'
263 spiga 1.42 else:
264 spiga 1.342 self.algo = 'EventBased'
265     common.logger.debug("Job splitting method: %s" % self.algo)
266 ewv 1.347
267 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
268     self.dict = splitter.Algos()[self.algo]()
269 gutsche 1.5
270 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
271     self.rootArgsFilename= 'arguments'
272 spiga 1.208 # modify Pset only the first time
273 spiga 1.320 if isNew:
274     if self.pset != None: self.ModifyPset()
275 spiga 1.300
276 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
277     self.tarNameWithPath = self.getTarBall(self.executable)
278 spiga 1.293
279    
280     def ModifyPset(self):
281     import PsetManipulator as pp
282 ewv 1.335
283     # If pycfg_params set, fake out the config script
284     # to make it think it was called with those args
285     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
286     if pycfg_params:
287     trueArgv = sys.argv
288     sys.argv = [self.pset]
289     sys.argv.extend(pycfg_params.split(' '))
290 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
291 ewv 1.335 if pycfg_params: # Restore original sys.argv
292     sys.argv = trueArgv
293    
294 spiga 1.293 try:
295     # Add FrameworkJobReport to parameter-set, set max events.
296     # Reset later for data jobs by writeCFG which does all modifications
297 ewv 1.295 PsetEdit.maxEvent(1)
298 spiga 1.293 PsetEdit.skipEvent(0)
299     PsetEdit.psetWriter(self.configFilename())
300     ## If present, add TFileService to output files
301 slacapra 1.349 if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
302 spiga 1.293 tfsOutput = PsetEdit.getTFileService()
303     if tfsOutput:
304     if tfsOutput in self.output_file:
305 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
306 spiga 1.293 else:
307     outfileflag = True #output found
308     self.output_file.append(tfsOutput)
309 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
310 spiga 1.293 pass
311     pass
312 fanzago 1.360
313     # If requested, add PoolOutputModule to output files
314     ### FEDE FOR MULTI ###
315     #edmOutput = PsetEdit.getPoolOutputModule()
316     edmOutputDict = PsetEdit.getPoolOutputModule()
317     common.logger.debug("(test) edmOutputDict = "+str(edmOutputDict))
318     filter_dict = {}
319     for key in edmOutputDict.keys():
320     filter_dict[key]=edmOutputDict[key]['dataset']
321     common.logger.debug("(test) filter_dict for multi = "+str(filter_dict))
322    
323     #### in CMSSW.sh: export var_filter
324    
325     self.var_filter = json.dumps(filter_dict)
326     common.logger.debug("(test) var_filter for multi = "+self.var_filter)
327 ewv 1.361
328 fanzago 1.360 edmOutput = edmOutputDict.keys()
329 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
330     if edmOutput:
331 ewv 1.321 for outputFile in edmOutput:
332     if outputFile in self.output_file:
333 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
334 ewv 1.321 else:
335     self.output_file.append(outputFile)
336     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
337     # not requested, check anyhow to avoid accidental T2 overload
338 slacapra 1.297 else:
339 ewv 1.321 if edmOutput:
340     missedFiles = []
341     for outputFile in edmOutput:
342     if outputFile not in self.output_file:
343     missedFiles.append(outputFile)
344     if missedFiles:
345     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
346     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
347     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
348     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
349     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
350     common.logger.info(msg)
351 spiga 1.322 else :
352 ewv 1.321 raise CrabException(msg)
353 ewv 1.301
354     if (PsetEdit.getBadFilesSetting()):
355     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
356 spiga 1.304 common.logger.info(msg)
357 ewv 1.301
358 slacapra 1.297 except CrabException, msg:
359 spiga 1.304 common.logger.info(str(msg))
360 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
361 spiga 1.293 raise CrabException(msg)
362    
363 gutsche 1.3
364 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
365    
366 slacapra 1.86 import DataDiscovery
367     import DataLocation
368 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
369 gutsche 1.3
370     datasetPath=self.datasetPath
371    
372 slacapra 1.1 ## Contact the DBS
373 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
374 slacapra 1.1 try:
375 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
376 slacapra 1.1 self.pubdata.fetchDBSInfo()
377    
378 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
379 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
380     raise CrabException(msg)
381 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
382 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
383     raise CrabException(msg)
384 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
385 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
386 slacapra 1.1 raise CrabException(msg)
387    
388 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
389 spiga 1.269 self.conf['pubdata']=self.pubdata
390 gutsche 1.3
391 slacapra 1.1 ## get max number of events
392 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
393 slacapra 1.1
394     ## Contact the DLS and build a list of sites hosting the fileblocks
395     try:
396 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
397 gutsche 1.6 dataloc.fetchDLSInfo()
398 slacapra 1.263
399 slacapra 1.41 except DataLocation.DataLocationError , ex:
400 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
401     raise CrabException(msg)
402 ewv 1.131
403 slacapra 1.1
404 slacapra 1.270 unsorted_sites = dataloc.getSites()
405     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
406     for lfn in self.filesbyblock.keys():
407     if unsorted_sites.has_key(lfn):
408     sites[lfn]=unsorted_sites[lfn]
409     else:
410     sites[lfn]=[]
411    
412 slacapra 1.264 if len(sites)==0:
413 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
414     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
415     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
416 slacapra 1.264 raise CrabException(msg)
417    
418 gutsche 1.35 allSites = []
419     listSites = sites.values()
420 slacapra 1.63 for listSite in listSites:
421     for oneSite in listSite:
422 gutsche 1.35 allSites.append(oneSite)
423 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
424 ewv 1.295
425 gutsche 1.3
426 gutsche 1.92 # screen output
427 spiga 1.354 if self.ads or self.lumiMask:
428     common.logger.info("Requested (A)DS %s has %s block(s)." %
429 ewv 1.350 (datasetPath, len(self.filesbyblock.keys())))
430     else:
431     common.logger.info("Requested dataset: " + datasetPath + \
432     " has " + str(self.maxEvents) + " events in " + \
433     str(len(self.filesbyblock.keys())) + " blocks.\n")
434 gutsche 1.92
435 gutsche 1.35 return sites
436 ewv 1.131
437 spiga 1.42
438 spiga 1.208 def split(self, jobParams,firstJobID):
439 ewv 1.276
440 spiga 1.293 jobParams = self.dict['args']
441 spiga 1.269 njobs = self.dict['njobs']
442     self.jobDestination = self.dict['jobDestination']
443 ewv 1.131
444 ewv 1.333 if njobs == 0:
445     raise CrabException("Asked to split zero jobs: aborting")
446     if not self.server and not self.local and njobs > 500:
447     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
448 slacapra 1.263
449 gutsche 1.3 # create the empty structure
450     for i in range(njobs):
451     jobParams.append("")
452 ewv 1.131
453 spiga 1.165 listID=[]
454     listField=[]
455 spiga 1.293 listDictions=[]
456 spiga 1.300 exist= os.path.exists(self.argsFile)
457 spiga 1.208 for id in range(njobs):
458     job = id + int(firstJobID)
459 spiga 1.167 listID.append(job+1)
460 spiga 1.162 job_ToSave ={}
461 spiga 1.169 concString = ' '
462 spiga 1.165 argu=''
463 spiga 1.293 str_argu = str(job+1)
464 spiga 1.208 if len(jobParams[id]):
465 ewv 1.295 argu = {'JobID': job+1}
466 spiga 1.293 for i in range(len(jobParams[id])):
467     argu[self.dict['params'][i]]=jobParams[id][i]
468 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
469 ewv 1.295 # just for debug
470 spiga 1.293 str_argu += concString.join(jobParams[id])
471 spiga 1.314 if argu != '': listDictions.append(argu)
472 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
473 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
474 spiga 1.165 listField.append(job_ToSave)
475 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
476     cms_se = CmsSEMap()
477 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
478 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
479 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
480     msg+="\t CMSDestination: %s "%(str(SEDestination))
481 spiga 1.307 common.logger.log(10-1,msg)
482 spiga 1.293 # write xml
483 ewv 1.295 if len(listDictions):
484 spiga 1.293 if exist==False: self.CreateXML()
485     self.addEntry(listDictions)
486 spiga 1.187 common._db.updateJob_(listID,listField)
487 spiga 1.293 return
488 ewv 1.313
489 spiga 1.293 def CreateXML(self):
490     """
491 ewv 1.295 """
492 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
493     outfile = file( self.argsFile, 'w').write(str(result))
494 ewv 1.295 return
495 spiga 1.293
496     def addEntry(self, listDictions):
497     """
498     _addEntry_
499 ewv 1.295
500 spiga 1.293 add an entry to the xml file
501     """
502     ## load xml
503 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
504 spiga 1.293 entrname= 'Job'
505     for dictions in listDictions:
506     report = IMProvNode(entrname , None, **dictions)
507     improvDoc.addNode(report)
508 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
509 gutsche 1.3 return
510 ewv 1.131
511 gutsche 1.3 def numberOfJobs(self):
512 spiga 1.342 #wmbs
513 ewv 1.347 if self.automation==0:
514 spiga 1.342 return self.dict['njobs']
515     else:
516     return None
517 ewv 1.347
518 slacapra 1.1 def getTarBall(self, exe):
519     """
520     Return the TarBall with lib and exe
521     """
522 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
523     if os.path.exists(self.tgzNameWithPath):
524     return self.tgzNameWithPath
525 slacapra 1.1
526     # Prepare a tar gzipped file with user binaries.
527     self.buildTar_(exe)
528    
529 spiga 1.320 return string.strip(self.tgzNameWithPath)
530 slacapra 1.1
531     def buildTar_(self, executable):
532    
533     # First of all declare the user Scram area
534     swArea = self.scram.getSWArea_()
535     swReleaseTop = self.scram.getReleaseTop_()
536 ewv 1.131
537 slacapra 1.1 ## check if working area is release top
538     if swReleaseTop == '' or swArea == swReleaseTop:
539 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
540 slacapra 1.1 return
541    
542 slacapra 1.61 import tarfile
543     try: # create tar ball
544 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
545 slacapra 1.61 ## First find the executable
546 slacapra 1.86 if (self.executable != ''):
547 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
548     if ( not exeWithPath ):
549     raise CrabException('User executable '+executable+' not found')
550 ewv 1.131
551 slacapra 1.61 ## then check if it's private or not
552     if exeWithPath.find(swReleaseTop) == -1:
553     # the exe is private, so we must ship
554 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
555 slacapra 1.61 path = swArea+'/'
556 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
557     if exeWithPath.find(path) >= 0 :
558     exe = string.replace(exeWithPath, path,'')
559 slacapra 1.129 tar.add(path+exe,exe)
560 corvo 1.85 else :
561     tar.add(exeWithPath,os.path.basename(executable))
562 slacapra 1.61 pass
563     else:
564     # the exe is from release, we'll find it on WN
565     pass
566 ewv 1.131
567 slacapra 1.61 ## Now get the libraries: only those in local working area
568 slacapra 1.256 tar.dereference=True
569 slacapra 1.61 libDir = 'lib'
570     lib = swArea+'/' +libDir
571 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
572 slacapra 1.61 if os.path.exists(lib):
573     tar.add(lib,libDir)
574 ewv 1.131
575 slacapra 1.61 ## Now check if module dir is present
576     moduleDir = 'module'
577     module = swArea + '/' + moduleDir
578     if os.path.isdir(module):
579     tar.add(module,moduleDir)
580 slacapra 1.256 tar.dereference=False
581 slacapra 1.61
582     ## Now check if any data dir(s) is present
583 spiga 1.179 self.dataExist = False
584 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
585 slacapra 1.206 while len(todo_list):
586     entry, name = todo_list.pop()
587 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
588 slacapra 1.206 continue
589 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
590 slacapra 1.206 entryPath = entry + '/'
591 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
592 slacapra 1.206 if name == 'data':
593     self.dataExist=True
594 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
595 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
596 slacapra 1.206 pass
597     pass
598 ewv 1.182
599 spiga 1.179 ### CMSSW ParameterSet
600     if not self.pset is None:
601     cfg_file = common.work_space.jobDir()+self.configFilename()
602 ewv 1.357 pickleFile = common.work_space.jobDir()+self.configFilename() + '.pkl'
603 ewv 1.182 tar.add(cfg_file,self.configFilename())
604 ewv 1.357 tar.add(pickleFile,self.configFilename() + '.pkl')
605 ewv 1.313
606 spiga 1.309 try:
607     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
608     tar.add(crab_cfg_file,'crab.cfg')
609     except:
610     pass
611 fanzago 1.93
612 fanzago 1.152 ## Add ProdCommon dir to tar
613 slacapra 1.211 prodcommonDir = './'
614     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
615 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
616 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
617     'WMCore/__init__.py','WMCore/Algorithms']
618 slacapra 1.214 for file in neededStuff:
619     tar.add(prodcommonPath+file,prodcommonDir+file)
620 spiga 1.179
621     ##### ML stuff
622     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
623     path=os.environ['CRABDIR'] + '/python/'
624     for file in ML_file_list:
625     tar.add(path+file,file)
626    
627     ##### Utils
628 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
629 spiga 1.179 for file in Utils_file_list:
630     tar.add(path+file,file)
631 ewv 1.131
632 ewv 1.182 ##### AdditionalFiles
633 slacapra 1.253 tar.dereference=True
634 spiga 1.179 for file in self.additional_inbox_files:
635     tar.add(file,string.split(file,'/')[-1])
636 slacapra 1.253 tar.dereference=False
637 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
638 ewv 1.182
639 slacapra 1.61 tar.close()
640 mcinquil 1.241 except IOError, exc:
641 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
642 spiga 1.304 msg += str(exc)
643     raise CrabException(msg)
644 mcinquil 1.241 except tarfile.TarError, exc:
645 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
646 spiga 1.304 msg += str(exc)
647     raise CrabException(msg)
648 spiga 1.300
649 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
650     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
651 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
652 ewv 1.250 +'MB input sandbox limit \n'
653 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
654     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
655 spiga 1.358 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabServerForUsers#Server_available_for_users'
656 spiga 1.238 raise CrabException(msg)
657 gutsche 1.72
658 slacapra 1.61 ## create tar-ball with ML stuff
659 slacapra 1.97
660 spiga 1.165 def wsSetupEnvironment(self, nj=0):
661 slacapra 1.1 """
662     Returns part of a job script which prepares
663     the execution environment for the job 'nj'.
664     """
665 ewv 1.334 psetName = 'pset.py'
666    
667 slacapra 1.1 # Prepare JobType-independent part
668 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
669 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
670 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
671     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
672     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
673 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
674 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
675 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
676 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
677 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
678 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
679     txt += ' job_exit_code=10016\n'
680     txt += ' func_exit\n'
681 gutsche 1.3 txt += ' fi\n'
682 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
683 gutsche 1.3 txt += '\n'
684     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
685     txt += ' cd $WORKING_DIR\n'
686 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
687 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
688 spiga 1.282 #Setup SGE Environment
689 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
690 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
691    
692 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
693     txt += self.wsSetupCMSLCGEnvironment_()
694    
695 mcinquil 1.340 #Setup PBS Environment
696 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
697 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
698    
699 gutsche 1.3 txt += 'fi\n'
700 slacapra 1.1
701     # Prepare JobType-specific part
702     scram = self.scram.commandName()
703     txt += '\n\n'
704 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
705     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
706 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
707     txt += 'status=$?\n'
708     txt += 'if [ $status != 0 ] ; then\n'
709 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
710     txt += ' job_exit_code=10034\n'
711 fanzago 1.163 txt += ' func_exit\n'
712 slacapra 1.1 txt += 'fi \n'
713     txt += 'cd '+self.version+'\n'
714 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
715 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
716 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
717 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
718     txt += ' echo "ERROR ==> Problem with the command: "\n'
719     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
720     txt += ' job_exit_code=10034\n'
721     txt += ' func_exit\n'
722     txt += 'fi \n'
723 slacapra 1.1 # Handle the arguments:
724     txt += "\n"
725 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
726 slacapra 1.1 txt += "\n"
727 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
728 slacapra 1.1 txt += "then\n"
729 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
730     txt += ' job_exit_code=50113\n'
731     txt += " func_exit\n"
732 slacapra 1.1 txt += "fi\n"
733     txt += "\n"
734    
735     # Prepare job-specific part
736     job = common.job_list[nj]
737 ewv 1.131 if (self.datasetPath):
738 fanzago 1.93 txt += '\n'
739     txt += 'DatasetPath='+self.datasetPath+'\n'
740    
741 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
742 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
743 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
744 fanzago 1.93
745     else:
746     txt += 'DatasetPath=MCDataTier\n'
747     txt += 'PrimaryDataset=null\n'
748     txt += 'DataTier=null\n'
749     txt += 'ApplicationFamily=MCDataTier\n'
750 ewv 1.170 if self.pset != None:
751 spiga 1.42 pset = os.path.basename(job.configFilename())
752 ewv 1.357 pkl = os.path.basename(job.configFilename()) + '.pkl'
753 spiga 1.42 txt += '\n'
754 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
755 ewv 1.357 txt += 'cp $RUNTIME_AREA/'+pkl+' .\n'
756 spiga 1.296
757 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
758     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
759     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
760     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
761 slacapra 1.90
762 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
763 fanzago 1.360 if self.var_filter:
764     #print "self.var_filter = ",self.var_filter
765     txt += "export var_filter="+"'"+self.var_filter+"'\n"
766     txt += 'echo $var_filter'
767 ewv 1.319 else:
768 spiga 1.314 txt += '\n'
769 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
770 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
771 gutsche 1.3 return txt
772 slacapra 1.176
773 fanzago 1.166 def wsUntarSoftware(self, nj=0):
774 gutsche 1.3 """
775     Put in the script the commands to build an executable
776     or a library.
777     """
778    
779 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
780 gutsche 1.3
781     if os.path.isfile(self.tgzNameWithPath):
782 spiga 1.358 txt += 'echo ">>> tar xzf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
783 fanzago 1.285 if self.debug_wrapper==1 :
784 spiga 1.358 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
785 spiga 1.199 txt += 'ls -Al \n'
786 spiga 1.358 else:
787     txt += 'tar zxf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
788 gutsche 1.3 txt += 'untar_status=$? \n'
789     txt += 'if [ $untar_status -ne 0 ]; then \n'
790 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
791     txt += ' job_exit_code=$untar_status\n'
792     txt += ' func_exit\n'
793 gutsche 1.3 txt += 'else \n'
794     txt += ' echo "Successful untar" \n'
795     txt += 'fi \n'
796 gutsche 1.50 txt += '\n'
797 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
798 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
799 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
800 gutsche 1.50 txt += 'else\n'
801 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
802 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
803 gutsche 1.50 txt += 'fi\n'
804     txt += '\n'
805    
806 gutsche 1.3 pass
807 ewv 1.131
808 slacapra 1.1 return txt
809 ewv 1.170
810 fanzago 1.166 def wsBuildExe(self, nj=0):
811     """
812     Put in the script the commands to build an executable
813     or a library.
814     """
815    
816     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
817     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
818    
819 ewv 1.170 txt += 'rm -r lib/ module/ \n'
820     txt += 'mv $RUNTIME_AREA/lib/ . \n'
821     txt += 'mv $RUNTIME_AREA/module/ . \n'
822 spiga 1.186 if self.dataExist == True:
823     txt += 'rm -r src/ \n'
824     txt += 'mv $RUNTIME_AREA/src/ . \n'
825 ewv 1.182 if len(self.additional_inbox_files)>0:
826 spiga 1.179 for file in self.additional_inbox_files:
827 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
828 ewv 1.170
829 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
830 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
831 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
832 fanzago 1.166 txt += 'else\n'
833 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
834 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
835     txt += 'fi\n'
836     txt += '\n'
837    
838 slacapra 1.302 if self.pset != None:
839 ewv 1.334 psetName = 'pset.py'
840    
841 slacapra 1.302 txt += '\n'
842     if self.debug_wrapper == 1:
843     txt += 'echo "***** cat ' + psetName + ' *********"\n'
844     txt += 'cat ' + psetName + '\n'
845     txt += 'echo "****** end ' + psetName + ' ********"\n'
846     txt += '\n'
847     txt += 'echo "***********************" \n'
848     txt += 'which edmConfigHash \n'
849     txt += 'echo "***********************" \n'
850 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
851     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
852 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
853     #### FEDE temporary fix for noEdm files #####
854     txt += 'if [ -z "$PSETHASH" ]; then \n'
855     txt += ' export PSETHASH=null\n'
856     txt += 'fi \n'
857     #############################################
858     txt += '\n'
859 fanzago 1.166 return txt
860 slacapra 1.1
861 ewv 1.131
862 slacapra 1.1 def executableName(self):
863 ewv 1.192 if self.scriptExe:
864 spiga 1.42 return "sh "
865     else:
866     return self.executable
867 slacapra 1.1
868     def executableArgs(self):
869 ewv 1.276 if self.scriptExe:
870 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
871 fanzago 1.115 else:
872 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
873 slacapra 1.1
874     def inputSandbox(self, nj):
875     """
876     Returns a list of filenames to be put in JDL input sandbox.
877     """
878     inp_box = []
879     if os.path.isfile(self.tgzNameWithPath):
880     inp_box.append(self.tgzNameWithPath)
881 spiga 1.320 if os.path.isfile(self.argsFile):
882     inp_box.append(self.argsFile)
883 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
884 slacapra 1.1 return inp_box
885    
886     def outputSandbox(self, nj):
887     """
888     Returns a list of filenames to be put in JDL output sandbox.
889     """
890     out_box = []
891    
892     ## User Declared output files
893 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
894 ewv 1.131 n_out = nj + 1
895 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
896 slacapra 1.1 return out_box
897    
898    
899     def wsRenameOutput(self, nj):
900     """
901     Returns part of a job script which renames the produced files.
902     """
903    
904 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
905 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
906     txt += 'echo ">>> current directory content:"\n'
907 fanzago 1.285 if self.debug_wrapper==1:
908 spiga 1.199 txt += 'ls -Al\n'
909 fanzago 1.145 txt += '\n'
910 slacapra 1.54
911 fanzago 1.128 for fileWithSuffix in (self.output_file):
912 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
913 slacapra 1.1 txt += '\n'
914 gutsche 1.7 txt += '# check output file\n'
915 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
916 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
917     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
918 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
919 ewv 1.147 else:
920     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
921     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
922 slacapra 1.106 txt += 'else\n'
923 fanzago 1.161 txt += ' job_exit_code=60302\n'
924     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
925 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
926 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
927     txt += ' echo "prepare dummy output file"\n'
928     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
929     txt += ' fi \n'
930 slacapra 1.1 txt += 'fi\n'
931 slacapra 1.105 file_list = []
932     for fileWithSuffix in (self.output_file):
933 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
934 ewv 1.131
935 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
936 fanzago 1.149 txt += '\n'
937 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
938     txt += 'echo ">>> current directory content:"\n'
939 fanzago 1.285 if self.debug_wrapper==1:
940 spiga 1.199 txt += 'ls -Al\n'
941 fanzago 1.148 txt += '\n'
942 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
943 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
944 slacapra 1.1 return txt
945    
946 slacapra 1.63 def getRequirements(self, nj=[]):
947 slacapra 1.1 """
948 ewv 1.131 return job requirements to add to jdl files
949 slacapra 1.1 """
950     req = ''
951 slacapra 1.47 if self.version:
952 slacapra 1.10 req='Member("VO-cms-' + \
953 slacapra 1.47 self.version + \
954 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
955 ewv 1.192 if self.executable_arch:
956 gutsche 1.107 req+=' && Member("VO-cms-' + \
957 slacapra 1.105 self.executable_arch + \
958     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
959 gutsche 1.35
960     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
961 spiga 1.353 if ( common.scheduler.name() in ["glite"] ):
962 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
963     if ( self.cfg_params.get('GRID.use_cream',None) ):
964     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
965     else:
966     req += ' && other.GlueCEStateStatus == "Production" '
967 gutsche 1.35
968 slacapra 1.1 return req
969 gutsche 1.3
970     def configFilename(self):
971     """ return the config filename """
972 ewv 1.334 return self.name()+'.py'
973 gutsche 1.3
974     def wsSetupCMSOSGEnvironment_(self):
975     """
976     Returns part of a job script which is prepares
977     the execution environment and which is common for all CMS jobs.
978     """
979 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
980     txt += ' echo ">>> setup CMS OSG environment:"\n'
981 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
982     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
983 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
984 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
985 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
986 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
987     txt += ' else\n'
988 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
989     txt += ' job_exit_code=10020\n'
990     txt += ' func_exit\n'
991 fanzago 1.133 txt += ' fi\n'
992 gutsche 1.3 txt += '\n'
993 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
994 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
995 gutsche 1.3
996     return txt
997 ewv 1.131
998 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
999     """
1000     Returns part of a job script which is prepares
1001     the execution environment and which is common for all CMS jobs.
1002     """
1003 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1004     txt += ' echo ">>> setup CMS LCG environment:"\n'
1005 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1006     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1007     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1008     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1009 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1010     txt += ' job_exit_code=10031\n'
1011     txt += ' func_exit\n'
1012 fanzago 1.133 txt += ' else\n'
1013     txt += ' echo "Sourcing environment... "\n'
1014     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1015 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1016     txt += ' job_exit_code=10020\n'
1017     txt += ' func_exit\n'
1018 fanzago 1.133 txt += ' fi\n'
1019     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1020     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1021     txt += ' result=$?\n'
1022     txt += ' if [ $result -ne 0 ]; then\n'
1023 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1024     txt += ' job_exit_code=10032\n'
1025     txt += ' func_exit\n'
1026 fanzago 1.133 txt += ' fi\n'
1027     txt += ' fi\n'
1028     txt += ' \n'
1029 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1030 gutsche 1.3 return txt
1031 gutsche 1.5
1032 spiga 1.238 def wsModifyReport(self, nj):
1033 fanzago 1.93 """
1034 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1035 fanzago 1.93 """
1036 ewv 1.250
1037 fanzago 1.281 txt = ''
1038 fanzago 1.292 if (self.copy_data == 1):
1039 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1040 ewv 1.283
1041 fanzago 1.360 ### FEDE not more necessary, we are using json file
1042     #txt += 'if [ $StageOutExitStatus -eq 0 ] || [ $StageOutExitStatus -eq 60308 ] ; then\n'
1043     #txt += ' FOR_LFN=$LFNBaseName\n'
1044     #txt += 'else\n'
1045     #txt += ' FOR_LFN=/copy_problems/ \n'
1046     #txt += 'fi\n'
1047 ewv 1.182
1048 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1049 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1050 fanzago 1.360 #txt += 'echo "SE = $SE"\n'
1051     #txt += 'echo "endpoint = $endpoint"\n'
1052     #txt += 'SE_PATH=$endpoint\n'
1053     #txt += 'echo "SE_PATH = $endpoint"\n'
1054     #txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1055 fanzago 1.175 txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1056 fanzago 1.344
1057 ewv 1.361 ### removing some arguments
1058 fanzago 1.360 #args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $OutUniqueID for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1059     args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml json $RUNTIME_AREA/resultCopyFile n_job $OutUniqueID PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH'
1060 fanzago 1.281
1061 fanzago 1.318 if (self.publish_data == 1):
1062     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1063 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1064     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1065 fanzago 1.281
1066 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1067     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1068 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1069     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1070     txt += ' modifyReport_result=70500\n'
1071     txt += ' job_exit_code=$modifyReport_result\n'
1072     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1073     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1074     txt += 'else\n'
1075     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1076 spiga 1.103 txt += 'fi\n'
1077 fanzago 1.93 return txt
1078 ewv 1.283
1079 ewv 1.192 def wsParseFJR(self):
1080 spiga 1.189 """
1081 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1082 spiga 1.189 """
1083     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1084     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1085     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1086     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1087 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1088 fanzago 1.285 if self.debug_wrapper==1 :
1089 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1090     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1091 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1092     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1093 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1094 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1095 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1096     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1097 spiga 1.189 txt += ' else\n'
1098     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1099     txt += ' fi\n'
1100     txt += ' else\n'
1101     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1102     txt += ' fi\n'
1103     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1104 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1105 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1106 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1107 spiga 1.296 """
1108 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1109 spiga 1.189 # VERIFY PROCESSED DATA
1110 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1111     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1112     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1113     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1114     txt += ' mv tmp.txt input-files.txt\n'
1115     txt += ' echo "cat input-files.txt"\n'
1116     txt += ' echo "----------------------"\n'
1117     txt += ' cat input-files.txt\n'
1118     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1119     txt += ' mv tmp.txt processed-files.txt\n'
1120     txt += ' echo "----------------------"\n'
1121     txt += ' echo "cat processed-files.txt"\n'
1122     txt += ' echo "----------------------"\n'
1123     txt += ' cat processed-files.txt\n'
1124     txt += ' echo "----------------------"\n'
1125 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1126 fanzago 1.273 txt += ' fileverify_status=$?\n'
1127     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1128     txt += ' executable_exit_status=30001\n'
1129     txt += ' echo "ERROR ==> not all input files processed"\n'
1130     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1131     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1132     txt += ' fi\n'
1133 spiga 1.296 """
1134 spiga 1.232 txt += ' fi\n'
1135 spiga 1.189 txt += 'else\n'
1136     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1137     txt += 'fi\n'
1138     txt += '\n'
1139 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1140 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1141     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1142     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1143     txt += ' job_exit_code=$executable_exit_status\n'
1144     txt += ' func_exit\n'
1145     txt += 'fi\n\n'
1146 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1147     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1148     txt += 'job_exit_code=$executable_exit_status\n'
1149    
1150     return txt
1151    
1152 gutsche 1.5 def setParam_(self, param, value):
1153     self._params[param] = value
1154    
1155     def getParams(self):
1156     return self._params
1157 gutsche 1.8
1158 spiga 1.257 def outList(self,list=False):
1159 mcinquil 1.121 """
1160     check the dimension of the output files
1161     """
1162 spiga 1.169 txt = ''
1163     txt += 'echo ">>> list of expected files on output sandbox"\n'
1164 mcinquil 1.121 listOutFiles = []
1165 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1166 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1167 spiga 1.268 if len(self.output_file) <= 0:
1168     msg ="WARNING: no output files name have been defined!!\n"
1169     msg+="\tno output files will be reported back/staged\n"
1170 spiga 1.304 common.logger.info(msg)
1171 ewv 1.350
1172 fanzago 1.148 if (self.return_data == 1):
1173 farinafa 1.348 for file in (self.output_file):
1174     listOutFiles.append(numberFile(file, '$OutUniqueID'))
1175     for file in (self.output_file_sandbox):
1176     listOutFiles.append(numberFile(file, '$NJob'))
1177     listOutFiles.append(stdout)
1178     listOutFiles.append(stderr)
1179    
1180 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1181 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1182 spiga 1.169 txt += 'export filesToCheck\n'
1183 spiga 1.341 taskinfo={}
1184     taskinfo['outfileBasename'] = self.output_file
1185     common._db.updateTask_(taskinfo)
1186 ewv 1.276
1187 spiga 1.257 if list : return self.output_file
1188 ewv 1.170 return txt
1189 ewv 1.355
1190     def checkCMSSWVersion(self, url = "https://cmstags.cern.ch/cgi-bin/CmsTC/", fileName = "ReleasesXML"):
1191     """
1192     compare current CMSSW release and arch with allowed releases
1193     """
1194    
1195     downloader = Downloader(url)
1196     goodRelease = False
1197    
1198     try:
1199     result = downloader.config(fileName)
1200     except:
1201     common.logger.info("ERROR: Problem reading file of allowed CMSSW releases.")
1202    
1203     try:
1204     events = pulldom.parseString(result)
1205    
1206     arch = None
1207     release = None
1208     relType = None
1209     relState = None
1210     for (event, node) in events:
1211     if event == pulldom.START_ELEMENT:
1212     if node.tagName == 'architecture':
1213     arch = node.attributes.getNamedItem('name').nodeValue
1214     if node.tagName == 'project':
1215     relType = node.attributes.getNamedItem('type').nodeValue
1216     relState = node.attributes.getNamedItem('state').nodeValue
1217     if relType == 'Production' and relState == 'Announced':
1218     release = node.attributes.getNamedItem('label').nodeValue
1219     if self.executable_arch == arch and self.version == release:
1220     goodRelease = True
1221     return goodRelease
1222    
1223     if not goodRelease:
1224     msg = "WARNING: %s on %s is not a supported release. " % \
1225     (self.version, self.executable_arch)
1226     msg += "Submission may fail."
1227     common.logger.info(msg)
1228     except:
1229     common.logger.info("Problems parsing file of allowed CMSSW releases.")
1230    
1231     return goodRelease
1232